]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
8c401077534e3163ce8bc27b73af4bdb858a713c
[thirdparty/kernel/stable-queue.git] /
1 From oleg@redhat.com Fri Sep 17 18:14:53 2010
2 From: Oleg Nesterov <oleg@redhat.com>
3 Date: Mon, 15 Mar 2010 10:10:03 +0100
4 Subject: sched: Kill the broken and deadlockable cpuset_lock/cpuset_cpus_allowed_locked code
5 To: stable <stable@kernel.org>
6 Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
7 Message-ID: <2ed3dbb00c3052ccb7ffda1e7a1d112e3d3f53f1.1283514307.git.efault@gmx.de>
8
9 From: Oleg Nesterov <oleg@redhat.com>
10
11 commit 897f0b3c3ff40b443c84e271bef19bd6ae885195 upstream
12
13 This patch just states the fact the cpusets/cpuhotplug interaction is
14 broken and removes the deadlockable code which only pretends to work.
15
16 - cpuset_lock() doesn't really work. It is needed for
17 cpuset_cpus_allowed_locked() but we can't take this lock in
18 try_to_wake_up()->select_fallback_rq() path.
19
20 - cpuset_lock() is deadlockable. Suppose that a task T bound to CPU takes
21 callback_mutex. If cpu_down(CPU) happens before T drops callback_mutex
22 stop_machine() preempts T, then migration_call(CPU_DEAD) tries to take
23 cpuset_lock() and hangs forever because CPU is already dead and thus
24 T can't be scheduled.
25
26 - cpuset_cpus_allowed_locked() is deadlockable too. It takes task_lock()
27 which is not irq-safe, but try_to_wake_up() can be called from irq.
28
29 Kill them, and change select_fallback_rq() to use cpu_possible_mask, like
30 we currently do without CONFIG_CPUSETS.
31
32 Also, with or without this patch, with or without CONFIG_CPUSETS, the
33 callers of select_fallback_rq() can race with each other or with
34 set_cpus_allowed() pathes.
35
36 The subsequent patches try to to fix these problems.
37
38 Signed-off-by: Oleg Nesterov <oleg@redhat.com>
39 Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
40 LKML-Reference: <20100315091003.GA9123@redhat.com>
41 Signed-off-by: Ingo Molnar <mingo@elte.hu>
42 Signed-off-by: Mike Galbraith <efault@gmx.de>
43 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
44 ---
45 include/linux/cpuset.h | 13 -------------
46 kernel/cpuset.c | 27 +--------------------------
47 kernel/sched.c | 10 +++-------
48 3 files changed, 4 insertions(+), 46 deletions(-)
49
50 --- a/include/linux/cpuset.h
51 +++ b/include/linux/cpuset.h
52 @@ -21,8 +21,6 @@ extern int number_of_cpusets; /* How man
53 extern int cpuset_init(void);
54 extern void cpuset_init_smp(void);
55 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
56 -extern void cpuset_cpus_allowed_locked(struct task_struct *p,
57 - struct cpumask *mask);
58 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
59 #define cpuset_current_mems_allowed (current->mems_allowed)
60 void cpuset_init_current_mems_allowed(void);
61 @@ -69,9 +67,6 @@ struct seq_file;
62 extern void cpuset_task_status_allowed(struct seq_file *m,
63 struct task_struct *task);
64
65 -extern void cpuset_lock(void);
66 -extern void cpuset_unlock(void);
67 -
68 extern int cpuset_mem_spread_node(void);
69
70 static inline int cpuset_do_page_mem_spread(void)
71 @@ -105,11 +100,6 @@ static inline void cpuset_cpus_allowed(s
72 {
73 cpumask_copy(mask, cpu_possible_mask);
74 }
75 -static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
76 - struct cpumask *mask)
77 -{
78 - cpumask_copy(mask, cpu_possible_mask);
79 -}
80
81 static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
82 {
83 @@ -157,9 +147,6 @@ static inline void cpuset_task_status_al
84 {
85 }
86
87 -static inline void cpuset_lock(void) {}
88 -static inline void cpuset_unlock(void) {}
89 -
90 static inline int cpuset_mem_spread_node(void)
91 {
92 return 0;
93 --- a/kernel/cpuset.c
94 +++ b/kernel/cpuset.c
95 @@ -2145,19 +2145,10 @@ void __init cpuset_init_smp(void)
96 void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
97 {
98 mutex_lock(&callback_mutex);
99 - cpuset_cpus_allowed_locked(tsk, pmask);
100 - mutex_unlock(&callback_mutex);
101 -}
102 -
103 -/**
104 - * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
105 - * Must be called with callback_mutex held.
106 - **/
107 -void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
108 -{
109 task_lock(tsk);
110 guarantee_online_cpus(task_cs(tsk), pmask);
111 task_unlock(tsk);
112 + mutex_unlock(&callback_mutex);
113 }
114
115 void cpuset_init_current_mems_allowed(void)
116 @@ -2346,22 +2337,6 @@ int __cpuset_node_allowed_hardwall(int n
117 }
118
119 /**
120 - * cpuset_lock - lock out any changes to cpuset structures
121 - *
122 - * The out of memory (oom) code needs to mutex_lock cpusets
123 - * from being changed while it scans the tasklist looking for a
124 - * task in an overlapping cpuset. Expose callback_mutex via this
125 - * cpuset_lock() routine, so the oom code can lock it, before
126 - * locking the task list. The tasklist_lock is a spinlock, so
127 - * must be taken inside callback_mutex.
128 - */
129 -
130 -void cpuset_lock(void)
131 -{
132 - mutex_lock(&callback_mutex);
133 -}
134 -
135 -/**
136 * cpuset_unlock - release lock on cpuset changes
137 *
138 * Undo the lock taken in a previous cpuset_lock() call.
139 --- a/kernel/sched.c
140 +++ b/kernel/sched.c
141 @@ -2349,11 +2349,9 @@ static int select_fallback_rq(int cpu, s
142 return dest_cpu;
143
144 /* No more Mr. Nice Guy. */
145 - if (dest_cpu >= nr_cpu_ids) {
146 - rcu_read_lock();
147 - cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
148 - rcu_read_unlock();
149 - dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
150 + if (unlikely(dest_cpu >= nr_cpu_ids)) {
151 + cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
152 + dest_cpu = cpumask_any(cpu_active_mask);
153
154 /*
155 * Don't tell them about moving exiting tasks or
156 @@ -7833,7 +7831,6 @@ migration_call(struct notifier_block *nf
157
158 case CPU_DEAD:
159 case CPU_DEAD_FROZEN:
160 - cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
161 migrate_live_tasks(cpu);
162 rq = cpu_rq(cpu);
163 /* Idle task back to normal (off runqueue, low prio) */
164 @@ -7844,7 +7841,6 @@ migration_call(struct notifier_block *nf
165 rq->idle->sched_class = &idle_sched_class;
166 migrate_dead_tasks(cpu);
167 spin_unlock_irq(&rq->lock);
168 - cpuset_unlock();
169 migrate_nr_uninterruptible(rq);
170 BUG_ON(rq->nr_running != 0);
171 calc_global_load_remove(rq);