]>
Commit | Line | Data |
---|---|---|
404abaf8 GKH |
1 | From 50e76632339d4655859523a39249dd95ee5e93e7 Mon Sep 17 00:00:00 2001 |
2 | From: Peter Zijlstra <peterz@infradead.org> | |
3 | Date: Thu, 7 Sep 2017 11:13:38 +0200 | |
4 | Subject: sched/cpuset/pm: Fix cpuset vs. suspend-resume bugs | |
5 | ||
6 | From: Peter Zijlstra <peterz@infradead.org> | |
7 | ||
8 | commit 50e76632339d4655859523a39249dd95ee5e93e7 upstream. | |
9 | ||
10 | Cpusets vs. suspend-resume is _completely_ broken. And it got noticed | |
11 | because it now resulted in non-cpuset usage breaking too. | |
12 | ||
13 | On suspend cpuset_cpu_inactive() doesn't call into | |
14 | cpuset_update_active_cpus() because it doesn't want to move tasks about, | |
15 | there is no need, all tasks are frozen and won't run again until after | |
16 | we've resumed everything. | |
17 | ||
18 | But this means that when we finally do call into | |
19 | cpuset_update_active_cpus() after resuming the last frozen cpu in | |
20 | cpuset_cpu_active(), the top_cpuset will not have any difference with | |
21 | the cpu_active_mask and this it will not in fact do _anything_. | |
22 | ||
23 | So the cpuset configuration will not be restored. This was largely | |
24 | hidden because we would unconditionally create identity domains and | |
25 | mobile users would not in fact use cpusets much. And servers what do use | |
26 | cpusets tend to not suspend-resume much. | |
27 | ||
28 | An addition problem is that we'd not in fact wait for the cpuset work to | |
29 | finish before resuming the tasks, allowing spurious migrations outside | |
30 | of the specified domains. | |
31 | ||
32 | Fix the rebuild by introducing cpuset_force_rebuild() and fix the | |
33 | ordering with cpuset_wait_for_hotplug(). | |
34 | ||
35 | Reported-by: Andy Lutomirski <luto@kernel.org> | |
36 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> | |
37 | Cc: <stable@vger.kernel.org> | |
38 | Cc: Andy Lutomirski <luto@amacapital.net> | |
39 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
40 | Cc: Mike Galbraith <efault@gmx.de> | |
41 | Cc: Peter Zijlstra <peterz@infradead.org> | |
42 | Cc: Rafael J. Wysocki <rjw@rjwysocki.net> | |
43 | Cc: Tejun Heo <tj@kernel.org> | |
44 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
45 | Fixes: deb7aa308ea2 ("cpuset: reorganize CPU / memory hotplug handling") | |
46 | Link: http://lkml.kernel.org/r/20170907091338.orwxrqkbfkki3c24@hirez.programming.kicks-ass.net | |
47 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
48 | Signed-off-by: Mike Galbraith <efault@gmx.de> | |
49 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
50 | --- | |
51 | include/linux/cpuset.h | 6 ++++++ | |
52 | kernel/cpuset.c | 16 +++++++++++++++- | |
53 | kernel/power/process.c | 5 ++++- | |
54 | kernel/sched/core.c | 7 +++---- | |
55 | 4 files changed, 28 insertions(+), 6 deletions(-) | |
56 | ||
57 | --- a/include/linux/cpuset.h | |
58 | +++ b/include/linux/cpuset.h | |
59 | @@ -43,7 +43,9 @@ static inline void cpuset_dec(void) | |
60 | ||
61 | extern int cpuset_init(void); | |
62 | extern void cpuset_init_smp(void); | |
63 | +extern void cpuset_force_rebuild(void); | |
64 | extern void cpuset_update_active_cpus(bool cpu_online); | |
65 | +extern void cpuset_wait_for_hotplug(void); | |
66 | extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); | |
67 | extern void cpuset_cpus_allowed_fallback(struct task_struct *p); | |
68 | extern nodemask_t cpuset_mems_allowed(struct task_struct *p); | |
69 | @@ -147,11 +149,15 @@ static inline bool cpusets_enabled(void) | |
70 | static inline int cpuset_init(void) { return 0; } | |
71 | static inline void cpuset_init_smp(void) {} | |
72 | ||
73 | +static inline void cpuset_force_rebuild(void) { } | |
74 | + | |
75 | static inline void cpuset_update_active_cpus(bool cpu_online) | |
76 | { | |
77 | partition_sched_domains(1, NULL, NULL); | |
78 | } | |
79 | ||
80 | +static inline void cpuset_wait_for_hotplug(void) { } | |
81 | + | |
82 | static inline void cpuset_cpus_allowed(struct task_struct *p, | |
83 | struct cpumask *mask) | |
84 | { | |
85 | --- a/kernel/cpuset.c | |
86 | +++ b/kernel/cpuset.c | |
87 | @@ -2281,6 +2281,13 @@ retry: | |
88 | mutex_unlock(&cpuset_mutex); | |
89 | } | |
90 | ||
91 | +static bool force_rebuild; | |
92 | + | |
93 | +void cpuset_force_rebuild(void) | |
94 | +{ | |
95 | + force_rebuild = true; | |
96 | +} | |
97 | + | |
98 | /** | |
99 | * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset | |
100 | * | |
101 | @@ -2355,8 +2362,10 @@ static void cpuset_hotplug_workfn(struct | |
102 | } | |
103 | ||
104 | /* rebuild sched domains if cpus_allowed has changed */ | |
105 | - if (cpus_updated) | |
106 | + if (cpus_updated || force_rebuild) { | |
107 | + force_rebuild = false; | |
108 | rebuild_sched_domains(); | |
109 | + } | |
110 | } | |
111 | ||
112 | void cpuset_update_active_cpus(bool cpu_online) | |
113 | @@ -2375,6 +2384,11 @@ void cpuset_update_active_cpus(bool cpu_ | |
114 | schedule_work(&cpuset_hotplug_work); | |
115 | } | |
116 | ||
117 | +void cpuset_wait_for_hotplug(void) | |
118 | +{ | |
119 | + flush_work(&cpuset_hotplug_work); | |
120 | +} | |
121 | + | |
122 | /* | |
123 | * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. | |
124 | * Call this routine anytime after node_states[N_MEMORY] changes. | |
125 | --- a/kernel/power/process.c | |
126 | +++ b/kernel/power/process.c | |
127 | @@ -18,8 +18,9 @@ | |
128 | #include <linux/workqueue.h> | |
129 | #include <linux/kmod.h> | |
130 | #include <trace/events/power.h> | |
131 | +#include <linux/cpuset.h> | |
132 | ||
133 | -/* | |
134 | +/* | |
135 | * Timeout for stopping processes | |
136 | */ | |
137 | unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; | |
138 | @@ -198,6 +199,8 @@ void thaw_processes(void) | |
139 | __usermodehelper_set_disable_depth(UMH_FREEZING); | |
140 | thaw_workqueues(); | |
141 | ||
142 | + cpuset_wait_for_hotplug(); | |
143 | + | |
144 | read_lock(&tasklist_lock); | |
145 | for_each_process_thread(g, p) { | |
146 | /* No other threads should have PF_SUSPEND_TASK set */ | |
147 | --- a/kernel/sched/core.c | |
148 | +++ b/kernel/sched/core.c | |
149 | @@ -7286,17 +7286,16 @@ static int cpuset_cpu_active(struct noti | |
150 | * operation in the resume sequence, just build a single sched | |
151 | * domain, ignoring cpusets. | |
152 | */ | |
153 | - num_cpus_frozen--; | |
154 | - if (likely(num_cpus_frozen)) { | |
155 | - partition_sched_domains(1, NULL, NULL); | |
156 | + partition_sched_domains(1, NULL, NULL); | |
157 | + if (--num_cpus_frozen) | |
158 | break; | |
159 | - } | |
160 | ||
161 | /* | |
162 | * This is the last CPU online operation. So fall through and | |
163 | * restore the original sched domains by considering the | |
164 | * cpuset configurations. | |
165 | */ | |
166 | + cpuset_force_rebuild(); | |
167 | ||
168 | case CPU_ONLINE: | |
169 | cpuset_update_active_cpus(true); |