]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
431889cc990817cc997172d606623be0a69f679a
[thirdparty/kernel/stable-queue.git] /
1 From 77f88796cee819b9c4562b0b6b44691b3b7755b1 Mon Sep 17 00:00:00 2001
2 From: Tejun Heo <tj@kernel.org>
3 Date: Thu, 16 Mar 2017 16:54:24 -0400
4 Subject: cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups
5
6 From: Tejun Heo <tj@kernel.org>
7
8 commit 77f88796cee819b9c4562b0b6b44691b3b7755b1 upstream.
9
10 Creation of a kthread goes through a couple interlocked stages between
11 the kthread itself and its creator. Once the new kthread starts
12 running, it initializes itself and wakes up the creator. The creator
13 then can further configure the kthread and then let it start doing its
14 job by waking it up.
15
16 In this configuration-by-creator stage, the creator is the only one
17 that can wake it up but the kthread is visible to userland. When
18 altering the kthread's attributes from userland is allowed, this is
19 fine; however, for cases where CPU affinity is critical,
20 kthread_bind() is used to first disable affinity changes from userland
21 and then set the affinity. This also prevents the kthread from being
22 migrated into non-root cgroups as that can affect the CPU affinity and
23 many other things.
24
25 Unfortunately, the cgroup side of protection is racy. While the
26 PF_NO_SETAFFINITY flag prevents further migrations, userland can win
27 the race before the creator sets the flag with kthread_bind() and put
28 the kthread in a non-root cgroup, which can lead to all sorts of
29 problems including incorrect CPU affinity and starvation.
30
31 This bug got triggered by userland which periodically tries to migrate
32 all processes in the root cpuset cgroup to a non-root one. Per-cpu
33 workqueue workers got caught while being created and ended up with
34 incorrected CPU affinity breaking concurrency management and sometimes
35 stalling workqueue execution.
36
37 This patch adds task->no_cgroup_migration which disallows the task to
38 be migrated by userland. kthreadd starts with the flag set making
39 every child kthread start in the root cgroup with migration
40 disallowed. The flag is cleared after the kthread finishes
41 initialization by which time PF_NO_SETAFFINITY is set if the kthread
42 should stay in the root cgroup.
43
44 It'd be better to wait for the initialization instead of failing but I
45 couldn't think of a way of implementing that without adding either a
46 new PF flag, or sleeping and retrying from waiting side. Even if
47 userland depends on changing cgroup membership of a kthread, it either
48 has to be synchronized with kthread_create() or periodically repeat,
49 so it's unlikely that this would break anything.
50
51 v2: Switch to a simpler implementation using a new task_struct bit
52 field suggested by Oleg.
53
54 Signed-off-by: Tejun Heo <tj@kernel.org>
55 Suggested-by: Oleg Nesterov <oleg@redhat.com>
56 Cc: Linus Torvalds <torvalds@linux-foundation.org>
57 Cc: Andrew Morton <akpm@linux-foundation.org>
58 Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
59 Cc: Thomas Gleixner <tglx@linutronix.de>
60 Reported-and-debugged-by: Chris Mason <clm@fb.com>
61 Signed-off-by: Tejun Heo <tj@kernel.org>
62 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
63
64 ---
65 include/linux/cgroup.h | 21 +++++++++++++++++++++
66 include/linux/sched.h | 4 ++++
67 kernel/cgroup.c | 9 +++++----
68 kernel/kthread.c | 3 +++
69 4 files changed, 33 insertions(+), 4 deletions(-)
70
71 --- a/include/linux/cgroup.h
72 +++ b/include/linux/cgroup.h
73 @@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(s
74 pr_cont_kernfs_path(cgrp->kn);
75 }
76
77 +static inline void cgroup_init_kthreadd(void)
78 +{
79 + /*
80 + * kthreadd is inherited by all kthreads, keep it in the root so
81 + * that the new kthreads are guaranteed to stay in the root until
82 + * initialization is finished.
83 + */
84 + current->no_cgroup_migration = 1;
85 +}
86 +
87 +static inline void cgroup_kthread_ready(void)
88 +{
89 + /*
90 + * This kthread finished initialization. The creator should have
91 + * set PF_NO_SETAFFINITY if this kthread should stay in the root.
92 + */
93 + current->no_cgroup_migration = 0;
94 +}
95 +
96 #else /* !CONFIG_CGROUPS */
97
98 struct cgroup_subsys_state;
99 @@ -590,6 +609,8 @@ static inline void cgroup_free(struct ta
100
101 static inline int cgroup_init_early(void) { return 0; }
102 static inline int cgroup_init(void) { return 0; }
103 +static inline void cgroup_init_kthreadd(void) {}
104 +static inline void cgroup_kthread_ready(void) {}
105
106 static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
107 struct cgroup *ancestor)
108 --- a/include/linux/sched.h
109 +++ b/include/linux/sched.h
110 @@ -1620,6 +1620,10 @@ struct task_struct {
111 #ifdef CONFIG_COMPAT_BRK
112 unsigned brk_randomized:1;
113 #endif
114 +#ifdef CONFIG_CGROUPS
115 + /* disallow userland-initiated cgroup migration */
116 + unsigned no_cgroup_migration:1;
117 +#endif
118
119 unsigned long atomic_flags; /* Flags needing atomic access. */
120
121 --- a/kernel/cgroup.c
122 +++ b/kernel/cgroup.c
123 @@ -2920,11 +2920,12 @@ static ssize_t __cgroup_procs_write(stru
124 tsk = tsk->group_leader;
125
126 /*
127 - * Workqueue threads may acquire PF_NO_SETAFFINITY and become
128 - * trapped in a cpuset, or RT worker may be born in a cgroup
129 - * with no rt_runtime allocated. Just say no.
130 + * kthreads may acquire PF_NO_SETAFFINITY during initialization.
131 + * If userland migrates such a kthread to a non-root cgroup, it can
132 + * become trapped in a cpuset, or RT kthread may be born in a
133 + * cgroup with no rt_runtime allocated. Just say no.
134 */
135 - if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
136 + if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
137 ret = -EINVAL;
138 goto out_unlock_rcu;
139 }
140 --- a/kernel/kthread.c
141 +++ b/kernel/kthread.c
142 @@ -18,6 +18,7 @@
143 #include <linux/freezer.h>
144 #include <linux/ptrace.h>
145 #include <linux/uaccess.h>
146 +#include <linux/cgroup.h>
147 #include <trace/events/sched.h>
148
149 static DEFINE_SPINLOCK(kthread_create_lock);
150 @@ -223,6 +224,7 @@ static int kthread(void *_create)
151
152 ret = -EINTR;
153 if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
154 + cgroup_kthread_ready();
155 __kthread_parkme(self);
156 ret = threadfn(data);
157 }
158 @@ -536,6 +538,7 @@ int kthreadd(void *unused)
159 set_mems_allowed(node_states[N_MEMORY]);
160
161 current->flags |= PF_NOFREEZE;
162 + cgroup_init_kthreadd();
163
164 for (;;) {
165 set_current_state(TASK_INTERRUPTIBLE);