From: Greg Kroah-Hartman Date: Tue, 18 Apr 2017 10:40:23 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v4.4.63~43 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=cdf49b34310c249f3f4b18bbfa1ef340b315d22c;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: cgroup-avoid-attaching-a-cgroup-root-to-two-different-superblocks.patch cgroup-kthread-close-race-window-where-new-kthreads-can-be-migrated-to-non-root-cgroups.patch --- diff --git a/queue-4.9/cgroup-avoid-attaching-a-cgroup-root-to-two-different-superblocks.patch b/queue-4.9/cgroup-avoid-attaching-a-cgroup-root-to-two-different-superblocks.patch new file mode 100644 index 00000000000..7847d8152bf --- /dev/null +++ b/queue-4.9/cgroup-avoid-attaching-a-cgroup-root-to-two-different-superblocks.patch @@ -0,0 +1,92 @@ +From bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e Mon Sep 17 00:00:00 2001 +From: Zefan Li +Date: Fri, 7 Apr 2017 16:51:55 +0800 +Subject: cgroup: avoid attaching a cgroup root to two different superblocks + +From: Zefan Li + +commit bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e upstream. + +Run this: + + touch file0 + for ((; ;)) + { + mount -t cpuset xxx file0 + } + +And this concurrently: + + touch file1 + for ((; ;)) + { + mount -t cpuset xxx file1 + } + +We'll trigger a warning like this: + + ------------[ cut here ]------------ + WARNING: CPU: 1 PID: 4675 at lib/percpu-refcount.c:317 percpu_ref_kill_and_confirm+0x92/0xb0 + percpu_ref_kill_and_confirm called more than once on css_release! + CPU: 1 PID: 4675 Comm: mount Not tainted 4.11.0-rc5+ #5 + Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 + Call Trace: + dump_stack+0x63/0x84 + __warn+0xd1/0xf0 + warn_slowpath_fmt+0x5f/0x80 + percpu_ref_kill_and_confirm+0x92/0xb0 + cgroup_kill_sb+0x95/0xb0 + deactivate_locked_super+0x43/0x70 + deactivate_super+0x46/0x60 + ... + ---[ end trace a79f61c2a2633700 ]--- + +Here's a race: + + Thread A Thread B + + cgroup1_mount() + # alloc a new cgroup root + cgroup_setup_root() + cgroup1_mount() + # no sb yet, returns NULL + kernfs_pin_sb() + + # but succeeds in getting the refcnt, + # so re-use cgroup root + percpu_ref_tryget_live() + # alloc sb with cgroup root + cgroup_do_mount() + + cgroup_kill_sb() + # alloc another sb with same root + cgroup_do_mount() + + cgroup_kill_sb() + +We end up using the same cgroup root for two different superblocks, +so percpu_ref_kill() will be called twice on the same root when the +two superblocks are destroyed. + +We should fix to make sure the superblock pinning is really successful. + +Reported-by: Dmitry Vyukov +Signed-off-by: Zefan Li +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -2195,7 +2195,7 @@ static struct dentry *cgroup_mount(struc + * path is super cold. Let's just sleep a bit and retry. + */ + pinned_sb = kernfs_pin_sb(root->kf_root, NULL); +- if (IS_ERR(pinned_sb) || ++ if (IS_ERR_OR_NULL(pinned_sb) || + !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { + mutex_unlock(&cgroup_mutex); + if (!IS_ERR_OR_NULL(pinned_sb)) diff --git a/queue-4.9/cgroup-kthread-close-race-window-where-new-kthreads-can-be-migrated-to-non-root-cgroups.patch b/queue-4.9/cgroup-kthread-close-race-window-where-new-kthreads-can-be-migrated-to-non-root-cgroups.patch new file mode 100644 index 00000000000..d1783da45b2 --- /dev/null +++ b/queue-4.9/cgroup-kthread-close-race-window-where-new-kthreads-can-be-migrated-to-non-root-cgroups.patch @@ -0,0 +1,165 @@ +From 77f88796cee819b9c4562b0b6b44691b3b7755b1 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Thu, 16 Mar 2017 16:54:24 -0400 +Subject: cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups + +From: Tejun Heo + +commit 77f88796cee819b9c4562b0b6b44691b3b7755b1 upstream. + +Creation of a kthread goes through a couple interlocked stages between +the kthread itself and its creator. Once the new kthread starts +running, it initializes itself and wakes up the creator. The creator +then can further configure the kthread and then let it start doing its +job by waking it up. + +In this configuration-by-creator stage, the creator is the only one +that can wake it up but the kthread is visible to userland. When +altering the kthread's attributes from userland is allowed, this is +fine; however, for cases where CPU affinity is critical, +kthread_bind() is used to first disable affinity changes from userland +and then set the affinity. This also prevents the kthread from being +migrated into non-root cgroups as that can affect the CPU affinity and +many other things. + +Unfortunately, the cgroup side of protection is racy. While the +PF_NO_SETAFFINITY flag prevents further migrations, userland can win +the race before the creator sets the flag with kthread_bind() and put +the kthread in a non-root cgroup, which can lead to all sorts of +problems including incorrect CPU affinity and starvation. + +This bug got triggered by userland which periodically tries to migrate +all processes in the root cpuset cgroup to a non-root one. Per-cpu +workqueue workers got caught while being created and ended up with +incorrected CPU affinity breaking concurrency management and sometimes +stalling workqueue execution. + +This patch adds task->no_cgroup_migration which disallows the task to +be migrated by userland. kthreadd starts with the flag set making +every child kthread start in the root cgroup with migration +disallowed. The flag is cleared after the kthread finishes +initialization by which time PF_NO_SETAFFINITY is set if the kthread +should stay in the root cgroup. + +It'd be better to wait for the initialization instead of failing but I +couldn't think of a way of implementing that without adding either a +new PF flag, or sleeping and retrying from waiting side. Even if +userland depends on changing cgroup membership of a kthread, it either +has to be synchronized with kthread_create() or periodically repeat, +so it's unlikely that this would break anything. + +v2: Switch to a simpler implementation using a new task_struct bit + field suggested by Oleg. + +Signed-off-by: Tejun Heo +Suggested-by: Oleg Nesterov +Cc: Linus Torvalds +Cc: Andrew Morton +Cc: Peter Zijlstra (Intel) +Cc: Thomas Gleixner +Reported-and-debugged-by: Chris Mason +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/cgroup.h | 21 +++++++++++++++++++++ + include/linux/sched.h | 4 ++++ + kernel/cgroup.c | 9 +++++---- + kernel/kthread.c | 3 +++ + 4 files changed, 33 insertions(+), 4 deletions(-) + +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(s + pr_cont_kernfs_path(cgrp->kn); + } + ++static inline void cgroup_init_kthreadd(void) ++{ ++ /* ++ * kthreadd is inherited by all kthreads, keep it in the root so ++ * that the new kthreads are guaranteed to stay in the root until ++ * initialization is finished. ++ */ ++ current->no_cgroup_migration = 1; ++} ++ ++static inline void cgroup_kthread_ready(void) ++{ ++ /* ++ * This kthread finished initialization. The creator should have ++ * set PF_NO_SETAFFINITY if this kthread should stay in the root. ++ */ ++ current->no_cgroup_migration = 0; ++} ++ + #else /* !CONFIG_CGROUPS */ + + struct cgroup_subsys_state; +@@ -590,6 +609,8 @@ static inline void cgroup_free(struct ta + + static inline int cgroup_init_early(void) { return 0; } + static inline int cgroup_init(void) { return 0; } ++static inline void cgroup_init_kthreadd(void) {} ++static inline void cgroup_kthread_ready(void) {} + + static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1584,6 +1584,10 @@ struct task_struct { + #ifdef CONFIG_COMPAT_BRK + unsigned brk_randomized:1; + #endif ++#ifdef CONFIG_CGROUPS ++ /* disallow userland-initiated cgroup migration */ ++ unsigned no_cgroup_migration:1; ++#endif + + unsigned long atomic_flags; /* Flags needing atomic access. */ + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -2920,11 +2920,12 @@ static ssize_t __cgroup_procs_write(stru + tsk = tsk->group_leader; + + /* +- * Workqueue threads may acquire PF_NO_SETAFFINITY and become +- * trapped in a cpuset, or RT worker may be born in a cgroup +- * with no rt_runtime allocated. Just say no. ++ * kthreads may acquire PF_NO_SETAFFINITY during initialization. ++ * If userland migrates such a kthread to a non-root cgroup, it can ++ * become trapped in a cpuset, or RT kthread may be born in a ++ * cgroup with no rt_runtime allocated. Just say no. + */ +- if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { ++ if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) { + ret = -EINVAL; + goto out_unlock_rcu; + } +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + + static DEFINE_SPINLOCK(kthread_create_lock); +@@ -205,6 +206,7 @@ static int kthread(void *_create) + ret = -EINTR; + + if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) { ++ cgroup_kthread_ready(); + __kthread_parkme(&self); + ret = threadfn(data); + } +@@ -530,6 +532,7 @@ int kthreadd(void *unused) + set_mems_allowed(node_states[N_MEMORY]); + + current->flags |= PF_NOFREEZE; ++ cgroup_init_kthreadd(); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE);