4.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 18 Apr 2017 10:39:42 +0000 (12:39 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 18 Apr 2017 10:39:42 +0000 (12:39 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 18 Apr 2017 10:39:42 +0000 (12:39 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 18 Apr 2017 10:39:42 +0000 (12:39 +0200)
diff --git a/queue-4.10/cgroup-avoid-attaching-a-cgroup-root-to-two-different-superblocks.patch b/queue-4.10/cgroup-avoid-attaching-a-cgroup-root-to-two-different-superblocks.patch

new file mode 100644 (file)

index 0000000..7847d81
--- /dev/null
+++ b/queue-4.10/cgroup-avoid-attaching-a-cgroup-root-to-two-different-superblocks.patch
@@ -0,0 +1,92 @@
+From bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e Mon Sep 17 00:00:00 2001
+From: Zefan Li <lizefan@huawei.com>
+Date: Fri, 7 Apr 2017 16:51:55 +0800
+Subject: cgroup: avoid attaching a cgroup root to two different superblocks
+
+From: Zefan Li <lizefan@huawei.com>
+
+commit bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e upstream.
+
+Run this:
+
+    touch file0
+    for ((; ;))
+    {
+        mount -t cpuset xxx file0
+    }
+
+And this concurrently:
+
+    touch file1
+    for ((; ;))
+    {
+        mount -t cpuset xxx file1
+    }
+
+We'll trigger a warning like this:
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 1 PID: 4675 at lib/percpu-refcount.c:317 percpu_ref_kill_and_confirm+0x92/0xb0
+ percpu_ref_kill_and_confirm called more than once on css_release!
+ CPU: 1 PID: 4675 Comm: mount Not tainted 4.11.0-rc5+ #5
+ Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
+ Call Trace:
+  dump_stack+0x63/0x84
+  __warn+0xd1/0xf0
+  warn_slowpath_fmt+0x5f/0x80
+  percpu_ref_kill_and_confirm+0x92/0xb0
+  cgroup_kill_sb+0x95/0xb0
+  deactivate_locked_super+0x43/0x70
+  deactivate_super+0x46/0x60
+ ...
+ ---[ end trace a79f61c2a2633700 ]---
+
+Here's a race:
+
+  Thread A                             Thread B
+
+  cgroup1_mount()
+    # alloc a new cgroup root
+    cgroup_setup_root()
+                                       cgroup1_mount()
+                                         # no sb yet, returns NULL
+                                         kernfs_pin_sb()
+
+                                         # but succeeds in getting the refcnt,
+                                         # so re-use cgroup root
+                                         percpu_ref_tryget_live()
+    # alloc sb with cgroup root
+    cgroup_do_mount()
+
+  cgroup_kill_sb()
+                                         # alloc another sb with same root
+                                         cgroup_do_mount()
+
+                                       cgroup_kill_sb()
+
+We end up using the same cgroup root for two different superblocks,
+so percpu_ref_kill() will be called twice on the same root when the
+two superblocks are destroyed.
+
+We should fix to make sure the superblock pinning is really successful.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Zefan Li <lizefan@huawei.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -2195,7 +2195,7 @@ static struct dentry *cgroup_mount(struc
+                * path is super cold.  Let's just sleep a bit and retry.
+                */
+               pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
+-              if (IS_ERR(pinned_sb) ||
++              if (IS_ERR_OR_NULL(pinned_sb) ||
+                   !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+                       mutex_unlock(&cgroup_mutex);
+                       if (!IS_ERR_OR_NULL(pinned_sb))
diff --git a/queue-4.10/cgroup-kthread-close-race-window-where-new-kthreads-can-be-migrated-to-non-root-cgroups.patch b/queue-4.10/cgroup-kthread-close-race-window-where-new-kthreads-can-be-migrated-to-non-root-cgroups.patch

new file mode 100644 (file)

index 0000000..431889c
--- /dev/null
+++ b/queue-4.10/cgroup-kthread-close-race-window-where-new-kthreads-can-be-migrated-to-non-root-cgroups.patch
@@ -0,0 +1,165 @@
+From 77f88796cee819b9c4562b0b6b44691b3b7755b1 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 16 Mar 2017 16:54:24 -0400
+Subject: cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 77f88796cee819b9c4562b0b6b44691b3b7755b1 upstream.
+
+Creation of a kthread goes through a couple interlocked stages between
+the kthread itself and its creator.  Once the new kthread starts
+running, it initializes itself and wakes up the creator.  The creator
+then can further configure the kthread and then let it start doing its
+job by waking it up.
+
+In this configuration-by-creator stage, the creator is the only one
+that can wake it up but the kthread is visible to userland.  When
+altering the kthread's attributes from userland is allowed, this is
+fine; however, for cases where CPU affinity is critical,
+kthread_bind() is used to first disable affinity changes from userland
+and then set the affinity.  This also prevents the kthread from being
+migrated into non-root cgroups as that can affect the CPU affinity and
+many other things.
+
+Unfortunately, the cgroup side of protection is racy.  While the
+PF_NO_SETAFFINITY flag prevents further migrations, userland can win
+the race before the creator sets the flag with kthread_bind() and put
+the kthread in a non-root cgroup, which can lead to all sorts of
+problems including incorrect CPU affinity and starvation.
+
+This bug got triggered by userland which periodically tries to migrate
+all processes in the root cpuset cgroup to a non-root one.  Per-cpu
+workqueue workers got caught while being created and ended up with
+incorrected CPU affinity breaking concurrency management and sometimes
+stalling workqueue execution.
+
+This patch adds task->no_cgroup_migration which disallows the task to
+be migrated by userland.  kthreadd starts with the flag set making
+every child kthread start in the root cgroup with migration
+disallowed.  The flag is cleared after the kthread finishes
+initialization by which time PF_NO_SETAFFINITY is set if the kthread
+should stay in the root cgroup.
+
+It'd be better to wait for the initialization instead of failing but I
+couldn't think of a way of implementing that without adding either a
+new PF flag, or sleeping and retrying from waiting side.  Even if
+userland depends on changing cgroup membership of a kthread, it either
+has to be synchronized with kthread_create() or periodically repeat,
+so it's unlikely that this would break anything.
+
+v2: Switch to a simpler implementation using a new task_struct bit
+    field suggested by Oleg.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Suggested-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Reported-and-debugged-by: Chris Mason <clm@fb.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/cgroup.h |   21 +++++++++++++++++++++
+ include/linux/sched.h  |    4 ++++
+ kernel/cgroup.c        |    9 +++++----
+ kernel/kthread.c       |    3 +++
+ 4 files changed, 33 insertions(+), 4 deletions(-)
+
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(s
+       pr_cont_kernfs_path(cgrp->kn);
+ }
+ 
++static inline void cgroup_init_kthreadd(void)
++{
++      /*
++       * kthreadd is inherited by all kthreads, keep it in the root so
++       * that the new kthreads are guaranteed to stay in the root until
++       * initialization is finished.
++       */
++      current->no_cgroup_migration = 1;
++}
++
++static inline void cgroup_kthread_ready(void)
++{
++      /*
++       * This kthread finished initialization.  The creator should have
++       * set PF_NO_SETAFFINITY if this kthread should stay in the root.
++       */
++      current->no_cgroup_migration = 0;
++}
++
+ #else /* !CONFIG_CGROUPS */
+ 
+ struct cgroup_subsys_state;
+@@ -590,6 +609,8 @@ static inline void cgroup_free(struct ta
+ 
+ static inline int cgroup_init_early(void) { return 0; }
+ static inline int cgroup_init(void) { return 0; }
++static inline void cgroup_init_kthreadd(void) {}
++static inline void cgroup_kthread_ready(void) {}
+ 
+ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+                                              struct cgroup *ancestor)
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1620,6 +1620,10 @@ struct task_struct {
+ #ifdef CONFIG_COMPAT_BRK
+       unsigned brk_randomized:1;
+ #endif
++#ifdef CONFIG_CGROUPS
++      /* disallow userland-initiated cgroup migration */
++      unsigned no_cgroup_migration:1;
++#endif
+ 
+       unsigned long atomic_flags; /* Flags needing atomic access. */
+ 
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -2920,11 +2920,12 @@ static ssize_t __cgroup_procs_write(stru
+               tsk = tsk->group_leader;
+ 
+       /*
+-       * Workqueue threads may acquire PF_NO_SETAFFINITY and become
+-       * trapped in a cpuset, or RT worker may be born in a cgroup
+-       * with no rt_runtime allocated.  Just say no.
++       * kthreads may acquire PF_NO_SETAFFINITY during initialization.
++       * If userland migrates such a kthread to a non-root cgroup, it can
++       * become trapped in a cpuset, or RT kthread may be born in a
++       * cgroup with no rt_runtime allocated.  Just say no.
+        */
+-      if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
++      if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
+               ret = -EINVAL;
+               goto out_unlock_rcu;
+       }
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -18,6 +18,7 @@
+ #include <linux/freezer.h>
+ #include <linux/ptrace.h>
+ #include <linux/uaccess.h>
++#include <linux/cgroup.h>
+ #include <trace/events/sched.h>
+ 
+ static DEFINE_SPINLOCK(kthread_create_lock);
+@@ -223,6 +224,7 @@ static int kthread(void *_create)
+ 
+       ret = -EINTR;
+       if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
++              cgroup_kthread_ready();
+               __kthread_parkme(self);
+               ret = threadfn(data);
+       }
+@@ -536,6 +538,7 @@ int kthreadd(void *unused)
+       set_mems_allowed(node_states[N_MEMORY]);
+ 
+       current->flags |= PF_NOFREEZE;
++      cgroup_init_kthreadd();
+ 
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 18 Apr 2017 10:39:42 +0000 (12:39 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 18 Apr 2017 10:39:42 +0000 (12:39 +0200)
queue-4.10/cgroup-avoid-attaching-a-cgroup-root-to-two-different-superblocks.patch	[new file with mode: 0644]	patch \| blob
queue-4.10/cgroup-kthread-close-race-window-where-new-kthreads-can-be-migrated-to-non-root-cgroups.patch	[new file with mode: 0644]	patch \| blob