--- /dev/null
+From bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e Mon Sep 17 00:00:00 2001
+From: Zefan Li <lizefan@huawei.com>
+Date: Fri, 7 Apr 2017 16:51:55 +0800
+Subject: cgroup: avoid attaching a cgroup root to two different superblocks
+
+From: Zefan Li <lizefan@huawei.com>
+
+commit bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e upstream.
+
+Run this:
+
+ touch file0
+ for ((; ;))
+ {
+ mount -t cpuset xxx file0
+ }
+
+And this concurrently:
+
+ touch file1
+ for ((; ;))
+ {
+ mount -t cpuset xxx file1
+ }
+
+We'll trigger a warning like this:
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 1 PID: 4675 at lib/percpu-refcount.c:317 percpu_ref_kill_and_confirm+0x92/0xb0
+ percpu_ref_kill_and_confirm called more than once on css_release!
+ CPU: 1 PID: 4675 Comm: mount Not tainted 4.11.0-rc5+ #5
+ Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
+ Call Trace:
+ dump_stack+0x63/0x84
+ __warn+0xd1/0xf0
+ warn_slowpath_fmt+0x5f/0x80
+ percpu_ref_kill_and_confirm+0x92/0xb0
+ cgroup_kill_sb+0x95/0xb0
+ deactivate_locked_super+0x43/0x70
+ deactivate_super+0x46/0x60
+ ...
+ ---[ end trace a79f61c2a2633700 ]---
+
+Here's a race:
+
+ Thread A Thread B
+
+ cgroup1_mount()
+ # alloc a new cgroup root
+ cgroup_setup_root()
+ cgroup1_mount()
+ # no sb yet, returns NULL
+ kernfs_pin_sb()
+
+ # but succeeds in getting the refcnt,
+ # so re-use cgroup root
+ percpu_ref_tryget_live()
+ # alloc sb with cgroup root
+ cgroup_do_mount()
+
+ cgroup_kill_sb()
+ # alloc another sb with same root
+ cgroup_do_mount()
+
+ cgroup_kill_sb()
+
+We end up using the same cgroup root for two different superblocks,
+so percpu_ref_kill() will be called twice on the same root when the
+two superblocks are destroyed.
+
+We should fix to make sure the superblock pinning is really successful.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Zefan Li <lizefan@huawei.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -2105,7 +2105,7 @@ static struct dentry *cgroup_mount(struc
+ * path is super cold. Let's just sleep a bit and retry.
+ */
+ pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
+- if (IS_ERR(pinned_sb) ||
++ if (IS_ERR_OR_NULL(pinned_sb) ||
+ !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+ mutex_unlock(&cgroup_mutex);
+ if (!IS_ERR_OR_NULL(pinned_sb))
--- /dev/null
+From 77f88796cee819b9c4562b0b6b44691b3b7755b1 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Thu, 16 Mar 2017 16:54:24 -0400
+Subject: cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 77f88796cee819b9c4562b0b6b44691b3b7755b1 upstream.
+
+Creation of a kthread goes through a couple interlocked stages between
+the kthread itself and its creator. Once the new kthread starts
+running, it initializes itself and wakes up the creator. The creator
+then can further configure the kthread and then let it start doing its
+job by waking it up.
+
+In this configuration-by-creator stage, the creator is the only one
+that can wake it up but the kthread is visible to userland. When
+altering the kthread's attributes from userland is allowed, this is
+fine; however, for cases where CPU affinity is critical,
+kthread_bind() is used to first disable affinity changes from userland
+and then set the affinity. This also prevents the kthread from being
+migrated into non-root cgroups as that can affect the CPU affinity and
+many other things.
+
+Unfortunately, the cgroup side of protection is racy. While the
+PF_NO_SETAFFINITY flag prevents further migrations, userland can win
+the race before the creator sets the flag with kthread_bind() and put
+the kthread in a non-root cgroup, which can lead to all sorts of
+problems including incorrect CPU affinity and starvation.
+
+This bug got triggered by userland which periodically tries to migrate
+all processes in the root cpuset cgroup to a non-root one. Per-cpu
+workqueue workers got caught while being created and ended up with
+incorrected CPU affinity breaking concurrency management and sometimes
+stalling workqueue execution.
+
+This patch adds task->no_cgroup_migration which disallows the task to
+be migrated by userland. kthreadd starts with the flag set making
+every child kthread start in the root cgroup with migration
+disallowed. The flag is cleared after the kthread finishes
+initialization by which time PF_NO_SETAFFINITY is set if the kthread
+should stay in the root cgroup.
+
+It'd be better to wait for the initialization instead of failing but I
+couldn't think of a way of implementing that without adding either a
+new PF flag, or sleeping and retrying from waiting side. Even if
+userland depends on changing cgroup membership of a kthread, it either
+has to be synchronized with kthread_create() or periodically repeat,
+so it's unlikely that this would break anything.
+
+v2: Switch to a simpler implementation using a new task_struct bit
+ field suggested by Oleg.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Suggested-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Reported-and-debugged-by: Chris Mason <clm@fb.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/cgroup.h | 21 +++++++++++++++++++++
+ include/linux/sched.h | 4 ++++
+ kernel/cgroup.c | 9 +++++----
+ kernel/kthread.c | 3 +++
+ 4 files changed, 33 insertions(+), 4 deletions(-)
+
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -528,6 +528,25 @@ static inline void pr_cont_cgroup_path(s
+ pr_cont_kernfs_path(cgrp->kn);
+ }
+
++static inline void cgroup_init_kthreadd(void)
++{
++ /*
++ * kthreadd is inherited by all kthreads, keep it in the root so
++ * that the new kthreads are guaranteed to stay in the root until
++ * initialization is finished.
++ */
++ current->no_cgroup_migration = 1;
++}
++
++static inline void cgroup_kthread_ready(void)
++{
++ /*
++ * This kthread finished initialization. The creator should have
++ * set PF_NO_SETAFFINITY if this kthread should stay in the root.
++ */
++ current->no_cgroup_migration = 0;
++}
++
+ #else /* !CONFIG_CGROUPS */
+
+ struct cgroup_subsys_state;
+@@ -551,6 +570,8 @@ static inline void cgroup_free(struct ta
+
+ static inline int cgroup_init_early(void) { return 0; }
+ static inline int cgroup_init(void) { return 0; }
++static inline void cgroup_init_kthreadd(void) {}
++static inline void cgroup_kthread_ready(void) {}
+
+ #endif /* !CONFIG_CGROUPS */
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1475,6 +1475,10 @@ struct task_struct {
+ #ifdef CONFIG_COMPAT_BRK
+ unsigned brk_randomized:1;
+ #endif
++#ifdef CONFIG_CGROUPS
++ /* disallow userland-initiated cgroup migration */
++ unsigned no_cgroup_migration:1;
++#endif
+
+ unsigned long atomic_flags; /* Flags needing atomic access. */
+
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -2752,11 +2752,12 @@ static ssize_t __cgroup_procs_write(stru
+ tsk = tsk->group_leader;
+
+ /*
+- * Workqueue threads may acquire PF_NO_SETAFFINITY and become
+- * trapped in a cpuset, or RT worker may be born in a cgroup
+- * with no rt_runtime allocated. Just say no.
++ * kthreads may acquire PF_NO_SETAFFINITY during initialization.
++ * If userland migrates such a kthread to a non-root cgroup, it can
++ * become trapped in a cpuset, or RT kthread may be born in a
++ * cgroup with no rt_runtime allocated. Just say no.
+ */
+- if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
++ if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
+ ret = -EINVAL;
+ goto out_unlock_rcu;
+ }
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -18,6 +18,7 @@
+ #include <linux/freezer.h>
+ #include <linux/ptrace.h>
+ #include <linux/uaccess.h>
++#include <linux/cgroup.h>
+ #include <trace/events/sched.h>
+
+ static DEFINE_SPINLOCK(kthread_create_lock);
+@@ -205,6 +206,7 @@ static int kthread(void *_create)
+ ret = -EINTR;
+
+ if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
++ cgroup_kthread_ready();
+ __kthread_parkme(&self);
+ ret = threadfn(data);
+ }
+@@ -510,6 +512,7 @@ int kthreadd(void *unused)
+ set_mems_allowed(node_states[N_MEMORY]);
+
+ current->flags |= PF_NOFREEZE;
++ cgroup_init_kthreadd();
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);