From 4b64cf1356e23d39cb55c2929d41371f189c8520 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Jul 2014 17:16:30 -0700 Subject: [PATCH] 3.15-stable patches added patches: cgroup-fix-a-race-between-cgroup_mount-and-cgroup_kill_sb.patch kernfs-implement-kernfs_root-supers-list.patch kernfs-introduce-kernfs_pin_sb.patch --- ...ween-cgroup_mount-and-cgroup_kill_sb.patch | 103 ++++++++++++++++ ...fs-implement-kernfs_root-supers-list.patch | 111 ++++++++++++++++++ .../kernfs-introduce-kernfs_pin_sb.patch | 78 ++++++++++++ queue-3.15/series | 3 + 4 files changed, 295 insertions(+) create mode 100644 queue-3.15/cgroup-fix-a-race-between-cgroup_mount-and-cgroup_kill_sb.patch create mode 100644 queue-3.15/kernfs-implement-kernfs_root-supers-list.patch create mode 100644 queue-3.15/kernfs-introduce-kernfs_pin_sb.patch diff --git a/queue-3.15/cgroup-fix-a-race-between-cgroup_mount-and-cgroup_kill_sb.patch b/queue-3.15/cgroup-fix-a-race-between-cgroup_mount-and-cgroup_kill_sb.patch new file mode 100644 index 00000000000..71f49171275 --- /dev/null +++ b/queue-3.15/cgroup-fix-a-race-between-cgroup_mount-and-cgroup_kill_sb.patch @@ -0,0 +1,103 @@ +From 3a32bd72d77058d768dbb38183ad517f720dd1bc Mon Sep 17 00:00:00 2001 +From: Li Zefan +Date: Mon, 30 Jun 2014 11:50:59 +0800 +Subject: cgroup: fix a race between cgroup_mount() and cgroup_kill_sb() + +From: Li Zefan + +commit 3a32bd72d77058d768dbb38183ad517f720dd1bc upstream. + +We've converted cgroup to kernfs so cgroup won't be intertwined with +vfs objects and locking, but there are dark areas. + +Run two instances of this script concurrently: + + for ((; ;)) + { + mount -t cgroup -o cpuacct xxx /cgroup + umount /cgroup + } + +After a while, I saw two mount processes were stuck at retrying, because +they were waiting for a subsystem to become free, but the root associated +with this subsystem never got freed. + +This can happen, if thread A is in the process of killing superblock but +hasn't called percpu_ref_kill(), and at this time thread B is mounting +the same cgroup root and finds the root in the root list and performs +percpu_ref_try_get(). + +To fix this, we try to increase both the refcnt of the superblock and the +percpu refcnt of cgroup root. + +v2: +- we should try to get both the superblock refcnt and cgroup_root refcnt, + because cgroup_root may have no superblock assosiated with it. +- adjust/add comments. + +tj: Updated comments. Renamed @sb to @pinned_sb. + +Signed-off-by: Li Zefan +Signed-off-by: Tejun Heo +[lizf: Backported to 3.15: + - Adjust context + - s/percpu_tryget_live/atomic_inc_not_zero/] +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup.c | 28 +++++++++++++++++++++++++++- + 1 file changed, 27 insertions(+), 1 deletion(-) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -1484,6 +1484,7 @@ static struct dentry *cgroup_mount(struc + int flags, const char *unused_dev_name, + void *data) + { ++ struct super_block *pinned_sb = NULL; + struct cgroup_subsys *ss; + struct cgroup_root *root; + struct cgroup_sb_opts opts; +@@ -1584,10 +1585,25 @@ retry: + * destruction to complete so that the subsystems are free. + * We can use wait_queue for the wait but this path is + * super cold. Let's just sleep for a bit and retry. ++ ++ * We want to reuse @root whose lifetime is governed by its ++ * ->cgrp. Let's check whether @root is alive and keep it ++ * that way. As cgroup_kill_sb() can happen anytime, we ++ * want to block it by pinning the sb so that @root doesn't ++ * get killed before mount is complete. ++ * ++ * With the sb pinned, inc_not_zero can reliably indicate ++ * whether @root can be reused. If it's being killed, ++ * drain it. We can use wait_queue for the wait but this ++ * path is super cold. Let's just sleep a bit and retry. + */ +- if (!atomic_inc_not_zero(&root->cgrp.refcnt)) { ++ pinned_sb = kernfs_pin_sb(root->kf_root, NULL); ++ if (IS_ERR(pinned_sb) || ++ !atomic_inc_not_zero(&root->cgrp.refcnt)) { + mutex_unlock(&cgroup_mutex); + mutex_unlock(&cgroup_tree_mutex); ++ if (!IS_ERR_OR_NULL(pinned_sb)) ++ deactivate_super(pinned_sb); + msleep(10); + mutex_lock(&cgroup_tree_mutex); + mutex_lock(&cgroup_mutex); +@@ -1634,6 +1650,16 @@ out_unlock: + CGROUP_SUPER_MAGIC, &new_sb); + if (IS_ERR(dentry) || !new_sb) + cgroup_put(&root->cgrp); ++ ++ /* ++ * If @pinned_sb, we're reusing an existing root and holding an ++ * extra ref on its sb. Mount is complete. Put the extra ref. ++ */ ++ if (pinned_sb) { ++ WARN_ON(new_sb); ++ deactivate_super(pinned_sb); ++ } ++ + return dentry; + } + diff --git a/queue-3.15/kernfs-implement-kernfs_root-supers-list.patch b/queue-3.15/kernfs-implement-kernfs_root-supers-list.patch new file mode 100644 index 00000000000..2ee3d7d8572 --- /dev/null +++ b/queue-3.15/kernfs-implement-kernfs_root-supers-list.patch @@ -0,0 +1,111 @@ +From 7d568a8383bbb9c1f5167781075906acb2bb1550 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 9 Apr 2014 11:07:30 -0400 +Subject: kernfs: implement kernfs_root->supers list + +From: Tejun Heo + +commit 7d568a8383bbb9c1f5167781075906acb2bb1550 upstream. + +Currently, there's no way to find out which super_blocks are +associated with a given kernfs_root. Let's implement it - the planned +inotify extension to kernfs_notify() needs it. + +Make kernfs_super_info point back to the super_block and chain it at +kernfs_root->supers. + +Signed-off-by: Tejun Heo +[lizf: Backported to 3.15: Adjust context] +Signed-off-by: Li Zefan +Signed-off-by: Greg Kroah-Hartman +--- + fs/kernfs/dir.c | 1 + + fs/kernfs/kernfs-internal.h | 5 +++++ + fs/kernfs/mount.c | 11 +++++++++++ + include/linux/kernfs.h | 4 ++++ + 4 files changed, 21 insertions(+) + +--- a/fs/kernfs/dir.c ++++ b/fs/kernfs/dir.c +@@ -714,6 +714,7 @@ struct kernfs_root *kernfs_create_root(s + return ERR_PTR(-ENOMEM); + + ida_init(&root->ino_ida); ++ INIT_LIST_HEAD(&root->supers); + + kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, + KERNFS_DIR); +--- a/fs/kernfs/kernfs-internal.h ++++ b/fs/kernfs/kernfs-internal.h +@@ -49,6 +49,8 @@ static inline struct kernfs_root *kernfs + * mount.c + */ + struct kernfs_super_info { ++ struct super_block *sb; ++ + /* + * The root associated with this super_block. Each super_block is + * identified by the root and ns it's associated with. +@@ -62,6 +64,9 @@ struct kernfs_super_info { + * an array and compare kernfs_node tag against every entry. + */ + const void *ns; ++ ++ /* anchored at kernfs_root->supers, protected by kernfs_mutex */ ++ struct list_head node; + }; + #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) + +--- a/fs/kernfs/mount.c ++++ b/fs/kernfs/mount.c +@@ -68,6 +68,7 @@ static int kernfs_fill_super(struct supe + struct inode *inode; + struct dentry *root; + ++ info->sb = sb; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = magic; +@@ -167,12 +168,18 @@ struct dentry *kernfs_mount_ns(struct fi + *new_sb_created = !sb->s_root; + + if (!sb->s_root) { ++ struct kernfs_super_info *info = kernfs_info(sb); ++ + error = kernfs_fill_super(sb, magic); + if (error) { + deactivate_locked_super(sb); + return ERR_PTR(error); + } + sb->s_flags |= MS_ACTIVE; ++ ++ mutex_lock(&kernfs_mutex); ++ list_add(&info->node, &root->supers); ++ mutex_unlock(&kernfs_mutex); + } + + return dget(sb->s_root); +@@ -191,6 +198,10 @@ void kernfs_kill_sb(struct super_block * + struct kernfs_super_info *info = kernfs_info(sb); + struct kernfs_node *root_kn = sb->s_root->d_fsdata; + ++ mutex_lock(&kernfs_mutex); ++ list_del(&info->node); ++ mutex_unlock(&kernfs_mutex); ++ + /* + * Remove the superblock from fs_supers/s_instances + * so we can't find it, before freeing kernfs_super_info. +--- a/include/linux/kernfs.h ++++ b/include/linux/kernfs.h +@@ -161,6 +161,10 @@ struct kernfs_root { + /* private fields, do not use outside kernfs proper */ + struct ida ino_ida; + struct kernfs_syscall_ops *syscall_ops; ++ ++ /* list of kernfs_super_info of this root, protected by kernfs_mutex */ ++ struct list_head supers; ++ + wait_queue_head_t deactivate_waitq; + }; + diff --git a/queue-3.15/kernfs-introduce-kernfs_pin_sb.patch b/queue-3.15/kernfs-introduce-kernfs_pin_sb.patch new file mode 100644 index 00000000000..1639fbb922c --- /dev/null +++ b/queue-3.15/kernfs-introduce-kernfs_pin_sb.patch @@ -0,0 +1,78 @@ +From 4e26445faad366d67d7723622bf6a60a6f0f5993 Mon Sep 17 00:00:00 2001 +From: Li Zefan +Date: Mon, 30 Jun 2014 11:50:28 +0800 +Subject: kernfs: introduce kernfs_pin_sb() + +From: Li Zefan + +commit 4e26445faad366d67d7723622bf6a60a6f0f5993 upstream. + +kernfs_pin_sb() tries to get a refcnt of the superblock. + +This will be used by cgroupfs. + +v2: +- make kernfs_pin_sb() return the superblock. +- drop kernfs_drop_sb(). + +tj: Updated the comment a bit. + +[ This is a prerequisite for a bugfix. ] +Acked-by: Greg Kroah-Hartman +Signed-off-by: Li Zefan +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + fs/kernfs/mount.c | 30 ++++++++++++++++++++++++++++++ + include/linux/kernfs.h | 1 + + 2 files changed, 31 insertions(+) + +--- a/fs/kernfs/mount.c ++++ b/fs/kernfs/mount.c +@@ -211,6 +211,36 @@ void kernfs_kill_sb(struct super_block * + kernfs_put(root_kn); + } + ++/** ++ * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root ++ * @kernfs_root: the kernfs_root in question ++ * @ns: the namespace tag ++ * ++ * Pin the superblock so the superblock won't be destroyed in subsequent ++ * operations. This can be used to block ->kill_sb() which may be useful ++ * for kernfs users which dynamically manage superblocks. ++ * ++ * Returns NULL if there's no superblock associated to this kernfs_root, or ++ * -EINVAL if the superblock is being freed. ++ */ ++struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns) ++{ ++ struct kernfs_super_info *info; ++ struct super_block *sb = NULL; ++ ++ mutex_lock(&kernfs_mutex); ++ list_for_each_entry(info, &root->supers, node) { ++ if (info->ns == ns) { ++ sb = info->sb; ++ if (!atomic_inc_not_zero(&info->sb->s_active)) ++ sb = ERR_PTR(-EINVAL); ++ break; ++ } ++ } ++ mutex_unlock(&kernfs_mutex); ++ return sb; ++} ++ + void __init kernfs_init(void) + { + kernfs_node_cache = kmem_cache_create("kernfs_node_cache", +--- a/include/linux/kernfs.h ++++ b/include/linux/kernfs.h +@@ -304,6 +304,7 @@ struct dentry *kernfs_mount_ns(struct fi + struct kernfs_root *root, unsigned long magic, + bool *new_sb_created, const void *ns); + void kernfs_kill_sb(struct super_block *sb); ++struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns); + + void kernfs_init(void); + diff --git a/queue-3.15/series b/queue-3.15/series index 935cf9e4906..7bf3e73b27b 100644 --- a/queue-3.15/series +++ b/queue-3.15/series @@ -67,3 +67,6 @@ ring-buffer-check-if-buffer-exists-before-polling.patch i40e-fix-passing-wrong-error-code-to-i40e_open.patch mtd-nand-omap-fix-omap_calculate_ecc_bch-for-loop-error.patch cgroup-fix-mount-failure-in-a-corner-case.patch +kernfs-implement-kernfs_root-supers-list.patch +kernfs-introduce-kernfs_pin_sb.patch +cgroup-fix-a-race-between-cgroup_mount-and-cgroup_kill_sb.patch -- 2.47.3