--- /dev/null
+From 0ab02ca8f887908152d1a96db5130fc661d36a1e Mon Sep 17 00:00:00 2001
+From: Li Zefan <lizefan@huawei.com>
+Date: Tue, 11 Feb 2014 16:05:46 +0800
+Subject: cgroup: protect modifications to cgroup_idr with cgroup_mutex
+
+From: Li Zefan <lizefan@huawei.com>
+
+commit 0ab02ca8f887908152d1a96db5130fc661d36a1e upstream.
+
+Setup cgroupfs like this:
+ # mount -t cgroup -o cpuacct xxx /cgroup
+ # mkdir /cgroup/sub1
+ # mkdir /cgroup/sub2
+
+Then run these two commands:
+ # for ((; ;)) { mkdir /cgroup/sub1/tmp && rmdir /mnt/sub1/tmp; } &
+ # for ((; ;)) { mkdir /cgroup/sub2/tmp && rmdir /mnt/sub2/tmp; } &
+
+After seconds you may see this warning:
+
+------------[ cut here ]------------
+WARNING: CPU: 1 PID: 25243 at lib/idr.c:527 sub_remove+0x87/0x1b0()
+idr_remove called for id=6 which is not allocated.
+...
+Call Trace:
+ [<ffffffff8156063c>] dump_stack+0x7a/0x96
+ [<ffffffff810591ac>] warn_slowpath_common+0x8c/0xc0
+ [<ffffffff81059296>] warn_slowpath_fmt+0x46/0x50
+ [<ffffffff81300aa7>] sub_remove+0x87/0x1b0
+ [<ffffffff810f3f02>] ? css_killed_work_fn+0x32/0x1b0
+ [<ffffffff81300bf5>] idr_remove+0x25/0xd0
+ [<ffffffff810f2bab>] cgroup_destroy_css_killed+0x5b/0xc0
+ [<ffffffff810f4000>] css_killed_work_fn+0x130/0x1b0
+ [<ffffffff8107cdbc>] process_one_work+0x26c/0x550
+ [<ffffffff8107eefe>] worker_thread+0x12e/0x3b0
+ [<ffffffff81085f96>] kthread+0xe6/0xf0
+ [<ffffffff81570bac>] ret_from_fork+0x7c/0xb0
+---[ end trace 2d1577ec10cf80d0 ]---
+
+It's because allocating/removing cgroup ID is not properly synchronized.
+
+The bug was introduced when we converted cgroup_ida to cgroup_idr.
+While synchronization is already done inside ida_simple_{get,remove}(),
+users are responsible for concurrent calls to idr_{alloc,remove}().
+
+tj: Refreshed on top of b58c89986a77 ("cgroup: fix error return from
+cgroup_create()").
+
+[mhocko@suse.cz: ported to 3.12]
+Fixes: 4e96ee8e981b ("cgroup: convert cgroup_ida to cgroup_idr")
+Cc: <stable@vger.kernel.org> #3.12+
+Reported-by: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/cgroup.h | 2 ++
+ kernel/cgroup.c | 26 +++++++++++++-------------
+ 2 files changed, 15 insertions(+), 13 deletions(-)
+
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -169,6 +169,8 @@ struct cgroup {
+ *
+ * The ID of the root cgroup is always 0, and a new cgroup
+ * will be assigned with a smallest available ID.
++ *
++ * Allocating/Removing ID must be protected by cgroup_mutex.
+ */
+ int id;
+
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -4363,16 +4363,6 @@ static long cgroup_create(struct cgroup
+ rcu_assign_pointer(cgrp->name, name);
+
+ /*
+- * Temporarily set the pointer to NULL, so idr_find() won't return
+- * a half-baked cgroup.
+- */
+- cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
+- if (cgrp->id < 0) {
+- err = -ENOMEM;
+- goto err_free_name;
+- }
+-
+- /*
+ * Only live parents can have children. Note that the liveliness
+ * check isn't strictly necessary because cgroup_mkdir() and
+ * cgroup_rmdir() are fully synchronized by i_mutex; however, do it
+@@ -4381,7 +4371,7 @@ static long cgroup_create(struct cgroup
+ */
+ if (!cgroup_lock_live_group(parent)) {
+ err = -ENODEV;
+- goto err_free_id;
++ goto err_free_name;
+ }
+
+ /* Grab a reference on the superblock so the hierarchy doesn't
+@@ -4391,6 +4381,16 @@ static long cgroup_create(struct cgroup
+ * fs */
+ atomic_inc(&sb->s_active);
+
++ /*
++ * Temporarily set the pointer to NULL, so idr_find() won't return
++ * a half-baked cgroup.
++ */
++ cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
++ if (cgrp->id < 0) {
++ err = -ENOMEM;
++ goto err_unlock;
++ }
++
+ init_cgroup_housekeeping(cgrp);
+
+ dentry->d_fsdata = cgrp;
+@@ -4491,11 +4491,11 @@ err_free_all:
+ ss->css_free(css);
+ }
+ }
++ idr_remove(&root->cgroup_idr, cgrp->id);
++err_unlock:
+ mutex_unlock(&cgroup_mutex);
+ /* Release the reference count that we took on the superblock */
+ deactivate_super(sb);
+-err_free_id:
+- idr_remove(&root->cgroup_idr, cgrp->id);
+ err_free_name:
+ kfree(rcu_dereference_raw(cgrp->name));
+ err_free_cgrp:
--- /dev/null
+From 0b1b901b5a98bb36943d10820efc796f7cd45ff3 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Fri, 21 Mar 2014 10:14:08 -0400
+Subject: don't bother with propagate_mnt() unless the target is shared
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 0b1b901b5a98bb36943d10820efc796f7cd45ff3 upstream.
+
+If the dest_mnt is not shared, propagate_mnt() does nothing -
+there's no mounts to propagate to and thus no copies to create.
+Might as well don't bother calling it in that case.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c | 17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1653,16 +1653,14 @@ static int attach_recursive_mnt(struct m
+ err = invent_group_ids(source_mnt, true);
+ if (err)
+ goto out;
+- }
+- err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
+- if (err)
+- goto out_cleanup_ids;
+-
+- lock_mount_hash();
+-
+- if (IS_MNT_SHARED(dest_mnt)) {
++ err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
++ if (err)
++ goto out_cleanup_ids;
++ lock_mount_hash();
+ for (p = source_mnt; p; p = next_mnt(p, source_mnt))
+ set_mnt_shared(p);
++ } else {
++ lock_mount_hash();
+ }
+ if (parent_path) {
+ detach_mnt(source_mnt, parent_path);
+@@ -1685,8 +1683,7 @@ static int attach_recursive_mnt(struct m
+ return 0;
+
+ out_cleanup_ids:
+- if (IS_MNT_SHARED(dest_mnt))
+- cleanup_group_ids(source_mnt, NULL);
++ cleanup_group_ids(source_mnt, NULL);
+ out:
+ return err;
+ }
--- /dev/null
+From 1d6a32acd70ab18499829c0a9a5dbe2bace72a13 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 20 Mar 2014 20:34:43 -0400
+Subject: keep shadowed vfsmounts together
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 1d6a32acd70ab18499829c0a9a5dbe2bace72a13 upstream.
+
+preparation to switching mnt_hash to hlist
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c | 32 +++++++++++++++++++++++---------
+ 1 file changed, 23 insertions(+), 9 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -621,12 +621,20 @@ struct mount *__lookup_mnt(struct vfsmou
+ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
+ {
+ struct list_head *head = m_hash(mnt, dentry);
+- struct mount *p;
++ struct mount *p, *res = NULL;
+
+- list_for_each_entry_reverse(p, head, mnt_hash)
++ list_for_each_entry(p, head, mnt_hash)
+ if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+- return p;
+- return NULL;
++ goto found;
++ return res;
++found:
++ res = p;
++ list_for_each_entry_continue(p, head, mnt_hash) {
++ if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
++ break;
++ res = p;
++ }
++ return res;
+ }
+
+ /*
+@@ -769,14 +777,14 @@ static void attach_mnt(struct mount *mnt
+ struct mountpoint *mp)
+ {
+ mnt_set_mountpoint(parent, mp, mnt);
+- list_add_tail(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
++ list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ }
+
+ /*
+ * vfsmount lock must be held for write
+ */
+-static void commit_tree(struct mount *mnt)
++static void commit_tree(struct mount *mnt, struct mount *shadows)
+ {
+ struct mount *parent = mnt->mnt_parent;
+ struct mount *m;
+@@ -791,7 +799,10 @@ static void commit_tree(struct mount *mn
+
+ list_splice(&head, n->list.prev);
+
+- list_add_tail(&mnt->mnt_hash,
++ if (shadows)
++ list_add(&mnt->mnt_hash, &shadows->mnt_hash);
++ else
++ list_add(&mnt->mnt_hash,
+ m_hash(&parent->mnt, mnt->mnt_mountpoint));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ touch_mnt_namespace(n);
+@@ -1659,12 +1670,15 @@ static int attach_recursive_mnt(struct m
+ touch_mnt_namespace(source_mnt->mnt_ns);
+ } else {
+ mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
+- commit_tree(source_mnt);
++ commit_tree(source_mnt, NULL);
+ }
+
+ list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
++ struct mount *q;
+ list_del_init(&child->mnt_hash);
+- commit_tree(child);
++ q = __lookup_mnt_last(&child->mnt_parent->mnt,
++ child->mnt_mountpoint);
++ commit_tree(child, q);
+ }
+ unlock_mount_hash();
+
--- /dev/null
+From 668f9abbd4334e6c29fa8acd71635c4f9101caa7 Mon Sep 17 00:00:00 2001
+From: David Rientjes <rientjes@google.com>
+Date: Mon, 3 Mar 2014 15:38:18 -0800
+Subject: mm: close PageTail race
+
+From: David Rientjes <rientjes@google.com>
+
+commit 668f9abbd4334e6c29fa8acd71635c4f9101caa7 upstream.
+
+Commit bf6bddf1924e ("mm: introduce compaction and migration for
+ballooned pages") introduces page_count(page) into memory compaction
+which dereferences page->first_page if PageTail(page).
+
+This results in a very rare NULL pointer dereference on the
+aforementioned page_count(page). Indeed, anything that does
+compound_head(), including page_count() is susceptible to racing with
+prep_compound_page() and seeing a NULL or dangling page->first_page
+pointer.
+
+This patch uses Andrea's implementation of compound_trans_head() that
+deals with such a race and makes it the default compound_head()
+implementation. This includes a read memory barrier that ensures that
+if PageTail(head) is true that we return a head page that is neither
+NULL nor dangling. The patch then adds a store memory barrier to
+prep_compound_page() to ensure page->first_page is set.
+
+This is the safest way to ensure we see the head page that we are
+expecting, PageTail(page) is already in the unlikely() path and the
+memory barriers are unfortunately required.
+
+Hugetlbfs is the exception, we don't enforce a store memory barrier
+during init since no race is possible.
+
+Signed-off-by: David Rientjes <rientjes@google.com>
+Cc: Holger Kiehl <Holger.Kiehl@dwd.de>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Rafael Aquini <aquini@redhat.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ drivers/block/aoe/aoecmd.c | 4 ++--
+ drivers/vfio/vfio_iommu_type1.c | 4 ++--
+ fs/proc/page.c | 2 +-
+ include/linux/huge_mm.h | 18 ------------------
+ include/linux/mm.h | 14 ++++++++++++--
+ mm/ksm.c | 2 +-
+ mm/memory-failure.c | 2 +-
+ mm/page_alloc.c | 4 +++-
+ mm/swap.c | 4 ++--
+ 9 files changed, 24 insertions(+), 30 deletions(-)
+
+--- a/drivers/block/aoe/aoecmd.c
++++ b/drivers/block/aoe/aoecmd.c
+@@ -905,7 +905,7 @@ bio_pageinc(struct bio *bio)
+ /* Non-zero page count for non-head members of
+ * compound pages is no longer allowed by the kernel.
+ */
+- page = compound_trans_head(bv->bv_page);
++ page = compound_head(bv->bv_page);
+ atomic_inc(&page->_count);
+ }
+ }
+@@ -918,7 +918,7 @@ bio_pagedec(struct bio *bio)
+ int i;
+
+ bio_for_each_segment(bv, bio, i) {
+- page = compound_trans_head(bv->bv_page);
++ page = compound_head(bv->bv_page);
+ atomic_dec(&page->_count);
+ }
+ }
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -186,12 +186,12 @@ static bool is_invalid_reserved_pfn(unsi
+ if (pfn_valid(pfn)) {
+ bool reserved;
+ struct page *tail = pfn_to_page(pfn);
+- struct page *head = compound_trans_head(tail);
++ struct page *head = compound_head(tail);
+ reserved = !!(PageReserved(head));
+ if (head != tail) {
+ /*
+ * "head" is not a dangling pointer
+- * (compound_trans_head takes care of that)
++ * (compound_head takes care of that)
+ * but the hugepage may have been split
+ * from under us (and we may not hold a
+ * reference count on the head page so it can
+--- a/fs/proc/page.c
++++ b/fs/proc/page.c
+@@ -121,7 +121,7 @@ u64 stable_page_flags(struct page *page)
+ * just checks PG_head/PG_tail, so we need to check PageLRU to make
+ * sure a given page is a thp, not a non-huge compound page.
+ */
+- else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
++ else if (PageTransCompound(page) && PageLRU(compound_head(page)))
+ u |= 1 << KPF_THP;
+
+ /*
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -157,23 +157,6 @@ static inline int hpage_nr_pages(struct
+ return HPAGE_PMD_NR;
+ return 1;
+ }
+-static inline struct page *compound_trans_head(struct page *page)
+-{
+- if (PageTail(page)) {
+- struct page *head;
+- head = page->first_page;
+- smp_rmb();
+- /*
+- * head may be a dangling pointer.
+- * __split_huge_page_refcount clears PageTail before
+- * overwriting first_page, so if PageTail is still
+- * there it means the head pointer isn't dangling.
+- */
+- if (PageTail(page))
+- return head;
+- }
+- return page;
+-}
+
+ extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pmd_t pmd, pmd_t *pmdp);
+@@ -203,7 +186,6 @@ static inline int split_huge_page(struct
+ do { } while (0)
+ #define split_huge_page_pmd_mm(__mm, __address, __pmd) \
+ do { } while (0)
+-#define compound_trans_head(page) compound_head(page)
+ static inline int hugepage_madvise(struct vm_area_struct *vma,
+ unsigned long *vm_flags, int advice)
+ {
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -389,8 +389,18 @@ static inline void compound_unlock_irqre
+
+ static inline struct page *compound_head(struct page *page)
+ {
+- if (unlikely(PageTail(page)))
+- return page->first_page;
++ if (unlikely(PageTail(page))) {
++ struct page *head = page->first_page;
++
++ /*
++ * page->first_page may be a dangling pointer to an old
++ * compound page, so recheck that it is still a tail
++ * page before returning.
++ */
++ smp_rmb();
++ if (likely(PageTail(page)))
++ return head;
++ }
+ return page;
+ }
+
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -444,7 +444,7 @@ static void break_cow(struct rmap_item *
+ static struct page *page_trans_compound_anon(struct page *page)
+ {
+ if (PageTransCompound(page)) {
+- struct page *head = compound_trans_head(page);
++ struct page *head = compound_head(page);
+ /*
+ * head may actually be splitted and freed from under
+ * us but it's ok here.
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1645,7 +1645,7 @@ int soft_offline_page(struct page *page,
+ {
+ int ret;
+ unsigned long pfn = page_to_pfn(page);
+- struct page *hpage = compound_trans_head(page);
++ struct page *hpage = compound_head(page);
+
+ if (PageHWPoison(page)) {
+ pr_info("soft offline: %#lx page already poisoned\n", pfn);
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -369,9 +369,11 @@ void prep_compound_page(struct page *pag
+ __SetPageHead(page);
+ for (i = 1; i < nr_pages; i++) {
+ struct page *p = page + i;
+- __SetPageTail(p);
+ set_page_count(p, 0);
+ p->first_page = page;
++ /* Make sure p->first_page is always valid for PageTail() */
++ smp_wmb();
++ __SetPageTail(p);
+ }
+ }
+
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -84,7 +84,7 @@ static void put_compound_page(struct pag
+ {
+ if (unlikely(PageTail(page))) {
+ /* __split_huge_page_refcount can run under us */
+- struct page *page_head = compound_trans_head(page);
++ struct page *page_head = compound_head(page);
+
+ if (likely(page != page_head &&
+ get_page_unless_zero(page_head))) {
+@@ -222,7 +222,7 @@ bool __get_page_tail(struct page *page)
+ */
+ unsigned long flags;
+ bool got = false;
+- struct page *page_head = compound_trans_head(page);
++ struct page *page_head = compound_head(page);
+
+ if (likely(page != page_head && get_page_unless_zero(page_head))) {
+ /* Ref to put_compound_page() comment. */
--- /dev/null
+From b22f5126a24b3b2f15448c3f2a254fc10cbc2b92 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Mon, 6 Jan 2014 00:57:54 +0100
+Subject: netfilter: nf_conntrack_dccp: fix skb_header_pointer API usages
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+commit b22f5126a24b3b2f15448c3f2a254fc10cbc2b92 upstream.
+
+Some occurences in the netfilter tree use skb_header_pointer() in
+the following way ...
+
+ struct dccp_hdr _dh, *dh;
+ ...
+ skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
+
+... where dh itself is a pointer that is being passed as the copy
+buffer. Instead, we need to use &_dh as the forth argument so that
+we're copying the data into an actual buffer that sits on the stack.
+
+Currently, we probably could overwrite memory on the stack (e.g.
+with a possibly mal-formed DCCP packet), but unintentionally, as
+we only want the buffer to be placed into _dh variable.
+
+Fixes: 2bc780499aa3 ("[NETFILTER]: nf_conntrack: add DCCP protocol support")
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conntrack_proto_dccp.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/netfilter/nf_conntrack_proto_dccp.c
++++ b/net/netfilter/nf_conntrack_proto_dccp.c
+@@ -428,7 +428,7 @@ static bool dccp_new(struct nf_conn *ct,
+ const char *msg;
+ u_int8_t state;
+
+- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
++ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
+ BUG_ON(dh == NULL);
+
+ state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
+@@ -486,7 +486,7 @@ static int dccp_packet(struct nf_conn *c
+ u_int8_t type, old_state, new_state;
+ enum ct_dccp_roles role;
+
+- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
++ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
+ BUG_ON(dh == NULL);
+ type = dh->dccph_type;
+
+@@ -577,7 +577,7 @@ static int dccp_error(struct net *net, s
+ unsigned int cscov;
+ const char *msg;
+
+- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
++ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
+ if (dh == NULL) {
+ msg = "nf_ct_dccp: short packet ";
+ goto out_invalid;
net-mvneta-fix-usage-as-a-module-on-rgmii-configurations.patch
random32-avoid-attempt-to-late-reseed-if-in-the-middle-of-seeding.patch
resizable-namespace.c-hashes.patch
+keep-shadowed-vfsmounts-together.patch
+don-t-bother-with-propagate_mnt-unless-the-target-is-shared.patch
+switch-mnt_hash-to-hlist.patch
+mm-close-pagetail-race.patch
+cgroup-protect-modifications-to-cgroup_idr-with-cgroup_mutex.patch
+netfilter-nf_conntrack_dccp-fix-skb_header_pointer-api-usages.patch
--- /dev/null
+From 38129a13e6e71f666e0468e99fdd932a687b4d7e Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 20 Mar 2014 21:10:51 -0400
+Subject: switch mnt_hash to hlist
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 38129a13e6e71f666e0468e99fdd932a687b4d7e upstream.
+
+fixes RCU bug - walking through hlist is safe in face of element moves,
+since it's self-terminating. Cyclic lists are not - if we end up jumping
+to another hash chain, we'll loop infinitely without ever hitting the
+original list head.
+
+[fix for dumb braino folded]
+
+Spotted by: Max Kellermann <mk@cm4all.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/mount.h | 2 -
+ fs/namespace.c | 79 +++++++++++++++++++++++++++++++--------------------------
+ fs/pnode.c | 26 ++++++++++--------
+ fs/pnode.h | 4 +-
+ 4 files changed, 61 insertions(+), 50 deletions(-)
+
+--- a/fs/mount.h
++++ b/fs/mount.h
+@@ -25,7 +25,7 @@ struct mountpoint {
+ };
+
+ struct mount {
+- struct list_head mnt_hash;
++ struct hlist_node mnt_hash;
+ struct mount *mnt_parent;
+ struct dentry *mnt_mountpoint;
+ struct vfsmount mnt;
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -59,7 +59,7 @@ static DEFINE_SPINLOCK(mnt_id_lock);
+ static int mnt_id_start = 0;
+ static int mnt_group_start = 1;
+
+-static struct list_head *mount_hashtable __read_mostly;
++static struct hlist_head *mount_hashtable __read_mostly;
+ static struct hlist_head *mountpoint_hashtable __read_mostly;
+ static struct kmem_cache *mnt_cache __read_mostly;
+ static DECLARE_RWSEM(namespace_sem);
+@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(fs_kobj);
+ */
+ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
+
+-static inline struct list_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
++static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
+ {
+ unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
+ tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
+@@ -217,7 +217,7 @@ static struct mount *alloc_vfsmnt(const
+ mnt->mnt_writers = 0;
+ #endif
+
+- INIT_LIST_HEAD(&mnt->mnt_hash);
++ INIT_HLIST_NODE(&mnt->mnt_hash);
+ INIT_LIST_HEAD(&mnt->mnt_child);
+ INIT_LIST_HEAD(&mnt->mnt_mounts);
+ INIT_LIST_HEAD(&mnt->mnt_list);
+@@ -605,10 +605,10 @@ bool legitimize_mnt(struct vfsmount *bas
+ */
+ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+ {
+- struct list_head *head = m_hash(mnt, dentry);
++ struct hlist_head *head = m_hash(mnt, dentry);
+ struct mount *p;
+
+- list_for_each_entry_rcu(p, head, mnt_hash)
++ hlist_for_each_entry_rcu(p, head, mnt_hash)
+ if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+ return p;
+ return NULL;
+@@ -620,20 +620,16 @@ struct mount *__lookup_mnt(struct vfsmou
+ */
+ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
+ {
+- struct list_head *head = m_hash(mnt, dentry);
+- struct mount *p, *res = NULL;
+-
+- list_for_each_entry(p, head, mnt_hash)
+- if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+- goto found;
+- return res;
+-found:
+- res = p;
+- list_for_each_entry_continue(p, head, mnt_hash) {
++ struct mount *p, *res;
++ res = p = __lookup_mnt(mnt, dentry);
++ if (!p)
++ goto out;
++ hlist_for_each_entry_continue(p, mnt_hash) {
+ if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
+ break;
+ res = p;
+ }
++out:
+ return res;
+ }
+
+@@ -750,7 +746,7 @@ static void detach_mnt(struct mount *mnt
+ mnt->mnt_parent = mnt;
+ mnt->mnt_mountpoint = mnt->mnt.mnt_root;
+ list_del_init(&mnt->mnt_child);
+- list_del_init(&mnt->mnt_hash);
++ hlist_del_init_rcu(&mnt->mnt_hash);
+ put_mountpoint(mnt->mnt_mp);
+ mnt->mnt_mp = NULL;
+ }
+@@ -777,7 +773,7 @@ static void attach_mnt(struct mount *mnt
+ struct mountpoint *mp)
+ {
+ mnt_set_mountpoint(parent, mp, mnt);
+- list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
++ hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ }
+
+@@ -800,9 +796,9 @@ static void commit_tree(struct mount *mn
+ list_splice(&head, n->list.prev);
+
+ if (shadows)
+- list_add(&mnt->mnt_hash, &shadows->mnt_hash);
++ hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+ else
+- list_add(&mnt->mnt_hash,
++ hlist_add_head_rcu(&mnt->mnt_hash,
+ m_hash(&parent->mnt, mnt->mnt_mountpoint));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ touch_mnt_namespace(n);
+@@ -1193,26 +1189,28 @@ int may_umount(struct vfsmount *mnt)
+
+ EXPORT_SYMBOL(may_umount);
+
+-static LIST_HEAD(unmounted); /* protected by namespace_sem */
++static HLIST_HEAD(unmounted); /* protected by namespace_sem */
+
+ static void namespace_unlock(void)
+ {
+ struct mount *mnt;
+- LIST_HEAD(head);
++ struct hlist_head head = unmounted;
+
+- if (likely(list_empty(&unmounted))) {
++ if (likely(hlist_empty(&head))) {
+ up_write(&namespace_sem);
+ return;
+ }
+
+- list_splice_init(&unmounted, &head);
++ head.first->pprev = &head.first;
++ INIT_HLIST_HEAD(&unmounted);
++
+ up_write(&namespace_sem);
+
+ synchronize_rcu();
+
+- while (!list_empty(&head)) {
+- mnt = list_first_entry(&head, struct mount, mnt_hash);
+- list_del_init(&mnt->mnt_hash);
++ while (!hlist_empty(&head)) {
++ mnt = hlist_entry(head.first, struct mount, mnt_hash);
++ hlist_del_init(&mnt->mnt_hash);
+ if (mnt->mnt_ex_mountpoint.mnt)
+ path_put(&mnt->mnt_ex_mountpoint);
+ mntput(&mnt->mnt);
+@@ -1233,16 +1231,19 @@ static inline void namespace_lock(void)
+ */
+ void umount_tree(struct mount *mnt, int how)
+ {
+- LIST_HEAD(tmp_list);
++ HLIST_HEAD(tmp_list);
+ struct mount *p;
++ struct mount *last = NULL;
+
+- for (p = mnt; p; p = next_mnt(p, mnt))
+- list_move(&p->mnt_hash, &tmp_list);
++ for (p = mnt; p; p = next_mnt(p, mnt)) {
++ hlist_del_init_rcu(&p->mnt_hash);
++ hlist_add_head(&p->mnt_hash, &tmp_list);
++ }
+
+ if (how)
+ propagate_umount(&tmp_list);
+
+- list_for_each_entry(p, &tmp_list, mnt_hash) {
++ hlist_for_each_entry(p, &tmp_list, mnt_hash) {
+ list_del_init(&p->mnt_expire);
+ list_del_init(&p->mnt_list);
+ __touch_mnt_namespace(p->mnt_ns);
+@@ -1260,8 +1261,13 @@ void umount_tree(struct mount *mnt, int
+ p->mnt_mp = NULL;
+ }
+ change_mnt_propagation(p, MS_PRIVATE);
++ last = p;
++ }
++ if (last) {
++ last->mnt_hash.next = unmounted.first;
++ unmounted.first = tmp_list.first;
++ unmounted.first->pprev = &unmounted.first;
+ }
+- list_splice(&tmp_list, &unmounted);
+ }
+
+ static void shrink_submounts(struct mount *mnt);
+@@ -1645,8 +1651,9 @@ static int attach_recursive_mnt(struct m
+ struct mountpoint *dest_mp,
+ struct path *parent_path)
+ {
+- LIST_HEAD(tree_list);
++ HLIST_HEAD(tree_list);
+ struct mount *child, *p;
++ struct hlist_node *n;
+ int err;
+
+ if (IS_MNT_SHARED(dest_mnt)) {
+@@ -1671,9 +1678,9 @@ static int attach_recursive_mnt(struct m
+ commit_tree(source_mnt, NULL);
+ }
+
+- list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
++ hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
+ struct mount *q;
+- list_del_init(&child->mnt_hash);
++ hlist_del_init(&child->mnt_hash);
+ q = __lookup_mnt_last(&child->mnt_parent->mnt,
+ child->mnt_mountpoint);
+ commit_tree(child, q);
+@@ -2818,7 +2825,7 @@ void __init mnt_init(void)
+ 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+
+ mount_hashtable = alloc_large_system_hash("Mount-cache",
+- sizeof(struct list_head),
++ sizeof(struct hlist_head),
+ mhash_entries, 19,
+ 0,
+ &m_hash_shift, &m_hash_mask, 0, 0);
+@@ -2832,7 +2839,7 @@ void __init mnt_init(void)
+ panic("Failed to allocate mount hash table\n");
+
+ for (u = 0; u <= m_hash_mask; u++)
+- INIT_LIST_HEAD(&mount_hashtable[u]);
++ INIT_HLIST_HEAD(&mount_hashtable[u]);
+ for (u = 0; u <= mp_hash_mask; u++)
+ INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
+
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -220,14 +220,14 @@ static struct mount *get_source(struct m
+ * @tree_list : list of heads of trees to be attached.
+ */
+ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
+- struct mount *source_mnt, struct list_head *tree_list)
++ struct mount *source_mnt, struct hlist_head *tree_list)
+ {
+ struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+ struct mount *m, *child;
+ int ret = 0;
+ struct mount *prev_dest_mnt = dest_mnt;
+ struct mount *prev_src_mnt = source_mnt;
+- LIST_HEAD(tmp_list);
++ HLIST_HEAD(tmp_list);
+
+ for (m = propagation_next(dest_mnt, dest_mnt); m;
+ m = propagation_next(m, dest_mnt)) {
+@@ -246,27 +246,29 @@ int propagate_mnt(struct mount *dest_mnt
+ child = copy_tree(source, source->mnt.mnt_root, type);
+ if (IS_ERR(child)) {
+ ret = PTR_ERR(child);
+- list_splice(tree_list, tmp_list.prev);
++ tmp_list = *tree_list;
++ tmp_list.first->pprev = &tmp_list.first;
++ INIT_HLIST_HEAD(tree_list);
+ goto out;
+ }
+
+ if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
+ mnt_set_mountpoint(m, dest_mp, child);
+- list_add_tail(&child->mnt_hash, tree_list);
++ hlist_add_head(&child->mnt_hash, tree_list);
+ } else {
+ /*
+ * This can happen if the parent mount was bind mounted
+ * on some subdirectory of a shared/slave mount.
+ */
+- list_add_tail(&child->mnt_hash, &tmp_list);
++ hlist_add_head(&child->mnt_hash, &tmp_list);
+ }
+ prev_dest_mnt = m;
+ prev_src_mnt = child;
+ }
+ out:
+ lock_mount_hash();
+- while (!list_empty(&tmp_list)) {
+- child = list_first_entry(&tmp_list, struct mount, mnt_hash);
++ while (!hlist_empty(&tmp_list)) {
++ child = hlist_entry(tmp_list.first, struct mount, mnt_hash);
+ umount_tree(child, 0);
+ }
+ unlock_mount_hash();
+@@ -338,8 +340,10 @@ static void __propagate_umount(struct mo
+ * umount the child only if the child has no
+ * other children
+ */
+- if (child && list_empty(&child->mnt_mounts))
+- list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
++ if (child && list_empty(&child->mnt_mounts)) {
++ hlist_del_init_rcu(&child->mnt_hash);
++ hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
++ }
+ }
+ }
+
+@@ -350,11 +354,11 @@ static void __propagate_umount(struct mo
+ *
+ * vfsmount lock must be held for write
+ */
+-int propagate_umount(struct list_head *list)
++int propagate_umount(struct hlist_head *list)
+ {
+ struct mount *mnt;
+
+- list_for_each_entry(mnt, list, mnt_hash)
++ hlist_for_each_entry(mnt, list, mnt_hash)
+ __propagate_umount(mnt);
+ return 0;
+ }
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -36,8 +36,8 @@ static inline void set_mnt_shared(struct
+
+ void change_mnt_propagation(struct mount *, int);
+ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
+- struct list_head *);
+-int propagate_umount(struct list_head *);
++ struct hlist_head *);
++int propagate_umount(struct hlist_head *);
+ int propagate_mount_busy(struct mount *, int);
+ void mnt_release_group_id(struct mount *);
+ int get_dominating_id(struct mount *mnt, const struct path *root);