]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.13-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 31 Mar 2014 23:59:31 +0000 (16:59 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 31 Mar 2014 23:59:31 +0000 (16:59 -0700)
added patches:
cgroup-protect-modifications-to-cgroup_idr-with-cgroup_mutex.patch
don-t-bother-with-propagate_mnt-unless-the-target-is-shared.patch
keep-shadowed-vfsmounts-together.patch
mm-close-pagetail-race.patch
netfilter-nf_conntrack_dccp-fix-skb_header_pointer-api-usages.patch
switch-mnt_hash-to-hlist.patch

queue-3.13/cgroup-protect-modifications-to-cgroup_idr-with-cgroup_mutex.patch [new file with mode: 0644]
queue-3.13/don-t-bother-with-propagate_mnt-unless-the-target-is-shared.patch [new file with mode: 0644]
queue-3.13/keep-shadowed-vfsmounts-together.patch [new file with mode: 0644]
queue-3.13/mm-close-pagetail-race.patch [new file with mode: 0644]
queue-3.13/netfilter-nf_conntrack_dccp-fix-skb_header_pointer-api-usages.patch [new file with mode: 0644]
queue-3.13/series
queue-3.13/switch-mnt_hash-to-hlist.patch [new file with mode: 0644]

diff --git a/queue-3.13/cgroup-protect-modifications-to-cgroup_idr-with-cgroup_mutex.patch b/queue-3.13/cgroup-protect-modifications-to-cgroup_idr-with-cgroup_mutex.patch
new file mode 100644 (file)
index 0000000..6f13d3f
--- /dev/null
@@ -0,0 +1,131 @@
+From 0ab02ca8f887908152d1a96db5130fc661d36a1e Mon Sep 17 00:00:00 2001
+From: Li Zefan <lizefan@huawei.com>
+Date: Tue, 11 Feb 2014 16:05:46 +0800
+Subject: cgroup: protect modifications to cgroup_idr with cgroup_mutex
+
+From: Li Zefan <lizefan@huawei.com>
+
+commit 0ab02ca8f887908152d1a96db5130fc661d36a1e upstream.
+
+Setup cgroupfs like this:
+  # mount -t cgroup -o cpuacct xxx /cgroup
+  # mkdir /cgroup/sub1
+  # mkdir /cgroup/sub2
+
+Then run these two commands:
+  # for ((; ;)) { mkdir /cgroup/sub1/tmp && rmdir /mnt/sub1/tmp; } &
+  # for ((; ;)) { mkdir /cgroup/sub2/tmp && rmdir /mnt/sub2/tmp; } &
+
+After seconds you may see this warning:
+
+------------[ cut here ]------------
+WARNING: CPU: 1 PID: 25243 at lib/idr.c:527 sub_remove+0x87/0x1b0()
+idr_remove called for id=6 which is not allocated.
+...
+Call Trace:
+ [<ffffffff8156063c>] dump_stack+0x7a/0x96
+ [<ffffffff810591ac>] warn_slowpath_common+0x8c/0xc0
+ [<ffffffff81059296>] warn_slowpath_fmt+0x46/0x50
+ [<ffffffff81300aa7>] sub_remove+0x87/0x1b0
+ [<ffffffff810f3f02>] ? css_killed_work_fn+0x32/0x1b0
+ [<ffffffff81300bf5>] idr_remove+0x25/0xd0
+ [<ffffffff810f2bab>] cgroup_destroy_css_killed+0x5b/0xc0
+ [<ffffffff810f4000>] css_killed_work_fn+0x130/0x1b0
+ [<ffffffff8107cdbc>] process_one_work+0x26c/0x550
+ [<ffffffff8107eefe>] worker_thread+0x12e/0x3b0
+ [<ffffffff81085f96>] kthread+0xe6/0xf0
+ [<ffffffff81570bac>] ret_from_fork+0x7c/0xb0
+---[ end trace 2d1577ec10cf80d0 ]---
+
+It's because allocating/removing cgroup ID is not properly synchronized.
+
+The bug was introduced when we converted cgroup_ida to cgroup_idr.
+While synchronization is already done inside ida_simple_{get,remove}(),
+users are responsible for concurrent calls to idr_{alloc,remove}().
+
+tj: Refreshed on top of b58c89986a77 ("cgroup: fix error return from
+cgroup_create()").
+
+[mhocko@suse.cz: ported to 3.12]
+Fixes: 4e96ee8e981b ("cgroup: convert cgroup_ida to cgroup_idr")
+Cc: <stable@vger.kernel.org> #3.12+
+Reported-by: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Michal Hocko <mhocko@suse.cz>
+Signed-off-by: Jiri Slaby <jslaby@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/cgroup.h |    2 ++
+ kernel/cgroup.c        |   26 +++++++++++++-------------
+ 2 files changed, 15 insertions(+), 13 deletions(-)
+
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -169,6 +169,8 @@ struct cgroup {
+        *
+        * The ID of the root cgroup is always 0, and a new cgroup
+        * will be assigned with a smallest available ID.
++       *
++       * Allocating/Removing ID must be protected by cgroup_mutex.
+        */
+       int id;
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -4363,16 +4363,6 @@ static long cgroup_create(struct cgroup
+       rcu_assign_pointer(cgrp->name, name);
+       /*
+-       * Temporarily set the pointer to NULL, so idr_find() won't return
+-       * a half-baked cgroup.
+-       */
+-      cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
+-      if (cgrp->id < 0) {
+-              err = -ENOMEM;
+-              goto err_free_name;
+-      }
+-
+-      /*
+        * Only live parents can have children.  Note that the liveliness
+        * check isn't strictly necessary because cgroup_mkdir() and
+        * cgroup_rmdir() are fully synchronized by i_mutex; however, do it
+@@ -4381,7 +4371,7 @@ static long cgroup_create(struct cgroup
+        */
+       if (!cgroup_lock_live_group(parent)) {
+               err = -ENODEV;
+-              goto err_free_id;
++              goto err_free_name;
+       }
+       /* Grab a reference on the superblock so the hierarchy doesn't
+@@ -4391,6 +4381,16 @@ static long cgroup_create(struct cgroup
+        * fs */
+       atomic_inc(&sb->s_active);
++      /*
++       * Temporarily set the pointer to NULL, so idr_find() won't return
++       * a half-baked cgroup.
++       */
++      cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
++      if (cgrp->id < 0) {
++              err = -ENOMEM;
++              goto err_unlock;
++      }
++
+       init_cgroup_housekeeping(cgrp);
+       dentry->d_fsdata = cgrp;
+@@ -4491,11 +4491,11 @@ err_free_all:
+                       ss->css_free(css);
+               }
+       }
++      idr_remove(&root->cgroup_idr, cgrp->id);
++err_unlock:
+       mutex_unlock(&cgroup_mutex);
+       /* Release the reference count that we took on the superblock */
+       deactivate_super(sb);
+-err_free_id:
+-      idr_remove(&root->cgroup_idr, cgrp->id);
+ err_free_name:
+       kfree(rcu_dereference_raw(cgrp->name));
+ err_free_cgrp:
diff --git a/queue-3.13/don-t-bother-with-propagate_mnt-unless-the-target-is-shared.patch b/queue-3.13/don-t-bother-with-propagate_mnt-unless-the-target-is-shared.patch
new file mode 100644 (file)
index 0000000..05170c7
--- /dev/null
@@ -0,0 +1,55 @@
+From 0b1b901b5a98bb36943d10820efc796f7cd45ff3 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Fri, 21 Mar 2014 10:14:08 -0400
+Subject: don't bother with propagate_mnt() unless the target is shared
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 0b1b901b5a98bb36943d10820efc796f7cd45ff3 upstream.
+
+If the dest_mnt is not shared, propagate_mnt() does nothing -
+there's no mounts to propagate to and thus no copies to create.
+Might as well don't bother calling it in that case.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |   17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1653,16 +1653,14 @@ static int attach_recursive_mnt(struct m
+               err = invent_group_ids(source_mnt, true);
+               if (err)
+                       goto out;
+-      }
+-      err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
+-      if (err)
+-              goto out_cleanup_ids;
+-
+-      lock_mount_hash();
+-
+-      if (IS_MNT_SHARED(dest_mnt)) {
++              err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
++              if (err)
++                      goto out_cleanup_ids;
++              lock_mount_hash();
+               for (p = source_mnt; p; p = next_mnt(p, source_mnt))
+                       set_mnt_shared(p);
++      } else {
++              lock_mount_hash();
+       }
+       if (parent_path) {
+               detach_mnt(source_mnt, parent_path);
+@@ -1685,8 +1683,7 @@ static int attach_recursive_mnt(struct m
+       return 0;
+  out_cleanup_ids:
+-      if (IS_MNT_SHARED(dest_mnt))
+-              cleanup_group_ids(source_mnt, NULL);
++      cleanup_group_ids(source_mnt, NULL);
+  out:
+       return err;
+ }
diff --git a/queue-3.13/keep-shadowed-vfsmounts-together.patch b/queue-3.13/keep-shadowed-vfsmounts-together.patch
new file mode 100644 (file)
index 0000000..baec3b1
--- /dev/null
@@ -0,0 +1,92 @@
+From 1d6a32acd70ab18499829c0a9a5dbe2bace72a13 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 20 Mar 2014 20:34:43 -0400
+Subject: keep shadowed vfsmounts together
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 1d6a32acd70ab18499829c0a9a5dbe2bace72a13 upstream.
+
+preparation to switching mnt_hash to hlist
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |   32 +++++++++++++++++++++++---------
+ 1 file changed, 23 insertions(+), 9 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -621,12 +621,20 @@ struct mount *__lookup_mnt(struct vfsmou
+ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
+ {
+       struct list_head *head = m_hash(mnt, dentry);
+-      struct mount *p;
++      struct mount *p, *res = NULL;
+-      list_for_each_entry_reverse(p, head, mnt_hash)
++      list_for_each_entry(p, head, mnt_hash)
+               if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+-                      return p;
+-      return NULL;
++                      goto found;
++      return res;
++found:
++      res = p;
++      list_for_each_entry_continue(p, head, mnt_hash) {
++              if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
++                      break;
++              res = p;
++      }
++      return res;
+ }
+ /*
+@@ -769,14 +777,14 @@ static void attach_mnt(struct mount *mnt
+                       struct mountpoint *mp)
+ {
+       mnt_set_mountpoint(parent, mp, mnt);
+-      list_add_tail(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
++      list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
+       list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ }
+ /*
+  * vfsmount lock must be held for write
+  */
+-static void commit_tree(struct mount *mnt)
++static void commit_tree(struct mount *mnt, struct mount *shadows)
+ {
+       struct mount *parent = mnt->mnt_parent;
+       struct mount *m;
+@@ -791,7 +799,10 @@ static void commit_tree(struct mount *mn
+       list_splice(&head, n->list.prev);
+-      list_add_tail(&mnt->mnt_hash,
++      if (shadows)
++              list_add(&mnt->mnt_hash, &shadows->mnt_hash);
++      else
++              list_add(&mnt->mnt_hash,
+                               m_hash(&parent->mnt, mnt->mnt_mountpoint));
+       list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+       touch_mnt_namespace(n);
+@@ -1659,12 +1670,15 @@ static int attach_recursive_mnt(struct m
+               touch_mnt_namespace(source_mnt->mnt_ns);
+       } else {
+               mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
+-              commit_tree(source_mnt);
++              commit_tree(source_mnt, NULL);
+       }
+       list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
++              struct mount *q;
+               list_del_init(&child->mnt_hash);
+-              commit_tree(child);
++              q = __lookup_mnt_last(&child->mnt_parent->mnt,
++                                    child->mnt_mountpoint);
++              commit_tree(child, q);
+       }
+       unlock_mount_hash();
diff --git a/queue-3.13/mm-close-pagetail-race.patch b/queue-3.13/mm-close-pagetail-race.patch
new file mode 100644 (file)
index 0000000..d1cf99b
--- /dev/null
@@ -0,0 +1,222 @@
+From 668f9abbd4334e6c29fa8acd71635c4f9101caa7 Mon Sep 17 00:00:00 2001
+From: David Rientjes <rientjes@google.com>
+Date: Mon, 3 Mar 2014 15:38:18 -0800
+Subject: mm: close PageTail race
+
+From: David Rientjes <rientjes@google.com>
+
+commit 668f9abbd4334e6c29fa8acd71635c4f9101caa7 upstream.
+
+Commit bf6bddf1924e ("mm: introduce compaction and migration for
+ballooned pages") introduces page_count(page) into memory compaction
+which dereferences page->first_page if PageTail(page).
+
+This results in a very rare NULL pointer dereference on the
+aforementioned page_count(page).  Indeed, anything that does
+compound_head(), including page_count() is susceptible to racing with
+prep_compound_page() and seeing a NULL or dangling page->first_page
+pointer.
+
+This patch uses Andrea's implementation of compound_trans_head() that
+deals with such a race and makes it the default compound_head()
+implementation.  This includes a read memory barrier that ensures that
+if PageTail(head) is true that we return a head page that is neither
+NULL nor dangling.  The patch then adds a store memory barrier to
+prep_compound_page() to ensure page->first_page is set.
+
+This is the safest way to ensure we see the head page that we are
+expecting, PageTail(page) is already in the unlikely() path and the
+memory barriers are unfortunately required.
+
+Hugetlbfs is the exception, we don't enforce a store memory barrier
+during init since no race is possible.
+
+Signed-off-by: David Rientjes <rientjes@google.com>
+Cc: Holger Kiehl <Holger.Kiehl@dwd.de>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Rafael Aquini <aquini@redhat.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ drivers/block/aoe/aoecmd.c      |    4 ++--
+ drivers/vfio/vfio_iommu_type1.c |    4 ++--
+ fs/proc/page.c                  |    2 +-
+ include/linux/huge_mm.h         |   18 ------------------
+ include/linux/mm.h              |   14 ++++++++++++--
+ mm/ksm.c                        |    2 +-
+ mm/memory-failure.c             |    2 +-
+ mm/page_alloc.c                 |    4 +++-
+ mm/swap.c                       |    4 ++--
+ 9 files changed, 24 insertions(+), 30 deletions(-)
+
+--- a/drivers/block/aoe/aoecmd.c
++++ b/drivers/block/aoe/aoecmd.c
+@@ -905,7 +905,7 @@ bio_pageinc(struct bio *bio)
+               /* Non-zero page count for non-head members of
+                * compound pages is no longer allowed by the kernel.
+                */
+-              page = compound_trans_head(bv->bv_page);
++              page = compound_head(bv->bv_page);
+               atomic_inc(&page->_count);
+       }
+ }
+@@ -918,7 +918,7 @@ bio_pagedec(struct bio *bio)
+       int i;
+       bio_for_each_segment(bv, bio, i) {
+-              page = compound_trans_head(bv->bv_page);
++              page = compound_head(bv->bv_page);
+               atomic_dec(&page->_count);
+       }
+ }
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -186,12 +186,12 @@ static bool is_invalid_reserved_pfn(unsi
+       if (pfn_valid(pfn)) {
+               bool reserved;
+               struct page *tail = pfn_to_page(pfn);
+-              struct page *head = compound_trans_head(tail);
++              struct page *head = compound_head(tail);
+               reserved = !!(PageReserved(head));
+               if (head != tail) {
+                       /*
+                        * "head" is not a dangling pointer
+-                       * (compound_trans_head takes care of that)
++                       * (compound_head takes care of that)
+                        * but the hugepage may have been split
+                        * from under us (and we may not hold a
+                        * reference count on the head page so it can
+--- a/fs/proc/page.c
++++ b/fs/proc/page.c
+@@ -121,7 +121,7 @@ u64 stable_page_flags(struct page *page)
+        * just checks PG_head/PG_tail, so we need to check PageLRU to make
+        * sure a given page is a thp, not a non-huge compound page.
+        */
+-      else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
++      else if (PageTransCompound(page) && PageLRU(compound_head(page)))
+               u |= 1 << KPF_THP;
+       /*
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -157,23 +157,6 @@ static inline int hpage_nr_pages(struct
+               return HPAGE_PMD_NR;
+       return 1;
+ }
+-static inline struct page *compound_trans_head(struct page *page)
+-{
+-      if (PageTail(page)) {
+-              struct page *head;
+-              head = page->first_page;
+-              smp_rmb();
+-              /*
+-               * head may be a dangling pointer.
+-               * __split_huge_page_refcount clears PageTail before
+-               * overwriting first_page, so if PageTail is still
+-               * there it means the head pointer isn't dangling.
+-               */
+-              if (PageTail(page))
+-                      return head;
+-      }
+-      return page;
+-}
+ extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+                               unsigned long addr, pmd_t pmd, pmd_t *pmdp);
+@@ -203,7 +186,6 @@ static inline int split_huge_page(struct
+       do { } while (0)
+ #define split_huge_page_pmd_mm(__mm, __address, __pmd)        \
+       do { } while (0)
+-#define compound_trans_head(page) compound_head(page)
+ static inline int hugepage_madvise(struct vm_area_struct *vma,
+                                  unsigned long *vm_flags, int advice)
+ {
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -389,8 +389,18 @@ static inline void compound_unlock_irqre
+ static inline struct page *compound_head(struct page *page)
+ {
+-      if (unlikely(PageTail(page)))
+-              return page->first_page;
++      if (unlikely(PageTail(page))) {
++              struct page *head = page->first_page;
++
++              /*
++               * page->first_page may be a dangling pointer to an old
++               * compound page, so recheck that it is still a tail
++               * page before returning.
++               */
++              smp_rmb();
++              if (likely(PageTail(page)))
++                      return head;
++      }
+       return page;
+ }
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -444,7 +444,7 @@ static void break_cow(struct rmap_item *
+ static struct page *page_trans_compound_anon(struct page *page)
+ {
+       if (PageTransCompound(page)) {
+-              struct page *head = compound_trans_head(page);
++              struct page *head = compound_head(page);
+               /*
+                * head may actually be splitted and freed from under
+                * us but it's ok here.
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1645,7 +1645,7 @@ int soft_offline_page(struct page *page,
+ {
+       int ret;
+       unsigned long pfn = page_to_pfn(page);
+-      struct page *hpage = compound_trans_head(page);
++      struct page *hpage = compound_head(page);
+       if (PageHWPoison(page)) {
+               pr_info("soft offline: %#lx page already poisoned\n", pfn);
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -369,9 +369,11 @@ void prep_compound_page(struct page *pag
+       __SetPageHead(page);
+       for (i = 1; i < nr_pages; i++) {
+               struct page *p = page + i;
+-              __SetPageTail(p);
+               set_page_count(p, 0);
+               p->first_page = page;
++              /* Make sure p->first_page is always valid for PageTail() */
++              smp_wmb();
++              __SetPageTail(p);
+       }
+ }
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -84,7 +84,7 @@ static void put_compound_page(struct pag
+ {
+       if (unlikely(PageTail(page))) {
+               /* __split_huge_page_refcount can run under us */
+-              struct page *page_head = compound_trans_head(page);
++              struct page *page_head = compound_head(page);
+               if (likely(page != page_head &&
+                          get_page_unless_zero(page_head))) {
+@@ -222,7 +222,7 @@ bool __get_page_tail(struct page *page)
+        */
+       unsigned long flags;
+       bool got = false;
+-      struct page *page_head = compound_trans_head(page);
++      struct page *page_head = compound_head(page);
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+               /* Ref to put_compound_page() comment. */
diff --git a/queue-3.13/netfilter-nf_conntrack_dccp-fix-skb_header_pointer-api-usages.patch b/queue-3.13/netfilter-nf_conntrack_dccp-fix-skb_header_pointer-api-usages.patch
new file mode 100644 (file)
index 0000000..7a39612
--- /dev/null
@@ -0,0 +1,62 @@
+From b22f5126a24b3b2f15448c3f2a254fc10cbc2b92 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Mon, 6 Jan 2014 00:57:54 +0100
+Subject: netfilter: nf_conntrack_dccp: fix skb_header_pointer API usages
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+commit b22f5126a24b3b2f15448c3f2a254fc10cbc2b92 upstream.
+
+Some occurences in the netfilter tree use skb_header_pointer() in
+the following way ...
+
+  struct dccp_hdr _dh, *dh;
+  ...
+  skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
+
+... where dh itself is a pointer that is being passed as the copy
+buffer. Instead, we need to use &_dh as the forth argument so that
+we're copying the data into an actual buffer that sits on the stack.
+
+Currently, we probably could overwrite memory on the stack (e.g.
+with a possibly mal-formed DCCP packet), but unintentionally, as
+we only want the buffer to be placed into _dh variable.
+
+Fixes: 2bc780499aa3 ("[NETFILTER]: nf_conntrack: add DCCP protocol support")
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/netfilter/nf_conntrack_proto_dccp.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/netfilter/nf_conntrack_proto_dccp.c
++++ b/net/netfilter/nf_conntrack_proto_dccp.c
+@@ -428,7 +428,7 @@ static bool dccp_new(struct nf_conn *ct,
+       const char *msg;
+       u_int8_t state;
+-      dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
++      dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
+       BUG_ON(dh == NULL);
+       state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
+@@ -486,7 +486,7 @@ static int dccp_packet(struct nf_conn *c
+       u_int8_t type, old_state, new_state;
+       enum ct_dccp_roles role;
+-      dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
++      dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
+       BUG_ON(dh == NULL);
+       type = dh->dccph_type;
+@@ -577,7 +577,7 @@ static int dccp_error(struct net *net, s
+       unsigned int cscov;
+       const char *msg;
+-      dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
++      dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
+       if (dh == NULL) {
+               msg = "nf_ct_dccp: short packet ";
+               goto out_invalid;
index bfcda6e6cada379b8b35be0e4906e520dfd82e6c..f16d672c7bfa798e9c09ada23d20119ee9ff24c8 100644 (file)
@@ -14,3 +14,9 @@ net-mvneta-rename-mvneta_gmac2_psc_enable-to-mvneta_gmac2_pcs_enable.patch
 net-mvneta-fix-usage-as-a-module-on-rgmii-configurations.patch
 random32-avoid-attempt-to-late-reseed-if-in-the-middle-of-seeding.patch
 resizable-namespace.c-hashes.patch
+keep-shadowed-vfsmounts-together.patch
+don-t-bother-with-propagate_mnt-unless-the-target-is-shared.patch
+switch-mnt_hash-to-hlist.patch
+mm-close-pagetail-race.patch
+cgroup-protect-modifications-to-cgroup_idr-with-cgroup_mutex.patch
+netfilter-nf_conntrack_dccp-fix-skb_header_pointer-api-usages.patch
diff --git a/queue-3.13/switch-mnt_hash-to-hlist.patch b/queue-3.13/switch-mnt_hash-to-hlist.patch
new file mode 100644 (file)
index 0000000..e40c80e
--- /dev/null
@@ -0,0 +1,347 @@
+From 38129a13e6e71f666e0468e99fdd932a687b4d7e Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Thu, 20 Mar 2014 21:10:51 -0400
+Subject: switch mnt_hash to hlist
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 38129a13e6e71f666e0468e99fdd932a687b4d7e upstream.
+
+fixes RCU bug - walking through hlist is safe in face of element moves,
+since it's self-terminating.  Cyclic lists are not - if we end up jumping
+to another hash chain, we'll loop infinitely without ever hitting the
+original list head.
+
+[fix for dumb braino folded]
+
+Spotted by: Max Kellermann <mk@cm4all.com>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/mount.h     |    2 -
+ fs/namespace.c |   79 +++++++++++++++++++++++++++++++--------------------------
+ fs/pnode.c     |   26 ++++++++++--------
+ fs/pnode.h     |    4 +-
+ 4 files changed, 61 insertions(+), 50 deletions(-)
+
+--- a/fs/mount.h
++++ b/fs/mount.h
+@@ -25,7 +25,7 @@ struct mountpoint {
+ };
+ struct mount {
+-      struct list_head mnt_hash;
++      struct hlist_node mnt_hash;
+       struct mount *mnt_parent;
+       struct dentry *mnt_mountpoint;
+       struct vfsmount mnt;
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -59,7 +59,7 @@ static DEFINE_SPINLOCK(mnt_id_lock);
+ static int mnt_id_start = 0;
+ static int mnt_group_start = 1;
+-static struct list_head *mount_hashtable __read_mostly;
++static struct hlist_head *mount_hashtable __read_mostly;
+ static struct hlist_head *mountpoint_hashtable __read_mostly;
+ static struct kmem_cache *mnt_cache __read_mostly;
+ static DECLARE_RWSEM(namespace_sem);
+@@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(fs_kobj);
+  */
+ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
+-static inline struct list_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
++static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
+ {
+       unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
+       tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
+@@ -217,7 +217,7 @@ static struct mount *alloc_vfsmnt(const
+               mnt->mnt_writers = 0;
+ #endif
+-              INIT_LIST_HEAD(&mnt->mnt_hash);
++              INIT_HLIST_NODE(&mnt->mnt_hash);
+               INIT_LIST_HEAD(&mnt->mnt_child);
+               INIT_LIST_HEAD(&mnt->mnt_mounts);
+               INIT_LIST_HEAD(&mnt->mnt_list);
+@@ -605,10 +605,10 @@ bool legitimize_mnt(struct vfsmount *bas
+  */
+ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+ {
+-      struct list_head *head = m_hash(mnt, dentry);
++      struct hlist_head *head = m_hash(mnt, dentry);
+       struct mount *p;
+-      list_for_each_entry_rcu(p, head, mnt_hash)
++      hlist_for_each_entry_rcu(p, head, mnt_hash)
+               if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+                       return p;
+       return NULL;
+@@ -620,20 +620,16 @@ struct mount *__lookup_mnt(struct vfsmou
+  */
+ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
+ {
+-      struct list_head *head = m_hash(mnt, dentry);
+-      struct mount *p, *res = NULL;
+-
+-      list_for_each_entry(p, head, mnt_hash)
+-              if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
+-                      goto found;
+-      return res;
+-found:
+-      res = p;
+-      list_for_each_entry_continue(p, head, mnt_hash) {
++      struct mount *p, *res;
++      res = p = __lookup_mnt(mnt, dentry);
++      if (!p)
++              goto out;
++      hlist_for_each_entry_continue(p, mnt_hash) {
+               if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
+                       break;
+               res = p;
+       }
++out:
+       return res;
+ }
+@@ -750,7 +746,7 @@ static void detach_mnt(struct mount *mnt
+       mnt->mnt_parent = mnt;
+       mnt->mnt_mountpoint = mnt->mnt.mnt_root;
+       list_del_init(&mnt->mnt_child);
+-      list_del_init(&mnt->mnt_hash);
++      hlist_del_init_rcu(&mnt->mnt_hash);
+       put_mountpoint(mnt->mnt_mp);
+       mnt->mnt_mp = NULL;
+ }
+@@ -777,7 +773,7 @@ static void attach_mnt(struct mount *mnt
+                       struct mountpoint *mp)
+ {
+       mnt_set_mountpoint(parent, mp, mnt);
+-      list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
++      hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
+       list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ }
+@@ -800,9 +796,9 @@ static void commit_tree(struct mount *mn
+       list_splice(&head, n->list.prev);
+       if (shadows)
+-              list_add(&mnt->mnt_hash, &shadows->mnt_hash);
++              hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+       else
+-              list_add(&mnt->mnt_hash,
++              hlist_add_head_rcu(&mnt->mnt_hash,
+                               m_hash(&parent->mnt, mnt->mnt_mountpoint));
+       list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+       touch_mnt_namespace(n);
+@@ -1193,26 +1189,28 @@ int may_umount(struct vfsmount *mnt)
+ EXPORT_SYMBOL(may_umount);
+-static LIST_HEAD(unmounted);  /* protected by namespace_sem */
++static HLIST_HEAD(unmounted); /* protected by namespace_sem */
+ static void namespace_unlock(void)
+ {
+       struct mount *mnt;
+-      LIST_HEAD(head);
++      struct hlist_head head = unmounted;
+-      if (likely(list_empty(&unmounted))) {
++      if (likely(hlist_empty(&head))) {
+               up_write(&namespace_sem);
+               return;
+       }
+-      list_splice_init(&unmounted, &head);
++      head.first->pprev = &head.first;
++      INIT_HLIST_HEAD(&unmounted);
++
+       up_write(&namespace_sem);
+       synchronize_rcu();
+-      while (!list_empty(&head)) {
+-              mnt = list_first_entry(&head, struct mount, mnt_hash);
+-              list_del_init(&mnt->mnt_hash);
++      while (!hlist_empty(&head)) {
++              mnt = hlist_entry(head.first, struct mount, mnt_hash);
++              hlist_del_init(&mnt->mnt_hash);
+               if (mnt->mnt_ex_mountpoint.mnt)
+                       path_put(&mnt->mnt_ex_mountpoint);
+               mntput(&mnt->mnt);
+@@ -1233,16 +1231,19 @@ static inline void namespace_lock(void)
+  */
+ void umount_tree(struct mount *mnt, int how)
+ {
+-      LIST_HEAD(tmp_list);
++      HLIST_HEAD(tmp_list);
+       struct mount *p;
++      struct mount *last = NULL;
+-      for (p = mnt; p; p = next_mnt(p, mnt))
+-              list_move(&p->mnt_hash, &tmp_list);
++      for (p = mnt; p; p = next_mnt(p, mnt)) {
++              hlist_del_init_rcu(&p->mnt_hash);
++              hlist_add_head(&p->mnt_hash, &tmp_list);
++      }
+       if (how)
+               propagate_umount(&tmp_list);
+-      list_for_each_entry(p, &tmp_list, mnt_hash) {
++      hlist_for_each_entry(p, &tmp_list, mnt_hash) {
+               list_del_init(&p->mnt_expire);
+               list_del_init(&p->mnt_list);
+               __touch_mnt_namespace(p->mnt_ns);
+@@ -1260,8 +1261,13 @@ void umount_tree(struct mount *mnt, int
+                       p->mnt_mp = NULL;
+               }
+               change_mnt_propagation(p, MS_PRIVATE);
++              last = p;
++      }
++      if (last) {
++              last->mnt_hash.next = unmounted.first;
++              unmounted.first = tmp_list.first;
++              unmounted.first->pprev = &unmounted.first;
+       }
+-      list_splice(&tmp_list, &unmounted);
+ }
+ static void shrink_submounts(struct mount *mnt);
+@@ -1645,8 +1651,9 @@ static int attach_recursive_mnt(struct m
+                       struct mountpoint *dest_mp,
+                       struct path *parent_path)
+ {
+-      LIST_HEAD(tree_list);
++      HLIST_HEAD(tree_list);
+       struct mount *child, *p;
++      struct hlist_node *n;
+       int err;
+       if (IS_MNT_SHARED(dest_mnt)) {
+@@ -1671,9 +1678,9 @@ static int attach_recursive_mnt(struct m
+               commit_tree(source_mnt, NULL);
+       }
+-      list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
++      hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
+               struct mount *q;
+-              list_del_init(&child->mnt_hash);
++              hlist_del_init(&child->mnt_hash);
+               q = __lookup_mnt_last(&child->mnt_parent->mnt,
+                                     child->mnt_mountpoint);
+               commit_tree(child, q);
+@@ -2818,7 +2825,7 @@ void __init mnt_init(void)
+                       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+       mount_hashtable = alloc_large_system_hash("Mount-cache",
+-                              sizeof(struct list_head),
++                              sizeof(struct hlist_head),
+                               mhash_entries, 19,
+                               0,
+                               &m_hash_shift, &m_hash_mask, 0, 0);
+@@ -2832,7 +2839,7 @@ void __init mnt_init(void)
+               panic("Failed to allocate mount hash table\n");
+       for (u = 0; u <= m_hash_mask; u++)
+-              INIT_LIST_HEAD(&mount_hashtable[u]);
++              INIT_HLIST_HEAD(&mount_hashtable[u]);
+       for (u = 0; u <= mp_hash_mask; u++)
+               INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
+--- a/fs/pnode.c
++++ b/fs/pnode.c
+@@ -220,14 +220,14 @@ static struct mount *get_source(struct m
+  * @tree_list : list of heads of trees to be attached.
+  */
+ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
+-                  struct mount *source_mnt, struct list_head *tree_list)
++                  struct mount *source_mnt, struct hlist_head *tree_list)
+ {
+       struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
+       struct mount *m, *child;
+       int ret = 0;
+       struct mount *prev_dest_mnt = dest_mnt;
+       struct mount *prev_src_mnt  = source_mnt;
+-      LIST_HEAD(tmp_list);
++      HLIST_HEAD(tmp_list);
+       for (m = propagation_next(dest_mnt, dest_mnt); m;
+                       m = propagation_next(m, dest_mnt)) {
+@@ -246,27 +246,29 @@ int propagate_mnt(struct mount *dest_mnt
+               child = copy_tree(source, source->mnt.mnt_root, type);
+               if (IS_ERR(child)) {
+                       ret = PTR_ERR(child);
+-                      list_splice(tree_list, tmp_list.prev);
++                      tmp_list = *tree_list;
++                      tmp_list.first->pprev = &tmp_list.first;
++                      INIT_HLIST_HEAD(tree_list);
+                       goto out;
+               }
+               if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
+                       mnt_set_mountpoint(m, dest_mp, child);
+-                      list_add_tail(&child->mnt_hash, tree_list);
++                      hlist_add_head(&child->mnt_hash, tree_list);
+               } else {
+                       /*
+                        * This can happen if the parent mount was bind mounted
+                        * on some subdirectory of a shared/slave mount.
+                        */
+-                      list_add_tail(&child->mnt_hash, &tmp_list);
++                      hlist_add_head(&child->mnt_hash, &tmp_list);
+               }
+               prev_dest_mnt = m;
+               prev_src_mnt  = child;
+       }
+ out:
+       lock_mount_hash();
+-      while (!list_empty(&tmp_list)) {
+-              child = list_first_entry(&tmp_list, struct mount, mnt_hash);
++      while (!hlist_empty(&tmp_list)) {
++              child = hlist_entry(tmp_list.first, struct mount, mnt_hash);
+               umount_tree(child, 0);
+       }
+       unlock_mount_hash();
+@@ -338,8 +340,10 @@ static void __propagate_umount(struct mo
+                * umount the child only if the child has no
+                * other children
+                */
+-              if (child && list_empty(&child->mnt_mounts))
+-                      list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
++              if (child && list_empty(&child->mnt_mounts)) {
++                      hlist_del_init_rcu(&child->mnt_hash);
++                      hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
++              }
+       }
+ }
+@@ -350,11 +354,11 @@ static void __propagate_umount(struct mo
+  *
+  * vfsmount lock must be held for write
+  */
+-int propagate_umount(struct list_head *list)
++int propagate_umount(struct hlist_head *list)
+ {
+       struct mount *mnt;
+-      list_for_each_entry(mnt, list, mnt_hash)
++      hlist_for_each_entry(mnt, list, mnt_hash)
+               __propagate_umount(mnt);
+       return 0;
+ }
+--- a/fs/pnode.h
++++ b/fs/pnode.h
+@@ -36,8 +36,8 @@ static inline void set_mnt_shared(struct
+ void change_mnt_propagation(struct mount *, int);
+ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
+-              struct list_head *);
+-int propagate_umount(struct list_head *);
++              struct hlist_head *);
++int propagate_umount(struct hlist_head *);
+ int propagate_mount_busy(struct mount *, int);
+ void mnt_release_group_id(struct mount *);
+ int get_dominating_id(struct mount *mnt, const struct path *root);