]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 29 Jan 2014 18:39:26 +0000 (10:39 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 29 Jan 2014 18:39:26 +0000 (10:39 -0800)
added patches:
mm-hugetlbfs-fix-hugetlbfs-optimization.patch

queue-3.4/mm-hugetlbfs-fix-hugetlbfs-optimization.patch [new file with mode: 0644]
queue-3.4/series

diff --git a/queue-3.4/mm-hugetlbfs-fix-hugetlbfs-optimization.patch b/queue-3.4/mm-hugetlbfs-fix-hugetlbfs-optimization.patch
new file mode 100644 (file)
index 0000000..937bfb8
--- /dev/null
@@ -0,0 +1,271 @@
+From 27c73ae759774e63313c1fbfeb17ba076cea64c5 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Thu, 21 Nov 2013 14:32:02 -0800
+Subject: mm: hugetlbfs: fix hugetlbfs optimization
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit 27c73ae759774e63313c1fbfeb17ba076cea64c5 upstream.
+
+Commit 7cb2ef56e6a8 ("mm: fix aio performance regression for database
+caused by THP") can cause dereference of a dangling pointer if
+split_huge_page runs during PageHuge() if there are updates to the
+tail_page->private field.
+
+Also it is repeating compound_head twice for hugetlbfs and it is running
+compound_head+compound_trans_head for THP when a single one is needed in
+both cases.
+
+The new code within the PageSlab() check doesn't need to verify that the
+THP page size is never bigger than the smallest hugetlbfs page size, to
+avoid memory corruption.
+
+A longstanding theoretical race condition was found while fixing the
+above (see the change right after the skip_unlock label, that is
+relevant for the compound_lock path too).
+
+By re-establishing the _mapcount tail refcounting for all compound
+pages, this also fixes the below problem:
+
+  echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+  BUG: Bad page state in process bash  pfn:59a01
+  page:ffffea000139b038 count:0 mapcount:10 mapping:          (null) index:0x0
+  page flags: 0x1c00000000008000(tail)
+  Modules linked in:
+  CPU: 6 PID: 2018 Comm: bash Not tainted 3.12.0+ #25
+  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+  Call Trace:
+    dump_stack+0x55/0x76
+    bad_page+0xd5/0x130
+    free_pages_prepare+0x213/0x280
+    __free_pages+0x36/0x80
+    update_and_free_page+0xc1/0xd0
+    free_pool_huge_page+0xc2/0xe0
+    set_max_huge_pages.part.58+0x14c/0x220
+    nr_hugepages_store_common.isra.60+0xd0/0xf0
+    nr_hugepages_store+0x13/0x20
+    kobj_attr_store+0xf/0x20
+    sysfs_write_file+0x189/0x1e0
+    vfs_write+0xc5/0x1f0
+    SyS_write+0x55/0xb0
+    system_call_fastpath+0x16/0x1b
+
+Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com>
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Tested-by: Khalid Aziz <khalid.aziz@oracle.com>
+Cc: Pravin Shelar <pshelar@nicira.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Ben Hutchings <bhutchings@solarflare.com>
+Cc: Christoph Lameter <cl@linux.com>
+Cc: Johannes Weiner <jweiner@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Andi Kleen <andi@firstfloor.org>
+Cc: Minchan Kim <minchan@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Guillaume Morin <guillaume@morinfr.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ include/linux/hugetlb.h |    6 ++
+ mm/hugetlb.c            |   17 +++++++
+ mm/swap.c               |  107 ++++++++++++++++++++++++++++++++++--------------
+ 3 files changed, 101 insertions(+), 29 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -24,6 +24,7 @@ struct hugepage_subpool *hugepage_new_su
+ void hugepage_put_subpool(struct hugepage_subpool *spool);
+ int PageHuge(struct page *page);
++int PageHeadHuge(struct page *page_head);
+ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
+ int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+@@ -84,6 +85,11 @@ static inline int PageHuge(struct page *
+ {
+       return 0;
+ }
++
++static inline int PageHeadHuge(struct page *page_head)
++{
++      return 0;
++}
+ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+ {
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -679,6 +679,23 @@ int PageHuge(struct page *page)
+ }
+ EXPORT_SYMBOL_GPL(PageHuge);
++/*
++ * PageHeadHuge() only returns true for hugetlbfs head page, but not for
++ * normal or transparent huge pages.
++ */
++int PageHeadHuge(struct page *page_head)
++{
++      compound_page_dtor *dtor;
++
++      if (!PageHead(page_head))
++              return 0;
++
++      dtor = get_compound_page_dtor(page_head);
++
++      return dtor == free_huge_page;
++}
++EXPORT_SYMBOL_GPL(PageHeadHuge);
++
+ pgoff_t __basepage_index(struct page *page)
+ {
+       struct page *page_head = compound_head(page);
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -77,18 +77,6 @@ static void __put_compound_page(struct p
+ static void put_compound_page(struct page *page)
+ {
+-      /*
+-       * hugetlbfs pages cannot be split from under us.  If this is a
+-       * hugetlbfs page, check refcount on head page and release the page if
+-       * the refcount becomes zero.
+-       */
+-      if (PageHuge(page)) {
+-              page = compound_head(page);
+-              if (put_page_testzero(page))
+-                      __put_compound_page(page);
+-              return;
+-      }
+-
+       if (unlikely(PageTail(page))) {
+               /* __split_huge_page_refcount can run under us */
+               struct page *page_head = compound_trans_head(page);
+@@ -96,6 +84,35 @@ static void put_compound_page(struct pag
+               if (likely(page != page_head &&
+                          get_page_unless_zero(page_head))) {
+                       unsigned long flags;
++
++                       if (PageHeadHuge(page_head)) {
++                              if (likely(PageTail(page))) {
++                                      /*
++                                       * __split_huge_page_refcount
++                                       * cannot race here.
++                                       */
++                                      VM_BUG_ON(!PageHead(page_head));
++                                      atomic_dec(&page->_mapcount);
++                                      if (put_page_testzero(page_head))
++                                              VM_BUG_ON(1);
++                                      if (put_page_testzero(page_head))
++                                              __put_compound_page(page_head);
++                                      return;
++                              } else {
++                                      /*
++                                       * __split_huge_page_refcount
++                                       * run before us, "page" was a
++                                       * THP tail. The split
++                                       * page_head has been freed
++                                       * and reallocated as slab or
++                                       * hugetlbfs page of smaller
++                                       * order (only possible if
++                                       * reallocated as slab on
++                                       * x86).
++                                       */
++                                      goto skip_lock;
++                              }
++                      }
+                       /*
+                        * page_head wasn't a dangling pointer but it
+                        * may not be a head page anymore by the time
+@@ -107,9 +124,29 @@ static void put_compound_page(struct pag
+                               /* __split_huge_page_refcount run before us */
+                               compound_unlock_irqrestore(page_head, flags);
+                               VM_BUG_ON(PageHead(page_head));
+-                              if (put_page_testzero(page_head))
+-                                      __put_single_page(page_head);
+-                      out_put_single:
++skip_lock:
++                              if (put_page_testzero(page_head)) {
++                                      /*
++                                       * The head page may have been
++                                       * freed and reallocated as a
++                                       * compound page of smaller
++                                       * order and then freed again.
++                                       * All we know is that it
++                                       * cannot have become: a THP
++                                       * page, a compound page of
++                                       * higher order, a tail page.
++                                       * That is because we still
++                                       * hold the refcount of the
++                                       * split THP tail and
++                                       * page_head was the THP head
++                                       * before the split.
++                                       */
++                                      if (PageHead(page_head))
++                                              __put_compound_page(page_head);
++                                      else
++                                              __put_single_page(page_head);
++                              }
++out_put_single:
+                               if (put_page_testzero(page))
+                                       __put_single_page(page);
+                               return;
+@@ -173,21 +210,34 @@ bool __get_page_tail(struct page *page)
+        */
+       unsigned long flags;
+       bool got = false;
+-      struct page *page_head;
++      struct page *page_head = compound_trans_head(page);
+-      /*
+-       * If this is a hugetlbfs page it cannot be split under us.  Simply
+-       * increment refcount for the head page.
+-       */
+-      if (PageHuge(page)) {
+-              page_head = compound_head(page);
+-              atomic_inc(&page_head->_count);
+-              got = true;
+-              goto out;
+-      }
+-
+-      page_head = compound_trans_head(page);
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
++              /* Ref to put_compound_page() comment. */
++              if (PageHeadHuge(page_head)) {
++                      if (likely(PageTail(page))) {
++                              /*
++                               * This is a hugetlbfs
++                               * page. __split_huge_page_refcount
++                               * cannot race here.
++                               */
++                              VM_BUG_ON(!PageHead(page_head));
++                              __get_page_tail_foll(page, false);
++                              return true;
++                      } else {
++                              /*
++                               * __split_huge_page_refcount run
++                               * before us, "page" was a THP
++                               * tail. The split page_head has been
++                               * freed and reallocated as slab or
++                               * hugetlbfs page of smaller order
++                               * (only possible if reallocated as
++                               * slab on x86).
++                               */
++                              put_page(page_head);
++                              return false;
++                      }
++              }
+               /*
+                * page_head wasn't a dangling pointer but it
+                * may not be a head page anymore by the time
+@@ -204,7 +254,6 @@ bool __get_page_tail(struct page *page)
+               if (unlikely(!got))
+                       put_page(page_head);
+       }
+-out:
+       return got;
+ }
+ EXPORT_SYMBOL(__get_page_tail);
index 2902dc1d45beb022b62197dd8f32b4fb02fcc95b..bf6ff413e1e03a2d00153246bf56c276ac3c4d80 100644 (file)
@@ -1 +1,2 @@
 md-raid5-fix-long-standing-problem-with-bitmap-handling-on-write-failure.patch
+mm-hugetlbfs-fix-hugetlbfs-optimization.patch