From: Greg Kroah-Hartman Date: Wed, 29 Jan 2014 18:39:26 +0000 (-0800) Subject: 3.4-stable patches X-Git-Tag: v3.4.79~25 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c51552bf5abe89eac6918b3f6f5ec17b1cb782c3;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: mm-hugetlbfs-fix-hugetlbfs-optimization.patch --- diff --git a/queue-3.4/mm-hugetlbfs-fix-hugetlbfs-optimization.patch b/queue-3.4/mm-hugetlbfs-fix-hugetlbfs-optimization.patch new file mode 100644 index 00000000000..937bfb8a2cd --- /dev/null +++ b/queue-3.4/mm-hugetlbfs-fix-hugetlbfs-optimization.patch @@ -0,0 +1,271 @@ +From 27c73ae759774e63313c1fbfeb17ba076cea64c5 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Thu, 21 Nov 2013 14:32:02 -0800 +Subject: mm: hugetlbfs: fix hugetlbfs optimization + +From: Andrea Arcangeli + +commit 27c73ae759774e63313c1fbfeb17ba076cea64c5 upstream. + +Commit 7cb2ef56e6a8 ("mm: fix aio performance regression for database +caused by THP") can cause dereference of a dangling pointer if +split_huge_page runs during PageHuge() if there are updates to the +tail_page->private field. + +Also it is repeating compound_head twice for hugetlbfs and it is running +compound_head+compound_trans_head for THP when a single one is needed in +both cases. + +The new code within the PageSlab() check doesn't need to verify that the +THP page size is never bigger than the smallest hugetlbfs page size, to +avoid memory corruption. + +A longstanding theoretical race condition was found while fixing the +above (see the change right after the skip_unlock label, that is +relevant for the compound_lock path too). + +By re-establishing the _mapcount tail refcounting for all compound +pages, this also fixes the below problem: + + echo 0 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + + BUG: Bad page state in process bash pfn:59a01 + page:ffffea000139b038 count:0 mapcount:10 mapping: (null) index:0x0 + page flags: 0x1c00000000008000(tail) + Modules linked in: + CPU: 6 PID: 2018 Comm: bash Not tainted 3.12.0+ #25 + Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 + Call Trace: + dump_stack+0x55/0x76 + bad_page+0xd5/0x130 + free_pages_prepare+0x213/0x280 + __free_pages+0x36/0x80 + update_and_free_page+0xc1/0xd0 + free_pool_huge_page+0xc2/0xe0 + set_max_huge_pages.part.58+0x14c/0x220 + nr_hugepages_store_common.isra.60+0xd0/0xf0 + nr_hugepages_store+0x13/0x20 + kobj_attr_store+0xf/0x20 + sysfs_write_file+0x189/0x1e0 + vfs_write+0xc5/0x1f0 + SyS_write+0x55/0xb0 + system_call_fastpath+0x16/0x1b + +Signed-off-by: Khalid Aziz +Signed-off-by: Andrea Arcangeli +Tested-by: Khalid Aziz +Cc: Pravin Shelar +Cc: Greg Kroah-Hartman +Cc: Ben Hutchings +Cc: Christoph Lameter +Cc: Johannes Weiner +Cc: Mel Gorman +Cc: Rik van Riel +Cc: Andi Kleen +Cc: Minchan Kim +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Guillaume Morin +Signed-off-by: Greg Kroah-Hartman + + +--- + include/linux/hugetlb.h | 6 ++ + mm/hugetlb.c | 17 +++++++ + mm/swap.c | 107 ++++++++++++++++++++++++++++++++++-------------- + 3 files changed, 101 insertions(+), 29 deletions(-) + +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -24,6 +24,7 @@ struct hugepage_subpool *hugepage_new_su + void hugepage_put_subpool(struct hugepage_subpool *spool); + + int PageHuge(struct page *page); ++int PageHeadHuge(struct page *page_head); + + void reset_vma_resv_huge_pages(struct vm_area_struct *vma); + int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +@@ -84,6 +85,11 @@ static inline int PageHuge(struct page * + { + return 0; + } ++ ++static inline int PageHeadHuge(struct page *page_head) ++{ ++ return 0; ++} + + static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) + { +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -679,6 +679,23 @@ int PageHuge(struct page *page) + } + EXPORT_SYMBOL_GPL(PageHuge); + ++/* ++ * PageHeadHuge() only returns true for hugetlbfs head page, but not for ++ * normal or transparent huge pages. ++ */ ++int PageHeadHuge(struct page *page_head) ++{ ++ compound_page_dtor *dtor; ++ ++ if (!PageHead(page_head)) ++ return 0; ++ ++ dtor = get_compound_page_dtor(page_head); ++ ++ return dtor == free_huge_page; ++} ++EXPORT_SYMBOL_GPL(PageHeadHuge); ++ + pgoff_t __basepage_index(struct page *page) + { + struct page *page_head = compound_head(page); +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -77,18 +77,6 @@ static void __put_compound_page(struct p + + static void put_compound_page(struct page *page) + { +- /* +- * hugetlbfs pages cannot be split from under us. If this is a +- * hugetlbfs page, check refcount on head page and release the page if +- * the refcount becomes zero. +- */ +- if (PageHuge(page)) { +- page = compound_head(page); +- if (put_page_testzero(page)) +- __put_compound_page(page); +- return; +- } +- + if (unlikely(PageTail(page))) { + /* __split_huge_page_refcount can run under us */ + struct page *page_head = compound_trans_head(page); +@@ -96,6 +84,35 @@ static void put_compound_page(struct pag + if (likely(page != page_head && + get_page_unless_zero(page_head))) { + unsigned long flags; ++ ++ if (PageHeadHuge(page_head)) { ++ if (likely(PageTail(page))) { ++ /* ++ * __split_huge_page_refcount ++ * cannot race here. ++ */ ++ VM_BUG_ON(!PageHead(page_head)); ++ atomic_dec(&page->_mapcount); ++ if (put_page_testzero(page_head)) ++ VM_BUG_ON(1); ++ if (put_page_testzero(page_head)) ++ __put_compound_page(page_head); ++ return; ++ } else { ++ /* ++ * __split_huge_page_refcount ++ * run before us, "page" was a ++ * THP tail. The split ++ * page_head has been freed ++ * and reallocated as slab or ++ * hugetlbfs page of smaller ++ * order (only possible if ++ * reallocated as slab on ++ * x86). ++ */ ++ goto skip_lock; ++ } ++ } + /* + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time +@@ -107,9 +124,29 @@ static void put_compound_page(struct pag + /* __split_huge_page_refcount run before us */ + compound_unlock_irqrestore(page_head, flags); + VM_BUG_ON(PageHead(page_head)); +- if (put_page_testzero(page_head)) +- __put_single_page(page_head); +- out_put_single: ++skip_lock: ++ if (put_page_testzero(page_head)) { ++ /* ++ * The head page may have been ++ * freed and reallocated as a ++ * compound page of smaller ++ * order and then freed again. ++ * All we know is that it ++ * cannot have become: a THP ++ * page, a compound page of ++ * higher order, a tail page. ++ * That is because we still ++ * hold the refcount of the ++ * split THP tail and ++ * page_head was the THP head ++ * before the split. ++ */ ++ if (PageHead(page_head)) ++ __put_compound_page(page_head); ++ else ++ __put_single_page(page_head); ++ } ++out_put_single: + if (put_page_testzero(page)) + __put_single_page(page); + return; +@@ -173,21 +210,34 @@ bool __get_page_tail(struct page *page) + */ + unsigned long flags; + bool got = false; +- struct page *page_head; ++ struct page *page_head = compound_trans_head(page); + +- /* +- * If this is a hugetlbfs page it cannot be split under us. Simply +- * increment refcount for the head page. +- */ +- if (PageHuge(page)) { +- page_head = compound_head(page); +- atomic_inc(&page_head->_count); +- got = true; +- goto out; +- } +- +- page_head = compound_trans_head(page); + if (likely(page != page_head && get_page_unless_zero(page_head))) { ++ /* Ref to put_compound_page() comment. */ ++ if (PageHeadHuge(page_head)) { ++ if (likely(PageTail(page))) { ++ /* ++ * This is a hugetlbfs ++ * page. __split_huge_page_refcount ++ * cannot race here. ++ */ ++ VM_BUG_ON(!PageHead(page_head)); ++ __get_page_tail_foll(page, false); ++ return true; ++ } else { ++ /* ++ * __split_huge_page_refcount run ++ * before us, "page" was a THP ++ * tail. The split page_head has been ++ * freed and reallocated as slab or ++ * hugetlbfs page of smaller order ++ * (only possible if reallocated as ++ * slab on x86). ++ */ ++ put_page(page_head); ++ return false; ++ } ++ } + /* + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time +@@ -204,7 +254,6 @@ bool __get_page_tail(struct page *page) + if (unlikely(!got)) + put_page(page_head); + } +-out: + return got; + } + EXPORT_SYMBOL(__get_page_tail); diff --git a/queue-3.4/series b/queue-3.4/series index 2902dc1d45b..bf6ff413e1e 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -1 +1,2 @@ md-raid5-fix-long-standing-problem-with-bitmap-handling-on-write-failure.patch +mm-hugetlbfs-fix-hugetlbfs-optimization.patch