From a86503361db701d18e82b855fdf3a1b32b4f3288 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 20 May 2014 12:43:38 +0900 Subject: [PATCH] 3.10-stable patches added patches: md-raid1-r1buf_pool_alloc-free-allocate-pages-when-subsequent-allocation-fails.patch mm-hugetlb.c-add-cond_resched_lock-in-return_unused_surplus_pages.patch mm-use-paravirt-friendly-ops-for-numa-hinting-ptes.patch --- ...ges-when-subsequent-allocation-fails.patch | 76 +++++++++++ ..._lock-in-return_unused_surplus_pages.patch | 37 ++++++ ...t-friendly-ops-for-numa-hinting-ptes.patch | 120 ++++++++++++++++++ queue-3.10/series | 3 + 4 files changed, 236 insertions(+) create mode 100644 queue-3.10/md-raid1-r1buf_pool_alloc-free-allocate-pages-when-subsequent-allocation-fails.patch create mode 100644 queue-3.10/mm-hugetlb.c-add-cond_resched_lock-in-return_unused_surplus_pages.patch create mode 100644 queue-3.10/mm-use-paravirt-friendly-ops-for-numa-hinting-ptes.patch diff --git a/queue-3.10/md-raid1-r1buf_pool_alloc-free-allocate-pages-when-subsequent-allocation-fails.patch b/queue-3.10/md-raid1-r1buf_pool_alloc-free-allocate-pages-when-subsequent-allocation-fails.patch new file mode 100644 index 00000000000..149e92816d8 --- /dev/null +++ b/queue-3.10/md-raid1-r1buf_pool_alloc-free-allocate-pages-when-subsequent-allocation-fails.patch @@ -0,0 +1,76 @@ +From da1aab3dca9aa88ae34ca392470b8943159e25fe Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Wed, 9 Apr 2014 12:25:43 +1000 +Subject: md/raid1: r1buf_pool_alloc: free allocate pages when subsequent allocation fails. + +From: NeilBrown + +commit da1aab3dca9aa88ae34ca392470b8943159e25fe upstream. + +When performing a user-request check/repair (MD_RECOVERY_REQUEST is set) +on a raid1, we allocate multiple bios each with their own set of pages. + +If the page allocations for one bio fails, we currently do *not* free +the pages allocated for the previous bios, nor do we free the bio itself. + +This patch frees all the already-allocate pages, and makes sure that +all the bios are freed as well. + +This bug can cause a memory leak which can ultimately OOM a machine. +It was introduced in 3.10-rc1. + +Fixes: a07876064a0b73ab5ef1ebcf14b1cf0231c07858 +Cc: Kent Overstreet +Reported-by: Russell King - ARM Linux +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid1.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +--- a/drivers/md/raid1.c ++++ b/drivers/md/raid1.c +@@ -94,6 +94,7 @@ static void * r1buf_pool_alloc(gfp_t gfp + struct pool_info *pi = data; + struct r1bio *r1_bio; + struct bio *bio; ++ int need_pages; + int i, j; + + r1_bio = r1bio_pool_alloc(gfp_flags, pi); +@@ -116,15 +117,15 @@ static void * r1buf_pool_alloc(gfp_t gfp + * RESYNC_PAGES for each bio. + */ + if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) +- j = pi->raid_disks; ++ need_pages = pi->raid_disks; + else +- j = 1; +- while(j--) { ++ need_pages = 1; ++ for (j = 0; j < need_pages; j++) { + bio = r1_bio->bios[j]; + bio->bi_vcnt = RESYNC_PAGES; + + if (bio_alloc_pages(bio, gfp_flags)) +- goto out_free_bio; ++ goto out_free_pages; + } + /* If not user-requests, copy the page pointers to all bios */ + if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { +@@ -138,6 +139,14 @@ static void * r1buf_pool_alloc(gfp_t gfp + + return r1_bio; + ++out_free_pages: ++ while (--j >= 0) { ++ struct bio_vec *bv; ++ ++ bio_for_each_segment_all(bv, r1_bio->bios[j], i) ++ __free_page(bv->bv_page); ++ } ++ + out_free_bio: + while (++j < pi->raid_disks) + bio_put(r1_bio->bios[j]); diff --git a/queue-3.10/mm-hugetlb.c-add-cond_resched_lock-in-return_unused_surplus_pages.patch b/queue-3.10/mm-hugetlb.c-add-cond_resched_lock-in-return_unused_surplus_pages.patch new file mode 100644 index 00000000000..e949e0a4ddc --- /dev/null +++ b/queue-3.10/mm-hugetlb.c-add-cond_resched_lock-in-return_unused_surplus_pages.patch @@ -0,0 +1,37 @@ +From 7848a4bf51b34f41fcc9bd77e837126d99ae84e3 Mon Sep 17 00:00:00 2001 +From: "Mizuma, Masayoshi" +Date: Fri, 18 Apr 2014 15:07:18 -0700 +Subject: mm/hugetlb.c: add cond_resched_lock() in return_unused_surplus_pages() + +From: "Mizuma, Masayoshi" + +commit 7848a4bf51b34f41fcc9bd77e837126d99ae84e3 upstream. + +soft lockup in freeing gigantic hugepage fixed in commit 55f67141a892 "mm: +hugetlb: fix softlockup when a large number of hugepages are freed." can +happen in return_unused_surplus_pages(), so let's fix it. + +Signed-off-by: Masayoshi Mizuma +Signed-off-by: Naoya Horiguchi +Cc: Joonsoo Kim +Cc: Michal Hocko +Cc: Aneesh Kumar +Cc: KOSAKI Motohiro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/hugetlb.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1100,6 +1100,7 @@ static void return_unused_surplus_pages( + while (nr_pages--) { + if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) + break; ++ cond_resched_lock(&hugetlb_lock); + } + } + diff --git a/queue-3.10/mm-use-paravirt-friendly-ops-for-numa-hinting-ptes.patch b/queue-3.10/mm-use-paravirt-friendly-ops-for-numa-hinting-ptes.patch new file mode 100644 index 00000000000..4c5f58e406e --- /dev/null +++ b/queue-3.10/mm-use-paravirt-friendly-ops-for-numa-hinting-ptes.patch @@ -0,0 +1,120 @@ +From 29c7787075c92ca8af353acd5301481e6f37082f Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Fri, 18 Apr 2014 15:07:21 -0700 +Subject: mm: use paravirt friendly ops for NUMA hinting ptes + +From: Mel Gorman + +commit 29c7787075c92ca8af353acd5301481e6f37082f upstream. + +David Vrabel identified a regression when using automatic NUMA balancing +under Xen whereby page table entries were getting corrupted due to the +use of native PTE operations. Quoting him + + Xen PV guest page tables require that their entries use machine + addresses if the preset bit (_PAGE_PRESENT) is set, and (for + successful migration) non-present PTEs must use pseudo-physical + addresses. This is because on migration MFNs in present PTEs are + translated to PFNs (canonicalised) so they may be translated back + to the new MFN in the destination domain (uncanonicalised). + + pte_mknonnuma(), pmd_mknonnuma(), pte_mknuma() and pmd_mknuma() + set and clear the _PAGE_PRESENT bit using pte_set_flags(), + pte_clear_flags(), etc. + + In a Xen PV guest, these functions must translate MFNs to PFNs + when clearing _PAGE_PRESENT and translate PFNs to MFNs when setting + _PAGE_PRESENT. + +His suggested fix converted p[te|md]_[set|clear]_flags to using +paravirt-friendly ops but this is overkill. He suggested an alternative +of using p[te|md]_modify in the NUMA page table operations but this is +does more work than necessary and would require looking up a VMA for +protections. + +This patch modifies the NUMA page table operations to use paravirt +friendly operations to set/clear the flags of interest. Unfortunately +this will take a performance hit when updating the PTEs on +CONFIG_PARAVIRT but I do not see a way around it that does not break +Xen. + +Signed-off-by: Mel Gorman +Acked-by: David Vrabel +Tested-by: David Vrabel +Cc: Ingo Molnar +Cc: Peter Anvin +Cc: Fengguang Wu +Cc: Linus Torvalds +Cc: Steven Noonan +Cc: Rik van Riel +Cc: Peter Zijlstra +Cc: Andrea Arcangeli +Cc: Dave Hansen +Cc: Srikar Dronamraju +Cc: Cyrill Gorcunov +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/asm-generic/pgtable.h | 31 +++++++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 8 deletions(-) + +--- a/include/asm-generic/pgtable.h ++++ b/include/asm-generic/pgtable.h +@@ -620,32 +620,47 @@ static inline int pmd_numa(pmd_t pmd) + #ifndef pte_mknonnuma + static inline pte_t pte_mknonnuma(pte_t pte) + { +- pte = pte_clear_flags(pte, _PAGE_NUMA); +- return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); ++ pteval_t val = pte_val(pte); ++ ++ val &= ~_PAGE_NUMA; ++ val |= (_PAGE_PRESENT|_PAGE_ACCESSED); ++ return __pte(val); + } + #endif + + #ifndef pmd_mknonnuma + static inline pmd_t pmd_mknonnuma(pmd_t pmd) + { +- pmd = pmd_clear_flags(pmd, _PAGE_NUMA); +- return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); ++ pmdval_t val = pmd_val(pmd); ++ ++ val &= ~_PAGE_NUMA; ++ val |= (_PAGE_PRESENT|_PAGE_ACCESSED); ++ ++ return __pmd(val); + } + #endif + + #ifndef pte_mknuma + static inline pte_t pte_mknuma(pte_t pte) + { +- pte = pte_set_flags(pte, _PAGE_NUMA); +- return pte_clear_flags(pte, _PAGE_PRESENT); ++ pteval_t val = pte_val(pte); ++ ++ val &= ~_PAGE_PRESENT; ++ val |= _PAGE_NUMA; ++ ++ return __pte(val); + } + #endif + + #ifndef pmd_mknuma + static inline pmd_t pmd_mknuma(pmd_t pmd) + { +- pmd = pmd_set_flags(pmd, _PAGE_NUMA); +- return pmd_clear_flags(pmd, _PAGE_PRESENT); ++ pmdval_t val = pmd_val(pmd); ++ ++ val &= ~_PAGE_PRESENT; ++ val |= _PAGE_NUMA; ++ ++ return __pmd(val); + } + #endif + #else diff --git a/queue-3.10/series b/queue-3.10/series index 2aacacf3f59..68f7eea62db 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -14,3 +14,6 @@ powerpc-add-vr-save-restore-functions.patch tgafb-fix-mode-setting-with-fbset.patch parisc-fix-epoll_pwait-syscall-on-compat-kernel.patch don-t-bother-with-get-put-_write_access-on-non-regular-files.patch +md-raid1-r1buf_pool_alloc-free-allocate-pages-when-subsequent-allocation-fails.patch +mm-hugetlb.c-add-cond_resched_lock-in-return_unused_surplus_pages.patch +mm-use-paravirt-friendly-ops-for-numa-hinting-ptes.patch -- 2.47.3