From: Greg Kroah-Hartman Date: Thu, 18 Jun 2020 16:22:10 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v4.4.228~48 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d58476ad499979f2567840b6118f63a95d493449;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: mm-initialize-deferred-pages-with-interrupts-enabled.patch mm-thp-make-the-thp-mapcount-atomic-against-__split_huge_pmd_locked.patch powerpc-mm-fix-conditions-to-perform-mmu-specific-management-by-blocks-on-ppc32.patch --- diff --git a/queue-5.4/mm-initialize-deferred-pages-with-interrupts-enabled.patch b/queue-5.4/mm-initialize-deferred-pages-with-interrupts-enabled.patch new file mode 100644 index 00000000000..3e97fe8576d --- /dev/null +++ b/queue-5.4/mm-initialize-deferred-pages-with-interrupts-enabled.patch @@ -0,0 +1,109 @@ +From 3d060856adfc59afb9d029c233141334cfaba418 Mon Sep 17 00:00:00 2001 +From: Pavel Tatashin +Date: Wed, 3 Jun 2020 15:59:24 -0700 +Subject: mm: initialize deferred pages with interrupts enabled + +From: Pavel Tatashin + +commit 3d060856adfc59afb9d029c233141334cfaba418 upstream. + +Initializing struct pages is a long task and keeping interrupts disabled +for the duration of this operation introduces a number of problems. + +1. jiffies are not updated for long period of time, and thus incorrect time + is reported. See proposed solution and discussion here: + lkml/20200311123848.118638-1-shile.zhang@linux.alibaba.com +2. It prevents farther improving deferred page initialization by allowing + intra-node multi-threading. + +We are keeping interrupts disabled to solve a rather theoretical problem +that was never observed in real world (See 3a2d7fa8a3d5). + +Let's keep interrupts enabled. In case we ever encounter a scenario where +an interrupt thread wants to allocate large amount of memory this early in +boot we can deal with that by growing zone (see deferred_grow_zone()) by +the needed amount before starting deferred_init_memmap() threads. + +Before: +[ 1.232459] node 0 initialised, 12058412 pages in 1ms + +After: +[ 1.632580] node 0 initialised, 12051227 pages in 436ms + +Fixes: 3a2d7fa8a3d5 ("mm: disable interrupts while initializing deferred pages") +Reported-by: Shile Zhang +Signed-off-by: Pavel Tatashin +Signed-off-by: Andrew Morton +Reviewed-by: Daniel Jordan +Reviewed-by: David Hildenbrand +Acked-by: Michal Hocko +Acked-by: Vlastimil Babka +Cc: Dan Williams +Cc: James Morris +Cc: Kirill Tkhai +Cc: Sasha Levin +Cc: Yiqian Wei +Cc: [4.17+] +Link: http://lkml.kernel.org/r/20200403140952.17177-3-pasha.tatashin@soleen.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/mmzone.h | 2 ++ + mm/page_alloc.c | 20 +++++++------------- + 2 files changed, 9 insertions(+), 13 deletions(-) + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -709,6 +709,8 @@ typedef struct pglist_data { + /* + * Must be held any time you expect node_start_pfn, + * node_present_pages, node_spanned_pages or nr_zones to stay constant. ++ * Also synchronizes pgdat->first_deferred_pfn during deferred page ++ * init. + * + * pgdat_resize_lock() and pgdat_resize_unlock() are provided to + * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1793,6 +1793,13 @@ static int __init deferred_init_memmap(v + BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); + pgdat->first_deferred_pfn = ULONG_MAX; + ++ /* ++ * Once we unlock here, the zone cannot be grown anymore, thus if an ++ * interrupt thread must allocate this early in boot, zone must be ++ * pre-grown prior to start of deferred page initialization. ++ */ ++ pgdat_resize_unlock(pgdat, &flags); ++ + /* Only the highest zone is deferred so find it */ + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + zone = pgdat->node_zones + zid; +@@ -1813,8 +1820,6 @@ static int __init deferred_init_memmap(v + while (spfn < epfn) + nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn); + zone_empty: +- pgdat_resize_unlock(pgdat, &flags); +- + /* Sanity check that the next zone really is unpopulated */ + WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); + +@@ -1857,17 +1862,6 @@ deferred_grow_zone(struct zone *zone, un + pgdat_resize_lock(pgdat, &flags); + + /* +- * If deferred pages have been initialized while we were waiting for +- * the lock, return true, as the zone was grown. The caller will retry +- * this zone. We won't return to this function since the caller also +- * has this static branch. +- */ +- if (!static_branch_unlikely(&deferred_pages)) { +- pgdat_resize_unlock(pgdat, &flags); +- return true; +- } +- +- /* + * If someone grew this zone while we were waiting for spinlock, return + * true, as there might be enough pages already. + */ diff --git a/queue-5.4/mm-thp-make-the-thp-mapcount-atomic-against-__split_huge_pmd_locked.patch b/queue-5.4/mm-thp-make-the-thp-mapcount-atomic-against-__split_huge_pmd_locked.patch new file mode 100644 index 00000000000..f0567ef29f9 --- /dev/null +++ b/queue-5.4/mm-thp-make-the-thp-mapcount-atomic-against-__split_huge_pmd_locked.patch @@ -0,0 +1,102 @@ +From c444eb564fb16645c172d550359cb3d75fe8a040 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Wed, 27 May 2020 19:06:24 -0400 +Subject: mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked() + +From: Andrea Arcangeli + +commit c444eb564fb16645c172d550359cb3d75fe8a040 upstream. + +Write protect anon page faults require an accurate mapcount to decide +if to break the COW or not. This is implemented in the THP path with +reuse_swap_page() -> +page_trans_huge_map_swapcount()/page_trans_huge_mapcount(). + +If the COW triggers while the other processes sharing the page are +under a huge pmd split, to do an accurate reading, we must ensure the +mapcount isn't computed while it's being transferred from the head +page to the tail pages. + +reuse_swap_cache() already runs serialized by the page lock, so it's +enough to add the page lock around __split_huge_pmd_locked too, in +order to add the missing serialization. + +Note: the commit in "Fixes" is just to facilitate the backporting, +because the code before such commit didn't try to do an accurate THP +mapcount calculation and it instead used the page_count() to decide if +to COW or not. Both the page_count and the pin_count are THP-wide +refcounts, so they're inaccurate if used in +reuse_swap_page(). Reverting such commit (besides the unrelated fix to +the local anon_vma assignment) would have also opened the window for +memory corruption side effects to certain workloads as documented in +such commit header. + +Signed-off-by: Andrea Arcangeli +Suggested-by: Jann Horn +Reported-by: Jann Horn +Acked-by: Kirill A. Shutemov +Fixes: 6d0a07edd17c ("mm: thp: calculate the mapcount correctly for THP pages during WP faults") +Cc: stable@vger.kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/huge_memory.c | 31 ++++++++++++++++++++++++++++--- + 1 file changed, 28 insertions(+), 3 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2301,6 +2301,8 @@ void __split_huge_pmd(struct vm_area_str + { + spinlock_t *ptl; + struct mmu_notifier_range range; ++ bool was_locked = false; ++ pmd_t _pmd; + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, + address & HPAGE_PMD_MASK, +@@ -2313,11 +2315,32 @@ void __split_huge_pmd(struct vm_area_str + * pmd against. Otherwise we can end up replacing wrong page. + */ + VM_BUG_ON(freeze && !page); +- if (page && page != pmd_page(*pmd)) +- goto out; ++ if (page) { ++ VM_WARN_ON_ONCE(!PageLocked(page)); ++ was_locked = true; ++ if (page != pmd_page(*pmd)) ++ goto out; ++ } + ++repeat: + if (pmd_trans_huge(*pmd)) { +- page = pmd_page(*pmd); ++ if (!page) { ++ page = pmd_page(*pmd); ++ if (unlikely(!trylock_page(page))) { ++ get_page(page); ++ _pmd = *pmd; ++ spin_unlock(ptl); ++ lock_page(page); ++ spin_lock(ptl); ++ if (unlikely(!pmd_same(*pmd, _pmd))) { ++ unlock_page(page); ++ put_page(page); ++ page = NULL; ++ goto repeat; ++ } ++ put_page(page); ++ } ++ } + if (PageMlocked(page)) + clear_page_mlock(page); + } else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd))) +@@ -2325,6 +2348,8 @@ void __split_huge_pmd(struct vm_area_str + __split_huge_pmd_locked(vma, pmd, range.start, freeze); + out: + spin_unlock(ptl); ++ if (!was_locked && page) ++ unlock_page(page); + /* + * No need to double call mmu_notifier->invalidate_range() callback. + * They are 3 cases to consider inside __split_huge_pmd_locked(): diff --git a/queue-5.4/powerpc-mm-fix-conditions-to-perform-mmu-specific-management-by-blocks-on-ppc32.patch b/queue-5.4/powerpc-mm-fix-conditions-to-perform-mmu-specific-management-by-blocks-on-ppc32.patch new file mode 100644 index 00000000000..1e33bec8ed8 --- /dev/null +++ b/queue-5.4/powerpc-mm-fix-conditions-to-perform-mmu-specific-management-by-blocks-on-ppc32.patch @@ -0,0 +1,46 @@ +From 4e3319c23a66dabfd6c35f4d2633d64d99b68096 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Tue, 19 May 2020 05:48:59 +0000 +Subject: powerpc/mm: Fix conditions to perform MMU specific management by blocks on PPC32. + +From: Christophe Leroy + +commit 4e3319c23a66dabfd6c35f4d2633d64d99b68096 upstream. + +Setting init mem to NX shall depend on sinittext being mapped by +block, not on stext being mapped by block. + +Setting text and rodata to RO shall depend on stext being mapped by +block, not on sinittext being mapped by block. + +Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX") +Cc: stable@vger.kernel.org +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/7d565fb8f51b18a3d98445a830b2f6548cb2da2a.1589866984.git.christophe.leroy@csgroup.eu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/mm/pgtable_32.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/powerpc/mm/pgtable_32.c ++++ b/arch/powerpc/mm/pgtable_32.c +@@ -207,7 +207,7 @@ void mark_initmem_nx(void) + unsigned long numpages = PFN_UP((unsigned long)_einittext) - + PFN_DOWN((unsigned long)_sinittext); + +- if (v_block_mapped((unsigned long)_stext + 1)) ++ if (v_block_mapped((unsigned long)_sinittext)) + mmu_mark_initmem_nx(); + else + change_page_attr(page, numpages, PAGE_KERNEL); +@@ -219,7 +219,7 @@ void mark_rodata_ro(void) + struct page *page; + unsigned long numpages; + +- if (v_block_mapped((unsigned long)_sinittext)) { ++ if (v_block_mapped((unsigned long)_stext + 1)) { + mmu_mark_rodata_ro(); + ptdump_check_wx(); + return; diff --git a/queue-5.4/series b/queue-5.4/series index 6fbe053d37f..95be475a95c 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -165,3 +165,6 @@ btrfs-fix-error-handling-when-submitting-direct-i-o-bio.patch btrfs-fix-wrong-file-range-cleanup-after-an-error-filling-dealloc-range.patch btrfs-fix-space_info-bytes_may_use-underflow-after-nocow-buffered-write.patch btrfs-fix-space_info-bytes_may_use-underflow-during-space-cache-writeout.patch +powerpc-mm-fix-conditions-to-perform-mmu-specific-management-by-blocks-on-ppc32.patch +mm-thp-make-the-thp-mapcount-atomic-against-__split_huge_pmd_locked.patch +mm-initialize-deferred-pages-with-interrupts-enabled.patch