From f11d102a72156934a1a2f62b59f211d4323eb2df Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 28 Jun 2021 14:01:08 +0200 Subject: [PATCH] 5.4-stable patches added patches: mm-add-vm_warn_on_once_page-macro.patch mm-page_vma_mapped_walk-add-a-level-of-indentation.patch mm-page_vma_mapped_walk-crossing-page-table-boundary.patch mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch mm-rmap-use-page_not_mapped-in-try_to_unmap.patch mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch mm-thp-fix-__split_huge_pmd_locked-on-shmem-migratio.patch mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch mm-thp-use-head-page-in-__migration_entry_wait.patch --- .../mm-add-vm_warn_on_once_page-macro.patch | 59 +++++++ ...hared-futex-pgoff-on-shmem-huge-page.patch | 2 +- ...pped_walk-add-a-level-of-indentation.patch | 153 ++++++++++++++++++ ...ed_walk-crossing-page-table-boundary.patch | 54 +++++++ ...ped_walk-get-vma_address_end-earlier.patch | 61 +++++++ ...d_walk-prettify-pvmw_migration-block.patch | 73 +++++++++ ...mapped_walk-settle-pagehuge-on-entry.patch | 66 ++++++++ ...ped_walk-use-goto-instead-of-while-1.patch | 59 +++++++ ...a_mapped_walk-use-page-for-pvmw-page.patch | 71 ++++++++ ...ma_mapped_walk-use-pmde-for-pvmw-pmd.patch | 56 +++++++ ...nneeded-semicolon-in-page_not_mapped.patch | 31 ++++ ...-use-page_not_mapped-in-try_to_unmap.patch | 60 +++++++ ...vmw_sync-fix-in-page_vma_mapped_walk.patch | 46 ++++++ ...it_huge_pmd_locked-on-shmem-migratio.patch | 130 +++++++++++++++ ...age_address_in_vma-on-file-thp-tails.patch | 61 +++++++ ...ma_mapped_walk-if-thp-mapped-by-ptes.patch | 119 ++++++++++++++ ...if-virtual-address-below-file-offset.patch | 6 +- ...ke-is_huge_zero_pmd-safe-and-quicker.patch | 116 +++++++++++++ ...h-vm_warn-when-unmap-fails-for-split.patch | 12 +- ...nmap-use-ttu_sync-for-safe-splitting.patch | 6 +- ...age-to-fix-thp-truncate_cleanup_page.patch | 2 +- ...-head-page-in-__migration_entry_wait.patch | 65 ++++++++ queue-5.4/series | 17 ++ 23 files changed, 1311 insertions(+), 14 deletions(-) create mode 100644 queue-5.4/mm-add-vm_warn_on_once_page-macro.patch create mode 100644 queue-5.4/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch create mode 100644 queue-5.4/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch create mode 100644 queue-5.4/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch create mode 100644 queue-5.4/mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch create mode 100644 queue-5.4/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch create mode 100644 queue-5.4/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch create mode 100644 queue-5.4/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch create mode 100644 queue-5.4/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch create mode 100644 queue-5.4/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch create mode 100644 queue-5.4/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch create mode 100644 queue-5.4/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch create mode 100644 queue-5.4/mm-thp-fix-__split_huge_pmd_locked-on-shmem-migratio.patch create mode 100644 queue-5.4/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch create mode 100644 queue-5.4/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch create mode 100644 queue-5.4/mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch create mode 100644 queue-5.4/mm-thp-use-head-page-in-__migration_entry_wait.patch diff --git a/queue-5.4/mm-add-vm_warn_on_once_page-macro.patch b/queue-5.4/mm-add-vm_warn_on_once_page-macro.patch new file mode 100644 index 00000000000..918baa14b15 --- /dev/null +++ b/queue-5.4/mm-add-vm_warn_on_once_page-macro.patch @@ -0,0 +1,59 @@ +From foo@baz Mon Jun 28 01:55:13 PM CEST 2021 +From: Alex Shi +Date: Fri, 18 Dec 2020 14:01:31 -0800 +Subject: mm: add VM_WARN_ON_ONCE_PAGE() macro + +From: Alex Shi + +[ Upstream commit a4055888629bc0467d12d912cd7c90acdf3d9b12 part ] + +Add VM_WARN_ON_ONCE_PAGE() macro. + +Link: https://lkml.kernel.org/r/1604283436-18880-3-git-send-email-alex.shi@linux.alibaba.com +Signed-off-by: Alex Shi +Acked-by: Michal Hocko +Acked-by: Hugh Dickins +Acked-by: Johannes Weiner +Cc: Vladimir Davydov +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds + +Note on stable backport: original commit was titled +mm/memcg: warning on !memcg after readahead page charged +which included uses of this macro in mm/memcontrol.c: here omitted. + +Signed-off-by: Hugh Dickins +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mmdebug.h | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/include/linux/mmdebug.h ++++ b/include/linux/mmdebug.h +@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm) + BUG(); \ + } \ + } while (0) ++#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ ++ static bool __section(".data.once") __warned; \ ++ int __ret_warn_once = !!(cond); \ ++ \ ++ if (unlikely(__ret_warn_once && !__warned)) { \ ++ dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\ ++ __warned = true; \ ++ WARN_ON(1); \ ++ } \ ++ unlikely(__ret_warn_once); \ ++}) ++ + #define VM_WARN_ON(cond) (void)WARN_ON(cond) + #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) + #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) +@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm) + #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) + #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) + #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) ++#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) + #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) + #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) + #endif diff --git a/queue-5.4/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch b/queue-5.4/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch index ee847d04609..89cdb896470 100644 --- a/queue-5.4/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch +++ b/queue-5.4/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch @@ -1,4 +1,4 @@ -From foo@baz Mon Jun 28 01:37:10 PM CEST 2021 +From foo@baz Mon Jun 28 01:55:13 PM CEST 2021 From: Hugh Dickins Date: Thu, 24 Jun 2021 18:39:52 -0700 Subject: mm, futex: fix shared futex pgoff on shmem huge page diff --git a/queue-5.4/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch b/queue-5.4/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch new file mode 100644 index 00000000000..382f21002de --- /dev/null +++ b/queue-5.4/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch @@ -0,0 +1,153 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:17 -0700 +Subject: mm: page_vma_mapped_walk(): add a level of indentation + +From: Hugh Dickins + +page_vma_mapped_walk() cleanup: add a level of indentation to much of +the body, making no functional change in this commit, but reducing the +later diff when this is all converted to a loop. + +[hughd@google.com: : page_vma_mapped_walk(): add a level of indentation fix] + Link: https://lkml.kernel.org/r/7f817555-3ce1-c785-e438-87d8efdcaf26@google.com + +Link: https://lkml.kernel.org/r/efde211-f3e2-fe54-977-ef481419e7f3@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 105 ++++++++++++++++++++++++++------------------------- + 1 file changed, 55 insertions(+), 50 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -168,62 +168,67 @@ bool page_vma_mapped_walk(struct page_vm + if (pvmw->pte) + goto next_pte; + restart: +- pgd = pgd_offset(mm, pvmw->address); +- if (!pgd_present(*pgd)) +- return false; +- p4d = p4d_offset(pgd, pvmw->address); +- if (!p4d_present(*p4d)) +- return false; +- pud = pud_offset(p4d, pvmw->address); +- if (!pud_present(*pud)) +- return false; +- pvmw->pmd = pmd_offset(pud, pvmw->address); +- /* +- * Make sure the pmd value isn't cached in a register by the +- * compiler and used as a stale value after we've observed a +- * subsequent update. +- */ +- pmde = READ_ONCE(*pvmw->pmd); +- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { +- pvmw->ptl = pmd_lock(mm, pvmw->pmd); +- pmde = *pvmw->pmd; +- if (likely(pmd_trans_huge(pmde))) { +- if (pvmw->flags & PVMW_MIGRATION) +- return not_found(pvmw); +- if (pmd_page(pmde) != page) +- return not_found(pvmw); +- return true; +- } +- if (!pmd_present(pmde)) { +- swp_entry_t entry; ++ { ++ pgd = pgd_offset(mm, pvmw->address); ++ if (!pgd_present(*pgd)) ++ return false; ++ p4d = p4d_offset(pgd, pvmw->address); ++ if (!p4d_present(*p4d)) ++ return false; ++ pud = pud_offset(p4d, pvmw->address); ++ if (!pud_present(*pud)) ++ return false; + +- if (!thp_migration_supported() || +- !(pvmw->flags & PVMW_MIGRATION)) +- return not_found(pvmw); +- entry = pmd_to_swp_entry(pmde); +- if (!is_migration_entry(entry) || +- migration_entry_to_page(entry) != page) +- return not_found(pvmw); +- return true; +- } +- /* THP pmd was split under us: handle on pte level */ +- spin_unlock(pvmw->ptl); +- pvmw->ptl = NULL; +- } else if (!pmd_present(pmde)) { ++ pvmw->pmd = pmd_offset(pud, pvmw->address); + /* +- * If PVMW_SYNC, take and drop THP pmd lock so that we +- * cannot return prematurely, while zap_huge_pmd() has +- * cleared *pmd but not decremented compound_mapcount(). ++ * Make sure the pmd value isn't cached in a register by the ++ * compiler and used as a stale value after we've observed a ++ * subsequent update. + */ +- if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) { +- spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); ++ pmde = READ_ONCE(*pvmw->pmd); ++ ++ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { ++ pvmw->ptl = pmd_lock(mm, pvmw->pmd); ++ pmde = *pvmw->pmd; ++ if (likely(pmd_trans_huge(pmde))) { ++ if (pvmw->flags & PVMW_MIGRATION) ++ return not_found(pvmw); ++ if (pmd_page(pmde) != page) ++ return not_found(pvmw); ++ return true; ++ } ++ if (!pmd_present(pmde)) { ++ swp_entry_t entry; + +- spin_unlock(ptl); ++ if (!thp_migration_supported() || ++ !(pvmw->flags & PVMW_MIGRATION)) ++ return not_found(pvmw); ++ entry = pmd_to_swp_entry(pmde); ++ if (!is_migration_entry(entry) || ++ migration_entry_to_page(entry) != page) ++ return not_found(pvmw); ++ return true; ++ } ++ /* THP pmd was split under us: handle on pte level */ ++ spin_unlock(pvmw->ptl); ++ pvmw->ptl = NULL; ++ } else if (!pmd_present(pmde)) { ++ /* ++ * If PVMW_SYNC, take and drop THP pmd lock so that we ++ * cannot return prematurely, while zap_huge_pmd() has ++ * cleared *pmd but not decremented compound_mapcount(). ++ */ ++ if ((pvmw->flags & PVMW_SYNC) && ++ PageTransCompound(page)) { ++ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); ++ ++ spin_unlock(ptl); ++ } ++ return false; + } +- return false; ++ if (!map_pte(pvmw)) ++ goto next_pte; + } +- if (!map_pte(pvmw)) +- goto next_pte; + while (1) { + unsigned long end; + diff --git a/queue-5.4/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch b/queue-5.4/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch new file mode 100644 index 00000000000..c75be1be645 --- /dev/null +++ b/queue-5.4/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch @@ -0,0 +1,54 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:14 -0700 +Subject: mm: page_vma_mapped_walk(): crossing page table boundary + +From: Hugh Dickins + +page_vma_mapped_walk() cleanup: adjust the test for crossing page table +boundary - I believe pvmw->address is always page-aligned, but nothing +else here assumed that; and remember to reset pvmw->pte to NULL after +unmapping the page table, though I never saw any bug from that. + +Link: https://lkml.kernel.org/r/799b3f9c-2a9e-dfef-5d89-26e9f76fd97@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -239,16 +239,16 @@ next_pte: + if (pvmw->address >= end) + return not_found(pvmw); + /* Did we cross page table boundary? */ +- if (pvmw->address % PMD_SIZE == 0) { +- pte_unmap(pvmw->pte); ++ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { + if (pvmw->ptl) { + spin_unlock(pvmw->ptl); + pvmw->ptl = NULL; + } ++ pte_unmap(pvmw->pte); ++ pvmw->pte = NULL; + goto restart; +- } else { +- pvmw->pte++; + } ++ pvmw->pte++; + } while (pte_none(*pvmw->pte)); + + if (!pvmw->ptl) { diff --git a/queue-5.4/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch b/queue-5.4/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch new file mode 100644 index 00000000000..900a6620819 --- /dev/null +++ b/queue-5.4/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch @@ -0,0 +1,61 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:23 -0700 +Subject: mm: page_vma_mapped_walk(): get vma_address_end() earlier + +From: Hugh Dickins + +page_vma_mapped_walk() cleanup: get THP's vma_address_end() at the +start, rather than later at next_pte. + +It's a little unnecessary overhead on the first call, but makes for a +simpler loop in the following commit. + +Link: https://lkml.kernel.org/r/4542b34d-862f-7cb4-bb22-e0df6ce830a2@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -166,6 +166,15 @@ bool page_vma_mapped_walk(struct page_vm + return true; + } + ++ /* ++ * Seek to next pte only makes sense for THP. ++ * But more important than that optimization, is to filter out ++ * any PageKsm page: whose page->index misleads vma_address() ++ * and vma_address_end() to disaster. ++ */ ++ end = PageTransCompound(page) ? ++ vma_address_end(page, pvmw->vma) : ++ pvmw->address + PAGE_SIZE; + if (pvmw->pte) + goto next_pte; + restart: +@@ -233,10 +242,6 @@ this_pte: + if (check_pte(pvmw)) + return true; + next_pte: +- /* Seek to next pte only makes sense for THP */ +- if (!PageTransHuge(page)) +- return not_found(pvmw); +- end = vma_address_end(page, pvmw->vma); + do { + pvmw->address += PAGE_SIZE; + if (pvmw->address >= end) diff --git a/queue-5.4/mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch b/queue-5.4/mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch new file mode 100644 index 00000000000..4a094dedd49 --- /dev/null +++ b/queue-5.4/mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch @@ -0,0 +1,73 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:10 -0700 +Subject: mm: page_vma_mapped_walk(): prettify PVMW_MIGRATION block + +From: Hugh Dickins + +page_vma_mapped_walk() cleanup: rearrange the !pmd_present() block to +follow the same "return not_found, return not_found, return true" +pattern as the block above it (note: returning not_found there is never +premature, since existence or prior existence of huge pmd guarantees +good alignment). + +Link: https://lkml.kernel.org/r/378c8650-1488-2edf-9647-32a53cf2e21@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Reviewed-by: Peter Xu +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 30 ++++++++++++++---------------- + 1 file changed, 14 insertions(+), 16 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -193,24 +193,22 @@ restart: + if (pmd_page(pmde) != page) + return not_found(pvmw); + return true; +- } else if (!pmd_present(pmde)) { +- if (thp_migration_supported()) { +- if (!(pvmw->flags & PVMW_MIGRATION)) +- return not_found(pvmw); +- if (is_migration_entry(pmd_to_swp_entry(pmde))) { +- swp_entry_t entry = pmd_to_swp_entry(pmde); ++ } ++ if (!pmd_present(pmde)) { ++ swp_entry_t entry; + +- if (migration_entry_to_page(entry) != page) +- return not_found(pvmw); +- return true; +- } +- } +- return not_found(pvmw); +- } else { +- /* THP pmd was split under us: handle on pte level */ +- spin_unlock(pvmw->ptl); +- pvmw->ptl = NULL; ++ if (!thp_migration_supported() || ++ !(pvmw->flags & PVMW_MIGRATION)) ++ return not_found(pvmw); ++ entry = pmd_to_swp_entry(pmde); ++ if (!is_migration_entry(entry) || ++ migration_entry_to_page(entry) != page) ++ return not_found(pvmw); ++ return true; + } ++ /* THP pmd was split under us: handle on pte level */ ++ spin_unlock(pvmw->ptl); ++ pvmw->ptl = NULL; + } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we diff --git a/queue-5.4/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch b/queue-5.4/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch new file mode 100644 index 00000000000..8862408be29 --- /dev/null +++ b/queue-5.4/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch @@ -0,0 +1,66 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:04 -0700 +Subject: mm: page_vma_mapped_walk(): settle PageHuge on entry + +From: Hugh Dickins + +page_vma_mapped_walk() cleanup: get the hugetlbfs PageHuge case out of +the way at the start, so no need to worry about it later. + +Link: https://lkml.kernel.org/r/e31a483c-6d73-a6bb-26c5-43c3b880a2@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Reviewed-by: Peter Xu +Cc: Alistair Popple +Cc: "Kirill A. Shutemov" +Cc: Matthew Wilcox +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -148,10 +148,11 @@ bool page_vma_mapped_walk(struct page_vm + if (pvmw->pmd && !pvmw->pte) + return not_found(pvmw); + +- if (pvmw->pte) +- goto next_pte; +- + if (unlikely(PageHuge(page))) { ++ /* The only possible mapping was handled on last iteration */ ++ if (pvmw->pte) ++ return not_found(pvmw); ++ + /* when pud is not present, pte will be NULL */ + pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); + if (!pvmw->pte) +@@ -163,6 +164,9 @@ bool page_vma_mapped_walk(struct page_vm + return not_found(pvmw); + return true; + } ++ ++ if (pvmw->pte) ++ goto next_pte; + restart: + pgd = pgd_offset(mm, pvmw->address); + if (!pgd_present(*pgd)) +@@ -228,7 +232,7 @@ restart: + return true; + next_pte: + /* Seek to next pte only makes sense for THP */ +- if (!PageTransHuge(page) || PageHuge(page)) ++ if (!PageTransHuge(page)) + return not_found(pvmw); + end = vma_address_end(page, pvmw->vma); + do { diff --git a/queue-5.4/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch b/queue-5.4/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch new file mode 100644 index 00000000000..2aa4dc8ed10 --- /dev/null +++ b/queue-5.4/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch @@ -0,0 +1,59 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:20 -0700 +Subject: mm: page_vma_mapped_walk(): use goto instead of while (1) + +From: Hugh Dickins + +page_vma_mapped_walk() cleanup: add a label this_pte, matching next_pte, +and use "goto this_pte", in place of the "while (1)" loop at the end. + +Link: https://lkml.kernel.org/r/a52b234a-851-3616-2525-f42736e8934@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -139,6 +139,7 @@ bool page_vma_mapped_walk(struct page_vm + { + struct mm_struct *mm = pvmw->vma->vm_mm; + struct page *page = pvmw->page; ++ unsigned long end; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; +@@ -228,10 +229,7 @@ restart: + } + if (!map_pte(pvmw)) + goto next_pte; +- } +- while (1) { +- unsigned long end; +- ++this_pte: + if (check_pte(pvmw)) + return true; + next_pte: +@@ -260,6 +258,7 @@ next_pte: + pvmw->ptl = pte_lockptr(mm, pvmw->pmd); + spin_lock(pvmw->ptl); + } ++ goto this_pte; + } + } + diff --git a/queue-5.4/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch b/queue-5.4/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch new file mode 100644 index 00000000000..ee70f1fa740 --- /dev/null +++ b/queue-5.4/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch @@ -0,0 +1,71 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:01 -0700 +Subject: mm: page_vma_mapped_walk(): use page for pvmw->page + +From: Hugh Dickins + +Patch series "mm: page_vma_mapped_walk() cleanup and THP fixes". + +I've marked all of these for stable: many are merely cleanups, but I +think they are much better before the main fix than after. + +This patch (of 11): + +page_vma_mapped_walk() cleanup: sometimes the local copy of pvwm->page +was used, sometimes pvmw->page itself: use the local copy "page" +throughout. + +Link: https://lkml.kernel.org/r/589b358c-febc-c88e-d4c2-7834b37fa7bf@google.com +Link: https://lkml.kernel.org/r/88e67645-f467-c279-bf5e-af4b5c6b13eb@google.com +Signed-off-by: Hugh Dickins +Reviewed-by: Alistair Popple +Acked-by: Kirill A. Shutemov +Reviewed-by: Peter Xu +Cc: Yang Shi +Cc: Wang Yugui +Cc: Matthew Wilcox +Cc: Ralph Campbell +Cc: Zi Yan +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -151,7 +151,7 @@ bool page_vma_mapped_walk(struct page_vm + if (pvmw->pte) + goto next_pte; + +- if (unlikely(PageHuge(pvmw->page))) { ++ if (unlikely(PageHuge(page))) { + /* when pud is not present, pte will be NULL */ + pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); + if (!pvmw->pte) +@@ -212,8 +212,7 @@ restart: + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ +- if ((pvmw->flags & PVMW_SYNC) && +- PageTransCompound(pvmw->page)) { ++ if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + + spin_unlock(ptl); +@@ -229,9 +228,9 @@ restart: + return true; + next_pte: + /* Seek to next pte only makes sense for THP */ +- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) ++ if (!PageTransHuge(page) || PageHuge(page)) + return not_found(pvmw); +- end = vma_address_end(pvmw->page, pvmw->vma); ++ end = vma_address_end(page, pvmw->vma); + do { + pvmw->address += PAGE_SIZE; + if (pvmw->address >= end) diff --git a/queue-5.4/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch b/queue-5.4/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch new file mode 100644 index 00000000000..2abedb4fc3e --- /dev/null +++ b/queue-5.4/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch @@ -0,0 +1,56 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:07 -0700 +Subject: mm: page_vma_mapped_walk(): use pmde for *pvmw->pmd + +From: Hugh Dickins + +page_vma_mapped_walk() cleanup: re-evaluate pmde after taking lock, then +use it in subsequent tests, instead of repeatedly dereferencing pointer. + +Link: https://lkml.kernel.org/r/53fbc9d-891e-46b2-cb4b-468c3b19238e@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Reviewed-by: Peter Xu +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -186,18 +186,19 @@ restart: + pmde = READ_ONCE(*pvmw->pmd); + if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { + pvmw->ptl = pmd_lock(mm, pvmw->pmd); +- if (likely(pmd_trans_huge(*pvmw->pmd))) { ++ pmde = *pvmw->pmd; ++ if (likely(pmd_trans_huge(pmde))) { + if (pvmw->flags & PVMW_MIGRATION) + return not_found(pvmw); +- if (pmd_page(*pvmw->pmd) != page) ++ if (pmd_page(pmde) != page) + return not_found(pvmw); + return true; +- } else if (!pmd_present(*pvmw->pmd)) { ++ } else if (!pmd_present(pmde)) { + if (thp_migration_supported()) { + if (!(pvmw->flags & PVMW_MIGRATION)) + return not_found(pvmw); +- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) { +- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd); ++ if (is_migration_entry(pmd_to_swp_entry(pmde))) { ++ swp_entry_t entry = pmd_to_swp_entry(pmde); + + if (migration_entry_to_page(entry) != page) + return not_found(pvmw); diff --git a/queue-5.4/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch b/queue-5.4/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch new file mode 100644 index 00000000000..f4cc9c2a35c --- /dev/null +++ b/queue-5.4/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch @@ -0,0 +1,31 @@ +From foo@baz Mon Jun 28 01:55:13 PM CEST 2021 +From: Miaohe Lin +Date: Thu, 25 Feb 2021 17:17:56 -0800 +Subject: mm/rmap: remove unneeded semicolon in page_not_mapped() + +From: Miaohe Lin + +[ Upstream commit e0af87ff7afcde2660be44302836d2d5618185af ] + +Remove extra semicolon without any functional change intended. + +Link: https://lkml.kernel.org/r/20210127093425.39640-1-linmiaohe@huawei.com +Signed-off-by: Miaohe Lin +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/rmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1737,7 +1737,7 @@ bool try_to_unmap(struct page *page, enu + static int page_not_mapped(struct page *page) + { + return !page_mapped(page); +-}; ++} + + /** + * try_to_munlock - try to munlock a page diff --git a/queue-5.4/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch b/queue-5.4/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch new file mode 100644 index 00000000000..330e8f69a5c --- /dev/null +++ b/queue-5.4/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch @@ -0,0 +1,60 @@ +From foo@baz Mon Jun 28 01:55:13 PM CEST 2021 +From: Miaohe Lin +Date: Thu, 25 Feb 2021 17:18:03 -0800 +Subject: mm/rmap: use page_not_mapped in try_to_unmap() + +From: Miaohe Lin + +[ Upstream commit b7e188ec98b1644ff70a6d3624ea16aadc39f5e0 ] + +page_mapcount_is_zero() calculates accurately how many mappings a hugepage +has in order to check against 0 only. This is a waste of cpu time. We +can do this via page_not_mapped() to save some possible atomic_read +cycles. Remove the function page_mapcount_is_zero() as it's not used +anymore and move page_not_mapped() above try_to_unmap() to avoid +identifier undeclared compilation error. + +Link: https://lkml.kernel.org/r/20210130084904.35307-1-linmiaohe@huawei.com +Signed-off-by: Miaohe Lin +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/rmap.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1690,9 +1690,9 @@ static bool invalid_migration_vma(struct + return is_vma_temporary_stack(vma); + } + +-static int page_mapcount_is_zero(struct page *page) ++static int page_not_mapped(struct page *page) + { +- return !total_mapcount(page); ++ return !page_mapped(page); + } + + /** +@@ -1710,7 +1710,7 @@ bool try_to_unmap(struct page *page, enu + struct rmap_walk_control rwc = { + .rmap_one = try_to_unmap_one, + .arg = (void *)flags, +- .done = page_mapcount_is_zero, ++ .done = page_not_mapped, + .anon_lock = page_lock_anon_vma_read, + }; + +@@ -1734,11 +1734,6 @@ bool try_to_unmap(struct page *page, enu + return !page_mapcount(page) ? true : false; + } + +-static int page_not_mapped(struct page *page) +-{ +- return !page_mapped(page); +-} +- + /** + * try_to_munlock - try to munlock a page + * @page: the page to be munlocked diff --git a/queue-5.4/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch b/queue-5.4/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch new file mode 100644 index 00000000000..276653e883f --- /dev/null +++ b/queue-5.4/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch @@ -0,0 +1,46 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:30 -0700 +Subject: mm/thp: another PVMW_SYNC fix in page_vma_mapped_walk() + +From: Hugh Dickins + +Aha! Shouldn't that quick scan over pte_none()s make sure that it holds +ptlock in the PVMW_SYNC case? That too might have been responsible for +BUGs or WARNs in split_huge_page_to_list() or its unmap_page(), though +I've never seen any. + +Link: https://lkml.kernel.org/r/1bdf384c-8137-a149-2a1e-475a4791c3c@google.com +Link: https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/ +Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Tested-by: Wang Yugui +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -271,6 +271,10 @@ next_pte: + goto restart; + } + pvmw->pte++; ++ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) { ++ pvmw->ptl = pte_lockptr(mm, pvmw->pmd); ++ spin_lock(pvmw->ptl); ++ } + } while (pte_none(*pvmw->pte)); + + if (!pvmw->ptl) { diff --git a/queue-5.4/mm-thp-fix-__split_huge_pmd_locked-on-shmem-migratio.patch b/queue-5.4/mm-thp-fix-__split_huge_pmd_locked-on-shmem-migratio.patch new file mode 100644 index 00000000000..6a765788800 --- /dev/null +++ b/queue-5.4/mm-thp-fix-__split_huge_pmd_locked-on-shmem-migratio.patch @@ -0,0 +1,130 @@ +From 908f7f557f881612724bba31024565890984cd24 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Tue, 15 Jun 2021 18:23:45 -0700 +Subject: mm/thp: fix __split_huge_pmd_locked() on shmem migration entry + +From: Hugh Dickins + +[ Upstream commit 99fa8a48203d62b3743d866fc48ef6abaee682be ] + +Patch series "mm/thp: fix THP splitting unmap BUGs and related", v10. + +Here is v2 batch of long-standing THP bug fixes that I had not got +around to sending before, but prompted now by Wang Yugui's report +https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/ + +Wang Yugui has tested a rollup of these fixes applied to 5.10.39, and +they have done no harm, but have *not* fixed that issue: something more +is needed and I have no idea of what. + +This patch (of 7): + +Stressing huge tmpfs page migration racing hole punch often crashed on +the VM_BUG_ON(!pmd_present) in pmdp_huge_clear_flush(), with DEBUG_VM=y +kernel; or shortly afterwards, on a bad dereference in +__split_huge_pmd_locked() when DEBUG_VM=n. They forgot to allow for pmd +migration entries in the non-anonymous case. + +Full disclosure: those particular experiments were on a kernel with more +relaxed mmap_lock and i_mmap_rwsem locking, and were not repeated on the +vanilla kernel: it is conceivable that stricter locking happens to avoid +those cases, or makes them less likely; but __split_huge_pmd_locked() +already allowed for pmd migration entries when handling anonymous THPs, +so this commit brings the shmem and file THP handling into line. + +And while there: use old_pmd rather than _pmd, as in the following +blocks; and make it clearer to the eye that the !vma_is_anonymous() +block is self-contained, making an early return after accounting for +unmapping. + +Link: https://lkml.kernel.org/r/af88612-1473-2eaa-903-8d1a448b26@google.com +Link: https://lkml.kernel.org/r/dd221a99-efb3-cd1d-6256-7e646af29314@google.com +Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp") +Signed-off-by: Hugh Dickins +Cc: Kirill A. Shutemov +Cc: Yang Shi +Cc: Wang Yugui +Cc: "Matthew Wilcox (Oracle)" +Cc: Naoya Horiguchi +Cc: Alistair Popple +Cc: Ralph Campbell +Cc: Zi Yan +Cc: Miaohe Lin +Cc: Minchan Kim +Cc: Jue Wang +Cc: Peter Xu +Cc: Jan Kara +Cc: Shakeel Butt +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds + +Note on stable backport: this commit made intervening cleanups in +pmdp_huge_clear_flush() redundant: here it's rediffed to skip them. + +Signed-off-by: Hugh Dickins +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 27 ++++++++++++++++++--------- + mm/pgtable-generic.c | 4 ++-- + 2 files changed, 20 insertions(+), 11 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2155,7 +2155,7 @@ static void __split_huge_pmd_locked(stru + count_vm_event(THP_SPLIT_PMD); + + if (!vma_is_anonymous(vma)) { +- _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); ++ old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); + /* + * We are going to unmap this huge page. So + * just go ahead and zap it +@@ -2164,16 +2164,25 @@ static void __split_huge_pmd_locked(stru + zap_deposited_table(mm, pmd); + if (vma_is_dax(vma)) + return; +- page = pmd_page(_pmd); +- if (!PageDirty(page) && pmd_dirty(_pmd)) +- set_page_dirty(page); +- if (!PageReferenced(page) && pmd_young(_pmd)) +- SetPageReferenced(page); +- page_remove_rmap(page, true); +- put_page(page); ++ if (unlikely(is_pmd_migration_entry(old_pmd))) { ++ swp_entry_t entry; ++ ++ entry = pmd_to_swp_entry(old_pmd); ++ page = migration_entry_to_page(entry); ++ } else { ++ page = pmd_page(old_pmd); ++ if (!PageDirty(page) && pmd_dirty(old_pmd)) ++ set_page_dirty(page); ++ if (!PageReferenced(page) && pmd_young(old_pmd)) ++ SetPageReferenced(page); ++ page_remove_rmap(page, true); ++ put_page(page); ++ } + add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); + return; +- } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { ++ } ++ ++ if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { + /* + * FIXME: Do we want to invalidate secondary mmu by calling + * mmu_notifier_invalidate_range() see comments below inside +--- a/mm/pgtable-generic.c ++++ b/mm/pgtable-generic.c +@@ -126,8 +126,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_ar + { + pmd_t pmd; + VM_BUG_ON(address & ~HPAGE_PMD_MASK); +- VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && +- !pmd_devmap(*pmdp)) || !pmd_present(*pmdp)); ++ VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && ++ !pmd_devmap(*pmdp)); + pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); + flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return pmd; diff --git a/queue-5.4/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch b/queue-5.4/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch new file mode 100644 index 00000000000..a28c08318ac --- /dev/null +++ b/queue-5.4/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch @@ -0,0 +1,61 @@ +From 31657170deaf1d8d2f6a1955fbc6fa9d228be036 Mon Sep 17 00:00:00 2001 +From: Jue Wang +Date: Tue, 15 Jun 2021 18:24:00 -0700 +Subject: mm/thp: fix page_address_in_vma() on file THP tails + +From: Jue Wang + +commit 31657170deaf1d8d2f6a1955fbc6fa9d228be036 upstream. + +Anon THP tails were already supported, but memory-failure may need to +use page_address_in_vma() on file THP tails, which its page->mapping +check did not permit: fix it. + +hughd adds: no current usage is known to hit the issue, but this does +fix a subtle trap in a general helper: best fixed in stable sooner than +later. + +Link: https://lkml.kernel.org/r/a0d9b53-bf5d-8bab-ac5-759dc61819c1@google.com +Fixes: 800d8c63b2e9 ("shmem: add huge pages support") +Signed-off-by: Jue Wang +Signed-off-by: Hugh Dickins +Reviewed-by: Matthew Wilcox (Oracle) +Reviewed-by: Yang Shi +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Jan Kara +Cc: Miaohe Lin +Cc: Minchan Kim +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Shakeel Butt +Cc: Wang Yugui +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/rmap.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -696,11 +696,11 @@ unsigned long page_address_in_vma(struct + if (!vma->anon_vma || !page__anon_vma || + vma->anon_vma->root != page__anon_vma->root) + return -EFAULT; +- } else if (page->mapping) { +- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) +- return -EFAULT; +- } else ++ } else if (!vma->vm_file) { ++ return -EFAULT; ++ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { + return -EFAULT; ++ } + + return vma_address(page, vma); + } diff --git a/queue-5.4/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch b/queue-5.4/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch new file mode 100644 index 00000000000..4c2bfbdf776 --- /dev/null +++ b/queue-5.4/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch @@ -0,0 +1,119 @@ +From foo@baz Mon Jun 28 01:59:50 PM CEST 2021 +From: Hugh Dickins +Date: Thu, 24 Jun 2021 18:39:26 -0700 +Subject: mm/thp: fix page_vma_mapped_walk() if THP mapped by ptes + +From: Hugh Dickins + +Running certain tests with a DEBUG_VM kernel would crash within hours, +on the total_mapcount BUG() in split_huge_page_to_list(), while trying +to free up some memory by punching a hole in a shmem huge page: split's +try_to_unmap() was unable to find all the mappings of the page (which, +on a !DEBUG_VM kernel, would then keep the huge page pinned in memory). + +Crash dumps showed two tail pages of a shmem huge page remained mapped +by pte: ptes in a non-huge-aligned vma of a gVisor process, at the end +of a long unmapped range; and no page table had yet been allocated for +the head of the huge page to be mapped into. + +Although designed to handle these odd misaligned huge-page-mapped-by-pte +cases, page_vma_mapped_walk() falls short by returning false prematurely +when !pmd_present or !pud_present or !p4d_present or !pgd_present: there +are cases when a huge page may span the boundary, with ptes present in +the next. + +Restructure page_vma_mapped_walk() as a loop to continue in these cases, +while keeping its layout much as before. Add a step_forward() helper to +advance pvmw->address across those boundaries: originally I tried to use +mm's standard p?d_addr_end() macros, but hit the same crash 512 times +less often: because of the way redundant levels are folded together, but +folded differently in different configurations, it was just too +difficult to use them correctly; and step_forward() is simpler anyway. + +Link: https://lkml.kernel.org/r/fedb8632-1798-de42-f39e-873551d5bc81@google.com +Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_vma_mapped.c | 34 +++++++++++++++++++++++++--------- + 1 file changed, 25 insertions(+), 9 deletions(-) + +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -111,6 +111,13 @@ static bool check_pte(struct page_vma_ma + return pfn_in_hpage(pvmw->page, pfn); + } + ++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) ++{ ++ pvmw->address = (pvmw->address + size) & ~(size - 1); ++ if (!pvmw->address) ++ pvmw->address = ULONG_MAX; ++} ++ + /** + * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at + * @pvmw->address +@@ -178,16 +185,22 @@ bool page_vma_mapped_walk(struct page_vm + if (pvmw->pte) + goto next_pte; + restart: +- { ++ do { + pgd = pgd_offset(mm, pvmw->address); +- if (!pgd_present(*pgd)) +- return false; ++ if (!pgd_present(*pgd)) { ++ step_forward(pvmw, PGDIR_SIZE); ++ continue; ++ } + p4d = p4d_offset(pgd, pvmw->address); +- if (!p4d_present(*p4d)) +- return false; ++ if (!p4d_present(*p4d)) { ++ step_forward(pvmw, P4D_SIZE); ++ continue; ++ } + pud = pud_offset(p4d, pvmw->address); +- if (!pud_present(*pud)) +- return false; ++ if (!pud_present(*pud)) { ++ step_forward(pvmw, PUD_SIZE); ++ continue; ++ } + + pvmw->pmd = pmd_offset(pud, pvmw->address); + /* +@@ -234,7 +247,8 @@ restart: + + spin_unlock(ptl); + } +- return false; ++ step_forward(pvmw, PMD_SIZE); ++ continue; + } + if (!map_pte(pvmw)) + goto next_pte; +@@ -264,7 +278,9 @@ next_pte: + spin_lock(pvmw->ptl); + } + goto this_pte; +- } ++ } while (pvmw->address < end); ++ ++ return false; + } + + /** diff --git a/queue-5.4/mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch b/queue-5.4/mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch index fd576b3494b..4453d3d6ef1 100644 --- a/queue-5.4/mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch +++ b/queue-5.4/mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch @@ -1,4 +1,4 @@ -From foo@baz Mon Jun 28 01:37:10 PM CEST 2021 +From foo@baz Mon Jun 28 01:55:13 PM CEST 2021 From: Hugh Dickins Date: Tue, 15 Jun 2021 18:23:56 -0700 Subject: mm/thp: fix vma_address() if virtual address below file offset @@ -233,7 +233,7 @@ Signed-off-by: Greg Kroah-Hartman if (PageHuge(page)) { /* * If sharing is possible, start and end will be adjusted -@@ -1848,6 +1846,7 @@ static void rmap_walk_anon(struct page * +@@ -1843,6 +1841,7 @@ static void rmap_walk_anon(struct page * struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); @@ -241,7 +241,7 @@ Signed-off-by: Greg Kroah-Hartman cond_resched(); if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) -@@ -1902,6 +1901,7 @@ static void rmap_walk_file(struct page * +@@ -1897,6 +1896,7 @@ static void rmap_walk_file(struct page * pgoff_start, pgoff_end) { unsigned long address = vma_address(page, vma); diff --git a/queue-5.4/mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch b/queue-5.4/mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch new file mode 100644 index 00000000000..75ec223b232 --- /dev/null +++ b/queue-5.4/mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch @@ -0,0 +1,116 @@ +From 3b77e8c8cde581dadab9a0f1543a347e24315f11 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Tue, 15 Jun 2021 18:23:49 -0700 +Subject: mm/thp: make is_huge_zero_pmd() safe and quicker + +From: Hugh Dickins + +commit 3b77e8c8cde581dadab9a0f1543a347e24315f11 upstream. + +Most callers of is_huge_zero_pmd() supply a pmd already verified +present; but a few (notably zap_huge_pmd()) do not - it might be a pmd +migration entry, in which the pfn is encoded differently from a present +pmd: which might pass the is_huge_zero_pmd() test (though not on x86, +since L1TF forced us to protect against that); or perhaps even crash in +pmd_page() applied to a swap-like entry. + +Make it safe by adding pmd_present() check into is_huge_zero_pmd() +itself; and make it quicker by saving huge_zero_pfn, so that +is_huge_zero_pmd() will not need to do that pmd_page() lookup each time. + +__split_huge_pmd_locked() checked pmd_trans_huge() before: that worked, +but is unnecessary now that is_huge_zero_pmd() checks present. + +Link: https://lkml.kernel.org/r/21ea9ca-a1f5-8b90-5e88-95fb1c49bbfa@google.com +Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp") +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Reviewed-by: Yang Shi +Cc: Alistair Popple +Cc: Jan Kara +Cc: Jue Wang +Cc: "Matthew Wilcox (Oracle)" +Cc: Miaohe Lin +Cc: Minchan Kim +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Shakeel Butt +Cc: Wang Yugui +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/huge_mm.h | 8 +++++++- + mm/huge_memory.c | 5 ++++- + 2 files changed, 11 insertions(+), 2 deletions(-) + +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -259,6 +259,7 @@ struct page *follow_devmap_pud(struct vm + extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); + + extern struct page *huge_zero_page; ++extern unsigned long huge_zero_pfn; + + static inline bool is_huge_zero_page(struct page *page) + { +@@ -267,7 +268,7 @@ static inline bool is_huge_zero_page(str + + static inline bool is_huge_zero_pmd(pmd_t pmd) + { +- return is_huge_zero_page(pmd_page(pmd)); ++ return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); + } + + static inline bool is_huge_zero_pud(pud_t pud) +@@ -397,6 +398,11 @@ static inline bool is_huge_zero_page(str + { + return false; + } ++ ++static inline bool is_huge_zero_pmd(pmd_t pmd) ++{ ++ return false; ++} + + static inline bool is_huge_zero_pud(pud_t pud) + { +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -61,6 +61,7 @@ static struct shrinker deferred_split_sh + + static atomic_t huge_zero_refcount; + struct page *huge_zero_page __read_mostly; ++unsigned long huge_zero_pfn __read_mostly = ~0UL; + + bool transparent_hugepage_enabled(struct vm_area_struct *vma) + { +@@ -97,6 +98,7 @@ retry: + __free_pages(zero_page, compound_order(zero_page)); + goto retry; + } ++ WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); + + /* We take additional reference here. It will be put back by shrinker */ + atomic_set(&huge_zero_refcount, 2); +@@ -146,6 +148,7 @@ static unsigned long shrink_huge_zero_pa + if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { + struct page *zero_page = xchg(&huge_zero_page, NULL); + BUG_ON(zero_page == NULL); ++ WRITE_ONCE(huge_zero_pfn, ~0UL); + __free_pages(zero_page, compound_order(zero_page)); + return HPAGE_PMD_NR; + } +@@ -2182,7 +2185,7 @@ static void __split_huge_pmd_locked(stru + return; + } + +- if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { ++ if (is_huge_zero_pmd(*pmd)) { + /* + * FIXME: Do we want to invalidate secondary mmu by calling + * mmu_notifier_invalidate_range() see comments below inside diff --git a/queue-5.4/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch b/queue-5.4/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch index 754e9498166..47b7f1d1669 100644 --- a/queue-5.4/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch +++ b/queue-5.4/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch @@ -1,4 +1,4 @@ -From foo@baz Mon Jun 28 01:37:10 PM CEST 2021 +From foo@baz Mon Jun 28 01:55:13 PM CEST 2021 From: Yang Shi Date: Tue, 15 Jun 2021 18:24:07 -0700 Subject: mm: thp: replace DEBUG_VM BUG with VM_WARN when unmap fails for split @@ -50,7 +50,7 @@ Signed-off-by: Greg Kroah-Hartman --- a/mm/huge_memory.c +++ b/mm/huge_memory.c -@@ -2450,15 +2450,15 @@ static void unmap_page(struct page *page +@@ -2462,15 +2462,15 @@ static void unmap_page(struct page *page { enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC; @@ -69,7 +69,7 @@ Signed-off-by: Greg Kroah-Hartman } static void remap_page(struct page *page) -@@ -2737,7 +2737,7 @@ int split_huge_page_to_list(struct page +@@ -2749,7 +2749,7 @@ int split_huge_page_to_list(struct page struct deferred_split *ds_queue = get_deferred_split_queue(page); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; @@ -78,7 +78,7 @@ Signed-off-by: Greg Kroah-Hartman bool mlocked; unsigned long flags; pgoff_t end; -@@ -2799,7 +2799,6 @@ int split_huge_page_to_list(struct page +@@ -2811,7 +2811,6 @@ int split_huge_page_to_list(struct page mlocked = PageMlocked(page); unmap_page(head); @@ -86,7 +86,7 @@ Signed-off-by: Greg Kroah-Hartman /* Make sure the page is not on per-CPU pagevec as it takes pin */ if (mlocked) -@@ -2822,9 +2821,7 @@ int split_huge_page_to_list(struct page +@@ -2834,9 +2833,7 @@ int split_huge_page_to_list(struct page /* Prevent deferred_split_scan() touching ->_refcount */ spin_lock(&ds_queue->split_queue_lock); @@ -97,7 +97,7 @@ Signed-off-by: Greg Kroah-Hartman if (!list_empty(page_deferred_list(head))) { ds_queue->split_queue_len--; list_del(page_deferred_list(head)); -@@ -2845,16 +2842,9 @@ int split_huge_page_to_list(struct page +@@ -2857,16 +2854,9 @@ int split_huge_page_to_list(struct page } else ret = 0; } else { diff --git a/queue-5.4/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch b/queue-5.4/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch index e0569a44b44..9cbbec1cf91 100644 --- a/queue-5.4/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch +++ b/queue-5.4/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch @@ -1,4 +1,4 @@ -From foo@baz Mon Jun 28 01:37:10 PM CEST 2021 +From foo@baz Mon Jun 28 01:55:13 PM CEST 2021 From: Hugh Dickins Date: Tue, 15 Jun 2021 18:23:53 -0700 Subject: mm/thp: try_to_unmap() use TTU_SYNC for safe splitting @@ -84,7 +84,7 @@ Signed-off-by: Greg Kroah-Hartman #ifdef CONFIG_MMU --- a/mm/huge_memory.c +++ b/mm/huge_memory.c -@@ -2449,7 +2449,7 @@ void vma_adjust_trans_huge(struct vm_are +@@ -2461,7 +2461,7 @@ void vma_adjust_trans_huge(struct vm_are static void unmap_page(struct page *page) { enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | @@ -145,4 +145,4 @@ Signed-off-by: Greg Kroah-Hartman + return !page_mapcount(page); } - static int page_not_mapped(struct page *page) + /** diff --git a/queue-5.4/mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch b/queue-5.4/mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch index 112073ce64c..7364cee4eb1 100644 --- a/queue-5.4/mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch +++ b/queue-5.4/mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch @@ -1,4 +1,4 @@ -From foo@baz Mon Jun 28 01:37:10 PM CEST 2021 +From foo@baz Mon Jun 28 01:55:13 PM CEST 2021 From: Hugh Dickins Date: Tue, 15 Jun 2021 18:24:03 -0700 Subject: mm/thp: unmap_mapping_page() to fix THP truncate_cleanup_page() diff --git a/queue-5.4/mm-thp-use-head-page-in-__migration_entry_wait.patch b/queue-5.4/mm-thp-use-head-page-in-__migration_entry_wait.patch new file mode 100644 index 00000000000..828473e3b78 --- /dev/null +++ b/queue-5.4/mm-thp-use-head-page-in-__migration_entry_wait.patch @@ -0,0 +1,65 @@ +From ffc90cbb2970ab88b66ea51dd580469eede57b67 Mon Sep 17 00:00:00 2001 +From: Xu Yu +Date: Tue, 15 Jun 2021 18:23:42 -0700 +Subject: mm, thp: use head page in __migration_entry_wait() + +From: Xu Yu + +commit ffc90cbb2970ab88b66ea51dd580469eede57b67 upstream. + +We notice that hung task happens in a corner but practical scenario when +CONFIG_PREEMPT_NONE is enabled, as follows. + +Process 0 Process 1 Process 2..Inf +split_huge_page_to_list + unmap_page + split_huge_pmd_address + __migration_entry_wait(head) + __migration_entry_wait(tail) + remap_page (roll back) + remove_migration_ptes + rmap_walk_anon + cond_resched + +Where __migration_entry_wait(tail) is occurred in kernel space, e.g., +copy_to_user in fstat, which will immediately fault again without +rescheduling, and thus occupy the cpu fully. + +When there are too many processes performing __migration_entry_wait on +tail page, remap_page will never be done after cond_resched. + +This makes __migration_entry_wait operate on the compound head page, +thus waits for remap_page to complete, whether the THP is split +successfully or roll back. + +Note that put_and_wait_on_page_locked helps to drop the page reference +acquired with get_page_unless_zero, as soon as the page is on the wait +queue, before actually waiting. So splitting the THP is only prevented +for a brief interval. + +Link: https://lkml.kernel.org/r/b9836c1dd522e903891760af9f0c86a2cce987eb.1623144009.git.xuyu@linux.alibaba.com +Fixes: ba98828088ad ("thp: add option to setup migration entries during PMD split") +Suggested-by: Hugh Dickins +Signed-off-by: Gang Deng +Signed-off-by: Xu Yu +Acked-by: Kirill A. Shutemov +Acked-by: Hugh Dickins +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/migrate.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -321,6 +321,7 @@ void __migration_entry_wait(struct mm_st + goto out; + + page = migration_entry_to_page(entry); ++ page = compound_head(page); + + /* + * Once page cache replacement of page migration started, page_count diff --git a/queue-5.4/series b/queue-5.4/series index 7bc9b7e04a0..31fe4bd5525 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -42,8 +42,25 @@ kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch i2c-robotfuzz-osif-fix-control-request-directions.patch kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch +mm-add-vm_warn_on_once_page-macro.patch +mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch +mm-rmap-use-page_not_mapped-in-try_to_unmap.patch +mm-thp-use-head-page-in-__migration_entry_wait.patch +mm-thp-fix-__split_huge_pmd_locked-on-shmem-migratio.patch +mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch +mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch +mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch +mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch +mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch +mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch +mm-page_vma_mapped_walk-crossing-page-table-boundary.patch +mm-page_vma_mapped_walk-add-a-level-of-indentation.patch +mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch +mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch +mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch +mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch -- 2.47.2