--- /dev/null
+From foo@baz Mon Jun 28 01:55:13 PM CEST 2021
+From: Alex Shi <alex.shi@linux.alibaba.com>
+Date: Fri, 18 Dec 2020 14:01:31 -0800
+Subject: mm: add VM_WARN_ON_ONCE_PAGE() macro
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit a4055888629bc0467d12d912cd7c90acdf3d9b12 part ]
+
+Add VM_WARN_ON_ONCE_PAGE() macro.
+
+Link: https://lkml.kernel.org/r/1604283436-18880-3-git-send-email-alex.shi@linux.alibaba.com
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+Note on stable backport: original commit was titled
+mm/memcg: warning on !memcg after readahead page charged
+which included uses of this macro in mm/memcontrol.c: here omitted.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mmdebug.h | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/include/linux/mmdebug.h
++++ b/include/linux/mmdebug.h
+@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm)
+ BUG(); \
+ } \
+ } while (0)
++#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \
++ static bool __section(".data.once") __warned; \
++ int __ret_warn_once = !!(cond); \
++ \
++ if (unlikely(__ret_warn_once && !__warned)) { \
++ dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
++ __warned = true; \
++ WARN_ON(1); \
++ } \
++ unlikely(__ret_warn_once); \
++})
++
+ #define VM_WARN_ON(cond) (void)WARN_ON(cond)
+ #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
+ #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
+@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm)
+ #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
+ #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
++#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
+ #endif
-From foo@baz Mon Jun 28 01:37:10 PM CEST 2021
+From foo@baz Mon Jun 28 01:55:13 PM CEST 2021
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:52 -0700
Subject: mm, futex: fix shared futex pgoff on shmem huge page
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:17 -0700
+Subject: mm: page_vma_mapped_walk(): add a level of indentation
+
+From: Hugh Dickins <hughd@google.com>
+
+page_vma_mapped_walk() cleanup: add a level of indentation to much of
+the body, making no functional change in this commit, but reducing the
+later diff when this is all converted to a loop.
+
+[hughd@google.com: : page_vma_mapped_walk(): add a level of indentation fix]
+ Link: https://lkml.kernel.org/r/7f817555-3ce1-c785-e438-87d8efdcaf26@google.com
+
+Link: https://lkml.kernel.org/r/efde211-f3e2-fe54-977-ef481419e7f3@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 105 ++++++++++++++++++++++++++-------------------------
+ 1 file changed, 55 insertions(+), 50 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -168,62 +168,67 @@ bool page_vma_mapped_walk(struct page_vm
+ if (pvmw->pte)
+ goto next_pte;
+ restart:
+- pgd = pgd_offset(mm, pvmw->address);
+- if (!pgd_present(*pgd))
+- return false;
+- p4d = p4d_offset(pgd, pvmw->address);
+- if (!p4d_present(*p4d))
+- return false;
+- pud = pud_offset(p4d, pvmw->address);
+- if (!pud_present(*pud))
+- return false;
+- pvmw->pmd = pmd_offset(pud, pvmw->address);
+- /*
+- * Make sure the pmd value isn't cached in a register by the
+- * compiler and used as a stale value after we've observed a
+- * subsequent update.
+- */
+- pmde = READ_ONCE(*pvmw->pmd);
+- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+- pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+- pmde = *pvmw->pmd;
+- if (likely(pmd_trans_huge(pmde))) {
+- if (pvmw->flags & PVMW_MIGRATION)
+- return not_found(pvmw);
+- if (pmd_page(pmde) != page)
+- return not_found(pvmw);
+- return true;
+- }
+- if (!pmd_present(pmde)) {
+- swp_entry_t entry;
++ {
++ pgd = pgd_offset(mm, pvmw->address);
++ if (!pgd_present(*pgd))
++ return false;
++ p4d = p4d_offset(pgd, pvmw->address);
++ if (!p4d_present(*p4d))
++ return false;
++ pud = pud_offset(p4d, pvmw->address);
++ if (!pud_present(*pud))
++ return false;
+
+- if (!thp_migration_supported() ||
+- !(pvmw->flags & PVMW_MIGRATION))
+- return not_found(pvmw);
+- entry = pmd_to_swp_entry(pmde);
+- if (!is_migration_entry(entry) ||
+- migration_entry_to_page(entry) != page)
+- return not_found(pvmw);
+- return true;
+- }
+- /* THP pmd was split under us: handle on pte level */
+- spin_unlock(pvmw->ptl);
+- pvmw->ptl = NULL;
+- } else if (!pmd_present(pmde)) {
++ pvmw->pmd = pmd_offset(pud, pvmw->address);
+ /*
+- * If PVMW_SYNC, take and drop THP pmd lock so that we
+- * cannot return prematurely, while zap_huge_pmd() has
+- * cleared *pmd but not decremented compound_mapcount().
++ * Make sure the pmd value isn't cached in a register by the
++ * compiler and used as a stale value after we've observed a
++ * subsequent update.
+ */
+- if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) {
+- spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
++ pmde = READ_ONCE(*pvmw->pmd);
++
++ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
++ pvmw->ptl = pmd_lock(mm, pvmw->pmd);
++ pmde = *pvmw->pmd;
++ if (likely(pmd_trans_huge(pmde))) {
++ if (pvmw->flags & PVMW_MIGRATION)
++ return not_found(pvmw);
++ if (pmd_page(pmde) != page)
++ return not_found(pvmw);
++ return true;
++ }
++ if (!pmd_present(pmde)) {
++ swp_entry_t entry;
+
+- spin_unlock(ptl);
++ if (!thp_migration_supported() ||
++ !(pvmw->flags & PVMW_MIGRATION))
++ return not_found(pvmw);
++ entry = pmd_to_swp_entry(pmde);
++ if (!is_migration_entry(entry) ||
++ migration_entry_to_page(entry) != page)
++ return not_found(pvmw);
++ return true;
++ }
++ /* THP pmd was split under us: handle on pte level */
++ spin_unlock(pvmw->ptl);
++ pvmw->ptl = NULL;
++ } else if (!pmd_present(pmde)) {
++ /*
++ * If PVMW_SYNC, take and drop THP pmd lock so that we
++ * cannot return prematurely, while zap_huge_pmd() has
++ * cleared *pmd but not decremented compound_mapcount().
++ */
++ if ((pvmw->flags & PVMW_SYNC) &&
++ PageTransCompound(page)) {
++ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
++
++ spin_unlock(ptl);
++ }
++ return false;
+ }
+- return false;
++ if (!map_pte(pvmw))
++ goto next_pte;
+ }
+- if (!map_pte(pvmw))
+- goto next_pte;
+ while (1) {
+ unsigned long end;
+
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:14 -0700
+Subject: mm: page_vma_mapped_walk(): crossing page table boundary
+
+From: Hugh Dickins <hughd@google.com>
+
+page_vma_mapped_walk() cleanup: adjust the test for crossing page table
+boundary - I believe pvmw->address is always page-aligned, but nothing
+else here assumed that; and remember to reset pvmw->pte to NULL after
+unmapping the page table, though I never saw any bug from that.
+
+Link: https://lkml.kernel.org/r/799b3f9c-2a9e-dfef-5d89-26e9f76fd97@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -239,16 +239,16 @@ next_pte:
+ if (pvmw->address >= end)
+ return not_found(pvmw);
+ /* Did we cross page table boundary? */
+- if (pvmw->address % PMD_SIZE == 0) {
+- pte_unmap(pvmw->pte);
++ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
+ if (pvmw->ptl) {
+ spin_unlock(pvmw->ptl);
+ pvmw->ptl = NULL;
+ }
++ pte_unmap(pvmw->pte);
++ pvmw->pte = NULL;
+ goto restart;
+- } else {
+- pvmw->pte++;
+ }
++ pvmw->pte++;
+ } while (pte_none(*pvmw->pte));
+
+ if (!pvmw->ptl) {
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:23 -0700
+Subject: mm: page_vma_mapped_walk(): get vma_address_end() earlier
+
+From: Hugh Dickins <hughd@google.com>
+
+page_vma_mapped_walk() cleanup: get THP's vma_address_end() at the
+start, rather than later at next_pte.
+
+It's a little unnecessary overhead on the first call, but makes for a
+simpler loop in the following commit.
+
+Link: https://lkml.kernel.org/r/4542b34d-862f-7cb4-bb22-e0df6ce830a2@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -166,6 +166,15 @@ bool page_vma_mapped_walk(struct page_vm
+ return true;
+ }
+
++ /*
++ * Seek to next pte only makes sense for THP.
++ * But more important than that optimization, is to filter out
++ * any PageKsm page: whose page->index misleads vma_address()
++ * and vma_address_end() to disaster.
++ */
++ end = PageTransCompound(page) ?
++ vma_address_end(page, pvmw->vma) :
++ pvmw->address + PAGE_SIZE;
+ if (pvmw->pte)
+ goto next_pte;
+ restart:
+@@ -233,10 +242,6 @@ this_pte:
+ if (check_pte(pvmw))
+ return true;
+ next_pte:
+- /* Seek to next pte only makes sense for THP */
+- if (!PageTransHuge(page))
+- return not_found(pvmw);
+- end = vma_address_end(page, pvmw->vma);
+ do {
+ pvmw->address += PAGE_SIZE;
+ if (pvmw->address >= end)
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:10 -0700
+Subject: mm: page_vma_mapped_walk(): prettify PVMW_MIGRATION block
+
+From: Hugh Dickins <hughd@google.com>
+
+page_vma_mapped_walk() cleanup: rearrange the !pmd_present() block to
+follow the same "return not_found, return not_found, return true"
+pattern as the block above it (note: returning not_found there is never
+premature, since existence or prior existence of huge pmd guarantees
+good alignment).
+
+Link: https://lkml.kernel.org/r/378c8650-1488-2edf-9647-32a53cf2e21@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 30 ++++++++++++++----------------
+ 1 file changed, 14 insertions(+), 16 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -193,24 +193,22 @@ restart:
+ if (pmd_page(pmde) != page)
+ return not_found(pvmw);
+ return true;
+- } else if (!pmd_present(pmde)) {
+- if (thp_migration_supported()) {
+- if (!(pvmw->flags & PVMW_MIGRATION))
+- return not_found(pvmw);
+- if (is_migration_entry(pmd_to_swp_entry(pmde))) {
+- swp_entry_t entry = pmd_to_swp_entry(pmde);
++ }
++ if (!pmd_present(pmde)) {
++ swp_entry_t entry;
+
+- if (migration_entry_to_page(entry) != page)
+- return not_found(pvmw);
+- return true;
+- }
+- }
+- return not_found(pvmw);
+- } else {
+- /* THP pmd was split under us: handle on pte level */
+- spin_unlock(pvmw->ptl);
+- pvmw->ptl = NULL;
++ if (!thp_migration_supported() ||
++ !(pvmw->flags & PVMW_MIGRATION))
++ return not_found(pvmw);
++ entry = pmd_to_swp_entry(pmde);
++ if (!is_migration_entry(entry) ||
++ migration_entry_to_page(entry) != page)
++ return not_found(pvmw);
++ return true;
+ }
++ /* THP pmd was split under us: handle on pte level */
++ spin_unlock(pvmw->ptl);
++ pvmw->ptl = NULL;
+ } else if (!pmd_present(pmde)) {
+ /*
+ * If PVMW_SYNC, take and drop THP pmd lock so that we
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:04 -0700
+Subject: mm: page_vma_mapped_walk(): settle PageHuge on entry
+
+From: Hugh Dickins <hughd@google.com>
+
+page_vma_mapped_walk() cleanup: get the hugetlbfs PageHuge case out of
+the way at the start, so no need to worry about it later.
+
+Link: https://lkml.kernel.org/r/e31a483c-6d73-a6bb-26c5-43c3b880a2@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -148,10 +148,11 @@ bool page_vma_mapped_walk(struct page_vm
+ if (pvmw->pmd && !pvmw->pte)
+ return not_found(pvmw);
+
+- if (pvmw->pte)
+- goto next_pte;
+-
+ if (unlikely(PageHuge(page))) {
++ /* The only possible mapping was handled on last iteration */
++ if (pvmw->pte)
++ return not_found(pvmw);
++
+ /* when pud is not present, pte will be NULL */
+ pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
+ if (!pvmw->pte)
+@@ -163,6 +164,9 @@ bool page_vma_mapped_walk(struct page_vm
+ return not_found(pvmw);
+ return true;
+ }
++
++ if (pvmw->pte)
++ goto next_pte;
+ restart:
+ pgd = pgd_offset(mm, pvmw->address);
+ if (!pgd_present(*pgd))
+@@ -228,7 +232,7 @@ restart:
+ return true;
+ next_pte:
+ /* Seek to next pte only makes sense for THP */
+- if (!PageTransHuge(page) || PageHuge(page))
++ if (!PageTransHuge(page))
+ return not_found(pvmw);
+ end = vma_address_end(page, pvmw->vma);
+ do {
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:20 -0700
+Subject: mm: page_vma_mapped_walk(): use goto instead of while (1)
+
+From: Hugh Dickins <hughd@google.com>
+
+page_vma_mapped_walk() cleanup: add a label this_pte, matching next_pte,
+and use "goto this_pte", in place of the "while (1)" loop at the end.
+
+Link: https://lkml.kernel.org/r/a52b234a-851-3616-2525-f42736e8934@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -139,6 +139,7 @@ bool page_vma_mapped_walk(struct page_vm
+ {
+ struct mm_struct *mm = pvmw->vma->vm_mm;
+ struct page *page = pvmw->page;
++ unsigned long end;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+@@ -228,10 +229,7 @@ restart:
+ }
+ if (!map_pte(pvmw))
+ goto next_pte;
+- }
+- while (1) {
+- unsigned long end;
+-
++this_pte:
+ if (check_pte(pvmw))
+ return true;
+ next_pte:
+@@ -260,6 +258,7 @@ next_pte:
+ pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+ spin_lock(pvmw->ptl);
+ }
++ goto this_pte;
+ }
+ }
+
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:01 -0700
+Subject: mm: page_vma_mapped_walk(): use page for pvmw->page
+
+From: Hugh Dickins <hughd@google.com>
+
+Patch series "mm: page_vma_mapped_walk() cleanup and THP fixes".
+
+I've marked all of these for stable: many are merely cleanups, but I
+think they are much better before the main fix than after.
+
+This patch (of 11):
+
+page_vma_mapped_walk() cleanup: sometimes the local copy of pvwm->page
+was used, sometimes pvmw->page itself: use the local copy "page"
+throughout.
+
+Link: https://lkml.kernel.org/r/589b358c-febc-c88e-d4c2-7834b37fa7bf@google.com
+Link: https://lkml.kernel.org/r/88e67645-f467-c279-bf5e-af4b5c6b13eb@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Alistair Popple <apopple@nvidia.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -151,7 +151,7 @@ bool page_vma_mapped_walk(struct page_vm
+ if (pvmw->pte)
+ goto next_pte;
+
+- if (unlikely(PageHuge(pvmw->page))) {
++ if (unlikely(PageHuge(page))) {
+ /* when pud is not present, pte will be NULL */
+ pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
+ if (!pvmw->pte)
+@@ -212,8 +212,7 @@ restart:
+ * cannot return prematurely, while zap_huge_pmd() has
+ * cleared *pmd but not decremented compound_mapcount().
+ */
+- if ((pvmw->flags & PVMW_SYNC) &&
+- PageTransCompound(pvmw->page)) {
++ if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) {
+ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+ spin_unlock(ptl);
+@@ -229,9 +228,9 @@ restart:
+ return true;
+ next_pte:
+ /* Seek to next pte only makes sense for THP */
+- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
++ if (!PageTransHuge(page) || PageHuge(page))
+ return not_found(pvmw);
+- end = vma_address_end(pvmw->page, pvmw->vma);
++ end = vma_address_end(page, pvmw->vma);
+ do {
+ pvmw->address += PAGE_SIZE;
+ if (pvmw->address >= end)
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:07 -0700
+Subject: mm: page_vma_mapped_walk(): use pmde for *pvmw->pmd
+
+From: Hugh Dickins <hughd@google.com>
+
+page_vma_mapped_walk() cleanup: re-evaluate pmde after taking lock, then
+use it in subsequent tests, instead of repeatedly dereferencing pointer.
+
+Link: https://lkml.kernel.org/r/53fbc9d-891e-46b2-cb4b-468c3b19238e@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -186,18 +186,19 @@ restart:
+ pmde = READ_ONCE(*pvmw->pmd);
+ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+ pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+- if (likely(pmd_trans_huge(*pvmw->pmd))) {
++ pmde = *pvmw->pmd;
++ if (likely(pmd_trans_huge(pmde))) {
+ if (pvmw->flags & PVMW_MIGRATION)
+ return not_found(pvmw);
+- if (pmd_page(*pvmw->pmd) != page)
++ if (pmd_page(pmde) != page)
+ return not_found(pvmw);
+ return true;
+- } else if (!pmd_present(*pvmw->pmd)) {
++ } else if (!pmd_present(pmde)) {
+ if (thp_migration_supported()) {
+ if (!(pvmw->flags & PVMW_MIGRATION))
+ return not_found(pvmw);
+- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
+- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
++ if (is_migration_entry(pmd_to_swp_entry(pmde))) {
++ swp_entry_t entry = pmd_to_swp_entry(pmde);
+
+ if (migration_entry_to_page(entry) != page)
+ return not_found(pvmw);
--- /dev/null
+From foo@baz Mon Jun 28 01:55:13 PM CEST 2021
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Thu, 25 Feb 2021 17:17:56 -0800
+Subject: mm/rmap: remove unneeded semicolon in page_not_mapped()
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit e0af87ff7afcde2660be44302836d2d5618185af ]
+
+Remove extra semicolon without any functional change intended.
+
+Link: https://lkml.kernel.org/r/20210127093425.39640-1-linmiaohe@huawei.com
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/rmap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1737,7 +1737,7 @@ bool try_to_unmap(struct page *page, enu
+ static int page_not_mapped(struct page *page)
+ {
+ return !page_mapped(page);
+-};
++}
+
+ /**
+ * try_to_munlock - try to munlock a page
--- /dev/null
+From foo@baz Mon Jun 28 01:55:13 PM CEST 2021
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Thu, 25 Feb 2021 17:18:03 -0800
+Subject: mm/rmap: use page_not_mapped in try_to_unmap()
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit b7e188ec98b1644ff70a6d3624ea16aadc39f5e0 ]
+
+page_mapcount_is_zero() calculates accurately how many mappings a hugepage
+has in order to check against 0 only. This is a waste of cpu time. We
+can do this via page_not_mapped() to save some possible atomic_read
+cycles. Remove the function page_mapcount_is_zero() as it's not used
+anymore and move page_not_mapped() above try_to_unmap() to avoid
+identifier undeclared compilation error.
+
+Link: https://lkml.kernel.org/r/20210130084904.35307-1-linmiaohe@huawei.com
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/rmap.c | 11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1690,9 +1690,9 @@ static bool invalid_migration_vma(struct
+ return is_vma_temporary_stack(vma);
+ }
+
+-static int page_mapcount_is_zero(struct page *page)
++static int page_not_mapped(struct page *page)
+ {
+- return !total_mapcount(page);
++ return !page_mapped(page);
+ }
+
+ /**
+@@ -1710,7 +1710,7 @@ bool try_to_unmap(struct page *page, enu
+ struct rmap_walk_control rwc = {
+ .rmap_one = try_to_unmap_one,
+ .arg = (void *)flags,
+- .done = page_mapcount_is_zero,
++ .done = page_not_mapped,
+ .anon_lock = page_lock_anon_vma_read,
+ };
+
+@@ -1734,11 +1734,6 @@ bool try_to_unmap(struct page *page, enu
+ return !page_mapcount(page) ? true : false;
+ }
+
+-static int page_not_mapped(struct page *page)
+-{
+- return !page_mapped(page);
+-}
+-
+ /**
+ * try_to_munlock - try to munlock a page
+ * @page: the page to be munlocked
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:30 -0700
+Subject: mm/thp: another PVMW_SYNC fix in page_vma_mapped_walk()
+
+From: Hugh Dickins <hughd@google.com>
+
+Aha! Shouldn't that quick scan over pte_none()s make sure that it holds
+ptlock in the PVMW_SYNC case? That too might have been responsible for
+BUGs or WARNs in split_huge_page_to_list() or its unmap_page(), though
+I've never seen any.
+
+Link: https://lkml.kernel.org/r/1bdf384c-8137-a149-2a1e-475a4791c3c@google.com
+Link: https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
+Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Tested-by: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -271,6 +271,10 @@ next_pte:
+ goto restart;
+ }
+ pvmw->pte++;
++ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
++ pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
++ spin_lock(pvmw->ptl);
++ }
+ } while (pte_none(*pvmw->pte));
+
+ if (!pvmw->ptl) {
--- /dev/null
+From 908f7f557f881612724bba31024565890984cd24 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 15 Jun 2021 18:23:45 -0700
+Subject: mm/thp: fix __split_huge_pmd_locked() on shmem migration entry
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 99fa8a48203d62b3743d866fc48ef6abaee682be ]
+
+Patch series "mm/thp: fix THP splitting unmap BUGs and related", v10.
+
+Here is v2 batch of long-standing THP bug fixes that I had not got
+around to sending before, but prompted now by Wang Yugui's report
+https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
+
+Wang Yugui has tested a rollup of these fixes applied to 5.10.39, and
+they have done no harm, but have *not* fixed that issue: something more
+is needed and I have no idea of what.
+
+This patch (of 7):
+
+Stressing huge tmpfs page migration racing hole punch often crashed on
+the VM_BUG_ON(!pmd_present) in pmdp_huge_clear_flush(), with DEBUG_VM=y
+kernel; or shortly afterwards, on a bad dereference in
+__split_huge_pmd_locked() when DEBUG_VM=n. They forgot to allow for pmd
+migration entries in the non-anonymous case.
+
+Full disclosure: those particular experiments were on a kernel with more
+relaxed mmap_lock and i_mmap_rwsem locking, and were not repeated on the
+vanilla kernel: it is conceivable that stricter locking happens to avoid
+those cases, or makes them less likely; but __split_huge_pmd_locked()
+already allowed for pmd migration entries when handling anonymous THPs,
+so this commit brings the shmem and file THP handling into line.
+
+And while there: use old_pmd rather than _pmd, as in the following
+blocks; and make it clearer to the eye that the !vma_is_anonymous()
+block is self-contained, making an early return after accounting for
+unmapping.
+
+Link: https://lkml.kernel.org/r/af88612-1473-2eaa-903-8d1a448b26@google.com
+Link: https://lkml.kernel.org/r/dd221a99-efb3-cd1d-6256-7e646af29314@google.com
+Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Jue Wang <juew@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+Note on stable backport: this commit made intervening cleanups in
+pmdp_huge_clear_flush() redundant: here it's rediffed to skip them.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c | 27 ++++++++++++++++++---------
+ mm/pgtable-generic.c | 4 ++--
+ 2 files changed, 20 insertions(+), 11 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2155,7 +2155,7 @@ static void __split_huge_pmd_locked(stru
+ count_vm_event(THP_SPLIT_PMD);
+
+ if (!vma_is_anonymous(vma)) {
+- _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
++ old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ /*
+ * We are going to unmap this huge page. So
+ * just go ahead and zap it
+@@ -2164,16 +2164,25 @@ static void __split_huge_pmd_locked(stru
+ zap_deposited_table(mm, pmd);
+ if (vma_is_dax(vma))
+ return;
+- page = pmd_page(_pmd);
+- if (!PageDirty(page) && pmd_dirty(_pmd))
+- set_page_dirty(page);
+- if (!PageReferenced(page) && pmd_young(_pmd))
+- SetPageReferenced(page);
+- page_remove_rmap(page, true);
+- put_page(page);
++ if (unlikely(is_pmd_migration_entry(old_pmd))) {
++ swp_entry_t entry;
++
++ entry = pmd_to_swp_entry(old_pmd);
++ page = migration_entry_to_page(entry);
++ } else {
++ page = pmd_page(old_pmd);
++ if (!PageDirty(page) && pmd_dirty(old_pmd))
++ set_page_dirty(page);
++ if (!PageReferenced(page) && pmd_young(old_pmd))
++ SetPageReferenced(page);
++ page_remove_rmap(page, true);
++ put_page(page);
++ }
+ add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
+ return;
+- } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
++ }
++
++ if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+ /*
+ * FIXME: Do we want to invalidate secondary mmu by calling
+ * mmu_notifier_invalidate_range() see comments below inside
+--- a/mm/pgtable-generic.c
++++ b/mm/pgtable-generic.c
+@@ -126,8 +126,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_ar
+ {
+ pmd_t pmd;
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+- VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+- !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
++ VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
++ !pmd_devmap(*pmdp));
+ pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+ flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ return pmd;
--- /dev/null
+From 31657170deaf1d8d2f6a1955fbc6fa9d228be036 Mon Sep 17 00:00:00 2001
+From: Jue Wang <juew@google.com>
+Date: Tue, 15 Jun 2021 18:24:00 -0700
+Subject: mm/thp: fix page_address_in_vma() on file THP tails
+
+From: Jue Wang <juew@google.com>
+
+commit 31657170deaf1d8d2f6a1955fbc6fa9d228be036 upstream.
+
+Anon THP tails were already supported, but memory-failure may need to
+use page_address_in_vma() on file THP tails, which its page->mapping
+check did not permit: fix it.
+
+hughd adds: no current usage is known to hit the issue, but this does
+fix a subtle trap in a general helper: best fixed in stable sooner than
+later.
+
+Link: https://lkml.kernel.org/r/a0d9b53-bf5d-8bab-ac5-759dc61819c1@google.com
+Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
+Signed-off-by: Jue Wang <juew@google.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/rmap.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -696,11 +696,11 @@ unsigned long page_address_in_vma(struct
+ if (!vma->anon_vma || !page__anon_vma ||
+ vma->anon_vma->root != page__anon_vma->root)
+ return -EFAULT;
+- } else if (page->mapping) {
+- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
+- return -EFAULT;
+- } else
++ } else if (!vma->vm_file) {
++ return -EFAULT;
++ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
+ return -EFAULT;
++ }
+
+ return vma_address(page, vma);
+ }
--- /dev/null
+From foo@baz Mon Jun 28 01:59:50 PM CEST 2021
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:26 -0700
+Subject: mm/thp: fix page_vma_mapped_walk() if THP mapped by ptes
+
+From: Hugh Dickins <hughd@google.com>
+
+Running certain tests with a DEBUG_VM kernel would crash within hours,
+on the total_mapcount BUG() in split_huge_page_to_list(), while trying
+to free up some memory by punching a hole in a shmem huge page: split's
+try_to_unmap() was unable to find all the mappings of the page (which,
+on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).
+
+Crash dumps showed two tail pages of a shmem huge page remained mapped
+by pte: ptes in a non-huge-aligned vma of a gVisor process, at the end
+of a long unmapped range; and no page table had yet been allocated for
+the head of the huge page to be mapped into.
+
+Although designed to handle these odd misaligned huge-page-mapped-by-pte
+cases, page_vma_mapped_walk() falls short by returning false prematurely
+when !pmd_present or !pud_present or !p4d_present or !pgd_present: there
+are cases when a huge page may span the boundary, with ptes present in
+the next.
+
+Restructure page_vma_mapped_walk() as a loop to continue in these cases,
+while keeping its layout much as before. Add a step_forward() helper to
+advance pvmw->address across those boundaries: originally I tried to use
+mm's standard p?d_addr_end() macros, but hit the same crash 512 times
+less often: because of the way redundant levels are folded together, but
+folded differently in different configurations, it was just too
+difficult to use them correctly; and step_forward() is simpler anyway.
+
+Link: https://lkml.kernel.org/r/fedb8632-1798-de42-f39e-873551d5bc81@google.com
+Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c | 34 +++++++++++++++++++++++++---------
+ 1 file changed, 25 insertions(+), 9 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -111,6 +111,13 @@ static bool check_pte(struct page_vma_ma
+ return pfn_in_hpage(pvmw->page, pfn);
+ }
+
++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
++{
++ pvmw->address = (pvmw->address + size) & ~(size - 1);
++ if (!pvmw->address)
++ pvmw->address = ULONG_MAX;
++}
++
+ /**
+ * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
+ * @pvmw->address
+@@ -178,16 +185,22 @@ bool page_vma_mapped_walk(struct page_vm
+ if (pvmw->pte)
+ goto next_pte;
+ restart:
+- {
++ do {
+ pgd = pgd_offset(mm, pvmw->address);
+- if (!pgd_present(*pgd))
+- return false;
++ if (!pgd_present(*pgd)) {
++ step_forward(pvmw, PGDIR_SIZE);
++ continue;
++ }
+ p4d = p4d_offset(pgd, pvmw->address);
+- if (!p4d_present(*p4d))
+- return false;
++ if (!p4d_present(*p4d)) {
++ step_forward(pvmw, P4D_SIZE);
++ continue;
++ }
+ pud = pud_offset(p4d, pvmw->address);
+- if (!pud_present(*pud))
+- return false;
++ if (!pud_present(*pud)) {
++ step_forward(pvmw, PUD_SIZE);
++ continue;
++ }
+
+ pvmw->pmd = pmd_offset(pud, pvmw->address);
+ /*
+@@ -234,7 +247,8 @@ restart:
+
+ spin_unlock(ptl);
+ }
+- return false;
++ step_forward(pvmw, PMD_SIZE);
++ continue;
+ }
+ if (!map_pte(pvmw))
+ goto next_pte;
+@@ -264,7 +278,9 @@ next_pte:
+ spin_lock(pvmw->ptl);
+ }
+ goto this_pte;
+- }
++ } while (pvmw->address < end);
++
++ return false;
+ }
+
+ /**
-From foo@baz Mon Jun 28 01:37:10 PM CEST 2021
+From foo@baz Mon Jun 28 01:55:13 PM CEST 2021
From: Hugh Dickins <hughd@google.com>
Date: Tue, 15 Jun 2021 18:23:56 -0700
Subject: mm/thp: fix vma_address() if virtual address below file offset
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
-@@ -1848,6 +1846,7 @@ static void rmap_walk_anon(struct page *
+@@ -1843,6 +1841,7 @@ static void rmap_walk_anon(struct page *
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
-@@ -1902,6 +1901,7 @@ static void rmap_walk_file(struct page *
+@@ -1897,6 +1896,7 @@ static void rmap_walk_file(struct page *
pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);
--- /dev/null
+From 3b77e8c8cde581dadab9a0f1543a347e24315f11 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 15 Jun 2021 18:23:49 -0700
+Subject: mm/thp: make is_huge_zero_pmd() safe and quicker
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 3b77e8c8cde581dadab9a0f1543a347e24315f11 upstream.
+
+Most callers of is_huge_zero_pmd() supply a pmd already verified
+present; but a few (notably zap_huge_pmd()) do not - it might be a pmd
+migration entry, in which the pfn is encoded differently from a present
+pmd: which might pass the is_huge_zero_pmd() test (though not on x86,
+since L1TF forced us to protect against that); or perhaps even crash in
+pmd_page() applied to a swap-like entry.
+
+Make it safe by adding pmd_present() check into is_huge_zero_pmd()
+itself; and make it quicker by saving huge_zero_pfn, so that
+is_huge_zero_pmd() will not need to do that pmd_page() lookup each time.
+
+__split_huge_pmd_locked() checked pmd_trans_huge() before: that worked,
+but is unnecessary now that is_huge_zero_pmd() checks present.
+
+Link: https://lkml.kernel.org/r/21ea9ca-a1f5-8b90-5e88-95fb1c49bbfa@google.com
+Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/huge_mm.h | 8 +++++++-
+ mm/huge_memory.c | 5 ++++-
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -259,6 +259,7 @@ struct page *follow_devmap_pud(struct vm
+ extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+
+ extern struct page *huge_zero_page;
++extern unsigned long huge_zero_pfn;
+
+ static inline bool is_huge_zero_page(struct page *page)
+ {
+@@ -267,7 +268,7 @@ static inline bool is_huge_zero_page(str
+
+ static inline bool is_huge_zero_pmd(pmd_t pmd)
+ {
+- return is_huge_zero_page(pmd_page(pmd));
++ return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
+ }
+
+ static inline bool is_huge_zero_pud(pud_t pud)
+@@ -397,6 +398,11 @@ static inline bool is_huge_zero_page(str
+ {
+ return false;
+ }
++
++static inline bool is_huge_zero_pmd(pmd_t pmd)
++{
++ return false;
++}
+
+ static inline bool is_huge_zero_pud(pud_t pud)
+ {
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -61,6 +61,7 @@ static struct shrinker deferred_split_sh
+
+ static atomic_t huge_zero_refcount;
+ struct page *huge_zero_page __read_mostly;
++unsigned long huge_zero_pfn __read_mostly = ~0UL;
+
+ bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+ {
+@@ -97,6 +98,7 @@ retry:
+ __free_pages(zero_page, compound_order(zero_page));
+ goto retry;
+ }
++ WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
+
+ /* We take additional reference here. It will be put back by shrinker */
+ atomic_set(&huge_zero_refcount, 2);
+@@ -146,6 +148,7 @@ static unsigned long shrink_huge_zero_pa
+ if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
+ struct page *zero_page = xchg(&huge_zero_page, NULL);
+ BUG_ON(zero_page == NULL);
++ WRITE_ONCE(huge_zero_pfn, ~0UL);
+ __free_pages(zero_page, compound_order(zero_page));
+ return HPAGE_PMD_NR;
+ }
+@@ -2182,7 +2185,7 @@ static void __split_huge_pmd_locked(stru
+ return;
+ }
+
+- if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
++ if (is_huge_zero_pmd(*pmd)) {
+ /*
+ * FIXME: Do we want to invalidate secondary mmu by calling
+ * mmu_notifier_invalidate_range() see comments below inside
-From foo@baz Mon Jun 28 01:37:10 PM CEST 2021
+From foo@baz Mon Jun 28 01:55:13 PM CEST 2021
From: Yang Shi <shy828301@gmail.com>
Date: Tue, 15 Jun 2021 18:24:07 -0700
Subject: mm: thp: replace DEBUG_VM BUG with VM_WARN when unmap fails for split
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
-@@ -2450,15 +2450,15 @@ static void unmap_page(struct page *page
+@@ -2462,15 +2462,15 @@ static void unmap_page(struct page *page
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC;
}
static void remap_page(struct page *page)
-@@ -2737,7 +2737,7 @@ int split_huge_page_to_list(struct page
+@@ -2749,7 +2749,7 @@ int split_huge_page_to_list(struct page
struct deferred_split *ds_queue = get_deferred_split_queue(page);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
bool mlocked;
unsigned long flags;
pgoff_t end;
-@@ -2799,7 +2799,6 @@ int split_huge_page_to_list(struct page
+@@ -2811,7 +2811,6 @@ int split_huge_page_to_list(struct page
mlocked = PageMlocked(page);
unmap_page(head);
/* Make sure the page is not on per-CPU pagevec as it takes pin */
if (mlocked)
-@@ -2822,9 +2821,7 @@ int split_huge_page_to_list(struct page
+@@ -2834,9 +2833,7 @@ int split_huge_page_to_list(struct page
/* Prevent deferred_split_scan() touching ->_refcount */
spin_lock(&ds_queue->split_queue_lock);
if (!list_empty(page_deferred_list(head))) {
ds_queue->split_queue_len--;
list_del(page_deferred_list(head));
-@@ -2845,16 +2842,9 @@ int split_huge_page_to_list(struct page
+@@ -2857,16 +2854,9 @@ int split_huge_page_to_list(struct page
} else
ret = 0;
} else {
-From foo@baz Mon Jun 28 01:37:10 PM CEST 2021
+From foo@baz Mon Jun 28 01:55:13 PM CEST 2021
From: Hugh Dickins <hughd@google.com>
Date: Tue, 15 Jun 2021 18:23:53 -0700
Subject: mm/thp: try_to_unmap() use TTU_SYNC for safe splitting
#ifdef CONFIG_MMU
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
-@@ -2449,7 +2449,7 @@ void vma_adjust_trans_huge(struct vm_are
+@@ -2461,7 +2461,7 @@ void vma_adjust_trans_huge(struct vm_are
static void unmap_page(struct page *page)
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+ return !page_mapcount(page);
}
- static int page_not_mapped(struct page *page)
+ /**
-From foo@baz Mon Jun 28 01:37:10 PM CEST 2021
+From foo@baz Mon Jun 28 01:55:13 PM CEST 2021
From: Hugh Dickins <hughd@google.com>
Date: Tue, 15 Jun 2021 18:24:03 -0700
Subject: mm/thp: unmap_mapping_page() to fix THP truncate_cleanup_page()
--- /dev/null
+From ffc90cbb2970ab88b66ea51dd580469eede57b67 Mon Sep 17 00:00:00 2001
+From: Xu Yu <xuyu@linux.alibaba.com>
+Date: Tue, 15 Jun 2021 18:23:42 -0700
+Subject: mm, thp: use head page in __migration_entry_wait()
+
+From: Xu Yu <xuyu@linux.alibaba.com>
+
+commit ffc90cbb2970ab88b66ea51dd580469eede57b67 upstream.
+
+We notice that hung task happens in a corner but practical scenario when
+CONFIG_PREEMPT_NONE is enabled, as follows.
+
+Process 0 Process 1 Process 2..Inf
+split_huge_page_to_list
+ unmap_page
+ split_huge_pmd_address
+ __migration_entry_wait(head)
+ __migration_entry_wait(tail)
+ remap_page (roll back)
+ remove_migration_ptes
+ rmap_walk_anon
+ cond_resched
+
+Where __migration_entry_wait(tail) is occurred in kernel space, e.g.,
+copy_to_user in fstat, which will immediately fault again without
+rescheduling, and thus occupy the cpu fully.
+
+When there are too many processes performing __migration_entry_wait on
+tail page, remap_page will never be done after cond_resched.
+
+This makes __migration_entry_wait operate on the compound head page,
+thus waits for remap_page to complete, whether the THP is split
+successfully or roll back.
+
+Note that put_and_wait_on_page_locked helps to drop the page reference
+acquired with get_page_unless_zero, as soon as the page is on the wait
+queue, before actually waiting. So splitting the THP is only prevented
+for a brief interval.
+
+Link: https://lkml.kernel.org/r/b9836c1dd522e903891760af9f0c86a2cce987eb.1623144009.git.xuyu@linux.alibaba.com
+Fixes: ba98828088ad ("thp: add option to setup migration entries during PMD split")
+Suggested-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Gang Deng <gavin.dg@linux.alibaba.com>
+Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -321,6 +321,7 @@ void __migration_entry_wait(struct mm_st
+ goto out;
+
+ page = migration_entry_to_page(entry);
++ page = compound_head(page);
+
+ /*
+ * Once page cache replacement of page migration started, page_count
i2c-robotfuzz-osif-fix-control-request-directions.patch
kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
+mm-add-vm_warn_on_once_page-macro.patch
+mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch
+mm-rmap-use-page_not_mapped-in-try_to_unmap.patch
+mm-thp-use-head-page-in-__migration_entry_wait.patch
+mm-thp-fix-__split_huge_pmd_locked-on-shmem-migratio.patch
+mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch
mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch
mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch
+mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch
mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch
mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch
+mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch
+mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch
+mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch
+mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch
+mm-page_vma_mapped_walk-crossing-page-table-boundary.patch
+mm-page_vma_mapped_walk-add-a-level-of-indentation.patch
+mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch
+mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch
+mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch
+mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch
mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch