]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.14
authorSasha Levin <sashal@kernel.org>
Thu, 1 Jul 2021 11:27:20 +0000 (07:27 -0400)
committerSasha Levin <sashal@kernel.org>
Thu, 1 Jul 2021 11:27:20 +0000 (07:27 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
20 files changed:
queue-4.14/include-linux-mmdebug.h-make-vm_warn-non-rvals.patch [new file with mode: 0644]
queue-4.14/mm-add-vm_warn_on_once_page-macro.patch [new file with mode: 0644]
queue-4.14/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch [new file with mode: 0644]
queue-4.14/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch [new file with mode: 0644]
queue-4.14/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch [new file with mode: 0644]
queue-4.14/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch [new file with mode: 0644]
queue-4.14/mm-page_vma_mapped_walk-prettify-pvmw_migration-bloc.patch [new file with mode: 0644]
queue-4.14/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch [new file with mode: 0644]
queue-4.14/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch [new file with mode: 0644]
queue-4.14/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch [new file with mode: 0644]
queue-4.14/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch [new file with mode: 0644]
queue-4.14/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch [new file with mode: 0644]
queue-4.14/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch [new file with mode: 0644]
queue-4.14/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch [new file with mode: 0644]
queue-4.14/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch [new file with mode: 0644]
queue-4.14/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-pte.patch [new file with mode: 0644]
queue-4.14/mm-thp-fix-vma_address-if-virtual-address-below-file.patch [new file with mode: 0644]
queue-4.14/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-.patch [new file with mode: 0644]
queue-4.14/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch [new file with mode: 0644]
queue-4.14/series [new file with mode: 0644]

diff --git a/queue-4.14/include-linux-mmdebug.h-make-vm_warn-non-rvals.patch b/queue-4.14/include-linux-mmdebug.h-make-vm_warn-non-rvals.patch
new file mode 100644 (file)
index 0000000..60aa03b
--- /dev/null
@@ -0,0 +1,55 @@
+From 03af020be6c1b36fcf922898f75111ec55ea13e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Apr 2018 16:25:30 -0700
+Subject: include/linux/mmdebug.h: make VM_WARN* non-rvals
+
+From: Michal Hocko <mhocko@kernel.org>
+
+[ Upstream commit 91241681c62a5a690c88eb2aca027f094125eaac ]
+
+At present the construct
+
+       if (VM_WARN(...))
+
+will compile OK with CONFIG_DEBUG_VM=y and will fail with
+CONFIG_DEBUG_VM=n.  The reason is that VM_{WARN,BUG}* have always been
+special wrt.  {WARN/BUG}* and never generate any code when DEBUG_VM is
+disabled.  So we cannot really use it in conditionals.
+
+We considered changing things so that this construct works in both cases
+but that might cause unwanted code generation with CONFIG_DEBUG_VM=n.
+It is safer and simpler to make the build fail in both cases.
+
+[akpm@linux-foundation.org: changelog]
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mmdebug.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
+index 57b0030d3800..2ad72d2c8cc5 100644
+--- a/include/linux/mmdebug.h
++++ b/include/linux/mmdebug.h
+@@ -37,10 +37,10 @@ void dump_mm(const struct mm_struct *mm);
+                       BUG();                                          \
+               }                                                       \
+       } while (0)
+-#define VM_WARN_ON(cond) WARN_ON(cond)
+-#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
+-#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
+-#define VM_WARN(cond, format...) WARN(cond, format)
++#define VM_WARN_ON(cond) (void)WARN_ON(cond)
++#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
++#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
++#define VM_WARN(cond, format...) (void)WARN(cond, format)
+ #else
+ #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
+ #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-add-vm_warn_on_once_page-macro.patch b/queue-4.14/mm-add-vm_warn_on_once_page-macro.patch
new file mode 100644 (file)
index 0000000..0fc1ddc
--- /dev/null
@@ -0,0 +1,64 @@
+From 8dded92b9c871ff31bc86a6988cb2c658a781732 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Dec 2020 14:01:31 -0800
+Subject: mm: add VM_WARN_ON_ONCE_PAGE() macro
+
+From: Alex Shi <alex.shi@linux.alibaba.com>
+
+[ Upstream commit a4055888629bc0467d12d912cd7c90acdf3d9b12 part ]
+
+Add VM_WARN_ON_ONCE_PAGE() macro.
+
+Link: https://lkml.kernel.org/r/1604283436-18880-3-git-send-email-alex.shi@linux.alibaba.com
+Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+Note on stable backport: original commit was titled
+mm/memcg: warning on !memcg after readahead page charged
+which included uses of this macro in mm/memcontrol.c: here omitted.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mmdebug.h | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
+index 2ad72d2c8cc5..5d0767cb424a 100644
+--- a/include/linux/mmdebug.h
++++ b/include/linux/mmdebug.h
+@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm);
+                       BUG();                                          \
+               }                                                       \
+       } while (0)
++#define VM_WARN_ON_ONCE_PAGE(cond, page)      ({                      \
++      static bool __section(".data.once") __warned;                   \
++      int __ret_warn_once = !!(cond);                                 \
++                                                                      \
++      if (unlikely(__ret_warn_once && !__warned)) {                   \
++              dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
++              __warned = true;                                        \
++              WARN_ON(1);                                             \
++      }                                                               \
++      unlikely(__ret_warn_once);                                      \
++})
++
+ #define VM_WARN_ON(cond) (void)WARN_ON(cond)
+ #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
+ #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
+@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm);
+ #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
+ #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
++#define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
+ #endif
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch b/queue-4.14/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch
new file mode 100644 (file)
index 0000000..f7867df
--- /dev/null
@@ -0,0 +1,165 @@
+From 282ef53c0d02a97151131fc9573c122a85de2b59 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:52 -0700
+Subject: mm, futex: fix shared futex pgoff on shmem huge page
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit fe19bd3dae3d15d2fbfdb3de8839a6ea0fe94264 ]
+
+If more than one futex is placed on a shmem huge page, it can happen
+that waking the second wakes the first instead, and leaves the second
+waiting: the key's shared.pgoff is wrong.
+
+When 3.11 commit 13d60f4b6ab5 ("futex: Take hugepages into account when
+generating futex_key"), the only shared huge pages came from hugetlbfs,
+and the code added to deal with its exceptional page->index was put into
+hugetlb source.  Then that was missed when 4.8 added shmem huge pages.
+
+page_to_pgoff() is what others use for this nowadays: except that, as
+currently written, it gives the right answer on hugetlbfs head, but
+nonsense on hugetlbfs tails.  Fix that by calling hugetlbfs-specific
+hugetlb_basepage_index() on PageHuge tails as well as on head.
+
+Yes, it's unconventional to declare hugetlb_basepage_index() there in
+pagemap.h, rather than in hugetlb.h; but I do not expect anything but
+page_to_pgoff() ever to need it.
+
+[akpm@linux-foundation.org: give hugetlb_basepage_index() prototype the correct scope]
+
+Link: https://lkml.kernel.org/r/b17d946b-d09-326e-b42a-52884c36df32@google.com
+Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
+Reported-by: Neel Natu <neelnatu@google.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Zhang Yi <wetpzy@gmail.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Darren Hart <dvhart@infradead.org>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+Note on stable backport: leave redundant #include <linux/hugetlb.h>
+in kernel/futex.c, to avoid conflict over the header files included.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/hugetlb.h | 16 ----------------
+ include/linux/pagemap.h | 13 +++++++------
+ kernel/futex.c          |  2 +-
+ mm/hugetlb.c            |  5 +----
+ 4 files changed, 9 insertions(+), 27 deletions(-)
+
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index fe0ec0a29db7..d2b5cc8ce54f 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -467,17 +467,6 @@ static inline int hstate_index(struct hstate *h)
+       return h - hstates;
+ }
+-pgoff_t __basepage_index(struct page *page);
+-
+-/* Return page->index in PAGE_SIZE units */
+-static inline pgoff_t basepage_index(struct page *page)
+-{
+-      if (!PageCompound(page))
+-              return page->index;
+-
+-      return __basepage_index(page);
+-}
+-
+ extern int dissolve_free_huge_page(struct page *page);
+ extern int dissolve_free_huge_pages(unsigned long start_pfn,
+                                   unsigned long end_pfn);
+@@ -572,11 +561,6 @@ static inline int hstate_index(struct hstate *h)
+       return 0;
+ }
+-static inline pgoff_t basepage_index(struct page *page)
+-{
+-      return page->index;
+-}
+-
+ static inline int dissolve_free_huge_page(struct page *page)
+ {
+       return 0;
+diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
+index e08b5339023c..84c7fc7f63e7 100644
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -399,7 +399,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
+ }
+ /*
+- * Get index of the page with in radix-tree
++ * Get index of the page within radix-tree (but not for hugetlb pages).
+  * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
+  */
+ static inline pgoff_t page_to_index(struct page *page)
+@@ -418,15 +418,16 @@ static inline pgoff_t page_to_index(struct page *page)
+       return pgoff;
+ }
++extern pgoff_t hugetlb_basepage_index(struct page *page);
++
+ /*
+- * Get the offset in PAGE_SIZE.
+- * (TODO: hugepage should have ->index in PAGE_SIZE)
++ * Get the offset in PAGE_SIZE (even for hugetlb pages).
++ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
+  */
+ static inline pgoff_t page_to_pgoff(struct page *page)
+ {
+-      if (unlikely(PageHeadHuge(page)))
+-              return page->index << compound_order(page);
+-
++      if (unlikely(PageHuge(page)))
++              return hugetlb_basepage_index(page);
+       return page_to_index(page);
+ }
+diff --git a/kernel/futex.c b/kernel/futex.c
+index af1d9a993988..e282c083df59 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -719,7 +719,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
+               key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+               key->shared.i_seq = get_inode_sequence_number(inode);
+-              key->shared.pgoff = basepage_index(tail);
++              key->shared.pgoff = page_to_pgoff(tail);
+               rcu_read_unlock();
+       }
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 0dc181290d1f..c765fd01f0aa 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1403,15 +1403,12 @@ int PageHeadHuge(struct page *page_head)
+       return get_compound_page_dtor(page_head) == free_huge_page;
+ }
+-pgoff_t __basepage_index(struct page *page)
++pgoff_t hugetlb_basepage_index(struct page *page)
+ {
+       struct page *page_head = compound_head(page);
+       pgoff_t index = page_index(page_head);
+       unsigned long compound_idx;
+-      if (!PageHuge(page_head))
+-              return page_index(page);
+-
+       if (compound_order(page_head) >= MAX_ORDER)
+               compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
+       else
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch b/queue-4.14/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch
new file mode 100644 (file)
index 0000000..e9a3d4b
--- /dev/null
@@ -0,0 +1,160 @@
+From 7d740deb578880c6db6feb6b8f87e7816a295011 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:17 -0700
+Subject: mm: page_vma_mapped_walk(): add a level of indentation
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit b3807a91aca7d21c05d5790612e49969117a72b9 ]
+
+page_vma_mapped_walk() cleanup: add a level of indentation to much of
+the body, making no functional change in this commit, but reducing the
+later diff when this is all converted to a loop.
+
+[hughd@google.com: : page_vma_mapped_walk(): add a level of indentation fix]
+  Link: https://lkml.kernel.org/r/7f817555-3ce1-c785-e438-87d8efdcaf26@google.com
+
+Link: https://lkml.kernel.org/r/efde211-f3e2-fe54-977-ef481419e7f3@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 105 ++++++++++++++++++++++---------------------
+ 1 file changed, 55 insertions(+), 50 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 2463ba78959b..911c6dbe85f9 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -168,62 +168,67 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+       if (pvmw->pte)
+               goto next_pte;
+ restart:
+-      pgd = pgd_offset(mm, pvmw->address);
+-      if (!pgd_present(*pgd))
+-              return false;
+-      p4d = p4d_offset(pgd, pvmw->address);
+-      if (!p4d_present(*p4d))
+-              return false;
+-      pud = pud_offset(p4d, pvmw->address);
+-      if (!pud_present(*pud))
+-              return false;
+-      pvmw->pmd = pmd_offset(pud, pvmw->address);
+-      /*
+-       * Make sure the pmd value isn't cached in a register by the
+-       * compiler and used as a stale value after we've observed a
+-       * subsequent update.
+-       */
+-      pmde = READ_ONCE(*pvmw->pmd);
+-      if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+-              pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+-              pmde = *pvmw->pmd;
+-              if (likely(pmd_trans_huge(pmde))) {
+-                      if (pvmw->flags & PVMW_MIGRATION)
+-                              return not_found(pvmw);
+-                      if (pmd_page(pmde) != page)
+-                              return not_found(pvmw);
+-                      return true;
+-              }
+-              if (!pmd_present(pmde)) {
+-                      swp_entry_t entry;
++      {
++              pgd = pgd_offset(mm, pvmw->address);
++              if (!pgd_present(*pgd))
++                      return false;
++              p4d = p4d_offset(pgd, pvmw->address);
++              if (!p4d_present(*p4d))
++                      return false;
++              pud = pud_offset(p4d, pvmw->address);
++              if (!pud_present(*pud))
++                      return false;
+-                      if (!thp_migration_supported() ||
+-                          !(pvmw->flags & PVMW_MIGRATION))
+-                              return not_found(pvmw);
+-                      entry = pmd_to_swp_entry(pmde);
+-                      if (!is_migration_entry(entry) ||
+-                          migration_entry_to_page(entry) != page)
+-                              return not_found(pvmw);
+-                      return true;
+-              }
+-              /* THP pmd was split under us: handle on pte level */
+-              spin_unlock(pvmw->ptl);
+-              pvmw->ptl = NULL;
+-      } else if (!pmd_present(pmde)) {
++              pvmw->pmd = pmd_offset(pud, pvmw->address);
+               /*
+-               * If PVMW_SYNC, take and drop THP pmd lock so that we
+-               * cannot return prematurely, while zap_huge_pmd() has
+-               * cleared *pmd but not decremented compound_mapcount().
++               * Make sure the pmd value isn't cached in a register by the
++               * compiler and used as a stale value after we've observed a
++               * subsequent update.
+                */
+-              if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) {
+-                      spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
++              pmde = READ_ONCE(*pvmw->pmd);
++
++              if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
++                      pvmw->ptl = pmd_lock(mm, pvmw->pmd);
++                      pmde = *pvmw->pmd;
++                      if (likely(pmd_trans_huge(pmde))) {
++                              if (pvmw->flags & PVMW_MIGRATION)
++                                      return not_found(pvmw);
++                              if (pmd_page(pmde) != page)
++                                      return not_found(pvmw);
++                              return true;
++                      }
++                      if (!pmd_present(pmde)) {
++                              swp_entry_t entry;
++
++                              if (!thp_migration_supported() ||
++                                  !(pvmw->flags & PVMW_MIGRATION))
++                                      return not_found(pvmw);
++                              entry = pmd_to_swp_entry(pmde);
++                              if (!is_migration_entry(entry) ||
++                                  migration_entry_to_page(entry) != page)
++                                      return not_found(pvmw);
++                              return true;
++                      }
++                      /* THP pmd was split under us: handle on pte level */
++                      spin_unlock(pvmw->ptl);
++                      pvmw->ptl = NULL;
++              } else if (!pmd_present(pmde)) {
++                      /*
++                       * If PVMW_SYNC, take and drop THP pmd lock so that we
++                       * cannot return prematurely, while zap_huge_pmd() has
++                       * cleared *pmd but not decremented compound_mapcount().
++                       */
++                      if ((pvmw->flags & PVMW_SYNC) &&
++                          PageTransCompound(page)) {
++                              spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+-                      spin_unlock(ptl);
++                              spin_unlock(ptl);
++                      }
++                      return false;
+               }
+-              return false;
++              if (!map_pte(pvmw))
++                      goto next_pte;
+       }
+-      if (!map_pte(pvmw))
+-              goto next_pte;
+       while (1) {
+               unsigned long end;
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch b/queue-4.14/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch
new file mode 100644 (file)
index 0000000..6e3bcf6
--- /dev/null
@@ -0,0 +1,61 @@
+From a3934d9ec623b8dea7f1ecc86c8aac617b990cd8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:14 -0700
+Subject: mm: page_vma_mapped_walk(): crossing page table boundary
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 448282487483d6fa5b2eeeafaa0acc681e544a9c ]
+
+page_vma_mapped_walk() cleanup: adjust the test for crossing page table
+boundary - I believe pvmw->address is always page-aligned, but nothing
+else here assumed that; and remember to reset pvmw->pte to NULL after
+unmapping the page table, though I never saw any bug from that.
+
+Link: https://lkml.kernel.org/r/799b3f9c-2a9e-dfef-5d89-26e9f76fd97@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 92d7f574b8ab..2463ba78959b 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -239,16 +239,16 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                       if (pvmw->address >= end)
+                               return not_found(pvmw);
+                       /* Did we cross page table boundary? */
+-                      if (pvmw->address % PMD_SIZE == 0) {
+-                              pte_unmap(pvmw->pte);
++                      if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
+                               if (pvmw->ptl) {
+                                       spin_unlock(pvmw->ptl);
+                                       pvmw->ptl = NULL;
+                               }
++                              pte_unmap(pvmw->pte);
++                              pvmw->pte = NULL;
+                               goto restart;
+-                      } else {
+-                              pvmw->pte++;
+                       }
++                      pvmw->pte++;
+               } while (pte_none(*pvmw->pte));
+               if (!pvmw->ptl) {
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch b/queue-4.14/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch
new file mode 100644 (file)
index 0000000..b5bc6a1
--- /dev/null
@@ -0,0 +1,68 @@
+From f7be95c11847d4399325bb317b16dc50daeb962a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:23 -0700
+Subject: mm: page_vma_mapped_walk(): get vma_address_end() earlier
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit a765c417d876cc635f628365ec9aa6f09470069a ]
+
+page_vma_mapped_walk() cleanup: get THP's vma_address_end() at the
+start, rather than later at next_pte.
+
+It's a little unnecessary overhead on the first call, but makes for a
+simpler loop in the following commit.
+
+Link: https://lkml.kernel.org/r/4542b34d-862f-7cb4-bb22-e0df6ce830a2@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index f6c750539a6b..96d4c4738590 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -166,6 +166,15 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+               return true;
+       }
++      /*
++       * Seek to next pte only makes sense for THP.
++       * But more important than that optimization, is to filter out
++       * any PageKsm page: whose page->index misleads vma_address()
++       * and vma_address_end() to disaster.
++       */
++      end = PageTransCompound(page) ?
++              vma_address_end(page, pvmw->vma) :
++              pvmw->address + PAGE_SIZE;
+       if (pvmw->pte)
+               goto next_pte;
+ restart:
+@@ -233,10 +242,6 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+               if (check_pte(pvmw))
+                       return true;
+ next_pte:
+-              /* Seek to next pte only makes sense for THP */
+-              if (!PageTransHuge(page))
+-                      return not_found(pvmw);
+-              end = vma_address_end(page, pvmw->vma);
+               do {
+                       pvmw->address += PAGE_SIZE;
+                       if (pvmw->address >= end)
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-page_vma_mapped_walk-prettify-pvmw_migration-bloc.patch b/queue-4.14/mm-page_vma_mapped_walk-prettify-pvmw_migration-bloc.patch
new file mode 100644 (file)
index 0000000..748a2da
--- /dev/null
@@ -0,0 +1,80 @@
+From d4e20c7bea4b602253be8e27d034a307bc70aa51 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:10 -0700
+Subject: mm: page_vma_mapped_walk(): prettify PVMW_MIGRATION block
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit e2e1d4076c77b3671cf8ce702535ae7dee3acf89 ]
+
+page_vma_mapped_walk() cleanup: rearrange the !pmd_present() block to
+follow the same "return not_found, return not_found, return true"
+pattern as the block above it (note: returning not_found there is never
+premature, since existence or prior existence of huge pmd guarantees
+good alignment).
+
+Link: https://lkml.kernel.org/r/378c8650-1488-2edf-9647-32a53cf2e21@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 30 ++++++++++++++----------------
+ 1 file changed, 14 insertions(+), 16 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 8a6af4007c7e..92d7f574b8ab 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -193,24 +193,22 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                       if (pmd_page(pmde) != page)
+                               return not_found(pvmw);
+                       return true;
+-              } else if (!pmd_present(pmde)) {
+-                      if (thp_migration_supported()) {
+-                              if (!(pvmw->flags & PVMW_MIGRATION))
+-                                      return not_found(pvmw);
+-                              if (is_migration_entry(pmd_to_swp_entry(pmde))) {
+-                                      swp_entry_t entry = pmd_to_swp_entry(pmde);
++              }
++              if (!pmd_present(pmde)) {
++                      swp_entry_t entry;
+-                                      if (migration_entry_to_page(entry) != page)
+-                                              return not_found(pvmw);
+-                                      return true;
+-                              }
+-                      }
+-                      return not_found(pvmw);
+-              } else {
+-                      /* THP pmd was split under us: handle on pte level */
+-                      spin_unlock(pvmw->ptl);
+-                      pvmw->ptl = NULL;
++                      if (!thp_migration_supported() ||
++                          !(pvmw->flags & PVMW_MIGRATION))
++                              return not_found(pvmw);
++                      entry = pmd_to_swp_entry(pmde);
++                      if (!is_migration_entry(entry) ||
++                          migration_entry_to_page(entry) != page)
++                              return not_found(pvmw);
++                      return true;
+               }
++              /* THP pmd was split under us: handle on pte level */
++              spin_unlock(pvmw->ptl);
++              pvmw->ptl = NULL;
+       } else if (!pmd_present(pmde)) {
+               /*
+                * If PVMW_SYNC, take and drop THP pmd lock so that we
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch b/queue-4.14/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch
new file mode 100644 (file)
index 0000000..e9b8967
--- /dev/null
@@ -0,0 +1,73 @@
+From e91dff2640beec5309275d7933848bac8c32c101 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:04 -0700
+Subject: mm: page_vma_mapped_walk(): settle PageHuge on entry
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 6d0fd5987657cb0c9756ce684e3a74c0f6351728 ]
+
+page_vma_mapped_walk() cleanup: get the hugetlbfs PageHuge case out of
+the way at the start, so no need to worry about it later.
+
+Link: https://lkml.kernel.org/r/e31a483c-6d73-a6bb-26c5-43c3b880a2@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 3cff784019c1..bdb63aafc737 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -147,10 +147,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+       if (pvmw->pmd && !pvmw->pte)
+               return not_found(pvmw);
+-      if (pvmw->pte)
+-              goto next_pte;
+-
+       if (unlikely(PageHuge(page))) {
++              /* The only possible mapping was handled on last iteration */
++              if (pvmw->pte)
++                      return not_found(pvmw);
++
+               /* when pud is not present, pte will be NULL */
+               pvmw->pte = huge_pte_offset(mm, pvmw->address,
+                                           PAGE_SIZE << compound_order(page));
+@@ -163,6 +164,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                       return not_found(pvmw);
+               return true;
+       }
++
++      if (pvmw->pte)
++              goto next_pte;
+ restart:
+       pgd = pgd_offset(mm, pvmw->address);
+       if (!pgd_present(*pgd))
+@@ -228,7 +232,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                       return true;
+ next_pte:
+               /* Seek to next pte only makes sense for THP */
+-              if (!PageTransHuge(page) || PageHuge(page))
++              if (!PageTransHuge(page))
+                       return not_found(pvmw);
+               end = vma_address_end(page, pvmw->vma);
+               do {
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch b/queue-4.14/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch
new file mode 100644 (file)
index 0000000..5d0c949
--- /dev/null
@@ -0,0 +1,66 @@
+From 2aab14fa4f5c330a41c5dfe18bc83d18b6a02063 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:20 -0700
+Subject: mm: page_vma_mapped_walk(): use goto instead of while (1)
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 474466301dfd8b39a10c01db740645f3f7ae9a28 ]
+
+page_vma_mapped_walk() cleanup: add a label this_pte, matching next_pte,
+and use "goto this_pte", in place of the "while (1)" loop at the end.
+
+Link: https://lkml.kernel.org/r/a52b234a-851-3616-2525-f42736e8934@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 911c6dbe85f9..f6c750539a6b 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -138,6 +138,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+ {
+       struct mm_struct *mm = pvmw->vma->vm_mm;
+       struct page *page = pvmw->page;
++      unsigned long end;
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+@@ -228,10 +229,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+               }
+               if (!map_pte(pvmw))
+                       goto next_pte;
+-      }
+-      while (1) {
+-              unsigned long end;
+-
++this_pte:
+               if (check_pte(pvmw))
+                       return true;
+ next_pte:
+@@ -260,6 +258,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                       pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+                       spin_lock(pvmw->ptl);
+               }
++              goto this_pte;
+       }
+ }
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch b/queue-4.14/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch
new file mode 100644 (file)
index 0000000..25981d4
--- /dev/null
@@ -0,0 +1,78 @@
+From aa84a964e28e7feed6e5191876ad4526c9fe6aea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:01 -0700
+Subject: mm: page_vma_mapped_walk(): use page for pvmw->page
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit f003c03bd29e6f46fef1b9a8e8d636ac732286d5 ]
+
+Patch series "mm: page_vma_mapped_walk() cleanup and THP fixes".
+
+I've marked all of these for stable: many are merely cleanups, but I
+think they are much better before the main fix than after.
+
+This patch (of 11):
+
+page_vma_mapped_walk() cleanup: sometimes the local copy of pvwm->page
+was used, sometimes pvmw->page itself: use the local copy "page"
+throughout.
+
+Link: https://lkml.kernel.org/r/589b358c-febc-c88e-d4c2-7834b37fa7bf@google.com
+Link: https://lkml.kernel.org/r/88e67645-f467-c279-bf5e-af4b5c6b13eb@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Alistair Popple <apopple@nvidia.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 340207ba3743..3cff784019c1 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -150,7 +150,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+       if (pvmw->pte)
+               goto next_pte;
+-      if (unlikely(PageHuge(pvmw->page))) {
++      if (unlikely(PageHuge(page))) {
+               /* when pud is not present, pte will be NULL */
+               pvmw->pte = huge_pte_offset(mm, pvmw->address,
+                                           PAGE_SIZE << compound_order(page));
+@@ -212,8 +212,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                * cannot return prematurely, while zap_huge_pmd() has
+                * cleared *pmd but not decremented compound_mapcount().
+                */
+-              if ((pvmw->flags & PVMW_SYNC) &&
+-                  PageTransCompound(pvmw->page)) {
++              if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) {
+                       spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+                       spin_unlock(ptl);
+@@ -229,9 +228,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                       return true;
+ next_pte:
+               /* Seek to next pte only makes sense for THP */
+-              if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
++              if (!PageTransHuge(page) || PageHuge(page))
+                       return not_found(pvmw);
+-              end = vma_address_end(pvmw->page, pvmw->vma);
++              end = vma_address_end(page, pvmw->vma);
+               do {
+                       pvmw->address += PAGE_SIZE;
+                       if (pvmw->address >= end)
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch b/queue-4.14/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch
new file mode 100644 (file)
index 0000000..134696e
--- /dev/null
@@ -0,0 +1,63 @@
+From f60a41a44158e5e3a7826a9da9dc2a416dd2f07b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:07 -0700
+Subject: mm: page_vma_mapped_walk(): use pmde for *pvmw->pmd
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 3306d3119ceacc43ea8b141a73e21fea68eec30c ]
+
+page_vma_mapped_walk() cleanup: re-evaluate pmde after taking lock, then
+use it in subsequent tests, instead of repeatedly dereferencing pointer.
+
+Link: https://lkml.kernel.org/r/53fbc9d-891e-46b2-cb4b-468c3b19238e@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index bdb63aafc737..8a6af4007c7e 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -186,18 +186,19 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+       pmde = READ_ONCE(*pvmw->pmd);
+       if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+               pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+-              if (likely(pmd_trans_huge(*pvmw->pmd))) {
++              pmde = *pvmw->pmd;
++              if (likely(pmd_trans_huge(pmde))) {
+                       if (pvmw->flags & PVMW_MIGRATION)
+                               return not_found(pvmw);
+-                      if (pmd_page(*pvmw->pmd) != page)
++                      if (pmd_page(pmde) != page)
+                               return not_found(pvmw);
+                       return true;
+-              } else if (!pmd_present(*pvmw->pmd)) {
++              } else if (!pmd_present(pmde)) {
+                       if (thp_migration_supported()) {
+                               if (!(pvmw->flags & PVMW_MIGRATION))
+                                       return not_found(pvmw);
+-                              if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
+-                                      swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
++                              if (is_migration_entry(pmd_to_swp_entry(pmde))) {
++                                      swp_entry_t entry = pmd_to_swp_entry(pmde);
+                                       if (migration_entry_to_page(entry) != page)
+                                               return not_found(pvmw);
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch b/queue-4.14/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch
new file mode 100644 (file)
index 0000000..32c1817
--- /dev/null
@@ -0,0 +1,36 @@
+From 4b798265f1b86f30c79f18201638e845803ef085 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Feb 2021 17:17:56 -0800
+Subject: mm/rmap: remove unneeded semicolon in page_not_mapped()
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit e0af87ff7afcde2660be44302836d2d5618185af ]
+
+Remove extra semicolon without any functional change intended.
+
+Link: https://lkml.kernel.org/r/20210127093425.39640-1-linmiaohe@huawei.com
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/rmap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 8bd2ddd8febd..e2506b6adb6a 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1671,7 +1671,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
+ static int page_not_mapped(struct page *page)
+ {
+       return !page_mapped(page);
+-};
++}
+ /**
+  * try_to_munlock - try to munlock a page
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch b/queue-4.14/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch
new file mode 100644 (file)
index 0000000..51b4d11
--- /dev/null
@@ -0,0 +1,65 @@
+From f77a2f7ab2a465fd416dcd53d23326d776e13131 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Feb 2021 17:18:03 -0800
+Subject: mm/rmap: use page_not_mapped in try_to_unmap()
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit b7e188ec98b1644ff70a6d3624ea16aadc39f5e0 ]
+
+page_mapcount_is_zero() calculates accurately how many mappings a hugepage
+has in order to check against 0 only.  This is a waste of cpu time.  We
+can do this via page_not_mapped() to save some possible atomic_read
+cycles.  Remove the function page_mapcount_is_zero() as it's not used
+anymore and move page_not_mapped() above try_to_unmap() to avoid
+identifier undeclared compilation error.
+
+Link: https://lkml.kernel.org/r/20210130084904.35307-1-linmiaohe@huawei.com
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/rmap.c | 11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+diff --git a/mm/rmap.c b/mm/rmap.c
+index e2506b6adb6a..e6a556fec9d1 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1624,9 +1624,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
+       return is_vma_temporary_stack(vma);
+ }
+-static int page_mapcount_is_zero(struct page *page)
++static int page_not_mapped(struct page *page)
+ {
+-      return !total_mapcount(page);
++      return !page_mapped(page);
+ }
+ /**
+@@ -1644,7 +1644,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_unmap_one,
+               .arg = (void *)flags,
+-              .done = page_mapcount_is_zero,
++              .done = page_not_mapped,
+               .anon_lock = page_lock_anon_vma_read,
+       };
+@@ -1668,11 +1668,6 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
+       return !page_mapcount(page) ? true : false;
+ }
+-static int page_not_mapped(struct page *page)
+-{
+-      return !page_mapped(page);
+-}
+-
+ /**
+  * try_to_munlock - try to munlock a page
+  * @page: the page to be munlocked
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch b/queue-4.14/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch
new file mode 100644 (file)
index 0000000..561f7f2
--- /dev/null
@@ -0,0 +1,53 @@
+From 4cd201ad0ceb1facb60176b5df0ddd52869812dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:30 -0700
+Subject: mm/thp: another PVMW_SYNC fix in page_vma_mapped_walk()
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit a7a69d8ba88d8dcee7ef00e91d413a4bd003a814 ]
+
+Aha! Shouldn't that quick scan over pte_none()s make sure that it holds
+ptlock in the PVMW_SYNC case? That too might have been responsible for
+BUGs or WARNs in split_huge_page_to_list() or its unmap_page(), though
+I've never seen any.
+
+Link: https://lkml.kernel.org/r/1bdf384c-8137-a149-2a1e-475a4791c3c@google.com
+Link: https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
+Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Tested-by: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 16adeef76d00..a612daef5f00 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -271,6 +271,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                               goto restart;
+                       }
+                       pvmw->pte++;
++                      if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
++                              pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
++                              spin_lock(pvmw->ptl);
++                      }
+               } while (pte_none(*pvmw->pte));
+               if (!pvmw->ptl) {
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch b/queue-4.14/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch
new file mode 100644 (file)
index 0000000..96f8b6a
--- /dev/null
@@ -0,0 +1,66 @@
+From 8ac5455b753eeb27f78f0391626ad9c2f2cb2f37 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Jun 2021 18:24:00 -0700
+Subject: mm/thp: fix page_address_in_vma() on file THP tails
+
+From: Jue Wang <juew@google.com>
+
+[ Upstream commit 31657170deaf1d8d2f6a1955fbc6fa9d228be036 ]
+
+Anon THP tails were already supported, but memory-failure may need to
+use page_address_in_vma() on file THP tails, which its page->mapping
+check did not permit: fix it.
+
+hughd adds: no current usage is known to hit the issue, but this does
+fix a subtle trap in a general helper: best fixed in stable sooner than
+later.
+
+Link: https://lkml.kernel.org/r/a0d9b53-bf5d-8bab-ac5-759dc61819c1@google.com
+Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
+Signed-off-by: Jue Wang <juew@google.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/rmap.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/mm/rmap.c b/mm/rmap.c
+index bebe29a2c5f2..8ed8ec113d5a 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -695,11 +695,11 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
+               if (!vma->anon_vma || !page__anon_vma ||
+                   vma->anon_vma->root != page__anon_vma->root)
+                       return -EFAULT;
+-      } else if (page->mapping) {
+-              if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
+-                      return -EFAULT;
+-      } else
++      } else if (!vma->vm_file) {
++              return -EFAULT;
++      } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
+               return -EFAULT;
++      }
+       return vma_address(page, vma);
+ }
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-pte.patch b/queue-4.14/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-pte.patch
new file mode 100644 (file)
index 0000000..a5a79ff
--- /dev/null
@@ -0,0 +1,126 @@
+From 8c4e927453029fc06914f8984bc2e4f9c093670f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Jun 2021 18:39:26 -0700
+Subject: mm/thp: fix page_vma_mapped_walk() if THP mapped by ptes
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit a9a7504d9beaf395481faa91e70e2fd08f7a3dde ]
+
+Running certain tests with a DEBUG_VM kernel would crash within hours,
+on the total_mapcount BUG() in split_huge_page_to_list(), while trying
+to free up some memory by punching a hole in a shmem huge page: split's
+try_to_unmap() was unable to find all the mappings of the page (which,
+on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).
+
+Crash dumps showed two tail pages of a shmem huge page remained mapped
+by pte: ptes in a non-huge-aligned vma of a gVisor process, at the end
+of a long unmapped range; and no page table had yet been allocated for
+the head of the huge page to be mapped into.
+
+Although designed to handle these odd misaligned huge-page-mapped-by-pte
+cases, page_vma_mapped_walk() falls short by returning false prematurely
+when !pmd_present or !pud_present or !p4d_present or !pgd_present: there
+are cases when a huge page may span the boundary, with ptes present in
+the next.
+
+Restructure page_vma_mapped_walk() as a loop to continue in these cases,
+while keeping its layout much as before.  Add a step_forward() helper to
+advance pvmw->address across those boundaries: originally I tried to use
+mm's standard p?d_addr_end() macros, but hit the same crash 512 times
+less often: because of the way redundant levels are folded together, but
+folded differently in different configurations, it was just too
+difficult to use them correctly; and step_forward() is simpler anyway.
+
+Link: https://lkml.kernel.org/r/fedb8632-1798-de42-f39e-873551d5bc81@google.com
+Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_vma_mapped.c | 34 +++++++++++++++++++++++++---------
+ 1 file changed, 25 insertions(+), 9 deletions(-)
+
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 96d4c4738590..16adeef76d00 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -110,6 +110,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
+       return true;
+ }
++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
++{
++      pvmw->address = (pvmw->address + size) & ~(size - 1);
++      if (!pvmw->address)
++              pvmw->address = ULONG_MAX;
++}
++
+ /**
+  * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
+  * @pvmw->address
+@@ -178,16 +185,22 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+       if (pvmw->pte)
+               goto next_pte;
+ restart:
+-      {
++      do {
+               pgd = pgd_offset(mm, pvmw->address);
+-              if (!pgd_present(*pgd))
+-                      return false;
++              if (!pgd_present(*pgd)) {
++                      step_forward(pvmw, PGDIR_SIZE);
++                      continue;
++              }
+               p4d = p4d_offset(pgd, pvmw->address);
+-              if (!p4d_present(*p4d))
+-                      return false;
++              if (!p4d_present(*p4d)) {
++                      step_forward(pvmw, P4D_SIZE);
++                      continue;
++              }
+               pud = pud_offset(p4d, pvmw->address);
+-              if (!pud_present(*pud))
+-                      return false;
++              if (!pud_present(*pud)) {
++                      step_forward(pvmw, PUD_SIZE);
++                      continue;
++              }
+               pvmw->pmd = pmd_offset(pud, pvmw->address);
+               /*
+@@ -234,7 +247,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                               spin_unlock(ptl);
+                       }
+-                      return false;
++                      step_forward(pvmw, PMD_SIZE);
++                      continue;
+               }
+               if (!map_pte(pvmw))
+                       goto next_pte;
+@@ -264,7 +278,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                       spin_lock(pvmw->ptl);
+               }
+               goto this_pte;
+-      }
++      } while (pvmw->address < end);
++
++      return false;
+ }
+ /**
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-thp-fix-vma_address-if-virtual-address-below-file.patch b/queue-4.14/mm-thp-fix-vma_address-if-virtual-address-below-file.patch
new file mode 100644 (file)
index 0000000..6ecb2bf
--- /dev/null
@@ -0,0 +1,259 @@
+From 32ee0628da926ae1cadb95196361e702f2a9122c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Jun 2021 18:23:56 -0700
+Subject: mm/thp: fix vma_address() if virtual address below file offset
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 494334e43c16d63b878536a26505397fce6ff3a2 ]
+
+Running certain tests with a DEBUG_VM kernel would crash within hours,
+on the total_mapcount BUG() in split_huge_page_to_list(), while trying
+to free up some memory by punching a hole in a shmem huge page: split's
+try_to_unmap() was unable to find all the mappings of the page (which,
+on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).
+
+When that BUG() was changed to a WARN(), it would later crash on the
+VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma) in
+mm/internal.h:vma_address(), used by rmap_walk_file() for
+try_to_unmap().
+
+vma_address() is usually correct, but there's a wraparound case when the
+vm_start address is unusually low, but vm_pgoff not so low:
+vma_address() chooses max(start, vma->vm_start), but that decides on the
+wrong address, because start has become almost ULONG_MAX.
+
+Rewrite vma_address() to be more careful about vm_pgoff; move the
+VM_BUG_ON_VMA() out of it, returning -EFAULT for errors, so that it can
+be safely used from page_mapped_in_vma() and page_address_in_vma() too.
+
+Add vma_address_end() to apply similar care to end address calculation,
+in page_vma_mapped_walk() and page_mkclean_one() and try_to_unmap_one();
+though it raises a question of whether callers would do better to supply
+pvmw->end to page_vma_mapped_walk() - I chose not, for a smaller patch.
+
+An irritation is that their apparent generality breaks down on KSM
+pages, which cannot be located by the page->index that page_to_pgoff()
+uses: as commit 4b0ece6fa016 ("mm: migrate: fix remove_migration_pte()
+for ksm pages") once discovered.  I dithered over the best thing to do
+about that, and have ended up with a VM_BUG_ON_PAGE(PageKsm) in both
+vma_address() and vma_address_end(); though the only place in danger of
+using it on them was try_to_unmap_one().
+
+Sidenote: vma_address() and vma_address_end() now use compound_nr() on a
+head page, instead of thp_size(): to make the right calculation on a
+hugetlbfs page, whether or not THPs are configured.  try_to_unmap() is
+used on hugetlbfs pages, but perhaps the wrong calculation never
+mattered.
+
+Link: https://lkml.kernel.org/r/caf1c1a3-7cfb-7f8f-1beb-ba816e932825@google.com
+Fixes: a8fa41ad2f6f ("mm, rmap: check all VMAs that PTE-mapped THP can be part of")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+Note on stable backport: fixed up conflicts on intervening thp_size(),
+and mmu_notifier_range initializations; substitute for compound_nr().
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h        | 53 ++++++++++++++++++++++++++++++++------------
+ mm/page_vma_mapped.c | 16 +++++--------
+ mm/rmap.c            | 14 ++++++------
+ 3 files changed, 52 insertions(+), 31 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index a182506242c4..97c8e896cd2f 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -330,27 +330,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
+ extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+ /*
+- * At what user virtual address is page expected in @vma?
++ * At what user virtual address is page expected in vma?
++ * Returns -EFAULT if all of the page is outside the range of vma.
++ * If page is a compound head, the entire compound page is considered.
+  */
+ static inline unsigned long
+-__vma_address(struct page *page, struct vm_area_struct *vma)
++vma_address(struct page *page, struct vm_area_struct *vma)
+ {
+-      pgoff_t pgoff = page_to_pgoff(page);
+-      return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++      pgoff_t pgoff;
++      unsigned long address;
++
++      VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
++      pgoff = page_to_pgoff(page);
++      if (pgoff >= vma->vm_pgoff) {
++              address = vma->vm_start +
++                      ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++              /* Check for address beyond vma (or wrapped through 0?) */
++              if (address < vma->vm_start || address >= vma->vm_end)
++                      address = -EFAULT;
++      } else if (PageHead(page) &&
++                 pgoff + (1UL << compound_order(page)) - 1 >= vma->vm_pgoff) {
++              /* Test above avoids possibility of wrap to 0 on 32-bit */
++              address = vma->vm_start;
++      } else {
++              address = -EFAULT;
++      }
++      return address;
+ }
++/*
++ * Then at what user virtual address will none of the page be found in vma?
++ * Assumes that vma_address() already returned a good starting address.
++ * If page is a compound head, the entire compound page is considered.
++ */
+ static inline unsigned long
+-vma_address(struct page *page, struct vm_area_struct *vma)
++vma_address_end(struct page *page, struct vm_area_struct *vma)
+ {
+-      unsigned long start, end;
+-
+-      start = __vma_address(page, vma);
+-      end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
+-
+-      /* page should be within @vma mapping range */
+-      VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
+-
+-      return max(start, vma->vm_start);
++      pgoff_t pgoff;
++      unsigned long address;
++
++      VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
++      pgoff = page_to_pgoff(page) + (1UL << compound_order(page));
++      address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++      /* Check for address beyond vma (or wrapped through 0?) */
++      if (address < vma->vm_start || address > vma->vm_end)
++              address = vma->vm_end;
++      return address;
+ }
+ #else /* !CONFIG_MMU */
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 31879f2175d0..340207ba3743 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -223,18 +223,18 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+       if (!map_pte(pvmw))
+               goto next_pte;
+       while (1) {
++              unsigned long end;
++
+               if (check_pte(pvmw))
+                       return true;
+ next_pte:
+               /* Seek to next pte only makes sense for THP */
+               if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+                       return not_found(pvmw);
++              end = vma_address_end(pvmw->page, pvmw->vma);
+               do {
+                       pvmw->address += PAGE_SIZE;
+-                      if (pvmw->address >= pvmw->vma->vm_end ||
+-                          pvmw->address >=
+-                                      __vma_address(pvmw->page, pvmw->vma) +
+-                                      hpage_nr_pages(pvmw->page) * PAGE_SIZE)
++                      if (pvmw->address >= end)
+                               return not_found(pvmw);
+                       /* Did we cross page table boundary? */
+                       if (pvmw->address % PMD_SIZE == 0) {
+@@ -272,14 +272,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+               .vma = vma,
+               .flags = PVMW_SYNC,
+       };
+-      unsigned long start, end;
+-
+-      start = __vma_address(page, vma);
+-      end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
+-      if (unlikely(end < vma->vm_start || start >= vma->vm_end))
++      pvmw.address = vma_address(page, vma);
++      if (pvmw.address == -EFAULT)
+               return 0;
+-      pvmw.address = max(start, vma->vm_start);
+       if (!page_vma_mapped_walk(&pvmw))
+               return 0;
+       page_vma_mapped_walk_done(&pvmw);
+diff --git a/mm/rmap.c b/mm/rmap.c
+index b6571c739723..bebe29a2c5f2 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -686,7 +686,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
+  */
+ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
+ {
+-      unsigned long address;
+       if (PageAnon(page)) {
+               struct anon_vma *page__anon_vma = page_anon_vma(page);
+               /*
+@@ -701,10 +700,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
+                       return -EFAULT;
+       } else
+               return -EFAULT;
+-      address = __vma_address(page, vma);
+-      if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+-              return -EFAULT;
+-      return address;
++
++      return vma_address(page, vma);
+ }
+ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
+@@ -896,7 +893,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
+        * We have to assume the worse case ie pmd for invalidation. Note that
+        * the page can not be free from this function.
+        */
+-      end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
++      end = vma_address_end(page, vma);
+       mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
+       while (page_vma_mapped_walk(&pvmw)) {
+@@ -1374,7 +1371,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+        * Note that the page can not be free in this function as call of
+        * try_to_unmap() must hold a reference on the page.
+        */
+-      end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
++      end = PageKsm(page) ?
++                      address + PAGE_SIZE : vma_address_end(page, vma);
+       if (PageHuge(page)) {
+               /*
+                * If sharing is possible, start and end will be adjusted
+@@ -1777,6 +1775,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
+               struct vm_area_struct *vma = avc->vma;
+               unsigned long address = vma_address(page, vma);
++              VM_BUG_ON_VMA(address == -EFAULT, vma);
+               cond_resched();
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+@@ -1831,6 +1830,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
+                       pgoff_start, pgoff_end) {
+               unsigned long address = vma_address(page, vma);
++              VM_BUG_ON_VMA(address == -EFAULT, vma);
+               cond_resched();
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-.patch b/queue-4.14/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-.patch
new file mode 100644 (file)
index 0000000..fdc95d7
--- /dev/null
@@ -0,0 +1,123 @@
+From 7175ca692c3b7e191de52a2f757d0f0f8336be7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Jun 2021 18:24:07 -0700
+Subject: mm: thp: replace DEBUG_VM BUG with VM_WARN when unmap fails for split
+
+From: Yang Shi <shy828301@gmail.com>
+
+[ Upstream commit 504e070dc08f757bccaed6d05c0f53ecbfac8a23 ]
+
+When debugging the bug reported by Wang Yugui [1], try_to_unmap() may
+fail, but the first VM_BUG_ON_PAGE() just checks page_mapcount() however
+it may miss the failure when head page is unmapped but other subpage is
+mapped.  Then the second DEBUG_VM BUG() that check total mapcount would
+catch it.  This may incur some confusion.
+
+As this is not a fatal issue, so consolidate the two DEBUG_VM checks
+into one VM_WARN_ON_ONCE_PAGE().
+
+[1] https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
+
+Link: https://lkml.kernel.org/r/d0f0db68-98b8-ebfb-16dc-f29df24cf012@google.com
+Signed-off-by: Yang Shi <shy828301@gmail.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+Note on stable backport: fixed up variables, split_queue_lock, tree_lock
+in split_huge_page_to_list(), and conflict on ttu_flags in unmap_page().
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/huge_memory.c | 24 +++++++-----------------
+ 1 file changed, 7 insertions(+), 17 deletions(-)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 5705ccff3e7f..972893908bcd 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2325,15 +2325,15 @@ static void unmap_page(struct page *page)
+ {
+       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+               TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC;
+-      bool unmap_success;
+       VM_BUG_ON_PAGE(!PageHead(page), page);
+       if (PageAnon(page))
+               ttu_flags |= TTU_SPLIT_FREEZE;
+-      unmap_success = try_to_unmap(page, ttu_flags);
+-      VM_BUG_ON_PAGE(!unmap_success, page);
++      try_to_unmap(page, ttu_flags);
++
++      VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
+ }
+ static void remap_page(struct page *page)
+@@ -2586,7 +2586,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
+       struct anon_vma *anon_vma = NULL;
+       struct address_space *mapping = NULL;
+-      int count, mapcount, extra_pins, ret;
++      int extra_pins, ret;
+       bool mlocked;
+       unsigned long flags;
+       pgoff_t end;
+@@ -2648,7 +2648,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+       mlocked = PageMlocked(page);
+       unmap_page(head);
+-      VM_BUG_ON_PAGE(compound_mapcount(head), head);
+       /* Make sure the page is not on per-CPU pagevec as it takes pin */
+       if (mlocked)
+@@ -2674,9 +2673,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+       /* Prevent deferred_split_scan() touching ->_refcount */
+       spin_lock(&pgdata->split_queue_lock);
+-      count = page_count(head);
+-      mapcount = total_mapcount(head);
+-      if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
++      if (page_ref_freeze(head, 1 + extra_pins)) {
+               if (!list_empty(page_deferred_list(head))) {
+                       pgdata->split_queue_len--;
+                       list_del(page_deferred_list(head));
+@@ -2692,16 +2689,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
+               } else
+                       ret = 0;
+       } else {
+-              if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
+-                      pr_alert("total_mapcount: %u, page_count(): %u\n",
+-                                      mapcount, count);
+-                      if (PageTail(page))
+-                              dump_page(head, NULL);
+-                      dump_page(page, "total_mapcount(head) > 0");
+-                      BUG();
+-              }
+               spin_unlock(&pgdata->split_queue_lock);
+-fail:         if (mapping)
++fail:
++              if (mapping)
+                       spin_unlock(&mapping->tree_lock);
+               spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+               remap_page(head);
+-- 
+2.30.2
+
diff --git a/queue-4.14/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch b/queue-4.14/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch
new file mode 100644 (file)
index 0000000..6196ee0
--- /dev/null
@@ -0,0 +1,159 @@
+From dae6673b7a92395057695e7fd2fd3600e7644aba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Jun 2021 18:23:53 -0700
+Subject: mm/thp: try_to_unmap() use TTU_SYNC for safe splitting
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit 732ed55823fc3ad998d43b86bf771887bcc5ec67 ]
+
+Stressing huge tmpfs often crashed on unmap_page()'s VM_BUG_ON_PAGE
+(!unmap_success): with dump_page() showing mapcount:1, but then its raw
+struct page output showing _mapcount ffffffff i.e.  mapcount 0.
+
+And even if that particular VM_BUG_ON_PAGE(!unmap_success) is removed,
+it is immediately followed by a VM_BUG_ON_PAGE(compound_mapcount(head)),
+and further down an IS_ENABLED(CONFIG_DEBUG_VM) total_mapcount BUG():
+all indicative of some mapcount difficulty in development here perhaps.
+But the !CONFIG_DEBUG_VM path handles the failures correctly and
+silently.
+
+I believe the problem is that once a racing unmap has cleared pte or
+pmd, try_to_unmap_one() may skip taking the page table lock, and emerge
+from try_to_unmap() before the racing task has reached decrementing
+mapcount.
+
+Instead of abandoning the unsafe VM_BUG_ON_PAGE(), and the ones that
+follow, use PVMW_SYNC in try_to_unmap_one() in this case: adding
+TTU_SYNC to the options, and passing that from unmap_page().
+
+When CONFIG_DEBUG_VM, or for non-debug too? Consensus is to do the same
+for both: the slight overhead added should rarely matter, except perhaps
+if splitting sparsely-populated multiply-mapped shmem.  Once confident
+that bugs are fixed, TTU_SYNC here can be removed, and the race
+tolerated.
+
+Link: https://lkml.kernel.org/r/c1e95853-8bcd-d8fd-55fa-e7f2488e78f@google.com
+Fixes: fec89c109f3a ("thp: rewrite freeze_page()/unfreeze_page() with generic rmap walkers")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+
+Note on stable backport: upstream TTU_SYNC 0x10 takes the value which
+5.11 commit 013339df116c ("mm/rmap: always do TTU_IGNORE_ACCESS") freed.
+It is very tempting to backport that commit (as 5.10 already did) and
+make no change here; but on reflection, good as that commit is, I'm
+reluctant to include any possible side-effect of it in this series.
+
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/rmap.h |  3 ++-
+ mm/huge_memory.c     |  2 +-
+ mm/page_vma_mapped.c | 11 +++++++++++
+ mm/rmap.c            | 17 ++++++++++++++++-
+ 4 files changed, 30 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/rmap.h b/include/linux/rmap.h
+index d7d6d4eb1794..91ccae946716 100644
+--- a/include/linux/rmap.h
++++ b/include/linux/rmap.h
+@@ -98,7 +98,8 @@ enum ttu_flags {
+                                        * do a final flush if necessary */
+       TTU_RMAP_LOCKED         = 0x80, /* do not grab rmap lock:
+                                        * caller holds it */
+-      TTU_SPLIT_FREEZE        = 0x100,                /* freeze pte under splitting thp */
++      TTU_SPLIT_FREEZE        = 0x100, /* freeze pte under splitting thp */
++      TTU_SYNC                = 0x200, /* avoid racy checks with PVMW_SYNC */
+ };
+ #ifdef CONFIG_MMU
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 513f0cf173ad..5705ccff3e7f 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2324,7 +2324,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ static void unmap_page(struct page *page)
+ {
+       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+-              TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
++              TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC;
+       bool unmap_success;
+       VM_BUG_ON_PAGE(!PageHead(page), page);
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index e00d985a51c5..31879f2175d0 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -207,6 +207,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
+                       pvmw->ptl = NULL;
+               }
+       } else if (!pmd_present(pmde)) {
++              /*
++               * If PVMW_SYNC, take and drop THP pmd lock so that we
++               * cannot return prematurely, while zap_huge_pmd() has
++               * cleared *pmd but not decremented compound_mapcount().
++               */
++              if ((pvmw->flags & PVMW_SYNC) &&
++                  PageTransCompound(pvmw->page)) {
++                      spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
++
++                      spin_unlock(ptl);
++              }
+               return false;
+       }
+       if (!map_pte(pvmw))
+diff --git a/mm/rmap.c b/mm/rmap.c
+index e6a556fec9d1..b6571c739723 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1344,6 +1344,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+       unsigned long start = address, end;
+       enum ttu_flags flags = (enum ttu_flags)arg;
++      /*
++       * When racing against e.g. zap_pte_range() on another cpu,
++       * in between its ptep_get_and_clear_full() and page_remove_rmap(),
++       * try_to_unmap() may return false when it is about to become true,
++       * if page table locking is skipped: use TTU_SYNC to wait for that.
++       */
++      if (flags & TTU_SYNC)
++              pvmw.flags = PVMW_SYNC;
++
+       /* munlock has nothing to gain from examining un-locked vmas */
+       if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
+               return true;
+@@ -1665,7 +1674,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
+       else
+               rmap_walk(page, &rwc);
+-      return !page_mapcount(page) ? true : false;
++      /*
++       * When racing against e.g. zap_pte_range() on another cpu,
++       * in between its ptep_get_and_clear_full() and page_remove_rmap(),
++       * try_to_unmap() may return false when it is about to become true,
++       * if page table locking is skipped: use TTU_SYNC to wait for that.
++       */
++      return !page_mapcount(page);
+ }
+ /**
+-- 
+2.30.2
+
diff --git a/queue-4.14/series b/queue-4.14/series
new file mode 100644 (file)
index 0000000..2bac7bd
--- /dev/null
@@ -0,0 +1,19 @@
+include-linux-mmdebug.h-make-vm_warn-non-rvals.patch
+mm-add-vm_warn_on_once_page-macro.patch
+mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch
+mm-rmap-use-page_not_mapped-in-try_to_unmap.patch
+mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch
+mm-thp-fix-vma_address-if-virtual-address-below-file.patch
+mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch
+mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-.patch
+mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch
+mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch
+mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch
+mm-page_vma_mapped_walk-prettify-pvmw_migration-bloc.patch
+mm-page_vma_mapped_walk-crossing-page-table-boundary.patch
+mm-page_vma_mapped_walk-add-a-level-of-indentation.patch
+mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch
+mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch
+mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-pte.patch
+mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch
+mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch