From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 28 Jun 2021 11:29:57 +0000 (+0200)
Subject: 5.12-stable patches
X-Git-Tag: v5.12.14~13
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bd8ef52fbb48b652ecce34c05dbb53ec0781db02;p=thirdparty%2Fkernel%2Fstable-queue.git

5.12-stable patches

added patches:
	mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch
	mm-memory-failure-use-a-mutex-to-avoid-memory_failure-races.patch
	mm-page_vma_mapped_walk-add-a-level-of-indentation.patch
	mm-page_vma_mapped_walk-crossing-page-table-boundary.patch
	mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch
	mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch
	mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch
	mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch
	mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch
	mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch
	mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch
	mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry.patch
	mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch
	mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch
	mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch
	mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch
	mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch
	mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch
	mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch
	mm-thp-use-head-page-in-__migration_entry_wait.patch
---

diff --git a/queue-5.12/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch b/queue-5.12/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch
new file mode 100644
index 00000000000..036467280e6
--- /dev/null
+++ b/queue-5.12/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch
@@ -0,0 +1,157 @@
+From fe19bd3dae3d15d2fbfdb3de8839a6ea0fe94264 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:52 -0700
+Subject: mm, futex: fix shared futex pgoff on shmem huge page
+
+From: Hugh Dickins <hughd@google.com>
+
+commit fe19bd3dae3d15d2fbfdb3de8839a6ea0fe94264 upstream.
+
+If more than one futex is placed on a shmem huge page, it can happen
+that waking the second wakes the first instead, and leaves the second
+waiting: the key's shared.pgoff is wrong.
+
+When 3.11 commit 13d60f4b6ab5 ("futex: Take hugepages into account when
+generating futex_key"), the only shared huge pages came from hugetlbfs,
+and the code added to deal with its exceptional page->index was put into
+hugetlb source.  Then that was missed when 4.8 added shmem huge pages.
+
+page_to_pgoff() is what others use for this nowadays: except that, as
+currently written, it gives the right answer on hugetlbfs head, but
+nonsense on hugetlbfs tails.  Fix that by calling hugetlbfs-specific
+hugetlb_basepage_index() on PageHuge tails as well as on head.
+
+Yes, it's unconventional to declare hugetlb_basepage_index() there in
+pagemap.h, rather than in hugetlb.h; but I do not expect anything but
+page_to_pgoff() ever to need it.
+
+[akpm@linux-foundation.org: give hugetlb_basepage_index() prototype the correct scope]
+
+Link: https://lkml.kernel.org/r/b17d946b-d09-326e-b42a-52884c36df32@google.com
+Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
+Reported-by: Neel Natu <neelnatu@google.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Zhang Yi <wetpzy@gmail.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Darren Hart <dvhart@infradead.org>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hugetlb.h |   16 ----------------
+ include/linux/pagemap.h |   13 +++++++------
+ kernel/futex.c          |    3 +--
+ mm/hugetlb.c            |    5 +----
+ 4 files changed, 9 insertions(+), 28 deletions(-)
+
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -728,17 +728,6 @@ static inline int hstate_index(struct hs
+ 	return h - hstates;
+ }
+ 
+-pgoff_t __basepage_index(struct page *page);
+-
+-/* Return page->index in PAGE_SIZE units */
+-static inline pgoff_t basepage_index(struct page *page)
+-{
+-	if (!PageCompound(page))
+-		return page->index;
+-
+-	return __basepage_index(page);
+-}
+-
+ extern int dissolve_free_huge_page(struct page *page);
+ extern int dissolve_free_huge_pages(unsigned long start_pfn,
+ 				    unsigned long end_pfn);
+@@ -969,11 +958,6 @@ static inline int hstate_index(struct hs
+ 	return 0;
+ }
+ 
+-static inline pgoff_t basepage_index(struct page *page)
+-{
+-	return page->index;
+-}
+-
+ static inline int dissolve_free_huge_page(struct page *page)
+ {
+ 	return 0;
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -501,7 +501,7 @@ static inline struct page *read_mapping_
+ }
+ 
+ /*
+- * Get index of the page with in radix-tree
++ * Get index of the page within radix-tree (but not for hugetlb pages).
+  * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
+  */
+ static inline pgoff_t page_to_index(struct page *page)
+@@ -520,15 +520,16 @@ static inline pgoff_t page_to_index(stru
+ 	return pgoff;
+ }
+ 
++extern pgoff_t hugetlb_basepage_index(struct page *page);
++
+ /*
+- * Get the offset in PAGE_SIZE.
+- * (TODO: hugepage should have ->index in PAGE_SIZE)
++ * Get the offset in PAGE_SIZE (even for hugetlb pages).
++ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
+  */
+ static inline pgoff_t page_to_pgoff(struct page *page)
+ {
+-	if (unlikely(PageHeadHuge(page)))
+-		return page->index << compound_order(page);
+-
++	if (unlikely(PageHuge(page)))
++		return hugetlb_basepage_index(page);
+ 	return page_to_index(page);
+ }
+ 
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -35,7 +35,6 @@
+ #include <linux/jhash.h>
+ #include <linux/pagemap.h>
+ #include <linux/syscalls.h>
+-#include <linux/hugetlb.h>
+ #include <linux/freezer.h>
+ #include <linux/memblock.h>
+ #include <linux/fault-inject.h>
+@@ -650,7 +649,7 @@ again:
+ 
+ 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+ 		key->shared.i_seq = get_inode_sequence_number(inode);
+-		key->shared.pgoff = basepage_index(tail);
++		key->shared.pgoff = page_to_pgoff(tail);
+ 		rcu_read_unlock();
+ 	}
+ 
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1584,15 +1584,12 @@ struct address_space *hugetlb_page_mappi
+ 	return NULL;
+ }
+ 
+-pgoff_t __basepage_index(struct page *page)
++pgoff_t hugetlb_basepage_index(struct page *page)
+ {
+ 	struct page *page_head = compound_head(page);
+ 	pgoff_t index = page_index(page_head);
+ 	unsigned long compound_idx;
+ 
+-	if (!PageHuge(page_head))
+-		return page_index(page);
+-
+ 	if (compound_order(page_head) >= MAX_ORDER)
+ 		compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
+ 	else
diff --git a/queue-5.12/mm-memory-failure-use-a-mutex-to-avoid-memory_failure-races.patch b/queue-5.12/mm-memory-failure-use-a-mutex-to-avoid-memory_failure-races.patch
new file mode 100644
index 00000000000..b4afb8d9634
--- /dev/null
+++ b/queue-5.12/mm-memory-failure-use-a-mutex-to-avoid-memory_failure-races.patch
@@ -0,0 +1,167 @@
+From 171936ddaf97e6f4e1264f4128bb5cf15691339c Mon Sep 17 00:00:00 2001
+From: Tony Luck <tony.luck@intel.com>
+Date: Thu, 24 Jun 2021 18:39:55 -0700
+Subject: mm/memory-failure: use a mutex to avoid memory_failure() races
+
+From: Tony Luck <tony.luck@intel.com>
+
+commit 171936ddaf97e6f4e1264f4128bb5cf15691339c upstream.
+
+Patch series "mm,hwpoison: fix sending SIGBUS for Action Required MCE", v5.
+
+I wrote this patchset to materialize what I think is the current
+allowable solution mentioned by the previous discussion [1].  I simply
+borrowed Tony's mutex patch and Aili's return code patch, then I queued
+another one to find error virtual address in the best effort manner.  I
+know that this is not a perfect solution, but should work for some
+typical case.
+
+[1]: https://lore.kernel.org/linux-mm/20210331192540.2141052f@alex-virtual-machine/
+
+This patch (of 2):
+
+There can be races when multiple CPUs consume poison from the same page.
+The first into memory_failure() atomically sets the HWPoison page flag
+and begins hunting for tasks that map this page.  Eventually it
+invalidates those mappings and may send a SIGBUS to the affected tasks.
+
+But while all that work is going on, other CPUs see a "success" return
+code from memory_failure() and so they believe the error has been
+handled and continue executing.
+
+Fix by wrapping most of the internal parts of memory_failure() in a
+mutex.
+
+[akpm@linux-foundation.org: make mf_mutex local to memory_failure()]
+
+Link: https://lkml.kernel.org/r/20210521030156.2612074-1-nao.horiguchi@gmail.com
+Link: https://lkml.kernel.org/r/20210521030156.2612074-2-nao.horiguchi@gmail.com
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Aili Yao <yaoaili@kingsoft.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jue Wang <juew@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |   36 +++++++++++++++++++++++-------------
+ 1 file changed, 23 insertions(+), 13 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1429,9 +1429,10 @@ int memory_failure(unsigned long pfn, in
+ 	struct page *hpage;
+ 	struct page *orig_head;
+ 	struct dev_pagemap *pgmap;
+-	int res;
++	int res = 0;
+ 	unsigned long page_flags;
+ 	bool retry = true;
++	static DEFINE_MUTEX(mf_mutex);
+ 
+ 	if (!sysctl_memory_failure_recovery)
+ 		panic("Memory failure on page %lx", pfn);
+@@ -1449,13 +1450,18 @@ int memory_failure(unsigned long pfn, in
+ 		return -ENXIO;
+ 	}
+ 
++	mutex_lock(&mf_mutex);
++
+ try_again:
+-	if (PageHuge(p))
+-		return memory_failure_hugetlb(pfn, flags);
++	if (PageHuge(p)) {
++		res = memory_failure_hugetlb(pfn, flags);
++		goto unlock_mutex;
++	}
++
+ 	if (TestSetPageHWPoison(p)) {
+ 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
+ 			pfn);
+-		return 0;
++		goto unlock_mutex;
+ 	}
+ 
+ 	orig_head = hpage = compound_head(p);
+@@ -1488,17 +1494,19 @@ try_again:
+ 				res = MF_FAILED;
+ 			}
+ 			action_result(pfn, MF_MSG_BUDDY, res);
+-			return res == MF_RECOVERED ? 0 : -EBUSY;
++			res = res == MF_RECOVERED ? 0 : -EBUSY;
+ 		} else {
+ 			action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
+-			return -EBUSY;
++			res = -EBUSY;
+ 		}
++		goto unlock_mutex;
+ 	}
+ 
+ 	if (PageTransHuge(hpage)) {
+ 		if (try_to_split_thp_page(p, "Memory Failure") < 0) {
+ 			action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
+-			return -EBUSY;
++			res = -EBUSY;
++			goto unlock_mutex;
+ 		}
+ 		VM_BUG_ON_PAGE(!page_count(p), p);
+ 	}
+@@ -1522,7 +1530,7 @@ try_again:
+ 	if (PageCompound(p) && compound_head(p) != orig_head) {
+ 		action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
+ 		res = -EBUSY;
+-		goto out;
++		goto unlock_page;
+ 	}
+ 
+ 	/*
+@@ -1542,14 +1550,14 @@ try_again:
+ 		num_poisoned_pages_dec();
+ 		unlock_page(p);
+ 		put_page(p);
+-		return 0;
++		goto unlock_mutex;
+ 	}
+ 	if (hwpoison_filter(p)) {
+ 		if (TestClearPageHWPoison(p))
+ 			num_poisoned_pages_dec();
+ 		unlock_page(p);
+ 		put_page(p);
+-		return 0;
++		goto unlock_mutex;
+ 	}
+ 
+ 	/*
+@@ -1573,7 +1581,7 @@ try_again:
+ 	if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
+ 		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
+ 		res = -EBUSY;
+-		goto out;
++		goto unlock_page;
+ 	}
+ 
+ 	/*
+@@ -1582,13 +1590,15 @@ try_again:
+ 	if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+ 		action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
+ 		res = -EBUSY;
+-		goto out;
++		goto unlock_page;
+ 	}
+ 
+ identify_page_state:
+ 	res = identify_page_state(pfn, p, page_flags);
+-out:
++unlock_page:
+ 	unlock_page(p);
++unlock_mutex:
++	mutex_unlock(&mf_mutex);
+ 	return res;
+ }
+ EXPORT_SYMBOL_GPL(memory_failure);
diff --git a/queue-5.12/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch b/queue-5.12/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch
new file mode 100644
index 00000000000..7c10047d04c
--- /dev/null
+++ b/queue-5.12/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch
@@ -0,0 +1,155 @@
+From b3807a91aca7d21c05d5790612e49969117a72b9 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:17 -0700
+Subject: mm: page_vma_mapped_walk(): add a level of indentation
+
+From: Hugh Dickins <hughd@google.com>
+
+commit b3807a91aca7d21c05d5790612e49969117a72b9 upstream.
+
+page_vma_mapped_walk() cleanup: add a level of indentation to much of
+the body, making no functional change in this commit, but reducing the
+later diff when this is all converted to a loop.
+
+[hughd@google.com: : page_vma_mapped_walk(): add a level of indentation fix]
+  Link: https://lkml.kernel.org/r/7f817555-3ce1-c785-e438-87d8efdcaf26@google.com
+
+Link: https://lkml.kernel.org/r/efde211-f3e2-fe54-977-ef481419e7f3@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |  105 ++++++++++++++++++++++++++-------------------------
+ 1 file changed, 55 insertions(+), 50 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -173,62 +173,67 @@ bool page_vma_mapped_walk(struct page_vm
+ 	if (pvmw->pte)
+ 		goto next_pte;
+ restart:
+-	pgd = pgd_offset(mm, pvmw->address);
+-	if (!pgd_present(*pgd))
+-		return false;
+-	p4d = p4d_offset(pgd, pvmw->address);
+-	if (!p4d_present(*p4d))
+-		return false;
+-	pud = pud_offset(p4d, pvmw->address);
+-	if (!pud_present(*pud))
+-		return false;
+-	pvmw->pmd = pmd_offset(pud, pvmw->address);
+-	/*
+-	 * Make sure the pmd value isn't cached in a register by the
+-	 * compiler and used as a stale value after we've observed a
+-	 * subsequent update.
+-	 */
+-	pmde = READ_ONCE(*pvmw->pmd);
+-	if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+-		pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+-		pmde = *pvmw->pmd;
+-		if (likely(pmd_trans_huge(pmde))) {
+-			if (pvmw->flags & PVMW_MIGRATION)
+-				return not_found(pvmw);
+-			if (pmd_page(pmde) != page)
+-				return not_found(pvmw);
+-			return true;
+-		}
+-		if (!pmd_present(pmde)) {
+-			swp_entry_t entry;
++	{
++		pgd = pgd_offset(mm, pvmw->address);
++		if (!pgd_present(*pgd))
++			return false;
++		p4d = p4d_offset(pgd, pvmw->address);
++		if (!p4d_present(*p4d))
++			return false;
++		pud = pud_offset(p4d, pvmw->address);
++		if (!pud_present(*pud))
++			return false;
+ 
+-			if (!thp_migration_supported() ||
+-			    !(pvmw->flags & PVMW_MIGRATION))
+-				return not_found(pvmw);
+-			entry = pmd_to_swp_entry(pmde);
+-			if (!is_migration_entry(entry) ||
+-			    migration_entry_to_page(entry) != page)
+-				return not_found(pvmw);
+-			return true;
+-		}
+-		/* THP pmd was split under us: handle on pte level */
+-		spin_unlock(pvmw->ptl);
+-		pvmw->ptl = NULL;
+-	} else if (!pmd_present(pmde)) {
++		pvmw->pmd = pmd_offset(pud, pvmw->address);
+ 		/*
+-		 * If PVMW_SYNC, take and drop THP pmd lock so that we
+-		 * cannot return prematurely, while zap_huge_pmd() has
+-		 * cleared *pmd but not decremented compound_mapcount().
++		 * Make sure the pmd value isn't cached in a register by the
++		 * compiler and used as a stale value after we've observed a
++		 * subsequent update.
+ 		 */
+-		if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) {
+-			spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
++		pmde = READ_ONCE(*pvmw->pmd);
++
++		if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
++			pvmw->ptl = pmd_lock(mm, pvmw->pmd);
++			pmde = *pvmw->pmd;
++			if (likely(pmd_trans_huge(pmde))) {
++				if (pvmw->flags & PVMW_MIGRATION)
++					return not_found(pvmw);
++				if (pmd_page(pmde) != page)
++					return not_found(pvmw);
++				return true;
++			}
++			if (!pmd_present(pmde)) {
++				swp_entry_t entry;
+ 
+-			spin_unlock(ptl);
++				if (!thp_migration_supported() ||
++				    !(pvmw->flags & PVMW_MIGRATION))
++					return not_found(pvmw);
++				entry = pmd_to_swp_entry(pmde);
++				if (!is_migration_entry(entry) ||
++				    migration_entry_to_page(entry) != page)
++					return not_found(pvmw);
++				return true;
++			}
++			/* THP pmd was split under us: handle on pte level */
++			spin_unlock(pvmw->ptl);
++			pvmw->ptl = NULL;
++		} else if (!pmd_present(pmde)) {
++			/*
++			 * If PVMW_SYNC, take and drop THP pmd lock so that we
++			 * cannot return prematurely, while zap_huge_pmd() has
++			 * cleared *pmd but not decremented compound_mapcount().
++			 */
++			if ((pvmw->flags & PVMW_SYNC) &&
++			    PageTransCompound(page)) {
++				spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
++
++				spin_unlock(ptl);
++			}
++			return false;
+ 		}
+-		return false;
++		if (!map_pte(pvmw))
++			goto next_pte;
+ 	}
+-	if (!map_pte(pvmw))
+-		goto next_pte;
+ 	while (1) {
+ 		unsigned long end;
+ 
diff --git a/queue-5.12/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch b/queue-5.12/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch
new file mode 100644
index 00000000000..a1a8f09d8ca
--- /dev/null
+++ b/queue-5.12/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch
@@ -0,0 +1,56 @@
+From 448282487483d6fa5b2eeeafaa0acc681e544a9c Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:14 -0700
+Subject: mm: page_vma_mapped_walk(): crossing page table boundary
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 448282487483d6fa5b2eeeafaa0acc681e544a9c upstream.
+
+page_vma_mapped_walk() cleanup: adjust the test for crossing page table
+boundary - I believe pvmw->address is always page-aligned, but nothing
+else here assumed that; and remember to reset pvmw->pte to NULL after
+unmapping the page table, though I never saw any bug from that.
+
+Link: https://lkml.kernel.org/r/799b3f9c-2a9e-dfef-5d89-26e9f76fd97@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -244,16 +244,16 @@ next_pte:
+ 			if (pvmw->address >= end)
+ 				return not_found(pvmw);
+ 			/* Did we cross page table boundary? */
+-			if (pvmw->address % PMD_SIZE == 0) {
+-				pte_unmap(pvmw->pte);
++			if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
+ 				if (pvmw->ptl) {
+ 					spin_unlock(pvmw->ptl);
+ 					pvmw->ptl = NULL;
+ 				}
++				pte_unmap(pvmw->pte);
++				pvmw->pte = NULL;
+ 				goto restart;
+-			} else {
+-				pvmw->pte++;
+ 			}
++			pvmw->pte++;
+ 		} while (pte_none(*pvmw->pte));
+ 
+ 		if (!pvmw->ptl) {
diff --git a/queue-5.12/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch b/queue-5.12/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch
new file mode 100644
index 00000000000..393106e9fda
--- /dev/null
+++ b/queue-5.12/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch
@@ -0,0 +1,63 @@
+From a765c417d876cc635f628365ec9aa6f09470069a Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:23 -0700
+Subject: mm: page_vma_mapped_walk(): get vma_address_end() earlier
+
+From: Hugh Dickins <hughd@google.com>
+
+commit a765c417d876cc635f628365ec9aa6f09470069a upstream.
+
+page_vma_mapped_walk() cleanup: get THP's vma_address_end() at the
+start, rather than later at next_pte.
+
+It's a little unnecessary overhead on the first call, but makes for a
+simpler loop in the following commit.
+
+Link: https://lkml.kernel.org/r/4542b34d-862f-7cb4-bb22-e0df6ce830a2@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -171,6 +171,15 @@ bool page_vma_mapped_walk(struct page_vm
+ 		return true;
+ 	}
+ 
++	/*
++	 * Seek to next pte only makes sense for THP.
++	 * But more important than that optimization, is to filter out
++	 * any PageKsm page: whose page->index misleads vma_address()
++	 * and vma_address_end() to disaster.
++	 */
++	end = PageTransCompound(page) ?
++		vma_address_end(page, pvmw->vma) :
++		pvmw->address + PAGE_SIZE;
+ 	if (pvmw->pte)
+ 		goto next_pte;
+ restart:
+@@ -238,10 +247,6 @@ this_pte:
+ 		if (check_pte(pvmw))
+ 			return true;
+ next_pte:
+-		/* Seek to next pte only makes sense for THP */
+-		if (!PageTransHuge(page))
+-			return not_found(pvmw);
+-		end = vma_address_end(page, pvmw->vma);
+ 		do {
+ 			pvmw->address += PAGE_SIZE;
+ 			if (pvmw->address >= end)
diff --git a/queue-5.12/mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch b/queue-5.12/mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch
new file mode 100644
index 00000000000..fcd6f71c056
--- /dev/null
+++ b/queue-5.12/mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch
@@ -0,0 +1,75 @@
+From e2e1d4076c77b3671cf8ce702535ae7dee3acf89 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:10 -0700
+Subject: mm: page_vma_mapped_walk(): prettify PVMW_MIGRATION block
+
+From: Hugh Dickins <hughd@google.com>
+
+commit e2e1d4076c77b3671cf8ce702535ae7dee3acf89 upstream.
+
+page_vma_mapped_walk() cleanup: rearrange the !pmd_present() block to
+follow the same "return not_found, return not_found, return true"
+pattern as the block above it (note: returning not_found there is never
+premature, since existence or prior existence of huge pmd guarantees
+good alignment).
+
+Link: https://lkml.kernel.org/r/378c8650-1488-2edf-9647-32a53cf2e21@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |   30 ++++++++++++++----------------
+ 1 file changed, 14 insertions(+), 16 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -198,24 +198,22 @@ restart:
+ 			if (pmd_page(pmde) != page)
+ 				return not_found(pvmw);
+ 			return true;
+-		} else if (!pmd_present(pmde)) {
+-			if (thp_migration_supported()) {
+-				if (!(pvmw->flags & PVMW_MIGRATION))
+-					return not_found(pvmw);
+-				if (is_migration_entry(pmd_to_swp_entry(pmde))) {
+-					swp_entry_t entry = pmd_to_swp_entry(pmde);
++		}
++		if (!pmd_present(pmde)) {
++			swp_entry_t entry;
+ 
+-					if (migration_entry_to_page(entry) != page)
+-						return not_found(pvmw);
+-					return true;
+-				}
+-			}
+-			return not_found(pvmw);
+-		} else {
+-			/* THP pmd was split under us: handle on pte level */
+-			spin_unlock(pvmw->ptl);
+-			pvmw->ptl = NULL;
++			if (!thp_migration_supported() ||
++			    !(pvmw->flags & PVMW_MIGRATION))
++				return not_found(pvmw);
++			entry = pmd_to_swp_entry(pmde);
++			if (!is_migration_entry(entry) ||
++			    migration_entry_to_page(entry) != page)
++				return not_found(pvmw);
++			return true;
+ 		}
++		/* THP pmd was split under us: handle on pte level */
++		spin_unlock(pvmw->ptl);
++		pvmw->ptl = NULL;
+ 	} else if (!pmd_present(pmde)) {
+ 		/*
+ 		 * If PVMW_SYNC, take and drop THP pmd lock so that we
diff --git a/queue-5.12/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch b/queue-5.12/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch
new file mode 100644
index 00000000000..1103bb90811
--- /dev/null
+++ b/queue-5.12/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch
@@ -0,0 +1,68 @@
+From 6d0fd5987657cb0c9756ce684e3a74c0f6351728 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:04 -0700
+Subject: mm: page_vma_mapped_walk(): settle PageHuge on entry
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 6d0fd5987657cb0c9756ce684e3a74c0f6351728 upstream.
+
+page_vma_mapped_walk() cleanup: get the hugetlbfs PageHuge case out of
+the way at the start, so no need to worry about it later.
+
+Link: https://lkml.kernel.org/r/e31a483c-6d73-a6bb-26c5-43c3b880a2@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -153,10 +153,11 @@ bool page_vma_mapped_walk(struct page_vm
+ 	if (pvmw->pmd && !pvmw->pte)
+ 		return not_found(pvmw);
+ 
+-	if (pvmw->pte)
+-		goto next_pte;
+-
+ 	if (unlikely(PageHuge(page))) {
++		/* The only possible mapping was handled on last iteration */
++		if (pvmw->pte)
++			return not_found(pvmw);
++
+ 		/* when pud is not present, pte will be NULL */
+ 		pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
+ 		if (!pvmw->pte)
+@@ -168,6 +169,9 @@ bool page_vma_mapped_walk(struct page_vm
+ 			return not_found(pvmw);
+ 		return true;
+ 	}
++
++	if (pvmw->pte)
++		goto next_pte;
+ restart:
+ 	pgd = pgd_offset(mm, pvmw->address);
+ 	if (!pgd_present(*pgd))
+@@ -233,7 +237,7 @@ restart:
+ 			return true;
+ next_pte:
+ 		/* Seek to next pte only makes sense for THP */
+-		if (!PageTransHuge(page) || PageHuge(page))
++		if (!PageTransHuge(page))
+ 			return not_found(pvmw);
+ 		end = vma_address_end(page, pvmw->vma);
+ 		do {
diff --git a/queue-5.12/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch b/queue-5.12/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch
new file mode 100644
index 00000000000..58c15885c92
--- /dev/null
+++ b/queue-5.12/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch
@@ -0,0 +1,61 @@
+From 474466301dfd8b39a10c01db740645f3f7ae9a28 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:20 -0700
+Subject: mm: page_vma_mapped_walk(): use goto instead of while (1)
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 474466301dfd8b39a10c01db740645f3f7ae9a28 upstream.
+
+page_vma_mapped_walk() cleanup: add a label this_pte, matching next_pte,
+and use "goto this_pte", in place of the "while (1)" loop at the end.
+
+Link: https://lkml.kernel.org/r/a52b234a-851-3616-2525-f42736e8934@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -144,6 +144,7 @@ bool page_vma_mapped_walk(struct page_vm
+ {
+ 	struct mm_struct *mm = pvmw->vma->vm_mm;
+ 	struct page *page = pvmw->page;
++	unsigned long end;
+ 	pgd_t *pgd;
+ 	p4d_t *p4d;
+ 	pud_t *pud;
+@@ -233,10 +234,7 @@ restart:
+ 		}
+ 		if (!map_pte(pvmw))
+ 			goto next_pte;
+-	}
+-	while (1) {
+-		unsigned long end;
+-
++this_pte:
+ 		if (check_pte(pvmw))
+ 			return true;
+ next_pte:
+@@ -265,6 +263,7 @@ next_pte:
+ 			pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+ 			spin_lock(pvmw->ptl);
+ 		}
++		goto this_pte;
+ 	}
+ }
+ 
diff --git a/queue-5.12/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch b/queue-5.12/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch
new file mode 100644
index 00000000000..5f634f69d0d
--- /dev/null
+++ b/queue-5.12/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch
@@ -0,0 +1,73 @@
+From f003c03bd29e6f46fef1b9a8e8d636ac732286d5 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:01 -0700
+Subject: mm: page_vma_mapped_walk(): use page for pvmw->page
+
+From: Hugh Dickins <hughd@google.com>
+
+commit f003c03bd29e6f46fef1b9a8e8d636ac732286d5 upstream.
+
+Patch series "mm: page_vma_mapped_walk() cleanup and THP fixes".
+
+I've marked all of these for stable: many are merely cleanups, but I
+think they are much better before the main fix than after.
+
+This patch (of 11):
+
+page_vma_mapped_walk() cleanup: sometimes the local copy of pvwm->page
+was used, sometimes pvmw->page itself: use the local copy "page"
+throughout.
+
+Link: https://lkml.kernel.org/r/589b358c-febc-c88e-d4c2-7834b37fa7bf@google.com
+Link: https://lkml.kernel.org/r/88e67645-f467-c279-bf5e-af4b5c6b13eb@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Alistair Popple <apopple@nvidia.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -156,7 +156,7 @@ bool page_vma_mapped_walk(struct page_vm
+ 	if (pvmw->pte)
+ 		goto next_pte;
+ 
+-	if (unlikely(PageHuge(pvmw->page))) {
++	if (unlikely(PageHuge(page))) {
+ 		/* when pud is not present, pte will be NULL */
+ 		pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
+ 		if (!pvmw->pte)
+@@ -217,8 +217,7 @@ restart:
+ 		 * cannot return prematurely, while zap_huge_pmd() has
+ 		 * cleared *pmd but not decremented compound_mapcount().
+ 		 */
+-		if ((pvmw->flags & PVMW_SYNC) &&
+-		    PageTransCompound(pvmw->page)) {
++		if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) {
+ 			spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+ 
+ 			spin_unlock(ptl);
+@@ -234,9 +233,9 @@ restart:
+ 			return true;
+ next_pte:
+ 		/* Seek to next pte only makes sense for THP */
+-		if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
++		if (!PageTransHuge(page) || PageHuge(page))
+ 			return not_found(pvmw);
+-		end = vma_address_end(pvmw->page, pvmw->vma);
++		end = vma_address_end(page, pvmw->vma);
+ 		do {
+ 			pvmw->address += PAGE_SIZE;
+ 			if (pvmw->address >= end)
diff --git a/queue-5.12/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch b/queue-5.12/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch
new file mode 100644
index 00000000000..179e7e32d4c
--- /dev/null
+++ b/queue-5.12/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch
@@ -0,0 +1,58 @@
+From 3306d3119ceacc43ea8b141a73e21fea68eec30c Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:07 -0700
+Subject: mm: page_vma_mapped_walk(): use pmde for *pvmw->pmd
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 3306d3119ceacc43ea8b141a73e21fea68eec30c upstream.
+
+page_vma_mapped_walk() cleanup: re-evaluate pmde after taking lock, then
+use it in subsequent tests, instead of repeatedly dereferencing pointer.
+
+Link: https://lkml.kernel.org/r/53fbc9d-891e-46b2-cb4b-468c3b19238e@google.com
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -191,18 +191,19 @@ restart:
+ 	pmde = READ_ONCE(*pvmw->pmd);
+ 	if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+ 		pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+-		if (likely(pmd_trans_huge(*pvmw->pmd))) {
++		pmde = *pvmw->pmd;
++		if (likely(pmd_trans_huge(pmde))) {
+ 			if (pvmw->flags & PVMW_MIGRATION)
+ 				return not_found(pvmw);
+-			if (pmd_page(*pvmw->pmd) != page)
++			if (pmd_page(pmde) != page)
+ 				return not_found(pvmw);
+ 			return true;
+-		} else if (!pmd_present(*pvmw->pmd)) {
++		} else if (!pmd_present(pmde)) {
+ 			if (thp_migration_supported()) {
+ 				if (!(pvmw->flags & PVMW_MIGRATION))
+ 					return not_found(pvmw);
+-				if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
+-					swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
++				if (is_migration_entry(pmd_to_swp_entry(pmde))) {
++					swp_entry_t entry = pmd_to_swp_entry(pmde);
+ 
+ 					if (migration_entry_to_page(entry) != page)
+ 						return not_found(pvmw);
diff --git a/queue-5.12/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch b/queue-5.12/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch
new file mode 100644
index 00000000000..a3b00267b6a
--- /dev/null
+++ b/queue-5.12/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch
@@ -0,0 +1,48 @@
+From a7a69d8ba88d8dcee7ef00e91d413a4bd003a814 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:30 -0700
+Subject: mm/thp: another PVMW_SYNC fix in page_vma_mapped_walk()
+
+From: Hugh Dickins <hughd@google.com>
+
+commit a7a69d8ba88d8dcee7ef00e91d413a4bd003a814 upstream.
+
+Aha! Shouldn't that quick scan over pte_none()s make sure that it holds
+ptlock in the PVMW_SYNC case? That too might have been responsible for
+BUGs or WARNs in split_huge_page_to_list() or its unmap_page(), though
+I've never seen any.
+
+Link: https://lkml.kernel.org/r/1bdf384c-8137-a149-2a1e-475a4791c3c@google.com
+Link: https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
+Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Tested-by: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -276,6 +276,10 @@ next_pte:
+ 				goto restart;
+ 			}
+ 			pvmw->pte++;
++			if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
++				pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
++				spin_lock(pvmw->ptl);
++			}
+ 		} while (pte_none(*pvmw->pte));
+ 
+ 		if (!pvmw->ptl) {
diff --git a/queue-5.12/mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry.patch b/queue-5.12/mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry.patch
new file mode 100644
index 00000000000..6f87a4679d0
--- /dev/null
+++ b/queue-5.12/mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry.patch
@@ -0,0 +1,126 @@
+From 99fa8a48203d62b3743d866fc48ef6abaee682be Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 15 Jun 2021 18:23:45 -0700
+Subject: mm/thp: fix __split_huge_pmd_locked() on shmem migration entry
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 99fa8a48203d62b3743d866fc48ef6abaee682be upstream.
+
+Patch series "mm/thp: fix THP splitting unmap BUGs and related", v10.
+
+Here is v2 batch of long-standing THP bug fixes that I had not got
+around to sending before, but prompted now by Wang Yugui's report
+https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
+
+Wang Yugui has tested a rollup of these fixes applied to 5.10.39, and
+they have done no harm, but have *not* fixed that issue: something more
+is needed and I have no idea of what.
+
+This patch (of 7):
+
+Stressing huge tmpfs page migration racing hole punch often crashed on
+the VM_BUG_ON(!pmd_present) in pmdp_huge_clear_flush(), with DEBUG_VM=y
+kernel; or shortly afterwards, on a bad dereference in
+__split_huge_pmd_locked() when DEBUG_VM=n.  They forgot to allow for pmd
+migration entries in the non-anonymous case.
+
+Full disclosure: those particular experiments were on a kernel with more
+relaxed mmap_lock and i_mmap_rwsem locking, and were not repeated on the
+vanilla kernel: it is conceivable that stricter locking happens to avoid
+those cases, or makes them less likely; but __split_huge_pmd_locked()
+already allowed for pmd migration entries when handling anonymous THPs,
+so this commit brings the shmem and file THP handling into line.
+
+And while there: use old_pmd rather than _pmd, as in the following
+blocks; and make it clearer to the eye that the !vma_is_anonymous()
+block is self-contained, making an early return after accounting for
+unmapping.
+
+Link: https://lkml.kernel.org/r/af88612-1473-2eaa-903-8d1a448b26@google.com
+Link: https://lkml.kernel.org/r/dd221a99-efb3-cd1d-6256-7e646af29314@google.com
+Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Jue Wang <juew@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c     |   27 ++++++++++++++++++---------
+ mm/pgtable-generic.c |    5 ++---
+ 2 files changed, 20 insertions(+), 12 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2046,7 +2046,7 @@ static void __split_huge_pmd_locked(stru
+ 	count_vm_event(THP_SPLIT_PMD);
+ 
+ 	if (!vma_is_anonymous(vma)) {
+-		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
++		old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ 		/*
+ 		 * We are going to unmap this huge page. So
+ 		 * just go ahead and zap it
+@@ -2055,16 +2055,25 @@ static void __split_huge_pmd_locked(stru
+ 			zap_deposited_table(mm, pmd);
+ 		if (vma_is_special_huge(vma))
+ 			return;
+-		page = pmd_page(_pmd);
+-		if (!PageDirty(page) && pmd_dirty(_pmd))
+-			set_page_dirty(page);
+-		if (!PageReferenced(page) && pmd_young(_pmd))
+-			SetPageReferenced(page);
+-		page_remove_rmap(page, true);
+-		put_page(page);
++		if (unlikely(is_pmd_migration_entry(old_pmd))) {
++			swp_entry_t entry;
++
++			entry = pmd_to_swp_entry(old_pmd);
++			page = migration_entry_to_page(entry);
++		} else {
++			page = pmd_page(old_pmd);
++			if (!PageDirty(page) && pmd_dirty(old_pmd))
++				set_page_dirty(page);
++			if (!PageReferenced(page) && pmd_young(old_pmd))
++				SetPageReferenced(page);
++			page_remove_rmap(page, true);
++			put_page(page);
++		}
+ 		add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
+ 		return;
+-	} else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
++	}
++
++	if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+ 		/*
+ 		 * FIXME: Do we want to invalidate secondary mmu by calling
+ 		 * mmu_notifier_invalidate_range() see comments below inside
+--- a/mm/pgtable-generic.c
++++ b/mm/pgtable-generic.c
+@@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_ar
+ {
+ 	pmd_t pmd;
+ 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+-	VM_BUG_ON(!pmd_present(*pmdp));
+-	/* Below assumes pmd_present() is true */
+-	VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
++	VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
++			   !pmd_devmap(*pmdp));
+ 	pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+ 	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ 	return pmd;
diff --git a/queue-5.12/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch b/queue-5.12/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch
new file mode 100644
index 00000000000..2058c1149c0
--- /dev/null
+++ b/queue-5.12/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch
@@ -0,0 +1,61 @@
+From 31657170deaf1d8d2f6a1955fbc6fa9d228be036 Mon Sep 17 00:00:00 2001
+From: Jue Wang <juew@google.com>
+Date: Tue, 15 Jun 2021 18:24:00 -0700
+Subject: mm/thp: fix page_address_in_vma() on file THP tails
+
+From: Jue Wang <juew@google.com>
+
+commit 31657170deaf1d8d2f6a1955fbc6fa9d228be036 upstream.
+
+Anon THP tails were already supported, but memory-failure may need to
+use page_address_in_vma() on file THP tails, which its page->mapping
+check did not permit: fix it.
+
+hughd adds: no current usage is known to hit the issue, but this does
+fix a subtle trap in a general helper: best fixed in stable sooner than
+later.
+
+Link: https://lkml.kernel.org/r/a0d9b53-bf5d-8bab-ac5-759dc61819c1@google.com
+Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
+Signed-off-by: Jue Wang <juew@google.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/rmap.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -716,11 +716,11 @@ unsigned long page_address_in_vma(struct
+ 		if (!vma->anon_vma || !page__anon_vma ||
+ 		    vma->anon_vma->root != page__anon_vma->root)
+ 			return -EFAULT;
+-	} else if (page->mapping) {
+-		if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
+-			return -EFAULT;
+-	} else
++	} else if (!vma->vm_file) {
++		return -EFAULT;
++	} else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
+ 		return -EFAULT;
++	}
+ 
+ 	return vma_address(page, vma);
+ }
diff --git a/queue-5.12/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch b/queue-5.12/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch
new file mode 100644
index 00000000000..8789aef4996
--- /dev/null
+++ b/queue-5.12/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch
@@ -0,0 +1,121 @@
+From a9a7504d9beaf395481faa91e70e2fd08f7a3dde Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 24 Jun 2021 18:39:26 -0700
+Subject: mm/thp: fix page_vma_mapped_walk() if THP mapped by ptes
+
+From: Hugh Dickins <hughd@google.com>
+
+commit a9a7504d9beaf395481faa91e70e2fd08f7a3dde upstream.
+
+Running certain tests with a DEBUG_VM kernel would crash within hours,
+on the total_mapcount BUG() in split_huge_page_to_list(), while trying
+to free up some memory by punching a hole in a shmem huge page: split's
+try_to_unmap() was unable to find all the mappings of the page (which,
+on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).
+
+Crash dumps showed two tail pages of a shmem huge page remained mapped
+by pte: ptes in a non-huge-aligned vma of a gVisor process, at the end
+of a long unmapped range; and no page table had yet been allocated for
+the head of the huge page to be mapped into.
+
+Although designed to handle these odd misaligned huge-page-mapped-by-pte
+cases, page_vma_mapped_walk() falls short by returning false prematurely
+when !pmd_present or !pud_present or !p4d_present or !pgd_present: there
+are cases when a huge page may span the boundary, with ptes present in
+the next.
+
+Restructure page_vma_mapped_walk() as a loop to continue in these cases,
+while keeping its layout much as before.  Add a step_forward() helper to
+advance pvmw->address across those boundaries: originally I tried to use
+mm's standard p?d_addr_end() macros, but hit the same crash 512 times
+less often: because of the way redundant levels are folded together, but
+folded differently in different configurations, it was just too
+difficult to use them correctly; and step_forward() is simpler anyway.
+
+Link: https://lkml.kernel.org/r/fedb8632-1798-de42-f39e-873551d5bc81@google.com
+Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_vma_mapped.c |   34 +++++++++++++++++++++++++---------
+ 1 file changed, 25 insertions(+), 9 deletions(-)
+
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -116,6 +116,13 @@ static bool check_pte(struct page_vma_ma
+ 	return pfn_is_match(pvmw->page, pfn);
+ }
+ 
++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
++{
++	pvmw->address = (pvmw->address + size) & ~(size - 1);
++	if (!pvmw->address)
++		pvmw->address = ULONG_MAX;
++}
++
+ /**
+  * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
+  * @pvmw->address
+@@ -183,16 +190,22 @@ bool page_vma_mapped_walk(struct page_vm
+ 	if (pvmw->pte)
+ 		goto next_pte;
+ restart:
+-	{
++	do {
+ 		pgd = pgd_offset(mm, pvmw->address);
+-		if (!pgd_present(*pgd))
+-			return false;
++		if (!pgd_present(*pgd)) {
++			step_forward(pvmw, PGDIR_SIZE);
++			continue;
++		}
+ 		p4d = p4d_offset(pgd, pvmw->address);
+-		if (!p4d_present(*p4d))
+-			return false;
++		if (!p4d_present(*p4d)) {
++			step_forward(pvmw, P4D_SIZE);
++			continue;
++		}
+ 		pud = pud_offset(p4d, pvmw->address);
+-		if (!pud_present(*pud))
+-			return false;
++		if (!pud_present(*pud)) {
++			step_forward(pvmw, PUD_SIZE);
++			continue;
++		}
+ 
+ 		pvmw->pmd = pmd_offset(pud, pvmw->address);
+ 		/*
+@@ -239,7 +252,8 @@ restart:
+ 
+ 				spin_unlock(ptl);
+ 			}
+-			return false;
++			step_forward(pvmw, PMD_SIZE);
++			continue;
+ 		}
+ 		if (!map_pte(pvmw))
+ 			goto next_pte;
+@@ -269,7 +283,9 @@ next_pte:
+ 			spin_lock(pvmw->ptl);
+ 		}
+ 		goto this_pte;
+-	}
++	} while (pvmw->address < end);
++
++	return false;
+ }
+ 
+ /**
diff --git a/queue-5.12/mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch b/queue-5.12/mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch
new file mode 100644
index 00000000000..9522237cec5
--- /dev/null
+++ b/queue-5.12/mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch
@@ -0,0 +1,247 @@
+From 494334e43c16d63b878536a26505397fce6ff3a2 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 15 Jun 2021 18:23:56 -0700
+Subject: mm/thp: fix vma_address() if virtual address below file offset
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 494334e43c16d63b878536a26505397fce6ff3a2 upstream.
+
+Running certain tests with a DEBUG_VM kernel would crash within hours,
+on the total_mapcount BUG() in split_huge_page_to_list(), while trying
+to free up some memory by punching a hole in a shmem huge page: split's
+try_to_unmap() was unable to find all the mappings of the page (which,
+on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).
+
+When that BUG() was changed to a WARN(), it would later crash on the
+VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma) in
+mm/internal.h:vma_address(), used by rmap_walk_file() for
+try_to_unmap().
+
+vma_address() is usually correct, but there's a wraparound case when the
+vm_start address is unusually low, but vm_pgoff not so low:
+vma_address() chooses max(start, vma->vm_start), but that decides on the
+wrong address, because start has become almost ULONG_MAX.
+
+Rewrite vma_address() to be more careful about vm_pgoff; move the
+VM_BUG_ON_VMA() out of it, returning -EFAULT for errors, so that it can
+be safely used from page_mapped_in_vma() and page_address_in_vma() too.
+
+Add vma_address_end() to apply similar care to end address calculation,
+in page_vma_mapped_walk() and page_mkclean_one() and try_to_unmap_one();
+though it raises a question of whether callers would do better to supply
+pvmw->end to page_vma_mapped_walk() - I chose not, for a smaller patch.
+
+An irritation is that their apparent generality breaks down on KSM
+pages, which cannot be located by the page->index that page_to_pgoff()
+uses: as commit 4b0ece6fa016 ("mm: migrate: fix remove_migration_pte()
+for ksm pages") once discovered.  I dithered over the best thing to do
+about that, and have ended up with a VM_BUG_ON_PAGE(PageKsm) in both
+vma_address() and vma_address_end(); though the only place in danger of
+using it on them was try_to_unmap_one().
+
+Sidenote: vma_address() and vma_address_end() now use compound_nr() on a
+head page, instead of thp_size(): to make the right calculation on a
+hugetlbfs page, whether or not THPs are configured.  try_to_unmap() is
+used on hugetlbfs pages, but perhaps the wrong calculation never
+mattered.
+
+Link: https://lkml.kernel.org/r/caf1c1a3-7cfb-7f8f-1beb-ba816e932825@google.com
+Fixes: a8fa41ad2f6f ("mm, rmap: check all VMAs that PTE-mapped THP can be part of")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/internal.h        |   51 ++++++++++++++++++++++++++++++++++++++-------------
+ mm/page_vma_mapped.c |   16 ++++++----------
+ mm/rmap.c            |   16 ++++++++--------
+ 3 files changed, 52 insertions(+), 31 deletions(-)
+
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -379,27 +379,52 @@ static inline void mlock_migrate_page(st
+ extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+ 
+ /*
+- * At what user virtual address is page expected in @vma?
++ * At what user virtual address is page expected in vma?
++ * Returns -EFAULT if all of the page is outside the range of vma.
++ * If page is a compound head, the entire compound page is considered.
+  */
+ static inline unsigned long
+-__vma_address(struct page *page, struct vm_area_struct *vma)
++vma_address(struct page *page, struct vm_area_struct *vma)
+ {
+-	pgoff_t pgoff = page_to_pgoff(page);
+-	return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++	pgoff_t pgoff;
++	unsigned long address;
++
++	VM_BUG_ON_PAGE(PageKsm(page), page);	/* KSM page->index unusable */
++	pgoff = page_to_pgoff(page);
++	if (pgoff >= vma->vm_pgoff) {
++		address = vma->vm_start +
++			((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++		/* Check for address beyond vma (or wrapped through 0?) */
++		if (address < vma->vm_start || address >= vma->vm_end)
++			address = -EFAULT;
++	} else if (PageHead(page) &&
++		   pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) {
++		/* Test above avoids possibility of wrap to 0 on 32-bit */
++		address = vma->vm_start;
++	} else {
++		address = -EFAULT;
++	}
++	return address;
+ }
+ 
++/*
++ * Then at what user virtual address will none of the page be found in vma?
++ * Assumes that vma_address() already returned a good starting address.
++ * If page is a compound head, the entire compound page is considered.
++ */
+ static inline unsigned long
+-vma_address(struct page *page, struct vm_area_struct *vma)
++vma_address_end(struct page *page, struct vm_area_struct *vma)
+ {
+-	unsigned long start, end;
+-
+-	start = __vma_address(page, vma);
+-	end = start + thp_size(page) - PAGE_SIZE;
+-
+-	/* page should be within @vma mapping range */
+-	VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
++	pgoff_t pgoff;
++	unsigned long address;
+ 
+-	return max(start, vma->vm_start);
++	VM_BUG_ON_PAGE(PageKsm(page), page);	/* KSM page->index unusable */
++	pgoff = page_to_pgoff(page) + compound_nr(page);
++	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++	/* Check for address beyond vma (or wrapped through 0?) */
++	if (address < vma->vm_start || address > vma->vm_end)
++		address = vma->vm_end;
++	return address;
+ }
+ 
+ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -228,18 +228,18 @@ restart:
+ 	if (!map_pte(pvmw))
+ 		goto next_pte;
+ 	while (1) {
++		unsigned long end;
++
+ 		if (check_pte(pvmw))
+ 			return true;
+ next_pte:
+ 		/* Seek to next pte only makes sense for THP */
+ 		if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+ 			return not_found(pvmw);
++		end = vma_address_end(pvmw->page, pvmw->vma);
+ 		do {
+ 			pvmw->address += PAGE_SIZE;
+-			if (pvmw->address >= pvmw->vma->vm_end ||
+-			    pvmw->address >=
+-					__vma_address(pvmw->page, pvmw->vma) +
+-					thp_size(pvmw->page))
++			if (pvmw->address >= end)
+ 				return not_found(pvmw);
+ 			/* Did we cross page table boundary? */
+ 			if (pvmw->address % PMD_SIZE == 0) {
+@@ -277,14 +277,10 @@ int page_mapped_in_vma(struct page *page
+ 		.vma = vma,
+ 		.flags = PVMW_SYNC,
+ 	};
+-	unsigned long start, end;
+-
+-	start = __vma_address(page, vma);
+-	end = start + thp_size(page) - PAGE_SIZE;
+ 
+-	if (unlikely(end < vma->vm_start || start >= vma->vm_end))
++	pvmw.address = vma_address(page, vma);
++	if (pvmw.address == -EFAULT)
+ 		return 0;
+-	pvmw.address = max(start, vma->vm_start);
+ 	if (!page_vma_mapped_walk(&pvmw))
+ 		return 0;
+ 	page_vma_mapped_walk_done(&pvmw);
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm
+  */
+ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
+ {
+-	unsigned long address;
+ 	if (PageAnon(page)) {
+ 		struct anon_vma *page__anon_vma = page_anon_vma(page);
+ 		/*
+@@ -722,10 +721,8 @@ unsigned long page_address_in_vma(struct
+ 			return -EFAULT;
+ 	} else
+ 		return -EFAULT;
+-	address = __vma_address(page, vma);
+-	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+-		return -EFAULT;
+-	return address;
++
++	return vma_address(page, vma);
+ }
+ 
+ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
+@@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page
+ 	 */
+ 	mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
+ 				0, vma, vma->vm_mm, address,
+-				min(vma->vm_end, address + page_size(page)));
++				vma_address_end(page, vma));
+ 	mmu_notifier_invalidate_range_start(&range);
+ 
+ 	while (page_vma_mapped_walk(&pvmw)) {
+@@ -1435,9 +1432,10 @@ static bool try_to_unmap_one(struct page
+ 	 * Note that the page can not be free in this function as call of
+ 	 * try_to_unmap() must hold a reference on the page.
+ 	 */
++	range.end = PageKsm(page) ?
++			address + PAGE_SIZE : vma_address_end(page, vma);
+ 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+-				address,
+-				min(vma->vm_end, address + page_size(page)));
++				address, range.end);
+ 	if (PageHuge(page)) {
+ 		/*
+ 		 * If sharing is possible, start and end will be adjusted
+@@ -1889,6 +1887,7 @@ static void rmap_walk_anon(struct page *
+ 		struct vm_area_struct *vma = avc->vma;
+ 		unsigned long address = vma_address(page, vma);
+ 
++		VM_BUG_ON_VMA(address == -EFAULT, vma);
+ 		cond_resched();
+ 
+ 		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+@@ -1943,6 +1942,7 @@ static void rmap_walk_file(struct page *
+ 			pgoff_start, pgoff_end) {
+ 		unsigned long address = vma_address(page, vma);
+ 
++		VM_BUG_ON_VMA(address == -EFAULT, vma);
+ 		cond_resched();
+ 
+ 		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
diff --git a/queue-5.12/mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch b/queue-5.12/mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch
new file mode 100644
index 00000000000..71e763ed853
--- /dev/null
+++ b/queue-5.12/mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch
@@ -0,0 +1,116 @@
+From 3b77e8c8cde581dadab9a0f1543a347e24315f11 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 15 Jun 2021 18:23:49 -0700
+Subject: mm/thp: make is_huge_zero_pmd() safe and quicker
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 3b77e8c8cde581dadab9a0f1543a347e24315f11 upstream.
+
+Most callers of is_huge_zero_pmd() supply a pmd already verified
+present; but a few (notably zap_huge_pmd()) do not - it might be a pmd
+migration entry, in which the pfn is encoded differently from a present
+pmd: which might pass the is_huge_zero_pmd() test (though not on x86,
+since L1TF forced us to protect against that); or perhaps even crash in
+pmd_page() applied to a swap-like entry.
+
+Make it safe by adding pmd_present() check into is_huge_zero_pmd()
+itself; and make it quicker by saving huge_zero_pfn, so that
+is_huge_zero_pmd() will not need to do that pmd_page() lookup each time.
+
+__split_huge_pmd_locked() checked pmd_trans_huge() before: that worked,
+but is unnecessary now that is_huge_zero_pmd() checks present.
+
+Link: https://lkml.kernel.org/r/21ea9ca-a1f5-8b90-5e88-95fb1c49bbfa@google.com
+Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/huge_mm.h |    8 +++++++-
+ mm/huge_memory.c        |    5 ++++-
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -289,6 +289,7 @@ struct page *follow_devmap_pud(struct vm
+ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+ 
+ extern struct page *huge_zero_page;
++extern unsigned long huge_zero_pfn;
+ 
+ static inline bool is_huge_zero_page(struct page *page)
+ {
+@@ -297,7 +298,7 @@ static inline bool is_huge_zero_page(str
+ 
+ static inline bool is_huge_zero_pmd(pmd_t pmd)
+ {
+-	return is_huge_zero_page(pmd_page(pmd));
++	return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
+ }
+ 
+ static inline bool is_huge_zero_pud(pud_t pud)
+@@ -442,6 +443,11 @@ static inline bool is_huge_zero_page(str
+ {
+ 	return false;
+ }
++
++static inline bool is_huge_zero_pmd(pmd_t pmd)
++{
++	return false;
++}
+ 
+ static inline bool is_huge_zero_pud(pud_t pud)
+ {
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -61,6 +61,7 @@ static struct shrinker deferred_split_sh
+ 
+ static atomic_t huge_zero_refcount;
+ struct page *huge_zero_page __read_mostly;
++unsigned long huge_zero_pfn __read_mostly = ~0UL;
+ 
+ bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+ {
+@@ -97,6 +98,7 @@ retry:
+ 		__free_pages(zero_page, compound_order(zero_page));
+ 		goto retry;
+ 	}
++	WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
+ 
+ 	/* We take additional reference here. It will be put back by shrinker */
+ 	atomic_set(&huge_zero_refcount, 2);
+@@ -146,6 +148,7 @@ static unsigned long shrink_huge_zero_pa
+ 	if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
+ 		struct page *zero_page = xchg(&huge_zero_page, NULL);
+ 		BUG_ON(zero_page == NULL);
++		WRITE_ONCE(huge_zero_pfn, ~0UL);
+ 		__free_pages(zero_page, compound_order(zero_page));
+ 		return HPAGE_PMD_NR;
+ 	}
+@@ -2073,7 +2076,7 @@ static void __split_huge_pmd_locked(stru
+ 		return;
+ 	}
+ 
+-	if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
++	if (is_huge_zero_pmd(*pmd)) {
+ 		/*
+ 		 * FIXME: Do we want to invalidate secondary mmu by calling
+ 		 * mmu_notifier_invalidate_range() see comments below inside
diff --git a/queue-5.12/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch b/queue-5.12/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch
new file mode 100644
index 00000000000..8ba8cdd823c
--- /dev/null
+++ b/queue-5.12/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch
@@ -0,0 +1,113 @@
+From 504e070dc08f757bccaed6d05c0f53ecbfac8a23 Mon Sep 17 00:00:00 2001
+From: Yang Shi <shy828301@gmail.com>
+Date: Tue, 15 Jun 2021 18:24:07 -0700
+Subject: mm: thp: replace DEBUG_VM BUG with VM_WARN when unmap fails for split
+
+From: Yang Shi <shy828301@gmail.com>
+
+commit 504e070dc08f757bccaed6d05c0f53ecbfac8a23 upstream.
+
+When debugging the bug reported by Wang Yugui [1], try_to_unmap() may
+fail, but the first VM_BUG_ON_PAGE() just checks page_mapcount() however
+it may miss the failure when head page is unmapped but other subpage is
+mapped.  Then the second DEBUG_VM BUG() that check total mapcount would
+catch it.  This may incur some confusion.
+
+As this is not a fatal issue, so consolidate the two DEBUG_VM checks
+into one VM_WARN_ON_ONCE_PAGE().
+
+[1] https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
+
+Link: https://lkml.kernel.org/r/d0f0db68-98b8-ebfb-16dc-f29df24cf012@google.com
+Signed-off-by: Yang Shi <shy828301@gmail.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   24 +++++++-----------------
+ 1 file changed, 7 insertions(+), 17 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2360,15 +2360,15 @@ static void unmap_page(struct page *page
+ {
+ 	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC |
+ 		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+-	bool unmap_success;
+ 
+ 	VM_BUG_ON_PAGE(!PageHead(page), page);
+ 
+ 	if (PageAnon(page))
+ 		ttu_flags |= TTU_SPLIT_FREEZE;
+ 
+-	unmap_success = try_to_unmap(page, ttu_flags);
+-	VM_BUG_ON_PAGE(!unmap_success, page);
++	try_to_unmap(page, ttu_flags);
++
++	VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
+ }
+ 
+ static void remap_page(struct page *page, unsigned int nr)
+@@ -2679,7 +2679,7 @@ int split_huge_page_to_list(struct page
+ 	struct deferred_split *ds_queue = get_deferred_split_queue(head);
+ 	struct anon_vma *anon_vma = NULL;
+ 	struct address_space *mapping = NULL;
+-	int count, mapcount, extra_pins, ret;
++	int extra_pins, ret;
+ 	pgoff_t end;
+ 
+ 	VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
+@@ -2738,7 +2738,6 @@ int split_huge_page_to_list(struct page
+ 	}
+ 
+ 	unmap_page(head);
+-	VM_BUG_ON_PAGE(compound_mapcount(head), head);
+ 
+ 	/* block interrupt reentry in xa_lock and spinlock */
+ 	local_irq_disable();
+@@ -2756,9 +2755,7 @@ int split_huge_page_to_list(struct page
+ 
+ 	/* Prevent deferred_split_scan() touching ->_refcount */
+ 	spin_lock(&ds_queue->split_queue_lock);
+-	count = page_count(head);
+-	mapcount = total_mapcount(head);
+-	if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
++	if (page_ref_freeze(head, 1 + extra_pins)) {
+ 		if (!list_empty(page_deferred_list(head))) {
+ 			ds_queue->split_queue_len--;
+ 			list_del(page_deferred_list(head));
+@@ -2778,16 +2775,9 @@ int split_huge_page_to_list(struct page
+ 		__split_huge_page(page, list, end);
+ 		ret = 0;
+ 	} else {
+-		if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
+-			pr_alert("total_mapcount: %u, page_count(): %u\n",
+-					mapcount, count);
+-			if (PageTail(page))
+-				dump_page(head, NULL);
+-			dump_page(page, "total_mapcount(head) > 0");
+-			BUG();
+-		}
+ 		spin_unlock(&ds_queue->split_queue_lock);
+-fail:		if (mapping)
++fail:
++		if (mapping)
+ 			xa_unlock(&mapping->i_pages);
+ 		local_irq_enable();
+ 		remap_page(head, thp_nr_pages(head));
diff --git a/queue-5.12/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch b/queue-5.12/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch
new file mode 100644
index 00000000000..54afc36b5e4
--- /dev/null
+++ b/queue-5.12/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch
@@ -0,0 +1,138 @@
+From 732ed55823fc3ad998d43b86bf771887bcc5ec67 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 15 Jun 2021 18:23:53 -0700
+Subject: mm/thp: try_to_unmap() use TTU_SYNC for safe splitting
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 732ed55823fc3ad998d43b86bf771887bcc5ec67 upstream.
+
+Stressing huge tmpfs often crashed on unmap_page()'s VM_BUG_ON_PAGE
+(!unmap_success): with dump_page() showing mapcount:1, but then its raw
+struct page output showing _mapcount ffffffff i.e.  mapcount 0.
+
+And even if that particular VM_BUG_ON_PAGE(!unmap_success) is removed,
+it is immediately followed by a VM_BUG_ON_PAGE(compound_mapcount(head)),
+and further down an IS_ENABLED(CONFIG_DEBUG_VM) total_mapcount BUG():
+all indicative of some mapcount difficulty in development here perhaps.
+But the !CONFIG_DEBUG_VM path handles the failures correctly and
+silently.
+
+I believe the problem is that once a racing unmap has cleared pte or
+pmd, try_to_unmap_one() may skip taking the page table lock, and emerge
+from try_to_unmap() before the racing task has reached decrementing
+mapcount.
+
+Instead of abandoning the unsafe VM_BUG_ON_PAGE(), and the ones that
+follow, use PVMW_SYNC in try_to_unmap_one() in this case: adding
+TTU_SYNC to the options, and passing that from unmap_page().
+
+When CONFIG_DEBUG_VM, or for non-debug too? Consensus is to do the same
+for both: the slight overhead added should rarely matter, except perhaps
+if splitting sparsely-populated multiply-mapped shmem.  Once confident
+that bugs are fixed, TTU_SYNC here can be removed, and the race
+tolerated.
+
+Link: https://lkml.kernel.org/r/c1e95853-8bcd-d8fd-55fa-e7f2488e78f@google.com
+Fixes: fec89c109f3a ("thp: rewrite freeze_page()/unfreeze_page() with generic rmap walkers")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rmap.h |    1 +
+ mm/huge_memory.c     |    2 +-
+ mm/page_vma_mapped.c |   11 +++++++++++
+ mm/rmap.c            |   17 ++++++++++++++++-
+ 4 files changed, 29 insertions(+), 2 deletions(-)
+
+--- a/include/linux/rmap.h
++++ b/include/linux/rmap.h
+@@ -91,6 +91,7 @@ enum ttu_flags {
+ 
+ 	TTU_SPLIT_HUGE_PMD	= 0x4,	/* split huge PMD if any */
+ 	TTU_IGNORE_MLOCK	= 0x8,	/* ignore mlock */
++	TTU_SYNC		= 0x10,	/* avoid racy checks with PVMW_SYNC */
+ 	TTU_IGNORE_HWPOISON	= 0x20,	/* corrupted page is recoverable */
+ 	TTU_BATCH_FLUSH		= 0x40,	/* Batch TLB flushes where possible
+ 					 * and caller guarantees they will
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2358,7 +2358,7 @@ void vma_adjust_trans_huge(struct vm_are
+ 
+ static void unmap_page(struct page *page)
+ {
+-	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
++	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC |
+ 		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+ 	bool unmap_success;
+ 
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -212,6 +212,17 @@ restart:
+ 			pvmw->ptl = NULL;
+ 		}
+ 	} else if (!pmd_present(pmde)) {
++		/*
++		 * If PVMW_SYNC, take and drop THP pmd lock so that we
++		 * cannot return prematurely, while zap_huge_pmd() has
++		 * cleared *pmd but not decremented compound_mapcount().
++		 */
++		if ((pvmw->flags & PVMW_SYNC) &&
++		    PageTransCompound(pvmw->page)) {
++			spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
++
++			spin_unlock(ptl);
++		}
+ 		return false;
+ 	}
+ 	if (!map_pte(pvmw))
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -1405,6 +1405,15 @@ static bool try_to_unmap_one(struct page
+ 	struct mmu_notifier_range range;
+ 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
+ 
++	/*
++	 * When racing against e.g. zap_pte_range() on another cpu,
++	 * in between its ptep_get_and_clear_full() and page_remove_rmap(),
++	 * try_to_unmap() may return false when it is about to become true,
++	 * if page table locking is skipped: use TTU_SYNC to wait for that.
++	 */
++	if (flags & TTU_SYNC)
++		pvmw.flags = PVMW_SYNC;
++
+ 	/* munlock has nothing to gain from examining un-locked vmas */
+ 	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
+ 		return true;
+@@ -1777,7 +1786,13 @@ bool try_to_unmap(struct page *page, enu
+ 	else
+ 		rmap_walk(page, &rwc);
+ 
+-	return !page_mapcount(page) ? true : false;
++	/*
++	 * When racing against e.g. zap_pte_range() on another cpu,
++	 * in between its ptep_get_and_clear_full() and page_remove_rmap(),
++	 * try_to_unmap() may return false when it is about to become true,
++	 * if page table locking is skipped: use TTU_SYNC to wait for that.
++	 */
++	return !page_mapcount(page);
+ }
+ 
+ /**
diff --git a/queue-5.12/mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch b/queue-5.12/mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch
new file mode 100644
index 00000000000..6ea24c6a461
--- /dev/null
+++ b/queue-5.12/mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch
@@ -0,0 +1,249 @@
+From 22061a1ffabdb9c3385de159c5db7aac3a4df1cc Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Tue, 15 Jun 2021 18:24:03 -0700
+Subject: mm/thp: unmap_mapping_page() to fix THP truncate_cleanup_page()
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 22061a1ffabdb9c3385de159c5db7aac3a4df1cc upstream.
+
+There is a race between THP unmapping and truncation, when truncate sees
+pmd_none() and skips the entry, after munmap's zap_huge_pmd() cleared
+it, but before its page_remove_rmap() gets to decrement
+compound_mapcount: generating false "BUG: Bad page cache" reports that
+the page is still mapped when deleted.  This commit fixes that, but not
+in the way I hoped.
+
+The first attempt used try_to_unmap(page, TTU_SYNC|TTU_IGNORE_MLOCK)
+instead of unmap_mapping_range() in truncate_cleanup_page(): it has
+often been an annoyance that we usually call unmap_mapping_range() with
+no pages locked, but there apply it to a single locked page.
+try_to_unmap() looks more suitable for a single locked page.
+
+However, try_to_unmap_one() contains a VM_BUG_ON_PAGE(!pvmw.pte,page):
+it is used to insert THP migration entries, but not used to unmap THPs.
+Copy zap_huge_pmd() and add THP handling now? Perhaps, but their TLB
+needs are different, I'm too ignorant of the DAX cases, and couldn't
+decide how far to go for anon+swap.  Set that aside.
+
+The second attempt took a different tack: make no change in truncate.c,
+but modify zap_huge_pmd() to insert an invalidated huge pmd instead of
+clearing it initially, then pmd_clear() between page_remove_rmap() and
+unlocking at the end.  Nice.  But powerpc blows that approach out of the
+water, with its serialize_against_pte_lookup(), and interesting pgtable
+usage.  It would need serious help to get working on powerpc (with a
+minor optimization issue on s390 too).  Set that aside.
+
+Just add an "if (page_mapped(page)) synchronize_rcu();" or other such
+delay, after unmapping in truncate_cleanup_page()? Perhaps, but though
+that's likely to reduce or eliminate the number of incidents, it would
+give less assurance of whether we had identified the problem correctly.
+
+This successful iteration introduces "unmap_mapping_page(page)" instead
+of try_to_unmap(), and goes the usual unmap_mapping_range_tree() route,
+with an addition to details.  Then zap_pmd_range() watches for this
+case, and does spin_unlock(pmd_lock) if so - just like
+page_vma_mapped_walk() now does in the PVMW_SYNC case.  Not pretty, but
+safe.
+
+Note that unmap_mapping_page() is doing a VM_BUG_ON(!PageLocked) to
+assert its interface; but currently that's only used to make sure that
+page->mapping is stable, and zap_pmd_range() doesn't care if the page is
+locked or not.  Along these lines, in invalidate_inode_pages2_range()
+move the initial unmap_mapping_range() out from under page lock, before
+then calling unmap_mapping_page() under page lock if still mapped.
+
+Link: https://lkml.kernel.org/r/a2a4a148-cdd8-942c-4ef8-51b77f643dbe@google.com
+Fixes: fc127da085c2 ("truncate: handle file thp")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jue Wang <juew@google.com>
+Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Wang Yugui <wangyugui@e16-tech.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h |    3 +++
+ mm/memory.c        |   41 +++++++++++++++++++++++++++++++++++++++++
+ mm/truncate.c      |   43 +++++++++++++++++++------------------------
+ 3 files changed, 63 insertions(+), 24 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1680,6 +1680,7 @@ struct zap_details {
+ 	struct address_space *check_mapping;	/* Check page->mapping if set */
+ 	pgoff_t	first_index;			/* Lowest page->index to unmap */
+ 	pgoff_t last_index;			/* Highest page->index to unmap */
++	struct page *single_page;		/* Locked page to be unmapped */
+ };
+ 
+ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
+@@ -1727,6 +1728,7 @@ extern vm_fault_t handle_mm_fault(struct
+ extern int fixup_user_fault(struct mm_struct *mm,
+ 			    unsigned long address, unsigned int fault_flags,
+ 			    bool *unlocked);
++void unmap_mapping_page(struct page *page);
+ void unmap_mapping_pages(struct address_space *mapping,
+ 		pgoff_t start, pgoff_t nr, bool even_cows);
+ void unmap_mapping_range(struct address_space *mapping,
+@@ -1747,6 +1749,7 @@ static inline int fixup_user_fault(struc
+ 	BUG();
+ 	return -EFAULT;
+ }
++static inline void unmap_mapping_page(struct page *page) { }
+ static inline void unmap_mapping_pages(struct address_space *mapping,
+ 		pgoff_t start, pgoff_t nr, bool even_cows) { }
+ static inline void unmap_mapping_range(struct address_space *mapping,
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_rang
+ 			else if (zap_huge_pmd(tlb, vma, pmd, addr))
+ 				goto next;
+ 			/* fall through */
++		} else if (details && details->single_page &&
++			   PageTransCompound(details->single_page) &&
++			   next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
++			spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
++			/*
++			 * Take and drop THP pmd lock so that we cannot return
++			 * prematurely, while zap_huge_pmd() has cleared *pmd,
++			 * but not yet decremented compound_mapcount().
++			 */
++			spin_unlock(ptl);
+ 		}
++
+ 		/*
+ 		 * Here there can be other concurrent MADV_DONTNEED or
+ 		 * trans huge page faults running, and if the pmd is
+@@ -3194,6 +3205,36 @@ static inline void unmap_mapping_range_t
+ }
+ 
+ /**
++ * unmap_mapping_page() - Unmap single page from processes.
++ * @page: The locked page to be unmapped.
++ *
++ * Unmap this page from any userspace process which still has it mmaped.
++ * Typically, for efficiency, the range of nearby pages has already been
++ * unmapped by unmap_mapping_pages() or unmap_mapping_range().  But once
++ * truncation or invalidation holds the lock on a page, it may find that
++ * the page has been remapped again: and then uses unmap_mapping_page()
++ * to unmap it finally.
++ */
++void unmap_mapping_page(struct page *page)
++{
++	struct address_space *mapping = page->mapping;
++	struct zap_details details = { };
++
++	VM_BUG_ON(!PageLocked(page));
++	VM_BUG_ON(PageTail(page));
++
++	details.check_mapping = mapping;
++	details.first_index = page->index;
++	details.last_index = page->index + thp_nr_pages(page) - 1;
++	details.single_page = page;
++
++	i_mmap_lock_write(mapping);
++	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
++		unmap_mapping_range_tree(&mapping->i_mmap, &details);
++	i_mmap_unlock_write(mapping);
++}
++
++/**
+  * unmap_mapping_pages() - Unmap pages from processes.
+  * @mapping: The address space containing pages to be unmapped.
+  * @start: Index of first page to be unmapped.
+--- a/mm/truncate.c
++++ b/mm/truncate.c
+@@ -168,13 +168,10 @@ void do_invalidatepage(struct page *page
+  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
+  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
+  */
+-static void
+-truncate_cleanup_page(struct address_space *mapping, struct page *page)
++static void truncate_cleanup_page(struct page *page)
+ {
+-	if (page_mapped(page)) {
+-		unsigned int nr = thp_nr_pages(page);
+-		unmap_mapping_pages(mapping, page->index, nr, false);
+-	}
++	if (page_mapped(page))
++		unmap_mapping_page(page);
+ 
+ 	if (page_has_private(page))
+ 		do_invalidatepage(page, 0, thp_size(page));
+@@ -219,7 +216,7 @@ int truncate_inode_page(struct address_s
+ 	if (page->mapping != mapping)
+ 		return -EIO;
+ 
+-	truncate_cleanup_page(mapping, page);
++	truncate_cleanup_page(page);
+ 	delete_from_page_cache(page);
+ 	return 0;
+ }
+@@ -326,7 +323,7 @@ void truncate_inode_pages_range(struct a
+ 		index = indices[pagevec_count(&pvec) - 1] + 1;
+ 		truncate_exceptional_pvec_entries(mapping, &pvec, indices);
+ 		for (i = 0; i < pagevec_count(&pvec); i++)
+-			truncate_cleanup_page(mapping, pvec.pages[i]);
++			truncate_cleanup_page(pvec.pages[i]);
+ 		delete_from_page_cache_batch(mapping, &pvec);
+ 		for (i = 0; i < pagevec_count(&pvec); i++)
+ 			unlock_page(pvec.pages[i]);
+@@ -652,6 +649,16 @@ int invalidate_inode_pages2_range(struct
+ 				continue;
+ 			}
+ 
++			if (!did_range_unmap && page_mapped(page)) {
++				/*
++				 * If page is mapped, before taking its lock,
++				 * zap the rest of the file in one hit.
++				 */
++				unmap_mapping_pages(mapping, index,
++						(1 + end - index), false);
++				did_range_unmap = 1;
++			}
++
+ 			lock_page(page);
+ 			WARN_ON(page_to_index(page) != index);
+ 			if (page->mapping != mapping) {
+@@ -659,23 +666,11 @@ int invalidate_inode_pages2_range(struct
+ 				continue;
+ 			}
+ 			wait_on_page_writeback(page);
+-			if (page_mapped(page)) {
+-				if (!did_range_unmap) {
+-					/*
+-					 * Zap the rest of the file in one hit.
+-					 */
+-					unmap_mapping_pages(mapping, index,
+-						(1 + end - index), false);
+-					did_range_unmap = 1;
+-				} else {
+-					/*
+-					 * Just zap this page
+-					 */
+-					unmap_mapping_pages(mapping, index,
+-								1, false);
+-				}
+-			}
++
++			if (page_mapped(page))
++				unmap_mapping_page(page);
+ 			BUG_ON(page_mapped(page));
++
+ 			ret2 = do_launder_page(mapping, page);
+ 			if (ret2 == 0) {
+ 				if (!invalidate_complete_page2(mapping, page))
diff --git a/queue-5.12/mm-thp-use-head-page-in-__migration_entry_wait.patch b/queue-5.12/mm-thp-use-head-page-in-__migration_entry_wait.patch
new file mode 100644
index 00000000000..f6deda68218
--- /dev/null
+++ b/queue-5.12/mm-thp-use-head-page-in-__migration_entry_wait.patch
@@ -0,0 +1,65 @@
+From ffc90cbb2970ab88b66ea51dd580469eede57b67 Mon Sep 17 00:00:00 2001
+From: Xu Yu <xuyu@linux.alibaba.com>
+Date: Tue, 15 Jun 2021 18:23:42 -0700
+Subject: mm, thp: use head page in __migration_entry_wait()
+
+From: Xu Yu <xuyu@linux.alibaba.com>
+
+commit ffc90cbb2970ab88b66ea51dd580469eede57b67 upstream.
+
+We notice that hung task happens in a corner but practical scenario when
+CONFIG_PREEMPT_NONE is enabled, as follows.
+
+Process 0                       Process 1                     Process 2..Inf
+split_huge_page_to_list
+    unmap_page
+        split_huge_pmd_address
+                                __migration_entry_wait(head)
+                                                              __migration_entry_wait(tail)
+    remap_page (roll back)
+        remove_migration_ptes
+            rmap_walk_anon
+                cond_resched
+
+Where __migration_entry_wait(tail) is occurred in kernel space, e.g.,
+copy_to_user in fstat, which will immediately fault again without
+rescheduling, and thus occupy the cpu fully.
+
+When there are too many processes performing __migration_entry_wait on
+tail page, remap_page will never be done after cond_resched.
+
+This makes __migration_entry_wait operate on the compound head page,
+thus waits for remap_page to complete, whether the THP is split
+successfully or roll back.
+
+Note that put_and_wait_on_page_locked helps to drop the page reference
+acquired with get_page_unless_zero, as soon as the page is on the wait
+queue, before actually waiting.  So splitting the THP is only prevented
+for a brief interval.
+
+Link: https://lkml.kernel.org/r/b9836c1dd522e903891760af9f0c86a2cce987eb.1623144009.git.xuyu@linux.alibaba.com
+Fixes: ba98828088ad ("thp: add option to setup migration entries during PMD split")
+Suggested-by: Hugh Dickins <hughd@google.com>
+Signed-off-by: Gang Deng <gavin.dg@linux.alibaba.com>
+Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -322,6 +322,7 @@ void __migration_entry_wait(struct mm_st
+ 		goto out;
+ 
+ 	page = migration_entry_to_page(entry);
++	page = compound_head(page);
+ 
+ 	/*
+ 	 * Once page cache replacement of page migration started, page_count
diff --git a/queue-5.12/series b/queue-5.12/series
index 4d7f5daa9a6..547facabd6b 100644
--- a/queue-5.12/series
+++ b/queue-5.12/series
@@ -78,3 +78,23 @@ kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
 kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
 x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
 x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
+mm-memory-failure-use-a-mutex-to-avoid-memory_failure-races.patch
+mm-thp-use-head-page-in-__migration_entry_wait.patch
+mm-thp-fix-__split_huge_pmd_locked-on-shmem-migration-entry.patch
+mm-thp-make-is_huge_zero_pmd-safe-and-quicker.patch
+mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch
+mm-thp-fix-vma_address-if-virtual-address-below-file-offset.patch
+mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch
+mm-thp-unmap_mapping_page-to-fix-thp-truncate_cleanup_page.patch
+mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-fails-for-split.patch
+mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch
+mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch
+mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch
+mm-page_vma_mapped_walk-prettify-pvmw_migration-block.patch
+mm-page_vma_mapped_walk-crossing-page-table-boundary.patch
+mm-page_vma_mapped_walk-add-a-level-of-indentation.patch
+mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch
+mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch
+mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-ptes.patch
+mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch
+mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch