--- /dev/null
+From a4a118f2eead1d6c49e00765de89878288d4b890 Mon Sep 17 00:00:00 2001
+From: Nadav Amit <namit@vmware.com>
+Date: Sun, 21 Nov 2021 12:40:07 -0800
+Subject: hugetlbfs: flush TLBs correctly after huge_pmd_unshare
+
+From: Nadav Amit <namit@vmware.com>
+
+commit a4a118f2eead1d6c49e00765de89878288d4b890 upstream.
+
+When __unmap_hugepage_range() calls to huge_pmd_unshare() succeed, a TLB
+flush is missing. This TLB flush must be performed before releasing the
+i_mmap_rwsem, in order to prevent an unshared PMDs page from being
+released and reused before the TLB flush took place.
+
+Arguably, a comprehensive solution would use mmu_gather interface to
+batch the TLB flushes and the PMDs page release, however it is not an
+easy solution: (1) try_to_unmap_one() and try_to_migrate_one() also call
+huge_pmd_unshare() and they cannot use the mmu_gather interface; and (2)
+deferring the release of the page reference for the PMDs page until
+after i_mmap_rwsem is dropeed can confuse huge_pmd_unshare() into
+thinking PMDs are shared when they are not.
+
+Fix __unmap_hugepage_range() by adding the missing TLB flush, and
+forcing a flush when unshare is successful.
+
+Fixes: 24669e58477e ("hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages)" # 3.6
+Signed-off-by: Nadav Amit <namit@vmware.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c | 23 +++++++++++++++++++----
+ 1 file changed, 19 insertions(+), 4 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3589,6 +3589,7 @@ void __unmap_hugepage_range(struct mmu_g
+ struct hstate *h = hstate_vma(vma);
+ unsigned long sz = huge_page_size(h);
+ struct mmu_notifier_range range;
++ bool force_flush = false;
+
+ WARN_ON(!is_vm_hugetlb_page(vma));
+ BUG_ON(start & ~huge_page_mask(h));
+@@ -3617,10 +3618,8 @@ void __unmap_hugepage_range(struct mmu_g
+ ptl = huge_pte_lock(h, mm, ptep);
+ if (huge_pmd_unshare(mm, &address, ptep)) {
+ spin_unlock(ptl);
+- /*
+- * We just unmapped a page of PMDs by clearing a PUD.
+- * The caller's TLB flush range should cover this area.
+- */
++ tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
++ force_flush = true;
+ continue;
+ }
+
+@@ -3677,6 +3676,22 @@ void __unmap_hugepage_range(struct mmu_g
+ }
+ mmu_notifier_invalidate_range_end(&range);
+ tlb_end_vma(tlb, vma);
++
++ /*
++ * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We
++ * could defer the flush until now, since by holding i_mmap_rwsem we
++ * guaranteed that the last refernece would not be dropped. But we must
++ * do the flushing before we return, as otherwise i_mmap_rwsem will be
++ * dropped and the last reference to the shared PMDs page might be
++ * dropped as well.
++ *
++ * In theory we could defer the freeing of the PMD pages as well, but
++ * huge_pmd_unshare() relies on the exact page_count for the PMD page to
++ * detect sharing, so we cannot defer the release of the page either.
++ * Instead, do flush now.
++ */
++ if (force_flush)
++ tlb_flush_mmu_tlbonly(tlb);
+ }
+
+ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
--- /dev/null
+From 2631ed00b0498810f8d5c2163c6b5270d893687b Mon Sep 17 00:00:00 2001
+From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Date: Thu, 25 Jun 2020 16:03:12 +0800
+Subject: tlb: mmu_gather: add tlb_flush_*_range APIs
+
+From: Peter Zijlstra (Intel) <peterz@infradead.org>
+
+commit 2631ed00b0498810f8d5c2163c6b5270d893687b upstream.
+
+tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and
+tlb->end, then set corresponding cleared_*.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Zhenyu Ye <yezhenyu2@huawei.com>
+Acked-by: Catalin Marinas <catalin.marinas@arm.com>
+Link: https://lore.kernel.org/r/20200625080314.230-5-yezhenyu2@huawei.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/tlb.h | 55 +++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 40 insertions(+), 15 deletions(-)
+
+--- a/include/asm-generic/tlb.h
++++ b/include/asm-generic/tlb.h
+@@ -495,6 +495,38 @@ static inline void tlb_end_vma(struct mm
+ }
+ #endif
+
++/*
++ * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
++ * and set corresponding cleared_*.
++ */
++static inline void tlb_flush_pte_range(struct mmu_gather *tlb,
++ unsigned long address, unsigned long size)
++{
++ __tlb_adjust_range(tlb, address, size);
++ tlb->cleared_ptes = 1;
++}
++
++static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
++ unsigned long address, unsigned long size)
++{
++ __tlb_adjust_range(tlb, address, size);
++ tlb->cleared_pmds = 1;
++}
++
++static inline void tlb_flush_pud_range(struct mmu_gather *tlb,
++ unsigned long address, unsigned long size)
++{
++ __tlb_adjust_range(tlb, address, size);
++ tlb->cleared_puds = 1;
++}
++
++static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
++ unsigned long address, unsigned long size)
++{
++ __tlb_adjust_range(tlb, address, size);
++ tlb->cleared_p4ds = 1;
++}
++
+ #ifndef __tlb_remove_tlb_entry
+ #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+ #endif
+@@ -508,19 +540,17 @@ static inline void tlb_end_vma(struct mm
+ */
+ #define tlb_remove_tlb_entry(tlb, ptep, address) \
+ do { \
+- __tlb_adjust_range(tlb, address, PAGE_SIZE); \
+- tlb->cleared_ptes = 1; \
++ tlb_flush_pte_range(tlb, address, PAGE_SIZE); \
+ __tlb_remove_tlb_entry(tlb, ptep, address); \
+ } while (0)
+
+ #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
+ do { \
+ unsigned long _sz = huge_page_size(h); \
+- __tlb_adjust_range(tlb, address, _sz); \
+ if (_sz == PMD_SIZE) \
+- tlb->cleared_pmds = 1; \
++ tlb_flush_pmd_range(tlb, address, _sz); \
+ else if (_sz == PUD_SIZE) \
+- tlb->cleared_puds = 1; \
++ tlb_flush_pud_range(tlb, address, _sz); \
+ __tlb_remove_tlb_entry(tlb, ptep, address); \
+ } while (0)
+
+@@ -534,8 +564,7 @@ static inline void tlb_end_vma(struct mm
+
+ #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \
+ do { \
+- __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \
+- tlb->cleared_pmds = 1; \
++ tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \
+ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
+ } while (0)
+
+@@ -549,8 +578,7 @@ static inline void tlb_end_vma(struct mm
+
+ #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \
+ do { \
+- __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \
+- tlb->cleared_puds = 1; \
++ tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \
+ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \
+ } while (0)
+
+@@ -575,9 +603,8 @@ static inline void tlb_end_vma(struct mm
+ #ifndef pte_free_tlb
+ #define pte_free_tlb(tlb, ptep, address) \
+ do { \
+- __tlb_adjust_range(tlb, address, PAGE_SIZE); \
++ tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \
+ tlb->freed_tables = 1; \
+- tlb->cleared_pmds = 1; \
+ __pte_free_tlb(tlb, ptep, address); \
+ } while (0)
+ #endif
+@@ -585,9 +612,8 @@ static inline void tlb_end_vma(struct mm
+ #ifndef pmd_free_tlb
+ #define pmd_free_tlb(tlb, pmdp, address) \
+ do { \
+- __tlb_adjust_range(tlb, address, PAGE_SIZE); \
++ tlb_flush_pud_range(tlb, address, PAGE_SIZE); \
+ tlb->freed_tables = 1; \
+- tlb->cleared_puds = 1; \
+ __pmd_free_tlb(tlb, pmdp, address); \
+ } while (0)
+ #endif
+@@ -596,9 +622,8 @@ static inline void tlb_end_vma(struct mm
+ #ifndef pud_free_tlb
+ #define pud_free_tlb(tlb, pudp, address) \
+ do { \
+- __tlb_adjust_range(tlb, address, PAGE_SIZE); \
++ tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \
+ tlb->freed_tables = 1; \
+- tlb->cleared_p4ds = 1; \
+ __pud_free_tlb(tlb, pudp, address); \
+ } while (0)
+ #endif