hugetlbfs: flush TLBs correctly after huge_pmd_unshare

author Nadav Amit <namit@vmware.com>

Sun, 21 Nov 2021 20:40:07 +0000 (12:40 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 22 Nov 2021 19:36:46 +0000 (11:36 -0800)
author Nadav Amit <namit@vmware.com>
Sun, 21 Nov 2021 20:40:07 +0000 (12:40 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 22 Nov 2021 19:36:46 +0000 (11:36 -0800)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index f025d234522fbfd8c419e74e14cf691d958e0637..2ccebe1ca9f41ba4f096320c0fd813a0a9e0320f 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4939,6 +4939,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
         struct hstate *h = hstate_vma(vma);
         unsigned long sz = huge_page_size(h);
         struct mmu_notifier_range range;
+       bool force_flush = false;
  
         WARN_ON(!is_vm_hugetlb_page(vma));
         BUG_ON(start & ~huge_page_mask(h));
@@ -4967,10 +4968,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                 ptl = huge_pte_lock(h, mm, ptep);
                 if (huge_pmd_unshare(mm, vma, &address, ptep)) {
                         spin_unlock(ptl);
-                       /*
-                        * We just unmapped a page of PMDs by clearing a PUD.
-                        * The caller's TLB flush range should cover this area.
-                        */
+                       tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
+                       force_flush = true;
                         continue;
                 }
  
@@ -5027,6 +5026,22 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
         }
         mmu_notifier_invalidate_range_end(&range);
         tlb_end_vma(tlb, vma);
+
+       /*
+        * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We
+        * could defer the flush until now, since by holding i_mmap_rwsem we
+        * guaranteed that the last refernece would not be dropped. But we must
+        * do the flushing before we return, as otherwise i_mmap_rwsem will be
+        * dropped and the last reference to the shared PMDs page might be
+        * dropped as well.
+        *
+        * In theory we could defer the freeing of the PMD pages as well, but
+        * huge_pmd_unshare() relies on the exact page_count for the PMD page to
+        * detect sharing, so we cannot defer the release of the page either.
+        * Instead, do flush now.
+        */
+       if (force_flush)
+               tlb_flush_mmu_tlbonly(tlb);
  }
  
  void __unmap_hugepage_range_final(struct mmu_gather *tlb,
author	Nadav Amit <namit@vmware.com>
	Sun, 21 Nov 2021 20:40:07 +0000 (12:40 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 22 Nov 2021 19:36:46 +0000 (11:36 -0800)