mm/hugetlb: fix excessive IPI broadcasts when unsharing PMD tables using mmu_gather

author David Hildenbrand (Red Hat) <david@kernel.org>

Tue, 23 Dec 2025 21:40:37 +0000 (22:40 +0100)

committer Andrew Morton <akpm@linux-foundation.org>

Tue, 20 Jan 2026 17:34:26 +0000 (09:34 -0800)
author David Hildenbrand (Red Hat) <david@kernel.org>
Tue, 23 Dec 2025 21:40:37 +0000 (22:40 +0100)
committer Andrew Morton <akpm@linux-foundation.org>
Tue, 20 Jan 2026 17:34:26 +0000 (09:34 -0800)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h

index 1fff717cae510dd635617ed6388105117224eec4..4d679d2a206b463bcccebeaf38475906d4100fae 100644 (file)
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -46,7 +46,8 @@
   *
   * The mmu_gather API consists of:
   *
- *  - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu()
+ *  - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_gather_mmu_vma() /
+ *    tlb_finish_mmu()
   *
   *    start and finish a mmu_gather
   *
@@ -364,6 +365,20 @@ struct mmu_gather {
         unsigned int            vma_huge : 1;
         unsigned int            vma_pfn  : 1;
  
+       /*
+        * Did we unshare (unmap) any shared page tables? For now only
+        * used for hugetlb PMD table sharing.
+        */
+       unsigned int            unshared_tables : 1;
+
+       /*
+        * Did we unshare any page tables such that they are now exclusive
+        * and could get reused+modified by the new owner? When setting this
+        * flag, "unshared_tables" will be set as well. For now only used
+        * for hugetlb PMD table sharing.
+        */
+       unsigned int            fully_unshared_tables : 1;
+
         unsigned int            batch_count;
  
  #ifndef CONFIG_MMU_GATHER_NO_GATHER
@@ -400,6 +415,7 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
         tlb->cleared_pmds = 0;
         tlb->cleared_puds = 0;
         tlb->cleared_p4ds = 0;
+       tlb->unshared_tables = 0;
         /*
          * Do not reset mmu_gather::vma_* fields here, we do not
          * call into tlb_start_vma() again to set them if there is an
@@ -484,7 +500,7 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
          * these bits.
          */
         if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds ||
-             tlb->cleared_puds || tlb->cleared_p4ds))
+             tlb->cleared_puds || tlb->cleared_p4ds || tlb->unshared_tables))
                 return;
  
         tlb_flush(tlb);
@@ -773,6 +789,63 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
  }
  #endif
  
+#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
+static inline void tlb_unshare_pmd_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt,
+                                         unsigned long addr)
+{
+       /*
+        * The caller must make sure that concurrent unsharing + exclusive
+        * reuse is impossible until tlb_flush_unshared_tables() was called.
+        */
+       VM_WARN_ON_ONCE(!ptdesc_pmd_is_shared(pt));
+       ptdesc_pmd_pts_dec(pt);
+
+       /* Clearing a PUD pointing at a PMD table with PMD leaves. */
+       tlb_flush_pmd_range(tlb, addr & PUD_MASK, PUD_SIZE);
+
+       /*
+        * If the page table is now exclusively owned, we fully unshared
+        * a page table.
+        */
+       if (!ptdesc_pmd_is_shared(pt))
+               tlb->fully_unshared_tables = true;
+       tlb->unshared_tables = true;
+}
+
+static inline void tlb_flush_unshared_tables(struct mmu_gather *tlb)
+{
+       /*
+        * As soon as the caller drops locks to allow for reuse of
+        * previously-shared tables, these tables could get modified and
+        * even reused outside of hugetlb context, so we have to make sure that
+        * any page table walkers (incl. TLB, GUP-fast) are aware of that
+        * change.
+        *
+        * Even if we are not fully unsharing a PMD table, we must
+        * flush the TLB for the unsharer now.
+        */
+       if (tlb->unshared_tables)
+               tlb_flush_mmu_tlbonly(tlb);
+
+       /*
+        * Similarly, we must make sure that concurrent GUP-fast will not
+        * walk previously-shared page tables that are getting modified+reused
+        * elsewhere. So broadcast an IPI to wait for any concurrent GUP-fast.
+        *
+        * We only perform this when we are the last sharer of a page table,
+        * as the IPI will reach all CPUs: any GUP-fast.
+        *
+        * Note that on configs where tlb_remove_table_sync_one() is a NOP,
+        * the expectation is that the tlb_flush_mmu_tlbonly() would have issued
+        * required IPIs already for us.
+        */
+       if (tlb->fully_unshared_tables) {
+               tlb_remove_table_sync_one();
+               tlb->fully_unshared_tables = false;
+       }
+}
+#endif /* CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */
+
  #endif /* CONFIG_MMU */
  
  #endif /* _ASM_GENERIC__TLB_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 03c8725efa289f4e49443c5af947e0f747674865..e51b8ef0cebd945903249b24692780625213548a 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -240,8 +240,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
  pte_t *huge_pte_offset(struct mm_struct *mm,
                        unsigned long addr, unsigned long sz);
  unsigned long hugetlb_mask_last_page(struct hstate *h);
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
-                               unsigned long addr, pte_t *ptep);
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+               unsigned long addr, pte_t *ptep);
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma);
  void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
                                 unsigned long *start, unsigned long *end);
  
@@ -300,13 +301,17 @@ static inline struct address_space *hugetlb_folio_mapping_lock_write(
         return NULL;
  }
  
-static inline int huge_pmd_unshare(struct mm_struct *mm,
-                                       struct vm_area_struct *vma,
-                                       unsigned long addr, pte_t *ptep)
+static inline int huge_pmd_unshare(struct mmu_gather *tlb,
+               struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
  {
         return 0;
  }
  
+static inline void huge_pmd_unshare_flush(struct mmu_gather *tlb,
+               struct vm_area_struct *vma)
+{
+}
+
  static inline void adjust_range_if_pmd_sharing_possible(
                                 struct vm_area_struct *vma,
                                 unsigned long *start, unsigned long *end)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index aa4639888f890a9142b469c84a0b2ecd5d66a6c5..78950eb8926dcecf214d77c0683b037e13e62525 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1530,6 +1530,7 @@ static inline unsigned int mm_cid_size(void)
  struct mmu_gather;
  extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
  extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
+void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma);
  extern void tlb_finish_mmu(struct mmu_gather *tlb);
  
  struct vm_fault;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 67131aa24d7740db27cd5ec37533c7ab27372cf2..a1832da0f62362016f1e81bdecba57fa2b25ef7b 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5112,7 +5112,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
         unsigned long last_addr_mask;
         pte_t *src_pte, *dst_pte;
         struct mmu_notifier_range range;
-       bool shared_pmd = false;
+       struct mmu_gather tlb;
  
         mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, old_addr,
                                 old_end);
@@ -5122,6 +5122,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
          * range.
          */
         flush_cache_range(vma, range.start, range.end);
+       tlb_gather_mmu_vma(&tlb, vma);
  
         mmu_notifier_invalidate_range_start(&range);
         last_addr_mask = hugetlb_mask_last_page(h);
@@ -5138,8 +5139,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
                 if (huge_pte_none(huge_ptep_get(mm, old_addr, src_pte)))
                         continue;
  
-               if (huge_pmd_unshare(mm, vma, old_addr, src_pte)) {
-                       shared_pmd = true;
+               if (huge_pmd_unshare(&tlb, vma, old_addr, src_pte)) {
                         old_addr |= last_addr_mask;
                         new_addr |= last_addr_mask;
                         continue;
@@ -5150,15 +5150,16 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
                         break;
  
                 move_huge_pte(vma, old_addr, new_addr, src_pte, dst_pte, sz);
+               tlb_remove_huge_tlb_entry(h, &tlb, src_pte, old_addr);
         }
  
-       if (shared_pmd)
-               flush_hugetlb_tlb_range(vma, range.start, range.end);
-       else
-               flush_hugetlb_tlb_range(vma, old_end - len, old_end);
+       tlb_flush_mmu_tlbonly(&tlb);
+       huge_pmd_unshare_flush(&tlb, vma);
+
         mmu_notifier_invalidate_range_end(&range);
         i_mmap_unlock_write(mapping);
         hugetlb_vma_unlock_write(vma);
+       tlb_finish_mmu(&tlb);
  
         return len + old_addr - old_end;
  }
@@ -5177,7 +5178,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
         unsigned long sz = huge_page_size(h);
         bool adjust_reservation;
         unsigned long last_addr_mask;
-       bool force_flush = false;
  
         WARN_ON(!is_vm_hugetlb_page(vma));
         BUG_ON(start & ~huge_page_mask(h));
@@ -5200,10 +5200,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
                 }
  
                 ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, vma, address, ptep)) {
+               if (huge_pmd_unshare(tlb, vma, address, ptep)) {
                         spin_unlock(ptl);
-                       tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
-                       force_flush = true;
                         address |= last_addr_mask;
                         continue;
                 }
@@ -5319,14 +5317,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
         }
         tlb_end_vma(tlb, vma);
  
-       /*
-        * There is nothing protecting a previously-shared page table that we
-        * unshared through huge_pmd_unshare() from getting freed after we
-        * release i_mmap_rwsem, so flush the TLB now. If huge_pmd_unshare()
-        * succeeded, flush the range corresponding to the pud.
-        */
-       if (force_flush)
-               tlb_flush_mmu_tlbonly(tlb);
+       huge_pmd_unshare_flush(tlb, vma);
  }
  
  void __hugetlb_zap_begin(struct vm_area_struct *vma,
@@ -6425,11 +6416,11 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
         pte_t pte;
         struct hstate *h = hstate_vma(vma);
         long pages = 0, psize = huge_page_size(h);
-       bool shared_pmd = false;
         struct mmu_notifier_range range;
         unsigned long last_addr_mask;
         bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
         bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+       struct mmu_gather tlb;
  
         /*
          * In the case of shared PMDs, the area to flush could be beyond
@@ -6442,6 +6433,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
  
         BUG_ON(address >= end);
         flush_cache_range(vma, range.start, range.end);
+       tlb_gather_mmu_vma(&tlb, vma);
  
         mmu_notifier_invalidate_range_start(&range);
         hugetlb_vma_lock_write(vma);
@@ -6468,7 +6460,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
                         }
                 }
                 ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, vma, address, ptep)) {
+               if (huge_pmd_unshare(&tlb, vma, address, ptep)) {
                         /*
                          * When uffd-wp is enabled on the vma, unshare
                          * shouldn't happen at all.  Warn about it if it
@@ -6477,7 +6469,6 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
                         WARN_ON_ONCE(uffd_wp || uffd_wp_resolve);
                         pages++;
                         spin_unlock(ptl);
-                       shared_pmd = true;
                         address |= last_addr_mask;
                         continue;
                 }
@@ -6538,22 +6529,16 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
                                 pte = huge_pte_clear_uffd_wp(pte);
                         huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
                         pages++;
+                       tlb_remove_huge_tlb_entry(h, &tlb, ptep, address);
                 }
  
  next:
                 spin_unlock(ptl);
                 cond_resched();
         }
-       /*
-        * There is nothing protecting a previously-shared page table that we
-        * unshared through huge_pmd_unshare() from getting freed after we
-        * release i_mmap_rwsem, so flush the TLB now. If huge_pmd_unshare()
-        * succeeded, flush the range corresponding to the pud.
-        */
-       if (shared_pmd)
-               flush_hugetlb_tlb_range(vma, range.start, range.end);
-       else
-               flush_hugetlb_tlb_range(vma, start, end);
+
+       tlb_flush_mmu_tlbonly(&tlb);
+       huge_pmd_unshare_flush(&tlb, vma);
         /*
          * No need to call mmu_notifier_arch_invalidate_secondary_tlbs() we are
          * downgrading page table protection not changing it to point to a new
@@ -6564,6 +6549,7 @@ next:
         i_mmap_unlock_write(vma->vm_file->f_mapping);
         hugetlb_vma_unlock_write(vma);
         mmu_notifier_invalidate_range_end(&range);
+       tlb_finish_mmu(&tlb);
  
         return pages > 0 ? (pages << h->order) : pages;
  }
@@ -6920,18 +6906,27 @@ out:
         return pte;
  }
  
-/*
- * unmap huge page backed by shared pte.
+/**
+ * huge_pmd_unshare - Unmap a pmd table if it is shared by multiple users
+ * @tlb: the current mmu_gather.
+ * @vma: the vma covering the pmd table.
+ * @addr: the address we are trying to unshare.
+ * @ptep: pointer into the (pmd) page table.
+ *
+ * Called with the page table lock held, the i_mmap_rwsem held in write mode
+ * and the hugetlb vma lock held in write mode.
   *
- * Called with page table lock held.
+ * Note: The caller must call huge_pmd_unshare_flush() before dropping the
+ * i_mmap_rwsem.
   *
- * returns: 1 successfully unmapped a shared pte page
- *         0 the underlying pte page is not shared, or it is the last user
+ * Returns: 1 if it was a shared PMD table and it got unmapped, or 0 if it
+ *         was not a shared PMD table.
   */
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
-                                       unsigned long addr, pte_t *ptep)
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+               unsigned long addr, pte_t *ptep)
  {
         unsigned long sz = huge_page_size(hstate_vma(vma));
+       struct mm_struct *mm = vma->vm_mm;
         pgd_t *pgd = pgd_offset(mm, addr);
         p4d_t *p4d = p4d_offset(pgd, addr);
         pud_t *pud = pud_offset(p4d, addr);
@@ -6943,18 +6938,36 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
         i_mmap_assert_write_locked(vma->vm_file->f_mapping);
         hugetlb_vma_assert_locked(vma);
         pud_clear(pud);
-       /*
-        * Once our caller drops the rmap lock, some other process might be
-        * using this page table as a normal, non-hugetlb page table.
-        * Wait for pending gup_fast() in other threads to finish before letting
-        * that happen.
-        */
-       tlb_remove_table_sync_one();
-       ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
+
+       tlb_unshare_pmd_ptdesc(tlb, virt_to_ptdesc(ptep), addr);
+
         mm_dec_nr_pmds(mm);
         return 1;
  }
  
+/*
+ * huge_pmd_unshare_flush - Complete a sequence of huge_pmd_unshare() calls
+ * @tlb: the current mmu_gather.
+ * @vma: the vma covering the pmd table.
+ *
+ * Perform necessary TLB flushes or IPI broadcasts to synchronize PMD table
+ * unsharing with concurrent page table walkers.
+ *
+ * This function must be called after a sequence of huge_pmd_unshare()
+ * calls while still holding the i_mmap_rwsem.
+ */
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+       /*
+        * We must synchronize page table unsharing such that nobody will
+        * try reusing a previously-shared page table while it might still
+        * be in use by previous sharers (TLB, GUP_fast).
+        */
+       i_mmap_assert_write_locked(vma->vm_file->f_mapping);
+
+       tlb_flush_unshared_tables(tlb);
+}
+
  #else /* !CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING */
  
  pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -6963,12 +6976,16 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
         return NULL;
  }
  
-int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
-                               unsigned long addr, pte_t *ptep)
+int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma,
+               unsigned long addr, pte_t *ptep)
  {
         return 0;
  }
  
+void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+}
+
  void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
                                 unsigned long *start, unsigned long *end)
  {
@@ -7235,6 +7252,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
         unsigned long sz = huge_page_size(h);
         struct mm_struct *mm = vma->vm_mm;
         struct mmu_notifier_range range;
+       struct mmu_gather tlb;
         unsigned long address;
         spinlock_t *ptl;
         pte_t *ptep;
@@ -7246,6 +7264,8 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
                 return;
  
         flush_cache_range(vma, start, end);
+       tlb_gather_mmu_vma(&tlb, vma);
+
         /*
          * No need to call adjust_range_if_pmd_sharing_possible(), because
          * we have already done the PUD_SIZE alignment.
@@ -7264,10 +7284,10 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
                 if (!ptep)
                         continue;
                 ptl = huge_pte_lock(h, mm, ptep);
-               huge_pmd_unshare(mm, vma, address, ptep);
+               huge_pmd_unshare(&tlb, vma, address, ptep);
                 spin_unlock(ptl);
         }
-       flush_hugetlb_tlb_range(vma, start, end);
+       huge_pmd_unshare_flush(&tlb, vma);
         if (take_locks) {
                 i_mmap_unlock_write(vma->vm_file->f_mapping);
                 hugetlb_vma_unlock_write(vma);
@@ -7277,6 +7297,7 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
          * Documentation/mm/mmu_notifier.rst.
          */
         mmu_notifier_invalidate_range_end(&range);
+       tlb_finish_mmu(&tlb);
  }
  
  /*
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c

index 247e3f9db6c7ae8d4f7bf1030ef338c83dc719db..7468ec38845556203fa25be7f27964d20e460928 100644 (file)
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -10,6 +10,7 @@
  #include <linux/swap.h>
  #include <linux/rmap.h>
  #include <linux/pgalloc.h>
+#include <linux/hugetlb.h>
  
  #include <asm/tlb.h>
  
@@ -426,6 +427,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
  #endif
         tlb->vma_pfn = 0;
  
+       tlb->fully_unshared_tables = 0;
         __tlb_reset_range(tlb);
         inc_tlb_flush_pending(tlb->mm);
  }
@@ -459,6 +461,31 @@ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
         __tlb_gather_mmu(tlb, mm, true);
  }
  
+/**
+ * tlb_gather_mmu_vma - initialize an mmu_gather structure for operating on a
+ *                     single VMA
+ * @tlb: the mmu_gather structure to initialize
+ * @vma: the vm_area_struct
+ *
+ * Called to initialize an (on-stack) mmu_gather structure for operating on
+ * a single VMA. In contrast to tlb_gather_mmu(), calling this function will
+ * not require another call to tlb_start_vma(). In contrast to tlb_start_vma(),
+ * this function will *not* call flush_cache_range().
+ *
+ * For hugetlb VMAs, this function will also initialize the mmu_gather
+ * page_size accordingly, not requiring a separate call to
+ * tlb_change_page_size().
+ *
+ */
+void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+       tlb_gather_mmu(tlb, vma->vm_mm);
+       tlb_update_vma_flags(tlb, vma);
+       if (is_vm_hugetlb_page(vma))
+               /* All entries have the same size. */
+               tlb_change_page_size(tlb, huge_page_size(hstate_vma(vma)));
+}
+
  /**
   * tlb_finish_mmu - finish an mmu_gather structure
   * @tlb: the mmu_gather structure to finish
@@ -468,6 +495,12 @@ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
   */
  void tlb_finish_mmu(struct mmu_gather *tlb)
  {
+       /*
+        * We expect an earlier huge_pmd_unshare_flush() call to sort this out,
+        * due to complicated locking requirements with page table unsharing.
+        */
+       VM_WARN_ON_ONCE(tlb->fully_unshared_tables);
+
         /*
          * If there are parallel threads are doing PTE changes on same range
          * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
diff --git a/mm/rmap.c b/mm/rmap.c

index 748f48727a162f68abca3b445c46cdf7205de0fe..7b9879ef442d91a74719eb25891b5c8e2d0aefbf 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -76,7 +76,7 @@
  #include <linux/mm_inline.h>
  #include <linux/oom.h>
  
-#include <asm/tlbflush.h>
+#include <asm/tlb.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/migrate.h>
@@ -2008,13 +2008,17 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                          * if unsuccessful.
                          */
                         if (!anon) {
+                               struct mmu_gather tlb;
+
                                 VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
                                 if (!hugetlb_vma_trylock_write(vma))
                                         goto walk_abort;
-                               if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
+
+                               tlb_gather_mmu_vma(&tlb, vma);
+                               if (huge_pmd_unshare(&tlb, vma, address, pvmw.pte)) {
                                         hugetlb_vma_unlock_write(vma);
-                                       flush_tlb_range(vma,
-                                               range.start, range.end);
+                                       huge_pmd_unshare_flush(&tlb, vma);
+                                       tlb_finish_mmu(&tlb);
                                         /*
                                          * The PMD table was unmapped,
                                          * consequently unmapping the folio.
@@ -2022,6 +2026,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                                         goto walk_done;
                                 }
                                 hugetlb_vma_unlock_write(vma);
+                               tlb_finish_mmu(&tlb);
                         }
                         pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
                         if (pte_dirty(pteval))
@@ -2398,17 +2403,20 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                          * fail if unsuccessful.
                          */
                         if (!anon) {
+                               struct mmu_gather tlb;
+
                                 VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
                                 if (!hugetlb_vma_trylock_write(vma)) {
                                         page_vma_mapped_walk_done(&pvmw);
                                         ret = false;
                                         break;
                                 }
-                               if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
-                                       hugetlb_vma_unlock_write(vma);
-                                       flush_tlb_range(vma,
-                                               range.start, range.end);
  
+                               tlb_gather_mmu_vma(&tlb, vma);
+                               if (huge_pmd_unshare(&tlb, vma, address, pvmw.pte)) {
+                                       hugetlb_vma_unlock_write(vma);
+                                       huge_pmd_unshare_flush(&tlb, vma);
+                                       tlb_finish_mmu(&tlb);
                                         /*
                                          * The PMD table was unmapped,
                                          * consequently unmapping the folio.
@@ -2417,6 +2425,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                                         break;
                                 }
                                 hugetlb_vma_unlock_write(vma);
+                               tlb_finish_mmu(&tlb);
                         }
                         /* Nuke the hugetlb page table entry */
                         pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
author	David Hildenbrand (Red Hat) <david@kernel.org>
	Tue, 23 Dec 2025 21:40:37 +0000 (22:40 +0100)
committer	Andrew Morton <akpm@linux-foundation.org>
	Tue, 20 Jan 2026 17:34:26 +0000 (09:34 -0800)
include/asm-generic/tlb.h		patch \| blob \| blame \| history
include/linux/hugetlb.h		patch \| blob \| blame \| history
include/linux/mm_types.h		patch \| blob \| blame \| history
mm/hugetlb.c		patch \| blob \| blame \| history
mm/mmu_gather.c		patch \| blob \| blame \| history
mm/rmap.c		patch \| blob \| blame \| history