mm: rmap: support batched checks of the references for large folios

author Baolin Wang <baolin.wang@linux.alibaba.com>

Mon, 9 Feb 2026 14:07:24 +0000 (22:07 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Thu, 12 Feb 2026 23:43:00 +0000 (15:43 -0800)
author Baolin Wang <baolin.wang@linux.alibaba.com>
Mon, 9 Feb 2026 14:07:24 +0000 (22:07 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Thu, 12 Feb 2026 23:43:00 +0000 (15:43 -0800)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h

index d1094c2d5fb61e5e39dcabb48072964f1f64aee3..07a2bbaf86e905133bd80c9bd211be6e16bfa30f 100644 (file)
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -515,16 +515,17 @@ static inline void mmu_notifier_range_init_owner(
         range->owner = owner;
  }
  
-#define ptep_clear_flush_young_notify(__vma, __address, __ptep)                \
+#define clear_flush_young_ptes_notify(__vma, __address, __ptep, __nr)  \
  ({                                                                     \
         int __young;                                                    \
         struct vm_area_struct *___vma = __vma;                          \
         unsigned long ___address = __address;                           \
-       __young = ptep_clear_flush_young(___vma, ___address, __ptep);   \
+       unsigned int ___nr = __nr;                                      \
+       __young = clear_flush_young_ptes(___vma, ___address, __ptep, ___nr);    \
         __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
                                                   ___address,           \
                                                   ___address +          \
-                                                       PAGE_SIZE);     \
+                                                 ___nr * PAGE_SIZE);   \
         __young;                                                        \
  })
  
@@ -650,7 +651,7 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm)
  
  #define mmu_notifier_range_update_to_read_only(r) false
  
-#define ptep_clear_flush_young_notify ptep_clear_flush_young
+#define clear_flush_young_ptes_notify clear_flush_young_ptes
  #define pmdp_clear_flush_young_notify pmdp_clear_flush_young
  #define ptep_clear_young_notify ptep_test_and_clear_young
  #define pmdp_clear_young_notify pmdp_test_and_clear_young
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h

index 21b67d9375558e205842f403a28b2b5ad33efbbf..a50df42a893fb53b32b20cadbbea745702fb2aeb 100644 (file)
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1068,6 +1068,41 @@ static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
  }
  #endif
  
+#ifndef clear_flush_young_ptes
+/**
+ * clear_flush_young_ptes - Mark PTEs that map consecutive pages of the same
+ *                         folio as old and flush the TLB.
+ * @vma: The virtual memory area the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear access bit.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_clear_flush_young().
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock.  The PTEs map consecutive
+ * pages that belong to the same folio.  The PTEs are all in the same PMD.
+ */
+static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
+               unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+       int young = 0;
+
+       for (;;) {
+               young |= ptep_clear_flush_young(vma, addr, ptep);
+               if (--nr == 0)
+                       break;
+               ptep++;
+               addr += PAGE_SIZE;
+       }
+
+       return young;
+}
+#endif
+
  /*
   * On some architectures hardware does not set page access bit when accessing
   * memory page, it is responsibility of software setting this bit. It brings
diff --git a/mm/rmap.c b/mm/rmap.c

index ab099405151ff91c6281665f083e51ebcc731d0e..3dbc2c4e02dc88c794c0ac6f95276daf0c481a8a 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -913,9 +913,11 @@ static bool folio_referenced_one(struct folio *folio,
         struct folio_referenced_arg *pra = arg;
         DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
         int ptes = 0, referenced = 0;
+       unsigned int nr;
  
         while (page_vma_mapped_walk(&pvmw)) {
                 address = pvmw.address;
+               nr = 1;
  
                 if (vma->vm_flags & VM_LOCKED) {
                         ptes++;
@@ -960,9 +962,21 @@ static bool folio_referenced_one(struct folio *folio,
                         if (lru_gen_look_around(&pvmw))
                                 referenced++;
                 } else if (pvmw.pte) {
-                       if (ptep_clear_flush_young_notify(vma, address,
-                                               pvmw.pte))
+                       if (folio_test_large(folio)) {
+                               unsigned long end_addr = pmd_addr_end(address, vma->vm_end);
+                               unsigned int max_nr = (end_addr - address) >> PAGE_SHIFT;
+                               pte_t pteval = ptep_get(pvmw.pte);
+
+                               nr = folio_pte_batch(folio, pvmw.pte,
+                                                    pteval, max_nr);
+                       }
+
+                       ptes += nr;
+                       if (clear_flush_young_ptes_notify(vma, address, pvmw.pte, nr))
                                 referenced++;
+                       /* Skip the batched PTEs */
+                       pvmw.pte += nr - 1;
+                       pvmw.address += (nr - 1) * PAGE_SIZE;
                 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
                         if (pmdp_clear_flush_young_notify(vma, address,
                                                 pvmw.pmd))
@@ -972,7 +986,15 @@ static bool folio_referenced_one(struct folio *folio,
                         WARN_ON_ONCE(1);
                 }
  
-               pra->mapcount--;
+               pra->mapcount -= nr;
+               /*
+                * If we are sure that we batched the entire folio,
+                * we can just optimize and stop right here.
+                */
+               if (ptes == pvmw.nr_pages) {
+                       page_vma_mapped_walk_done(&pvmw);
+                       break;
+               }
         }
  
         if (referenced)
author	Baolin Wang <baolin.wang@linux.alibaba.com>
	Mon, 9 Feb 2026 14:07:24 +0000 (22:07 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Thu, 12 Feb 2026 23:43:00 +0000 (15:43 -0800)
include/linux/mmu_notifier.h		patch \| blob \| blame \| history
include/linux/pgtable.h		patch \| blob \| blame \| history
mm/rmap.c		patch \| blob \| blame \| history