mm: support batched unmap for lazyfree large folios during reclamation

author Barry Song <v-songbaohua@oppo.com>

Fri, 14 Feb 2025 09:30:14 +0000 (22:30 +1300)

committer Andrew Morton <akpm@linux-foundation.org>

Mon, 17 Mar 2025 05:06:16 +0000 (22:06 -0700)
author Barry Song <v-songbaohua@oppo.com>
Fri, 14 Feb 2025 09:30:14 +0000 (22:30 +1300)
committer Andrew Morton <akpm@linux-foundation.org>
Mon, 17 Mar 2025 05:06:16 +0000 (22:06 -0700)
diff --git a/mm/rmap.c b/mm/rmap.c

index 765e541ac9be864887a1f3154ee6971f7a4fed87..7a93a7cd2c6468213a4a4484982b55c7c78ec614 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1722,6 +1722,25 @@ void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
  #endif
  }
  
+/* We support batch unmapping of PTEs for lazyfree large folios */
+static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
+                       struct folio *folio, pte_t *ptep)
+{
+       const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
+       int max_nr = folio_nr_pages(folio);
+       pte_t pte = ptep_get(ptep);
+
+       if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
+               return false;
+       if (pte_unused(pte))
+               return false;
+       if (pte_pfn(pte) != folio_pfn(folio))
+               return false;
+
+       return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
+                              NULL, NULL) == max_nr;
+}
+
  /*
   * @arg: enum ttu_flags will be passed to this argument
   */
@@ -1735,6 +1754,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
         struct page *subpage;
         struct mmu_notifier_range range;
         enum ttu_flags flags = (enum ttu_flags)(long)arg;
+       unsigned long nr_pages = 1, end_addr;
         unsigned long pfn;
         unsigned long hsz = 0;
  
@@ -1874,23 +1894,26 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                         if (pte_dirty(pteval))
                                 folio_mark_dirty(folio);
                 } else if (likely(pte_present(pteval))) {
-                       flush_cache_page(vma, address, pfn);
-                       /* Nuke the page table entry. */
-                       if (should_defer_flush(mm, flags)) {
-                               /*
-                                * We clear the PTE but do not flush so potentially
-                                * a remote CPU could still be writing to the folio.
-                                * If the entry was previously clean then the
-                                * architecture must guarantee that a clear->dirty
-                                * transition on a cached TLB entry is written through
-                                * and traps if the PTE is unmapped.
-                                */
-                               pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+                       if (folio_test_large(folio) && !(flags & TTU_HWPOISON) &&
+                           can_batch_unmap_folio_ptes(address, folio, pvmw.pte))
+                               nr_pages = folio_nr_pages(folio);
+                       end_addr = address + nr_pages * PAGE_SIZE;
+                       flush_cache_range(vma, address, end_addr);
  
-                               set_tlb_ubc_flush_pending(mm, pteval, address, address + PAGE_SIZE);
-                       } else {
-                               pteval = ptep_clear_flush(vma, address, pvmw.pte);
-                       }
+                       /* Nuke the page table entry. */
+                       pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0);
+                       /*
+                        * We clear the PTE but do not flush so potentially
+                        * a remote CPU could still be writing to the folio.
+                        * If the entry was previously clean then the
+                        * architecture must guarantee that a clear->dirty
+                        * transition on a cached TLB entry is written through
+                        * and traps if the PTE is unmapped.
+                        */
+                       if (should_defer_flush(mm, flags))
+                               set_tlb_ubc_flush_pending(mm, pteval, address, end_addr);
+                       else
+                               flush_tlb_range(vma, address, end_addr);
                         if (pte_dirty(pteval))
                                 folio_mark_dirty(folio);
                 } else {
@@ -1968,7 +1991,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                                          * redirtied either using the page table or a previously
                                          * obtained GUP reference.
                                          */
-                                       set_pte_at(mm, address, pvmw.pte, pteval);
+                                       set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
                                         folio_set_swapbacked(folio);
                                         goto walk_abort;
                                 } else if (ref_count != 1 + map_count) {
@@ -1981,10 +2004,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                                          * We'll come back here later and detect if the folio was
                                          * dirtied when the additional reference is gone.
                                          */
-                                       set_pte_at(mm, address, pvmw.pte, pteval);
+                                       set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
                                         goto walk_abort;
                                 }
-                               dec_mm_counter(mm, MM_ANONPAGES);
+                               add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
                                 goto discard;
                         }
  
@@ -2049,13 +2072,18 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                         dec_mm_counter(mm, mm_counter_file(folio));
                 }
  discard:
-               if (unlikely(folio_test_hugetlb(folio)))
+               if (unlikely(folio_test_hugetlb(folio))) {
                         hugetlb_remove_rmap(folio);
-               else
-                       folio_remove_rmap_pte(folio, subpage, vma);
+               } else {
+                       folio_remove_rmap_ptes(folio, subpage, nr_pages, vma);
+                       folio_ref_sub(folio, nr_pages - 1);
+               }
                 if (vma->vm_flags & VM_LOCKED)
                         mlock_drain_local();
                 folio_put(folio);
+               /* We have already batched the entire folio */
+               if (nr_pages > 1)
+                       goto walk_done;
                 continue;
  walk_abort:
                 ret = false;
author	Barry Song <v-songbaohua@oppo.com>
	Fri, 14 Feb 2025 09:30:14 +0000 (22:30 +1300)
committer	Andrew Morton <akpm@linux-foundation.org>
	Mon, 17 Mar 2025 05:06:16 +0000 (22:06 -0700)