]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
ksm: perform a range-walk in break_ksm
authorPedro Demarchi Gomes <pedrodemargomes@gmail.com>
Wed, 5 Nov 2025 18:49:11 +0000 (15:49 -0300)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 17 Nov 2025 01:28:28 +0000 (17:28 -0800)
Make break_ksm() receive an address range and change break_ksm_pmd_entry()
to perform a range-walk and return the address of the first ksm page
found.

This change allows break_ksm() to skip unmapped regions instead of
iterating every page address.  When unmerging large sparse VMAs, this
significantly reduces runtime.

In a benchmark unmerging a 32 TiB sparse virtual address space where only
one page was populated, the runtime dropped from 9 minutes to less then 5
seconds.

Link: https://lkml.kernel.org/r/20251105184912.186329-3-pedrodemargomes@gmail.com
Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
Suggested-by: David Hildenbrand (Red Hat) <david@kernel.org>
Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/ksm.c

index 9f74baf01e465f9418595374121c08e662261e61..43be57a6a3fd6514eec24715401628c121bf4f6e 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -607,35 +607,50 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
        return atomic_read(&mm->mm_users) == 0;
 }
 
-static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next,
+static int break_ksm_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
                        struct mm_walk *walk)
 {
-       struct folio *folio = NULL;
+       unsigned long *found_addr = (unsigned long *) walk->private;
+       struct mm_struct *mm = walk->mm;
+       pte_t *start_ptep, *ptep;
        spinlock_t *ptl;
-       pte_t *pte;
-       pte_t ptent;
-       int found;
+       int found = 0;
 
-       pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
-       if (!pte)
+       if (ksm_test_exit(walk->mm))
                return 0;
-       ptent = ptep_get(pte);
-       if (pte_present(ptent)) {
-               folio = vm_normal_folio(walk->vma, addr, ptent);
-       } else if (!pte_none(ptent)) {
-               swp_entry_t entry = pte_to_swp_entry(ptent);
+       if (signal_pending(current))
+               return -ERESTARTSYS;
 
-               /*
-                * As KSM pages remain KSM pages until freed, no need to wait
-                * here for migration to end.
-                */
-               if (is_migration_entry(entry))
-                       folio = pfn_swap_entry_folio(entry);
+       start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+       if (!start_ptep)
+               return 0;
+
+       for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
+               pte_t pte = ptep_get(ptep);
+               struct folio *folio = NULL;
+
+               if (pte_present(pte)) {
+                       folio = vm_normal_folio(walk->vma, addr, pte);
+               } else if (!pte_none(pte)) {
+                       swp_entry_t entry = pte_to_swp_entry(pte);
+
+                       /*
+                        * As KSM pages remain KSM pages until freed, no need to wait
+                        * here for migration to end.
+                        */
+                       if (is_migration_entry(entry))
+                               folio = pfn_swap_entry_folio(entry);
+               }
+               /* return 1 if the page is an normal ksm page or KSM-placed zero page */
+               found = (folio && folio_test_ksm(folio)) ||
+                       (pte_present(pte) && is_ksm_zero_pte(pte));
+               if (found) {
+                       *found_addr = addr;
+                       goto out_unlock;
+               }
        }
-       /* return 1 if the page is an normal ksm page or KSM-placed zero page */
-       found = (folio && folio_test_ksm(folio)) ||
-               (pte_present(ptent) && is_ksm_zero_pte(ptent));
-       pte_unmap_unlock(pte, ptl);
+out_unlock:
+       pte_unmap_unlock(ptep, ptl);
        return found;
 }
 
@@ -662,7 +677,8 @@ static const struct mm_walk_ops break_ksm_lock_vma_ops = {
  * of the process that owns 'vma'.  We also do not want to enforce
  * protection keys here anyway.
  */
-static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr,
+               unsigned long end, bool lock_vma)
 {
        vm_fault_t ret = 0;
        const struct mm_walk_ops *ops = lock_vma ?
@@ -672,11 +688,9 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_v
                int ksm_page;
 
                cond_resched();
-               ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
-               if (WARN_ON_ONCE(ksm_page < 0))
+               ksm_page = walk_page_range_vma(vma, addr, end, ops, &addr);
+               if (ksm_page <= 0)
                        return ksm_page;
-               if (!ksm_page)
-                       return 0;
                ret = handle_mm_fault(vma, addr,
                                      FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
                                      NULL);
@@ -762,7 +776,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
        mmap_read_lock(mm);
        vma = find_mergeable_vma(mm, addr);
        if (vma)
-               break_ksm(vma, addr, false);
+               break_ksm(vma, addr, addr + PAGE_SIZE, false);
        mmap_read_unlock(mm);
 }
 
@@ -1073,18 +1087,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
 static int unmerge_ksm_pages(struct vm_area_struct *vma,
                             unsigned long start, unsigned long end, bool lock_vma)
 {
-       unsigned long addr;
-       int err = 0;
-
-       for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
-               if (ksm_test_exit(vma->vm_mm))
-                       break;
-               if (signal_pending(current))
-                       err = -ERESTARTSYS;
-               else
-                       err = break_ksm(vma, addr, lock_vma);
-       }
-       return err;
+       return break_ksm(vma, start, end, lock_vma);
 }
 
 static inline