]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
mm: khugepaged: skip lazy-free folios
authorVernon Yang <yanglincheng@kylinos.cn>
Sat, 21 Feb 2026 09:39:18 +0000 (17:39 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Sun, 5 Apr 2026 20:53:03 +0000 (13:53 -0700)
For example, create three task: hot1 -> cold -> hot2.  After all three
task are created, each allocate memory 128MB.  the hot1/hot2 task
continuously access 128 MB memory, while the cold task only accesses its
memory briefly and then call madvise(MADV_FREE).  However, khugepaged
still prioritizes scanning the cold task and only scans the hot2 task
after completing the scan of the cold task.

All folios in VM_DROPPABLE are lazyfree, Collapsing maintains that
property, so we can just collapse and memory pressure in the future will
free it up.  In contrast, collapsing in !VM_DROPPABLE does not maintain
that property, the collapsed folio will not be lazyfree and memory
pressure in the future will not be able to free it up.

So if the user has explicitly informed us via MADV_FREE that this memory
will be freed, and this vma does not have VM_DROPPABLE flags, it is
appropriate for khugepaged to skip it only, thereby avoiding unnecessary
scan and collapse operations to reducing CPU wastage.

Here are the performance test results:
(Throughput bigger is better, other smaller is better)

Testing on x86_64 machine:

| task hot2           | without patch | with patch    |  delta  |
|---------------------|---------------|---------------|---------|
| total accesses time |  3.14 sec     |  2.93 sec     | -6.69%  |
| cycles per access   |  4.96         |  2.21         | -55.44% |
| Throughput          |  104.38 M/sec |  111.89 M/sec | +7.19%  |
| dTLB-load-misses    |  284814532    |  69597236     | -75.56% |

Testing on qemu-system-x86_64 -enable-kvm:

| task hot2           | without patch | with patch    |  delta  |
|---------------------|---------------|---------------|---------|
| total accesses time |  3.35 sec     |  2.96 sec     | -11.64% |
| cycles per access   |  7.29         |  2.07         | -71.60% |
| Throughput          |  97.67 M/sec  |  110.77 M/sec | +13.41% |
| dTLB-load-misses    |  241600871    |  3216108      | -98.67% |

[vernon2gm@gmail.com: add comment about VM_DROPPABLE in code, make it clearer]
Link: https://lkml.kernel.org/r/i4uowkt4h2ev47obm5h2vtd4zbk6fyw5g364up7kkjn2vmcikq@auepvqethj5r
Link: https://lkml.kernel.org/r/20260221093918.1456187-5-vernon2gm@gmail.com
Signed-off-by: Vernon Yang <yanglincheng@kylinos.cn>
Acked-by: David Hildenbrand (arm) <david@kernel.org>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Barry Song <baohua@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/trace/events/huge_memory.h
mm/khugepaged.c

index 384e29f6bef0fa1dcdcd92ed3ba0e618ff018371..bcdc57eea2701b09efe133853af501e9b6021204 100644 (file)
@@ -25,6 +25,7 @@
        EM( SCAN_PAGE_LRU,              "page_not_in_lru")              \
        EM( SCAN_PAGE_LOCK,             "page_locked")                  \
        EM( SCAN_PAGE_ANON,             "page_not_anon")                \
+       EM( SCAN_PAGE_LAZYFREE,         "page_lazyfree")                \
        EM( SCAN_PAGE_COMPOUND,         "page_compound")                \
        EM( SCAN_ANY_PROCESS,           "no_process_for_page")          \
        EM( SCAN_VMA_NULL,              "vma_null")                     \
index 8ee3c44bc851025c69893598ede5ac317fdb0192..13b0fe50dfc5652ee322bf45cd82b3792d9da037 100644 (file)
@@ -46,6 +46,7 @@ enum scan_result {
        SCAN_PAGE_LRU,
        SCAN_PAGE_LOCK,
        SCAN_PAGE_ANON,
+       SCAN_PAGE_LAZYFREE,
        SCAN_PAGE_COMPOUND,
        SCAN_ANY_PROCESS,
        SCAN_VMA_NULL,
@@ -577,6 +578,16 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
                folio = page_folio(page);
                VM_BUG_ON_FOLIO(!folio_test_anon(folio), folio);
 
+               /*
+                * If the vma has the VM_DROPPABLE flag, the collapse will
+                * preserve the lazyfree property without needing to skip.
+                */
+               if (cc->is_khugepaged && !(vma->vm_flags & VM_DROPPABLE) &&
+                   folio_test_lazyfree(folio) && !pte_dirty(pteval)) {
+                       result = SCAN_PAGE_LAZYFREE;
+                       goto out;
+               }
+
                /* See hpage_collapse_scan_pmd(). */
                if (folio_maybe_mapped_shared(folio)) {
                        ++shared;
@@ -1325,6 +1336,16 @@ static enum scan_result hpage_collapse_scan_pmd(struct mm_struct *mm,
                }
                folio = page_folio(page);
 
+               /*
+                * If the vma has the VM_DROPPABLE flag, the collapse will
+                * preserve the lazyfree property without needing to skip.
+                */
+               if (cc->is_khugepaged && !(vma->vm_flags & VM_DROPPABLE) &&
+                   folio_test_lazyfree(folio) && !pte_dirty(pteval)) {
+                       result = SCAN_PAGE_LAZYFREE;
+                       goto out_unmap;
+               }
+
                if (!folio_test_anon(folio)) {
                        result = SCAN_PAGE_ANON;
                        goto out_unmap;