]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm/mglru: use folio_mark_accessed to replace folio_set_active
authorBarry Song (Xiaomi) <baohua@kernel.org>
Tue, 26 May 2026 13:09:38 +0000 (21:09 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 4 Jun 2026 21:45:02 +0000 (14:45 -0700)
MGLRU gives high priority to folios mapped in page tables.  As a result,
folio_set_active() is invoked for all folios read during page faults.  In
practice, however, readahead can bring in many folios that are never
accessed via page tables.

A previous attempt by Lei Liu proposed introducing a separate LRU for
readahead[1] to make readahead pages easier to reclaim, but that approach
is likely over-engineered.

Before commit 4d5d14a01e2c ("mm/mglru: rework workingset protection"),
folios with PG_active were always placed in the youngest generation,
leading to over-protection and increased refaults.  After that commit,
PG_active folios are placed in the second youngest generation, which is
still too optimistic given the presence of readahead.  In contrast, the
classic active/inactive scheme is more conservative.

This patch switches to using folio_mark_accessed() and
begins prefaulted file folios from the second oldest
generation instead of active generations.
We should also adjust the following accordingly:
- WORKINGSET_ACTIVATE: aligned with setting active for refaulted workingset
  folios;
- lru_gen_folio_seq(): place (pre)faulted file folios into the second
oldest generation;
- promote second-scanned folios to workingset in
folio_check_references(): we now have to depend on
folio_lru_refs() > 1, since we previously relied on PG_referenced
being set during the first scan, but PG_referenced is now set
earlier.

On x86, running a kernel build inside a memcg with a 1GB memory
limit using 20 threads.

w/o patch:
real 1m50.764s
user 25m32.305s
sys 4m0.012s
pswpin: 1333245
pswpout: 4366443
pgpgin: 6962592
pgpgout: 17780712
swpout_zero: 1019603
swpin_zero: 14764
refault_file: 287794
refault_anon: 1347963

w/ patch:
real 1m48.879s
user 25m29.224s
sys 3m37.421s
pswpin: 568480
pswpout: 2322657
pgpgin: 4073416
pgpgout: 9613408
swpout_zero: 593275
swpin_zero: 9118
refault_file: 262505
refault_anon: 577550

active/inactive LRU:

real 1m49.928s
user 25m28.196s
sys 3m40.740s
pswpin: 463452
pswpout: 2309119
pgpgin: 4438856
pgpgout: 9568628
swpout_zero: 743704
swpin_zero: 7244
refault_file: 562555
refault_anon: 470694

Lance and Xueyuan made a huge contribution to this patch through testing.

Link: https://lore.kernel.org/20260526130938.66253-1-baohua@kernel.org
Link: https://lore.kernel.org/linux-mm/20250916072226.220426-1-liulei.rjpt@vivo.com/
Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
Tested-by: Lance Yang <lance.yang@linux.dev>
Tested-by: Xueyuan Chen <xueyuan.chen21@gmail.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Kairui Song <kasong@tencent.com>
Cc: Qi Zheng <qi.zheng@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: wangzicheng <wangzicheng@honor.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Lei Liu <liulei.rjpt@vivo.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Yuanchu Xie <yuanchu@google.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Kalesh Singh <kaleshsingh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mm_inline.h
mm/swap.c
mm/vmscan.c
mm/workingset.c

index a171070e15f05c34f37bec04345f98bc8bc60719..a8430a7ae05447ab69233e427366c4e40e8dddb9 100644 (file)
@@ -247,7 +247,7 @@ static inline unsigned long lru_gen_folio_seq(const struct lruvec *lruvec,
                  (folio_test_dirty(folio) || folio_test_writeback(folio))))
                gen = MIN_NR_GENS;
        else
-               gen = MAX_NR_GENS - folio_test_workingset(folio);
+               gen = MAX_NR_GENS - (folio_test_workingset(folio) || folio_test_referenced(folio));
 
        return max(READ_ONCE(lrugen->max_seq) - gen + 1, READ_ONCE(lrugen->min_seq[type]));
 }
index 2dd84813f4ddec2f731fec424964e917cfa42a41..588f50d8f1a8c991b870b3cbc7ed7c24bf32c3ea 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -544,10 +544,20 @@ void folio_add_lru(struct folio *folio)
                        folio_test_unevictable(folio), folio);
        VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
 
-       /* see the comment in lru_gen_folio_seq() */
+       /*
+        * For refaulted workingset folios, set PG_active so they
+        * can be added to active generations.
+        * For prefaulted file folios, folio_mark_accessed() sets
+        * PG_referenced so lru_gen_folio_seq() places them into
+        * the second oldest generation.
+        */
        if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
-           lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
-               folio_set_active(folio);
+           lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) {
+               if (folio_test_workingset(folio))
+                       folio_set_active(folio);
+               else if (!folio_test_referenced(folio))
+                       folio_mark_accessed(folio);
+       }
 
        folio_batch_add_and_move(folio, lru_add);
 }
index 3c856a78c0a59eba4f4b62e532aec73b6e07a5d4..76193a84a2afcdd49bf49374444b135c6c0da4ed 100644 (file)
@@ -850,7 +850,11 @@ static bool lru_gen_set_refs(struct folio *folio)
                return false;
        }
 
-       set_mask_bits(&folio->flags.f, LRU_REFS_FLAGS, BIT(PG_workingset));
+       /* Promote on second access */
+       if (folio_lru_refs(folio) > 1)
+               set_mask_bits(&folio->flags.f, LRU_REFS_FLAGS, BIT(PG_workingset));
+       else
+               folio_mark_accessed(folio);
        return true;
 }
 #else
index 07e6836d05020bdebab6c73c0fffb15796d896ca..f351798e723acd69ec383d0b0cfec5796f52144e 100644 (file)
@@ -319,11 +319,13 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
 
        atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]);
 
-       /* see folio_add_lru() where folio_set_active() will be called */
-       if (lru_gen_in_fault())
-               mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
-
        if (workingset) {
+               /*
+                * see folio_add_lru(), where folio_set_active() is
+                * called for workingset folios
+                */
+               if (lru_gen_in_fault())
+                       mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
                folio_set_workingset(folio);
                mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta);
        } else