From 1db4b3609aa13efceddeae2e58749acb62d42d71 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 13 Jan 2026 23:23:15 +0800 Subject: [PATCH] f2fs: optimize NAT block loading during checkpoint write Under stress tests with frequent metadata operations, checkpoint write time can become excessively long. Analysis shows that the slowdown is caused by synchronous, one-by-one reads of NAT blocks during checkpoint processing. The issue can be reproduced with the following workload: 1. seq 1 650000 | xargs -P 16 -n 1 touch 2. sync # avoid checkpoint write during deleting 3. delete 1 file every 455 files 4. echo 3 > /proc/sys/vm/drop_caches 5. sync # trigger checkpoint write This patch submits read I/O for all NAT blocks required in the __flush_nat_entry_set() phase in advance, reducing the overhead of synchronous waiting for individual NAT block reads. The NAT block flush latency before and after the change is as below: | |NAT blocks accessed|NAT blocks read|Flush time (ms)| |-------------|-------------------|---------------|---------------| |Before change|1205 |1191 |158 | |After change |1264 |1242 |11 | With a similar number of NAT blocks accessed and read from disk, adding NAT block readahead reduces the total NAT block flush time by more than 90%. Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 63252ff1e5c3..74992fd9c9b6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -3179,7 +3179,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_journal *journal = curseg->journal; struct nat_entry_set *setvec[NAT_VEC_SIZE]; struct nat_entry_set *set, *tmp; - unsigned int found; + unsigned int found, entry_count = 0; nid_t set_idx = 0; LIST_HEAD(sets); int err = 0; @@ -3219,6 +3219,18 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) MAX_NAT_JENTRIES(sbi, journal)); } + /* + * Readahead the current NAT block to prevent read requests from + * being issued and waited on one by one. + */ + list_for_each_entry(set, &sets, set_list) { + entry_count += set->entry_cnt; + if (!enabled_nat_bits(sbi, cpc) && + __has_cursum_space(sbi, journal, + entry_count, NAT_JOURNAL)) + continue; + f2fs_ra_meta_pages(sbi, set->set, 1, META_NAT, true); + } /* flush dirty nats in nat entry set */ list_for_each_entry_safe(set, tmp, &sets, set_list) { err = __flush_nat_entry_set(sbi, set, cpc); -- 2.47.3