From 2abd9e1c58d46e4d5b528a83f75c392ca5700b92 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 12 Jun 2025 17:19:05 +0100 Subject: [PATCH] btrfs: cache if we are using free space bitmaps for a block group Every time we add free space to the free space tree or we remove free space from the free space tree, we do a lookup for the block group's free space info item in the free space tree. This takes time, navigating the btree and we may block either on IO when reading extent buffers from disk or on extent buffer lock contention due to concurrency. Instead of doing this lookup every time, cache the result in the block structure and use it after the first lookup. This adds two boolean members to the block group structure but doesn't increase the structure's size. The following script that runs fs_mark was used to measure the time spent on run_delayed_tree_ref(), since down that call chain we have calls to add and remove free space to/from the free space tree (calls to btrfs_add_to_free_space_tree() and btrfs_remove_from_free_space_tree()): $ cat test.sh #!/bin/bash DEV=/dev/nullb0 MNT=/mnt FILES=100000 THREADS=$(nproc --all) echo "performance" | \ tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor umount $DEV &> /dev/null mkfs.btrfs -f $DEV mount -o ssd $DEV $MNT OPTS="-S 0 -L 5 -n $FILES -s 0 -t $THREADS -k" for ((i = 1; i <= $THREADS; i++)); do OPTS="$OPTS -d $MNT/d$i" done fs_mark $OPTS umount $MNT This is a heavy metadata test as it's exercising only file creation, so a lot of allocations of metadata extents, creating delayed refs for adding new metadata extents and dropping existing ones due to COW. The results of the times it took to execute run_delayed_tree_ref(), in nanoseconds, are the following. Before this change: Range: 1868.000 - 6482857.000; Mean: 10231.430; Median: 7005.000; Stddev: 27993.173 Percentiles: 90th: 13342.000; 95th: 23279.000; 99th: 82448.000 1868.000 - 4222.038: 270696 ############ 4222.038 - 9541.029: 1201327 ##################################################### 9541.029 - 21559.383: 385436 ################# 21559.383 - 48715.063: 64942 ### 48715.063 - 110073.800: 31454 # 110073.800 - 248714.944: 8218 | 248714.944 - 561977.042: 1030 | 561977.042 - 1269798.254: 295 | 1269798.254 - 2869132.711: 116 | 2869132.711 - 6482857.000: 28 | After this change: Range: 1554.000 - 4557014.000; Mean: 9168.164; Median: 6391.000; Stddev: 21467.060 Percentiles: 90th: 12478.000; 95th: 20964.000; 99th: 72234.000 1554.000 - 3453.820: 219004 ############ 3453.820 - 7674.743: 980645 ##################################################### 7674.743 - 17052.574: 552486 ############################## 17052.574 - 37887.762: 68558 #### 37887.762 - 84178.322: 31557 ## 84178.322 - 187024.331: 12102 # 187024.331 - 415522.355: 1364 | 415522.355 - 923187.626: 256 | 923187.626 - 2051092.468: 125 | 2051092.468 - 4557014.000: 21 | Approximate improvement in the first four buckets is about 20%. Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.h | 5 +++++ fs/btrfs/free-space-tree.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index aa176cc9a3249..a8bb8429c9663 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -246,6 +246,11 @@ struct btrfs_block_group { /* Lock for free space tree operations. */ struct mutex free_space_lock; + /* Protected by @free_space_lock. */ + bool using_free_space_bitmaps; + /* Protected by @free_space_lock. */ + bool using_free_space_bitmaps_cached; + /* * Number of extents in this block group used for swap files. * All accesses protected by the spinlock 'lock'. diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 222ec3132f0f5..eba7f22ae49c6 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -287,6 +287,8 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; flags = btrfs_free_space_flags(leaf, info); flags |= BTRFS_FREE_SPACE_USING_BITMAPS; + block_group->using_free_space_bitmaps = true; + block_group->using_free_space_bitmaps_cached = true; btrfs_set_free_space_flags(leaf, info, flags); expected_extent_count = btrfs_free_space_extent_count(leaf, info); btrfs_release_path(path); @@ -434,6 +436,8 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; flags = btrfs_free_space_flags(leaf, info); flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; + block_group->using_free_space_bitmaps = false; + block_group->using_free_space_bitmaps_cached = true; btrfs_set_free_space_flags(leaf, info, flags); expected_extent_count = btrfs_free_space_extent_count(leaf, info); btrfs_release_path(path); @@ -796,13 +800,19 @@ static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) struct btrfs_free_space_info *info; u32 flags; + if (bg->using_free_space_bitmaps_cached) + return bg->using_free_space_bitmaps; + info = btrfs_search_free_space_info(NULL, bg, path, 0); if (IS_ERR(info)) return PTR_ERR(info); flags = btrfs_free_space_flags(path->nodes[0], info); btrfs_release_path(path); - return (flags & BTRFS_FREE_SPACE_USING_BITMAPS) ? 1 : 0; + bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); + bg->using_free_space_bitmaps_cached = true; + + return bg->using_free_space_bitmaps; } EXPORT_FOR_TESTS -- 2.47.2