From d1a020a8d72731b80a01e1abdb8ff965ee278f69 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 20 Dec 2025 13:07:40 +1030 Subject: [PATCH] btrfs: add mount time auto fix for orphan fst entries [BUG] Before btrfs-progs v6.16.1 release, mkfs.btrfs can leave free space tree entries for deleted chunks: # mkfs.btrfs -f -O fst $dev # btrfs ins dump-tree -t chunk $dev btrfs-progs v6.16 chunk tree leaf 22036480 items 4 free space 15781 generation 8 owner CHUNK_TREE leaf 22036480 flags 0x1(WRITTEN) backref revision 1 item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 16185 itemsize 98 item 1 key (FIRST_CHUNK_TREE CHUNK_ITEM 13631488) itemoff 16105 itemsize 80 ^^^ The first chunk is at 13631488 item 2 key (FIRST_CHUNK_TREE CHUNK_ITEM 22020096) itemoff 15993 itemsize 112 item 3 key (FIRST_CHUNK_TREE CHUNK_ITEM 30408704) itemoff 15881 itemsize 112 # btrfs ins dump-tree -t free-space-tree $dev btrfs-progs v6.16 free space tree key (FREE_SPACE_TREE ROOT_ITEM 0) leaf 30556160 items 13 free space 15918 generation 8 owner FREE_SPACE_TREE leaf 30556160 flags 0x1(WRITTEN) backref revision 1 item 0 key (1048576 FREE_SPACE_INFO 4194304) itemoff 16275 itemsize 8 free space info extent count 1 flags 0 item 1 key (1048576 FREE_SPACE_EXTENT 4194304) itemoff 16275 itemsize 0 free space extent item 2 key (5242880 FREE_SPACE_INFO 8388608) itemoff 16267 itemsize 8 free space info extent count 1 flags 0 item 3 key (5242880 FREE_SPACE_EXTENT 8388608) itemoff 16267 itemsize 0 free space extent ^^^ Above 4 items are all before the first chunk. item 4 key (13631488 FREE_SPACE_INFO 8388608) itemoff 16259 itemsize 8 free space info extent count 1 flags 0 item 5 key (13631488 FREE_SPACE_EXTENT 8388608) itemoff 16259 itemsize 0 free space extent ... This can trigger btrfs check errors. [CAUSE] It's a bug in free space tree implementation of btrfs-progs, which doesn't delete involved fst entries for the to-be-deleted chunk/block group. [ENHANCEMENT] The mostly common fix is to clear the space cache and rebuild it, but that requires a ro->rw remount which may not be possible for rootfs, and also relies on users to use "clear_cache" mount option manually. Here introduce a kernel fix for it, which will delete any entries that is before the first block group automatically at the first RW mount. For filesystems without such problem, the overhead is just a single tree search and no modification to the free space tree, thus the overhead should be minimal. Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 ++++ fs/btrfs/free-space-tree.c | 103 +++++++++++++++++++++++++++++++++++++ fs/btrfs/free-space-tree.h | 1 + 3 files changed, 113 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7dea5615bd8ff..9bb5d65219a78 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3013,6 +3013,15 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } + /* + * Before btrfs-progs v6.16.1 mkfs.btrfs can leave free space entries + * for deleted temporary chunks. Delete them if they exist. + */ + ret = btrfs_delete_orphan_free_space_entries(fs_info); + if (ret < 0) { + btrfs_err(fs_info, "failed to delete orphan free space tree entries: %d", ret); + goto out; + } /* * btrfs_find_orphan_roots() is responsible for finding all the dead * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index a66ce9ef3affb..776b6467dfada 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1710,3 +1710,106 @@ int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) else return load_free_space_extents(caching_ctl, path, extent_count); } + +static int delete_orphan_free_space_entries(struct btrfs_root *fst_root, + struct btrfs_path *path, + u64 first_bg_bytenr) +{ + struct btrfs_trans_handle *trans; + int ret; + + trans = btrfs_start_transaction(fst_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + while (true) { + struct btrfs_key key = { 0 }; + int i; + + ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1); + if (ret < 0) + break; + ASSERT(ret > 0); + ret = 0; + for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) { + btrfs_item_key_to_cpu(path->nodes[0], &key, i); + if (key.objectid >= first_bg_bytenr) { + /* + * Only break the for() loop and continue to + * delete items. + */ + break; + } + } + /* No items to delete, finished. */ + if (i == 0) + break; + + ret = btrfs_del_items(trans, fst_root, path, 0, i); + if (ret < 0) + break; + btrfs_release_path(path); + } + btrfs_release_path(path); + btrfs_end_transaction(trans); + if (ret == 0) + btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries"); + return ret; +} + +/* Remove any free space entry before the first block group. */ +int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info) +{ + BTRFS_PATH_AUTO_RELEASE(path); + struct btrfs_key key = { + .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, + .type = BTRFS_ROOT_ITEM_KEY, + .offset = 0, + }; + struct btrfs_root *root; + struct btrfs_block_group *bg; + u64 first_bg_bytenr; + int ret; + + /* + * Extent tree v2 has multiple global roots based on the block group. + * This means we cannot easily grab the global free space tree and locate + * orphan items. Furthermore this is still experimental, all users + * should use the latest btrfs-progs anyway. + */ + if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) + return 0; + if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + return 0; + root = btrfs_global_root(fs_info, &key); + if (!root) + return 0; + + key.objectid = 0; + key.type = 0; + key.offset = 0; + + bg = btrfs_lookup_first_block_group(fs_info, 0); + if (unlikely(!bg)) { + btrfs_err(fs_info, "no block group found"); + return -EUCLEAN; + } + first_bg_bytenr = bg->start; + btrfs_put_block_group(bg); + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return ret; + /* There should not be an all-zero key in fst. */ + ASSERT(ret > 0); + + /* Empty free space tree. */ + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) + return 0; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid >= first_bg_bytenr) + return 0; + btrfs_release_path(&path); + return delete_orphan_free_space_entries(root, &path, first_bg_bytenr); +} diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h index 3d9a5d4477fc2..ca04fc7cf29e1 100644 --- a/fs/btrfs/free-space-tree.h +++ b/fs/btrfs/free-space-tree.h @@ -35,6 +35,7 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, u64 start, u64 size); int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, u64 start, u64 size); +int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_free_space_info * -- 2.47.3