]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
btrfs: zoned: fix data relocation block group reservation
authorNaohiro Aota <naohiro.aota@wdc.com>
Wed, 16 Jul 2025 07:59:53 +0000 (16:59 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 28 Aug 2025 14:34:38 +0000 (16:34 +0200)
[ Upstream commit daa0fde322350b467bc62bc1b141bf62df6123f8 ]

btrfs_zoned_reserve_data_reloc_bg() is called on mount and at that point,
all data block groups belong to the primary data space_info. So, we don't
find anything in the data relocation space_info.

Also, the condition "bg->used > 0" can select a block group with full of
zone_unusable bytes for the candidate. As we cannot allocate from the block
group, it is useless to reserve it as the data relocation block group.

Furthermore, because of the space_info separation, we need to migrate the
selected block group to the data relocation space_info. If not, the extent
allocator cannot use the block group to do the allocation.

This commit fixes these three issues.

Fixes: e606ff985ec7 ("btrfs: zoned: reserve data_reloc block group on mount")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/btrfs/zoned.c

index 936448b1f71696dffca2761dad01a3b6eb7ca20e..af5ba3ad2eb8330442d653b0688653e94e14eec5 100644 (file)
@@ -18,6 +18,7 @@
 #include "accessors.h"
 #include "bio.h"
 #include "transaction.h"
+#include "sysfs.h"
 
 /* Maximum number of zones to report per blkdev_report_zones() call */
 #define BTRFS_REPORT_NR_ZONES   4096
@@ -2510,12 +2511,12 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
 void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
-       struct btrfs_space_info *space_info = data_sinfo->sub_group[0];
+       struct btrfs_space_info *space_info = data_sinfo;
        struct btrfs_trans_handle *trans;
        struct btrfs_block_group *bg;
        struct list_head *bg_list;
        u64 alloc_flags;
-       bool initial = false;
+       bool first = true;
        bool did_chunk_alloc = false;
        int index;
        int ret;
@@ -2529,21 +2530,52 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
        if (sb_rdonly(fs_info->sb))
                return;
 
-       ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
        alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags);
        index = btrfs_bg_flags_to_raid_index(alloc_flags);
 
-       bg_list = &data_sinfo->block_groups[index];
+       /* Scan the data space_info to find empty block groups. Take the second one. */
 again:
+       bg_list = &space_info->block_groups[index];
        list_for_each_entry(bg, bg_list, list) {
-               if (bg->used > 0)
+               if (bg->alloc_offset != 0)
                        continue;
 
-               if (!initial) {
-                       initial = true;
+               if (first) {
+                       first = false;
                        continue;
                }
 
+               if (space_info == data_sinfo) {
+                       /* Migrate the block group to the data relocation space_info. */
+                       struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0];
+                       int factor;
+
+                       ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+                       factor = btrfs_bg_type_to_factor(bg->flags);
+
+                       down_write(&space_info->groups_sem);
+                       list_del_init(&bg->list);
+                       /* We can assume this as we choose the second empty one. */
+                       ASSERT(!list_empty(&space_info->block_groups[index]));
+                       up_write(&space_info->groups_sem);
+
+                       spin_lock(&space_info->lock);
+                       space_info->total_bytes -= bg->length;
+                       space_info->disk_total -= bg->length * factor;
+                       /* There is no allocation ever happened. */
+                       ASSERT(bg->used == 0);
+                       ASSERT(bg->zone_unusable == 0);
+                       /* No super block in a block group on the zoned setup. */
+                       ASSERT(bg->bytes_super == 0);
+                       spin_unlock(&space_info->lock);
+
+                       bg->space_info = reloc_sinfo;
+                       if (reloc_sinfo->block_group_kobjs[index] == NULL)
+                               btrfs_sysfs_add_block_group_type(bg);
+
+                       btrfs_add_bg_to_space_info(fs_info, bg);
+               }
+
                fs_info->data_reloc_bg = bg->start;
                set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
                btrfs_zone_activate(bg);
@@ -2558,11 +2590,18 @@ again:
        if (IS_ERR(trans))
                return;
 
+       /* Allocate new BG in the data relocation space_info. */
+       space_info = data_sinfo->sub_group[0];
+       ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
        ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
        btrfs_end_transaction(trans);
        if (ret == 1) {
+               /*
+                * We allocated a new block group in the data relocation space_info. We
+                * can take that one.
+                */
+               first = false;
                did_chunk_alloc = true;
-               bg_list = &space_info->block_groups[index];
                goto again;
        }
 }