From 81e5a4551c32b454468f5aa3fe45dabb6bccb854 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Wed, 7 Jan 2026 14:09:15 +0000 Subject: [PATCH] btrfs: allow balancing remap tree Balancing the METADATA_REMAP chunk, i.e. the chunk in which the remap tree lives, is a special case. We can't use the remap tree itself for this, as then we'd have no way to boostrap it on mount. And we can't use the pre-remap tree code for this as it relies on walking the extent tree, and we're not creating backrefs for METADATA_REMAP chunks. So instead, if a balance would relocate any METADATA_REMAP block groups, mark those block groups as readonly and COW every leaf of the remap tree. There's more sophisticated ways of doing this, such as only COWing nodes within a block group that's to be relocated, but they're fiddly and with lots of edge cases. Plus it's not anticipated that a) the number of METADATA_REMAP chunks is going to be particularly large, or b) that users will want to only relocate some of these chunks - the main use case here is to unbreak RAID conversion and device removal. Reviewed-by: Boris Burkov Signed-off-by: Mark Harmstone Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 152 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 148 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d42b8d50aea23..af0197b242a7d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3990,8 +3990,11 @@ static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk struct btrfs_balance_args *bargs = NULL; u64 chunk_type = btrfs_chunk_type(leaf, chunk); - if (chunk_type & BTRFS_BLOCK_GROUP_METADATA_REMAP) - return false; + /* Treat METADATA_REMAP chunks as METADATA. */ + if (chunk_type & BTRFS_BLOCK_GROUP_METADATA_REMAP) { + chunk_type &= ~BTRFS_BLOCK_GROUP_METADATA_REMAP; + chunk_type |= BTRFS_BLOCK_GROUP_METADATA; + } /* type filter */ if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) & @@ -4074,6 +4077,107 @@ static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk return true; } +struct remap_chunk_info { + struct list_head list; + u64 offset; + struct btrfs_block_group *bg; + bool made_ro; +}; + +static int cow_remap_tree(struct btrfs_trans_handle *trans, struct btrfs_path *path) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_key key = { 0 }; + int ret; + + ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, 0, 1); + if (ret < 0) + return ret; + + while (true) { + ret = btrfs_next_leaf(fs_info->remap_root, path); + if (ret < 0) { + return ret; + } else if (ret > 0) { + ret = 0; + break; + } + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + btrfs_release_path(path); + + ret = btrfs_search_slot(trans, fs_info->remap_root, &key, path, 0, 1); + if (ret < 0) + break; + } + + return ret; +} + +static int balance_remap_chunks(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct list_head *chunks) +{ + struct remap_chunk_info *rci, *tmp; + struct btrfs_trans_handle *trans; + int ret; + + list_for_each_entry_safe(rci, tmp, chunks, list) { + rci->bg = btrfs_lookup_block_group(fs_info, rci->offset); + if (!rci->bg) { + list_del(&rci->list); + kfree(rci); + continue; + } + + ret = btrfs_inc_block_group_ro(rci->bg, false); + if (ret) + goto end; + + rci->made_ro = true; + } + + if (list_empty(chunks)) + return 0; + + trans = btrfs_start_transaction(fs_info->remap_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto end; + } + + mutex_lock(&fs_info->remap_mutex); + ret = cow_remap_tree(trans, path); + mutex_unlock(&fs_info->remap_mutex); + + btrfs_release_path(path); + btrfs_commit_transaction(trans); + +end: + while (!list_empty(chunks)) { + bool is_unused; + + rci = list_first_entry(chunks, struct remap_chunk_info, list); + + spin_lock(&rci->bg->lock); + is_unused = !btrfs_is_block_group_used(rci->bg); + spin_unlock(&rci->bg->lock); + + if (is_unused) + btrfs_mark_bg_unused(rci->bg); + + if (rci->made_ro) + btrfs_dec_block_group_ro(rci->bg); + + btrfs_put_block_group(rci->bg); + + list_del(&rci->list); + kfree(rci); + } + + return ret; +} + static int __btrfs_balance(struct btrfs_fs_info *fs_info) { struct btrfs_balance_control *bctl = fs_info->balance_ctl; @@ -4096,6 +4200,9 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) u32 count_meta = 0; u32 count_sys = 0; int chunk_reserved = 0; + struct remap_chunk_info *rci; + unsigned int num_remap_chunks = 0; + LIST_HEAD(remap_chunks); path = btrfs_alloc_path(); if (!path) { @@ -4194,7 +4301,8 @@ again: count_data++; else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) count_sys++; - else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA) + else if (chunk_type & (BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_METADATA_REMAP)) count_meta++; goto loop; @@ -4214,6 +4322,29 @@ again: goto loop; } + /* + * Balancing METADATA_REMAP chunks takes place separately - add + * the details to a list so it can be processed later. + */ + if (chunk_type & BTRFS_BLOCK_GROUP_METADATA_REMAP) { + mutex_unlock(&fs_info->reclaim_bgs_lock); + + rci = kmalloc(sizeof(struct remap_chunk_info), GFP_NOFS); + if (!rci) { + ret = -ENOMEM; + goto error; + } + + rci->offset = found_key.offset; + rci->bg = NULL; + rci->made_ro = false; + list_add_tail(&rci->list, &remap_chunks); + + num_remap_chunks++; + + goto loop; + } + if (!chunk_reserved) { /* * We may be relocating the only data chunk we have, @@ -4253,11 +4384,24 @@ loop: key.offset = found_key.offset - 1; } + btrfs_release_path(path); + if (counting) { - btrfs_release_path(path); counting = false; goto again; } + + if (!list_empty(&remap_chunks)) { + ret = balance_remap_chunks(fs_info, path, &remap_chunks); + if (ret == -ENOSPC) + enospc_errors++; + + if (!ret) { + spin_lock(&fs_info->balance_lock); + bctl->stat.completed += num_remap_chunks; + spin_unlock(&fs_info->balance_lock); + } + } error: if (enospc_errors) { btrfs_info(fs_info, "%d enospc errors during balance", -- 2.47.3