]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
btrfs: add METADATA_REMAP chunk type
authorMark Harmstone <mark@harmstone.com>
Wed, 7 Jan 2026 14:09:02 +0000 (14:09 +0000)
committerDavid Sterba <dsterba@suse.com>
Tue, 3 Feb 2026 06:54:27 +0000 (07:54 +0100)
Add a new METADATA_REMAP chunk type, which is a metadata chunk that holds the
remap tree.

This is needed for bootstrapping purposes: the remap tree can't itself
be remapped, and must be relocated the existing way, by COWing every
leaf. The remap tree can't go in the SYSTEM chunk as space there is
limited, because a copy of the chunk item gets placed in the superblock.

The changes in fs/btrfs/volumes.h are because we're adding a new block
group type bit after the profile bits, and so can no longer rely on the
const_ilog2 trick.

The sizing to 32MB per chunk, matching the SYSTEM chunk, is an estimate
here, we can adjust it later if it proves to be too big or too small.
This works out to be ~500,000 remap items, which for a 4KB block size
covers ~2GB of remapped data in the worst case and ~500TB in the best case.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Mark Harmstone <mark@harmstone.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/block-rsv.c
fs/btrfs/block-rsv.h
fs/btrfs/disk-io.c
fs/btrfs/fs.h
fs/btrfs/space-info.c
fs/btrfs/sysfs.c
fs/btrfs/tree-checker.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
include/uapi/linux/btrfs_tree.h

index 96cf7a1629870f11d2cf320db4832b16d4fc9274..e823230c09b7cb1cba0895439ca050c60e95284b 100644 (file)
@@ -419,6 +419,9 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
        case BTRFS_TREE_LOG_OBJECTID:
                root->block_rsv = &fs_info->treelog_rsv;
                break;
+       case BTRFS_REMAP_TREE_OBJECTID:
+               root->block_rsv = &fs_info->remap_block_rsv;
+               break;
        default:
                root->block_rsv = NULL;
                break;
@@ -432,6 +435,9 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
        space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
        fs_info->chunk_block_rsv.space_info = space_info;
 
+       space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA_REMAP);
+       fs_info->remap_block_rsv.space_info = space_info;
+
        space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
        fs_info->global_block_rsv.space_info = space_info;
        fs_info->trans_block_rsv.space_info = space_info;
@@ -458,6 +464,8 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
        WARN_ON(fs_info->trans_block_rsv.reserved > 0);
        WARN_ON(fs_info->chunk_block_rsv.size > 0);
        WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
+       WARN_ON(fs_info->remap_block_rsv.size > 0);
+       WARN_ON(fs_info->remap_block_rsv.reserved > 0);
        WARN_ON(fs_info->delayed_block_rsv.size > 0);
        WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
        WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
index 79ae9d05cd9141ccb3f9207c833e4eac1d6b3a24..8359fb96bc3c7aa18e1423b4dad165b1132bf623 100644 (file)
@@ -22,6 +22,7 @@ enum btrfs_rsv_type {
        BTRFS_BLOCK_RSV_DELALLOC,
        BTRFS_BLOCK_RSV_TRANS,
        BTRFS_BLOCK_RSV_CHUNK,
+       BTRFS_BLOCK_RSV_REMAP,
        BTRFS_BLOCK_RSV_DELOPS,
        BTRFS_BLOCK_RSV_DELREFS,
        BTRFS_BLOCK_RSV_TREELOG,
index faa1c2c20ecd3ee55b655d3c4755a55666bc536c..922e69038d8106afcb634c275b86a525810b7441 100644 (file)
@@ -2751,6 +2751,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
                             BTRFS_BLOCK_RSV_GLOBAL);
        btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
        btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
+       btrfs_init_block_rsv(&fs_info->remap_block_rsv, BTRFS_BLOCK_RSV_REMAP);
        btrfs_init_block_rsv(&fs_info->treelog_rsv, BTRFS_BLOCK_RSV_TREELOG);
        btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
        btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
index e3e5e52e97a2a2e74e5a3e0d67fd8b9673e3c471..195428ecfd75d7f624295361b5383b346fe63f15 100644 (file)
@@ -509,6 +509,8 @@ struct btrfs_fs_info {
        struct btrfs_block_rsv trans_block_rsv;
        /* Block reservation for chunk tree */
        struct btrfs_block_rsv chunk_block_rsv;
+       /* Block reservation for remap tree. */
+       struct btrfs_block_rsv remap_block_rsv;
        /* Block reservation for delayed operations */
        struct btrfs_block_rsv delayed_block_rsv;
        /* Block reservation for delayed refs */
index 1d76242f5e0df1335201e3e82116c392be9db07d..2c9cf1ab232bf6bcd2b791544d320c3af04dd3f0 100644 (file)
@@ -215,7 +215,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
 
        if (flags & BTRFS_BLOCK_GROUP_DATA)
                return BTRFS_MAX_DATA_CHUNK_SIZE;
-       else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+       else if (flags & (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA_REMAP))
                return SZ_32M;
 
        /* Handle BTRFS_BLOCK_GROUP_METADATA */
@@ -348,6 +348,8 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
        if (mixed) {
                flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
                ret = create_space_info(fs_info, flags);
+               if (ret)
+                       goto out;
        } else {
                flags = BTRFS_BLOCK_GROUP_METADATA;
                ret = create_space_info(fs_info, flags);
@@ -356,7 +358,15 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
 
                flags = BTRFS_BLOCK_GROUP_DATA;
                ret = create_space_info(fs_info, flags);
+               if (ret)
+                       goto out;
+       }
+
+       if (features & BTRFS_FEATURE_INCOMPAT_REMAP_TREE) {
+               flags = BTRFS_BLOCK_GROUP_METADATA_REMAP;
+               ret = create_space_info(fs_info, flags);
        }
+
 out:
        return ret;
 }
@@ -611,6 +621,7 @@ static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
        DUMP_BLOCK_RSV(fs_info, global_block_rsv);
        DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
        DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
+       DUMP_BLOCK_RSV(fs_info, remap_block_rsv);
        DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
        DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
 }
index 8834a1dd499c9bdf9140049d4fed9e13af338b30..27bfb7b55ec4136efa5dd9e60d8dcd24aaff0c84 100644 (file)
@@ -1929,6 +1929,8 @@ static const char *alloc_name(struct btrfs_space_info *space_info)
        case BTRFS_BLOCK_GROUP_SYSTEM:
                ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
                return "system";
+       case BTRFS_BLOCK_GROUP_METADATA_REMAP:
+               return "metadata-remap";
        default:
                WARN_ON(1);
                return "invalid-combination";
index aedc208a95b83a252ad405f3719e0fee05a5b309..a6c158cd8fcd2a098c0a499585e0e26059f368a0 100644 (file)
@@ -748,17 +748,26 @@ static int check_block_group_item(struct extent_buffer *leaf,
                return -EUCLEAN;
        }
 
+       if (unlikely(flags & BTRFS_BLOCK_GROUP_METADATA_REMAP &&
+                    !btrfs_fs_incompat(fs_info, REMAP_TREE))) {
+               block_group_err(leaf, slot,
+"invalid flags, have 0x%llx (METADATA_REMAP flag set) but no remap-tree incompat flag",
+                               flags);
+               return -EUCLEAN;
+       }
+
        type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
        if (unlikely(type != BTRFS_BLOCK_GROUP_DATA &&
                     type != BTRFS_BLOCK_GROUP_METADATA &&
                     type != BTRFS_BLOCK_GROUP_SYSTEM &&
+                    type != BTRFS_BLOCK_GROUP_METADATA_REMAP &&
                     type != (BTRFS_BLOCK_GROUP_METADATA |
                              BTRFS_BLOCK_GROUP_DATA))) {
                block_group_err(leaf, slot,
-"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx, 0x%llx or 0x%llx",
                        type, hweight64(type),
                        BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
-                       BTRFS_BLOCK_GROUP_SYSTEM,
+                       BTRFS_BLOCK_GROUP_SYSTEM, BTRFS_BLOCK_GROUP_METADATA_REMAP,
                        BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
                return -EUCLEAN;
        }
index d2b7352eb7cbeb80d45e1d6d2633db80ea925066..eda6505f3ee59e2004f772b8ad21d164e13087bd 100644 (file)
@@ -231,6 +231,9 @@ void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
        DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
        DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
        DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
+       /* Block groups containing the remap tree. */
+       DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA_REMAP, "metadata-remap");
+       /* Block group that has been remapped. */
        DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_REMAPPED, "remapped");
 
        DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
index 59347a4bb1852d3e9703956a6b052d766653bcab..e4b3cb50f94a7ae4724f9761babb4d0ecee02004 100644 (file)
@@ -58,7 +58,6 @@ static_assert(ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
  */
 static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
              const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
-static_assert(ilog2(BTRFS_BLOCK_GROUP_RAID0) > ilog2(BTRFS_BLOCK_GROUP_TYPE_MASK));
 
 /* ilog2() can handle both constants and variables */
 #define BTRFS_BG_FLAG_TO_INDEX(profile)                                        \
@@ -80,6 +79,15 @@ enum btrfs_raid_types {
        BTRFS_NR_RAID_TYPES
 };
 
+static_assert(BTRFS_RAID_RAID0 == 1);
+static_assert(BTRFS_RAID_RAID1 == 2);
+static_assert(BTRFS_RAID_DUP == 3);
+static_assert(BTRFS_RAID_RAID10 == 4);
+static_assert(BTRFS_RAID_RAID5 == 5);
+static_assert(BTRFS_RAID_RAID6 == 6);
+static_assert(BTRFS_RAID_RAID1C3 == 7);
+static_assert(BTRFS_RAID_RAID1C4 == 8);
+
 /*
  * Use sequence counter to get consistent device stat data on
  * 32-bit processors.
index f011d34cb699b335dff4f7f438ba1f2b81f5fe2e..76578426671c753f96095bd5ceea649cba23ba71 100644 (file)
@@ -1169,12 +1169,14 @@ struct btrfs_dev_replace_item {
 #define BTRFS_BLOCK_GROUP_RAID1C3       (1ULL << 9)
 #define BTRFS_BLOCK_GROUP_RAID1C4       (1ULL << 10)
 #define BTRFS_BLOCK_GROUP_REMAPPED      (1ULL << 11)
+#define BTRFS_BLOCK_GROUP_METADATA_REMAP (1ULL << 12)
 #define BTRFS_BLOCK_GROUP_RESERVED     (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
                                         BTRFS_SPACE_INFO_GLOBAL_RSV)
 
 #define BTRFS_BLOCK_GROUP_TYPE_MASK    (BTRFS_BLOCK_GROUP_DATA |    \
                                         BTRFS_BLOCK_GROUP_SYSTEM |  \
-                                        BTRFS_BLOCK_GROUP_METADATA)
+                                        BTRFS_BLOCK_GROUP_METADATA | \
+                                        BTRFS_BLOCK_GROUP_METADATA_REMAP)
 
 #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 |   \
                                         BTRFS_BLOCK_GROUP_RAID1 |   \