]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
btrfs: introduce btrfs_space_info sub-group
authorNaohiro Aota <naohiro.aota@wdc.com>
Wed, 23 Apr 2025 02:43:48 +0000 (11:43 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 6 Feb 2026 15:43:55 +0000 (16:43 +0100)
[ Upstream commit f92ee31e031c7819126d2febdda0c3e91f5d2eb9 ]

Current code assumes we have only one space_info for each block group type
(DATA, METADATA, and SYSTEM). We sometime need multiple space infos to
manage special block groups.

One example is handling the data relocation block group for the zoned mode.
That block group is dedicated for writing relocated data and we cannot
allocate any regular extent from that block group, which is implemented in
the zoned extent allocator. This block group still belongs to the normal
data space_info. So, when all the normal data block groups are full and
there is some free space in the dedicated block group, the space_info
looks to have some free space, while it cannot allocate normal extent
anymore. That results in a strange ENOSPC error. We need to have a
space_info for the relocation data block group to represent the situation
properly.

Adds a basic infrastructure for having a "sub-group" of a space_info:
creation and removing. A sub-group space_info belongs to one of the
primary space_infos and has the same flags as its parent.

This commit first introduces the relocation data sub-space_info, and the
next commit will introduce tree-log sub-space_info. In the future, it could
be useful to implement tiered storage for btrfs e.g. by implementing a
sub-group space_info for block groups resides on a fast storage.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Stable-dep-of: a11224a016d6 ("btrfs: fix memory leaks in create_space_info() error paths")
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/btrfs/block-group.c
fs/btrfs/space-info.c
fs/btrfs/space-info.h
fs/btrfs/sysfs.c

index 797df5ddbcd128b5282e0d72366b970bccfe2917..2338d42b8f4e6832c83840aca8aa0c3e085654c0 100644 (file)
@@ -4149,6 +4149,17 @@ static void check_removing_space_info(struct btrfs_space_info *space_info)
 {
        struct btrfs_fs_info *info = space_info->fs_info;
 
+       if (space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY) {
+               /* This is a top space_info, proceed with its children first. */
+               for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) {
+                       if (space_info->sub_group[i]) {
+                               check_removing_space_info(space_info->sub_group[i]);
+                               kfree(space_info->sub_group[i]);
+                               space_info->sub_group[i] = NULL;
+                       }
+               }
+       }
+
        /*
         * Do not hide this behind enospc_debug, this is actually important and
         * indicates a real bug if this happens.
index 88cd37a13c0ee309a453a556601745b60c846ab8..15c578f49caab25e999a0c49a09907d848c7ee0a 100644 (file)
@@ -234,16 +234,44 @@ static void init_space_info(struct btrfs_fs_info *info,
        INIT_LIST_HEAD(&space_info->priority_tickets);
        space_info->clamp = 1;
        btrfs_update_space_info_chunk_size(space_info, calc_chunk_size(info, flags));
+       space_info->subgroup_id = BTRFS_SUB_GROUP_PRIMARY;
 
        if (btrfs_is_zoned(info))
                space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
 }
 
+static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flags,
+                                      enum btrfs_space_info_sub_group id, int index)
+{
+       struct btrfs_fs_info *fs_info = parent->fs_info;
+       struct btrfs_space_info *sub_group;
+       int ret;
+
+       ASSERT(parent->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
+       ASSERT(id != BTRFS_SUB_GROUP_PRIMARY);
+
+       sub_group = kzalloc(sizeof(*sub_group), GFP_NOFS);
+       if (!sub_group)
+               return -ENOMEM;
+
+       init_space_info(fs_info, sub_group, flags);
+       parent->sub_group[index] = sub_group;
+       sub_group->parent = parent;
+       sub_group->subgroup_id = id;
+
+       ret = btrfs_sysfs_add_space_info_type(fs_info, sub_group);
+       if (ret) {
+               kfree(sub_group);
+               parent->sub_group[index] = NULL;
+       }
+       return ret;
+}
+
 static int create_space_info(struct btrfs_fs_info *info, u64 flags)
 {
 
        struct btrfs_space_info *space_info;
-       int ret;
+       int ret = 0;
 
        space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
        if (!space_info)
@@ -251,6 +279,15 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
 
        init_space_info(info, space_info, flags);
 
+       if (btrfs_is_zoned(info)) {
+               if (flags & BTRFS_BLOCK_GROUP_DATA)
+                       ret = create_space_info_sub_group(space_info, flags,
+                                                         BTRFS_SUB_GROUP_DATA_RELOC,
+                                                         0);
+               if (ret)
+                       return ret;
+       }
+
        ret = btrfs_sysfs_add_space_info_type(info, space_info);
        if (ret)
                return ret;
@@ -511,8 +548,9 @@ static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
        lockdep_assert_held(&info->lock);
 
        /* The free space could be negative in case of overcommit */
-       btrfs_info(fs_info, "space_info %s has %lld free, is %sfull",
-                  flag_str,
+       btrfs_info(fs_info,
+                  "space_info %s (sub-group id %d) has %lld free, is %sfull",
+                  flag_str, info->subgroup_id,
                   (s64)(info->total_bytes - btrfs_space_info_used(info, true)),
                   info->full ? "" : "not ");
        btrfs_info(fs_info,
index d6b34f2738b53a1e5fd2b138dbfa4c360fe2ea99..dc69138f3de172c925185c7b054c36956f879245 100644 (file)
@@ -64,8 +64,17 @@ enum btrfs_flush_state {
        COMMIT_TRANS            = 11,
 };
 
+enum btrfs_space_info_sub_group {
+       BTRFS_SUB_GROUP_PRIMARY,
+       BTRFS_SUB_GROUP_DATA_RELOC,
+};
+
+#define BTRFS_SPACE_INFO_SUB_GROUP_MAX 1
 struct btrfs_space_info {
        struct btrfs_fs_info *fs_info;
+       struct btrfs_space_info *parent;
+       struct btrfs_space_info *sub_group[BTRFS_SPACE_INFO_SUB_GROUP_MAX];
+       int subgroup_id;
        spinlock_t lock;
 
        u64 total_bytes;        /* total bytes in the space,
index 44a94ac21e2fac374aff8b5f2baec86cf6f75180..693ae787056844b6e4c48ce8cc36104b785406c9 100644 (file)
@@ -1585,16 +1585,28 @@ void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info)
        kobject_put(&space_info->kobj);
 }
 
-static const char *alloc_name(u64 flags)
+static const char *alloc_name(struct btrfs_space_info *space_info)
 {
+       u64 flags = space_info->flags;
+
        switch (flags) {
        case BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA:
                return "mixed";
        case BTRFS_BLOCK_GROUP_METADATA:
+               ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
                return "metadata";
        case BTRFS_BLOCK_GROUP_DATA:
-               return "data";
+               switch (space_info->subgroup_id) {
+               case BTRFS_SUB_GROUP_PRIMARY:
+                       return "data";
+               case BTRFS_SUB_GROUP_DATA_RELOC:
+                       return "data-reloc";
+               default:
+                       WARN_ON_ONCE(1);
+                       return "data (unknown sub-group)";
+               }
        case BTRFS_BLOCK_GROUP_SYSTEM:
+               ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY);
                return "system";
        default:
                WARN_ON(1);
@@ -1613,7 +1625,7 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
 
        ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
                                   fs_info->space_info_kobj, "%s",
-                                  alloc_name(space_info->flags));
+                                  alloc_name(space_info));
        if (ret) {
                kobject_put(&space_info->kobj);
                return ret;