]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
btrfs: balance: fix potential bg lookup failure in chunk_usage_filter()
authorZhengYuan Huang <gality369@gmail.com>
Wed, 25 Mar 2026 00:43:36 +0000 (08:43 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 8 Jun 2026 13:53:30 +0000 (15:53 +0200)
[BUG]
Running btrfs balance with a usage filter (-dusage=N) can trigger a
null-ptr-deref when metadata corruption causes a chunk to have no
corresponding block group in the in-memory cache:

  KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077]
  RIP: 0010:chunk_usage_filter fs/btrfs/volumes.c:3874 [inline]
  RIP: 0010:should_balance_chunk fs/btrfs/volumes.c:4018 [inline]
  RIP: 0010:__btrfs_balance fs/btrfs/volumes.c:4172 [inline]
  RIP: 0010:btrfs_balance+0x2024/0x42b0 fs/btrfs/volumes.c:4604
  ...
  Call Trace:
    btrfs_ioctl_balance fs/btrfs/ioctl.c:3577 [inline]
    btrfs_ioctl+0x25cf/0x5b90 fs/btrfs/ioctl.c:5313
    vfs_ioctl fs/ioctl.c:51 [inline]
    ...

The bug is reproducible on current development branch.

[CAUSE]
Two separate data structures are involved:

1. The on-disk chunk tree, which records every chunk (logical address
   space region) and is iterated by __btrfs_balance().

2. The in-memory block group cache (fs_info->block_group_cache_tree),
   which is built at mount time by btrfs_read_block_groups() and holds
   a struct btrfs_block_group for each chunk. This cache is what the
   usage filter queries.

On a well-formed filesystem, these two are kept in 1:1 correspondence.
However, btrfs_read_block_groups() builds the cache from block group
items in the extent tree, not directly from the chunk tree. A corrupted
image can therefore contain a chunk item in the chunk tree whose
corresponding block group item is absent from the extent tree; that
chunk's block group is then never inserted into the in-memory cache.

When balance iterates the chunk tree and reaches such an orphaned chunk,
should_balance_chunk() calls chunk_usage_filter(), which queries the block
group cache:

  cache = btrfs_lookup_block_group(fs_info, chunk_offset);
  chunk_used = cache->used;   /* cache may be NULL */

btrfs_lookup_block_group() returns NULL silently when no cached entry
covers chunk_offset. chunk_usage_filter() does not check the return value,
so the immediately following dereference of cache->used triggers the crash.

[FIX]
Add a NULL check after btrfs_lookup_block_group() in chunk_usage_filter().
When the lookup fails, emit a btrfs_err() message identifying the
affected bytenr and return -EUCLEAN to indicate filesystem corruption.

Since chunk_usage_filter() now has an error path, change its return type
from bool to error pointer and 0 if the chunk passes the usage filter,
and 1 if it should be skipped.

Update should_balance_chunk() accordingly to propagate negative errors
from the usage filter.

Signed-off-by: ZhengYuan Huang <gality369@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/volumes.c

index a88e68f9056468e801d622cad506cae2c000cea4..cb29bf616e183ebebbd9ae91d3772f48f1c5e051 100644 (file)
@@ -3988,14 +3988,19 @@ static bool chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_of
        return ret;
 }
 
-static bool chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
-                              struct btrfs_balance_args *bargs)
+static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+                             struct btrfs_balance_args *bargs)
 {
        struct btrfs_block_group *cache;
        u64 chunk_used, user_thresh;
-       bool ret = true;
+       int ret = 1;
 
        cache = btrfs_lookup_block_group(fs_info, chunk_offset);
+       if (unlikely(!cache)) {
+               btrfs_err(fs_info, "balance: chunk at bytenr %llu has no corresponding block group",
+                         chunk_offset);
+               return -EUCLEAN;
+       }
        chunk_used = cache->used;
 
        if (bargs->usage_min == 0)
@@ -4006,7 +4011,7 @@ static bool chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
                user_thresh = mult_perc(cache->length, bargs->usage);
 
        if (chunk_used < user_thresh)
-               ret = false;
+               ret = 0;
 
        btrfs_put_block_group(cache);
        return ret;
@@ -4111,8 +4116,8 @@ static bool chunk_soft_convert_filter(u64 chunk_type, struct btrfs_balance_args
        return false;
 }
 
-static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
-                                u64 chunk_offset)
+static int should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk *chunk,
+                               u64 chunk_offset)
 {
        struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_balance_control *bctl = fs_info->balance_ctl;
@@ -4145,9 +4150,14 @@ static bool should_balance_chunk(struct extent_buffer *leaf, struct btrfs_chunk
        }
 
        /* usage filter */
-       if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
-           chunk_usage_filter(fs_info, chunk_offset, bargs)) {
-               return false;
+       if (bargs->flags & BTRFS_BALANCE_ARGS_USAGE) {
+               int ret2;
+
+               ret2 = chunk_usage_filter(fs_info, chunk_offset, bargs);
+               if (ret2 < 0)
+                       return ret2;
+               if (ret2)
+                       return false;
        } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
            chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
                return false;
@@ -4430,6 +4440,10 @@ again:
                ret = should_balance_chunk(leaf, chunk, found_key.offset);
 
                btrfs_release_path(path);
+               if (ret < 0) {
+                       mutex_unlock(&fs_info->reclaim_bgs_lock);
+                       goto error;
+               }
                if (!ret) {
                        mutex_unlock(&fs_info->reclaim_bgs_lock);
                        goto loop;