From f7f93c4da8447d3adf7d9ec73ea31456bab3622d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Feb 2026 13:37:09 +0100 Subject: [PATCH] 6.6-stable patches added patches: btrfs-fix-racy-bitfield-write-in-btrfs_clear_space_info_full.patch --- ...write-in-btrfs_clear_space_info_full.patch | 250 ++++++++++++++++++ queue-6.6/series | 1 + 2 files changed, 251 insertions(+) create mode 100644 queue-6.6/btrfs-fix-racy-bitfield-write-in-btrfs_clear_space_info_full.patch diff --git a/queue-6.6/btrfs-fix-racy-bitfield-write-in-btrfs_clear_space_info_full.patch b/queue-6.6/btrfs-fix-racy-bitfield-write-in-btrfs_clear_space_info_full.patch new file mode 100644 index 0000000000..4ff8937437 --- /dev/null +++ b/queue-6.6/btrfs-fix-racy-bitfield-write-in-btrfs_clear_space_info_full.patch @@ -0,0 +1,250 @@ +From 38e818718c5e04961eea0fa8feff3f100ce40408 Mon Sep 17 00:00:00 2001 +From: Boris Burkov +Date: Wed, 1 Oct 2025 17:20:22 -0700 +Subject: btrfs: fix racy bitfield write in btrfs_clear_space_info_full() + +From: Boris Burkov + +commit 38e818718c5e04961eea0fa8feff3f100ce40408 upstream. + +From the memory-barriers.txt document regarding memory barrier ordering +guarantees: + + (*) These guarantees do not apply to bitfields, because compilers often + generate code to modify these using non-atomic read-modify-write + sequences. Do not attempt to use bitfields to synchronize parallel + algorithms. + + (*) Even in cases where bitfields are protected by locks, all fields + in a given bitfield must be protected by one lock. If two fields + in a given bitfield are protected by different locks, the compiler's + non-atomic read-modify-write sequences can cause an update to one + field to corrupt the value of an adjacent field. + +btrfs_space_info has a bitfield sharing an underlying word consisting of +the fields full, chunk_alloc, and flush: + +struct btrfs_space_info { + struct btrfs_fs_info * fs_info; /* 0 8 */ + struct btrfs_space_info * parent; /* 8 8 */ + ... + int clamp; /* 172 4 */ + unsigned int full:1; /* 176: 0 4 */ + unsigned int chunk_alloc:1; /* 176: 1 4 */ + unsigned int flush:1; /* 176: 2 4 */ + ... + +Therefore, to be safe from parallel read-modify-writes losing a write to +one of the bitfield members protected by a lock, all writes to all the +bitfields must use the lock. They almost universally do, except for +btrfs_clear_space_info_full() which iterates over the space_infos and +writes out found->full = 0 without a lock. + +Imagine that we have one thread completing a transaction in which we +finished deleting a block_group and are thus calling +btrfs_clear_space_info_full() while simultaneously the data reclaim +ticket infrastructure is running do_async_reclaim_data_space(): + + T1 T2 +btrfs_commit_transaction + btrfs_clear_space_info_full + data_sinfo->full = 0 + READ: full:0, chunk_alloc:0, flush:1 + do_async_reclaim_data_space(data_sinfo) + spin_lock(&space_info->lock); + if(list_empty(tickets)) + space_info->flush = 0; + READ: full: 0, chunk_alloc:0, flush:1 + MOD/WRITE: full: 0, chunk_alloc:0, flush:0 + spin_unlock(&space_info->lock); + return; + MOD/WRITE: full:0, chunk_alloc:0, flush:1 + +and now data_sinfo->flush is 1 but the reclaim worker has exited. This +breaks the invariant that flush is 0 iff there is no work queued or +running. Once this invariant is violated, future allocations that go +into __reserve_bytes() will add tickets to space_info->tickets but will +see space_info->flush is set to 1 and not queue the work. After this, +they will block forever on the resulting ticket, as it is now impossible +to kick the worker again. + +I also confirmed by looking at the assembly of the affected kernel that +it is doing RMW operations. For example, to set the flush (3rd) bit to 0, +the assembly is: + andb $0xfb,0x60(%rbx) +and similarly for setting the full (1st) bit to 0: + andb $0xfe,-0x20(%rax) + +So I think this is really a bug on practical systems. I have observed +a number of systems in this exact state, but am currently unable to +reproduce it. + +Rather than leaving this footgun lying around for the future, take +advantage of the fact that there is room in the struct anyway, and that +it is already quite large and simply change the three bitfield members to +bools. This avoids writes to space_info->full having any effect on +writes to space_info->flush, regardless of locking. + +Fixes: 957780eb2788 ("Btrfs: introduce ticketed enospc infrastructure") +Reviewed-by: Qu Wenruo +Signed-off-by: Boris Burkov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[ The context change is due to the commit cc0517fe779f + ("btrfs: tweak extent/chunk allocation for space_info sub-space") + in v6.16 which is irrelevant to the logic of this patch. ] +Signed-off-by: Rahul Sharma +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/block-group.c | 6 +++--- + fs/btrfs/space-info.c | 22 +++++++++++----------- + fs/btrfs/space-info.h | 6 +++--- + 3 files changed, 17 insertions(+), 17 deletions(-) + +--- a/fs/btrfs/block-group.c ++++ b/fs/btrfs/block-group.c +@@ -4156,7 +4156,7 @@ int btrfs_chunk_alloc(struct btrfs_trans + mutex_unlock(&fs_info->chunk_mutex); + } else { + /* Proceed with allocation */ +- space_info->chunk_alloc = 1; ++ space_info->chunk_alloc = true; + wait_for_alloc = false; + spin_unlock(&space_info->lock); + } +@@ -4205,7 +4205,7 @@ int btrfs_chunk_alloc(struct btrfs_trans + spin_lock(&space_info->lock); + if (ret < 0) { + if (ret == -ENOSPC) +- space_info->full = 1; ++ space_info->full = true; + else + goto out; + } else { +@@ -4215,7 +4215,7 @@ int btrfs_chunk_alloc(struct btrfs_trans + + space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; + out: +- space_info->chunk_alloc = 0; ++ space_info->chunk_alloc = false; + spin_unlock(&space_info->lock); + mutex_unlock(&fs_info->chunk_mutex); + +--- a/fs/btrfs/space-info.c ++++ b/fs/btrfs/space-info.c +@@ -182,7 +182,7 @@ void btrfs_clear_space_info_full(struct + struct btrfs_space_info *found; + + list_for_each_entry(found, head, list) +- found->full = 0; ++ found->full = false; + } + + /* +@@ -361,7 +361,7 @@ void btrfs_add_bg_to_space_info(struct b + found->bytes_readonly += block_group->bytes_super; + btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable); + if (block_group->length > 0) +- found->full = 0; ++ found->full = false; + btrfs_try_granting_tickets(info, found); + spin_unlock(&found->lock); + +@@ -1103,7 +1103,7 @@ static void btrfs_async_reclaim_metadata + spin_lock(&space_info->lock); + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info); + if (!to_reclaim) { +- space_info->flush = 0; ++ space_info->flush = false; + spin_unlock(&space_info->lock); + return; + } +@@ -1115,7 +1115,7 @@ static void btrfs_async_reclaim_metadata + flush_space(fs_info, space_info, to_reclaim, flush_state, false); + spin_lock(&space_info->lock); + if (list_empty(&space_info->tickets)) { +- space_info->flush = 0; ++ space_info->flush = false; + spin_unlock(&space_info->lock); + return; + } +@@ -1158,7 +1158,7 @@ static void btrfs_async_reclaim_metadata + flush_state = FLUSH_DELAYED_ITEMS_NR; + commit_cycles--; + } else { +- space_info->flush = 0; ++ space_info->flush = false; + } + } else { + flush_state = FLUSH_DELAYED_ITEMS_NR; +@@ -1320,7 +1320,7 @@ static void btrfs_async_reclaim_data_spa + + spin_lock(&space_info->lock); + if (list_empty(&space_info->tickets)) { +- space_info->flush = 0; ++ space_info->flush = false; + spin_unlock(&space_info->lock); + return; + } +@@ -1331,7 +1331,7 @@ static void btrfs_async_reclaim_data_spa + flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE, false); + spin_lock(&space_info->lock); + if (list_empty(&space_info->tickets)) { +- space_info->flush = 0; ++ space_info->flush = false; + spin_unlock(&space_info->lock); + return; + } +@@ -1348,7 +1348,7 @@ static void btrfs_async_reclaim_data_spa + data_flush_states[flush_state], false); + spin_lock(&space_info->lock); + if (list_empty(&space_info->tickets)) { +- space_info->flush = 0; ++ space_info->flush = false; + spin_unlock(&space_info->lock); + return; + } +@@ -1365,7 +1365,7 @@ static void btrfs_async_reclaim_data_spa + if (maybe_fail_all_tickets(fs_info, space_info)) + flush_state = 0; + else +- space_info->flush = 0; ++ space_info->flush = false; + } else { + flush_state = 0; + } +@@ -1381,7 +1381,7 @@ static void btrfs_async_reclaim_data_spa + + aborted_fs: + maybe_fail_all_tickets(fs_info, space_info); +- space_info->flush = 0; ++ space_info->flush = false; + spin_unlock(&space_info->lock); + } + +@@ -1750,7 +1750,7 @@ static int __reserve_bytes(struct btrfs_ + */ + maybe_clamp_preempt(fs_info, space_info); + +- space_info->flush = 1; ++ space_info->flush = true; + trace_btrfs_trigger_flush(fs_info, + space_info->flags, + orig_bytes, flush, +--- a/fs/btrfs/space-info.h ++++ b/fs/btrfs/space-info.h +@@ -126,11 +126,11 @@ struct btrfs_space_info { + flushing. The value is >> clamp, so turns + out to be a 2^clamp divisor. */ + +- unsigned int full:1; /* indicates that we cannot allocate any more ++ bool full; /* indicates that we cannot allocate any more + chunks for this space */ +- unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ ++ bool chunk_alloc; /* set if we are allocating a chunk */ + +- unsigned int flush:1; /* set if we are trying to make space */ ++ bool flush; /* set if we are trying to make space */ + + unsigned int force_alloc; /* set if we need to force a chunk + alloc for this space */ diff --git a/queue-6.6/series b/queue-6.6/series index d82dd5502f..12699294af 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -74,3 +74,4 @@ hwmon-occ-mark-occ_init_attribute-as-__printf.patch netfilter-nf_tables-fix-inverted-genmask-check-in-nf.patch ipv6-fix-ecmp-sibling-count-mismatch-when-clearing-r.patch asoc-amd-fix-memory-leak-in-acp3x-pdm-dma-ops.patch +btrfs-fix-racy-bitfield-write-in-btrfs_clear_space_info_full.patch -- 2.47.3