--- /dev/null
+From stable+bounces-171688-greg=kroah.com@vger.kernel.org Tue Aug 19 02:39:42 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 20:38:40 -0400
+Subject: btrfs: abort transaction on unexpected eb generation at btrfs_copy_root()
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Daniel Vacek <neelx@suse.com>, Qu Wenruo <wqu@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819003840.226789-1-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8 ]
+
+If we find an unexpected generation for the extent buffer we are cloning
+at btrfs_copy_root(), we just WARN_ON() and don't error out and abort the
+transaction, meaning we allow to persist metadata with an unexpected
+generation. Instead of warning only, abort the transaction and return
+-EUCLEAN.
+
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Daniel Vacek <neelx@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -283,7 +283,14 @@ int btrfs_copy_root(struct btrfs_trans_h
+
+ write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
+
+- WARN_ON(btrfs_header_generation(buf) > trans->transid);
++ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
++ btrfs_tree_unlock(cow);
++ free_extent_buffer(cow);
++ ret = -EUCLEAN;
++ btrfs_abort_transaction(trans, ret);
++ return ret;
++ }
++
+ if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+ ret = btrfs_inc_ref(trans, root, cow, 1);
+ else
--- /dev/null
+From stable+bounces-171698-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:43 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:15:29 -0400
+Subject: btrfs: add comment for optimization in free_extent_buffer()
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011531.242846-2-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 2697b6159744e5afae0f7715da9f830ba6f9e45a ]
+
+There's this special atomic compare and exchange logic which serves to
+avoid locking the extent buffers refs_lock spinlock and therefore reduce
+lock contention, so add a comment to make it more obvious.
+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: ad580dfa388f ("btrfs: fix subpage deadlock in try_release_subpage_extent_buffer()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -3493,6 +3493,7 @@ void free_extent_buffer(struct extent_bu
+ break;
+ }
+
++ /* Optimization to avoid locking eb->refs_lock. */
+ if (atomic_try_cmpxchg(&eb->refs, &refs, refs - 1))
+ return;
+ }
--- /dev/null
+From stable+bounces-172264-greg=kroah.com@vger.kernel.org Fri Aug 22 05:36:17 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Aug 2025 23:35:15 -0400
+Subject: btrfs: add comments on the extra btrfs specific subpage bitmaps
+To: stable@vger.kernel.org
+Cc: Qu Wenruo <wqu@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250822033527.1065200-1-sashal@kernel.org>
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 1e17738d6b76cdc76d240d64de87fa66ba2365f7 ]
+
+Unlike the iomap_folio_state structure, the btrfs_subpage structure has a
+lot of extra sub-bitmaps, namely:
+
+- writeback sub-bitmap
+- locked sub-bitmap
+ iomap_folio_state uses an atomic for writeback tracking, while it has
+ no per-block locked tracking.
+
+ This is because iomap always locks a single folio, and submits dirty
+ blocks with that folio locked.
+
+ But btrfs has async delalloc ranges (for compression), which are queued
+ with their range locked, until the compression is done, then marks the
+ involved range writeback and unlocked.
+
+ This means a range can be unlocked and marked writeback at seemingly
+ random timing, thus it needs the extra tracking.
+
+ This needs a huge rework on the lifespan of async delalloc range
+ before we can remove/simplify these two sub-bitmaps.
+
+- ordered sub-bitmap
+- checked sub-bitmap
+ These are for COW-fixup, but as I mentioned in the past, the COW-fixup
+ is not really needed anymore and these two flags are already marked
+ deprecated, and will be removed in the near future after comprehensive
+ tests.
+
+Add related comments to indicate we're actively trying to align the
+sub-bitmaps to the iomap ones.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: b1511360c8ac ("btrfs: subpage: keep TOWRITE tag until folio is cleaned")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/subpage.h | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/fs/btrfs/subpage.h
++++ b/fs/btrfs/subpage.h
+@@ -33,8 +33,22 @@ enum {
+ btrfs_bitmap_nr_uptodate = 0,
+ btrfs_bitmap_nr_dirty,
+ btrfs_bitmap_nr_writeback,
++ /*
++ * The ordered and checked flags are for COW fixup, already marked
++ * deprecated, and will be removed eventually.
++ */
+ btrfs_bitmap_nr_ordered,
+ btrfs_bitmap_nr_checked,
++
++ /*
++ * The locked bit is for async delalloc range (compression), currently
++ * async extent is queued with the range locked, until the compression
++ * is done.
++ * So an async extent can unlock the range at any random timing.
++ *
++ * This will need a rework on the async extent lifespan (mark writeback
++ * and do compression) before deprecating this flag.
++ */
+ btrfs_bitmap_nr_locked,
+ btrfs_bitmap_nr_max
+ };
--- /dev/null
+From stable+bounces-171684-greg=kroah.com@vger.kernel.org Tue Aug 19 02:16:51 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 20:16:39 -0400
+Subject: btrfs: always abort transaction on failure to add block group to free space tree
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819001639.204027-2-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 1f06c942aa709d397cf6bed577a0d10a61509667 ]
+
+Only one of the callers of __add_block_group_free_space() aborts the
+transaction if the call fails, while the others don't do it and it's
+either never done up the call chain or much higher in the call chain.
+
+So make sure we abort the transaction at __add_block_group_free_space()
+if it fails, which brings a couple benefits:
+
+1) If some call chain never aborts the transaction, we avoid having some
+ metadata inconsistency because BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE is
+ cleared when we enter __add_block_group_free_space() and therefore
+ __add_block_group_free_space() is never called again to add the block
+ group items to the free space tree, since the function is only called
+ when that flag is set in a block group;
+
+2) If the call chain already aborts the transaction, then we get a better
+ trace that points to the exact step from __add_block_group_free_space()
+ which failed, which is better for analysis.
+
+So abort the transaction at __add_block_group_free_space() if any of its
+steps fails.
+
+CC: stable@vger.kernel.org # 6.6+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/free-space-tree.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/free-space-tree.c
++++ b/fs/btrfs/free-space-tree.c
+@@ -1431,12 +1431,17 @@ static int __add_block_group_free_space(
+ set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
+
+ ret = add_new_free_space_info(trans, block_group, path);
+- if (ret)
++ if (ret) {
++ btrfs_abort_transaction(trans, ret);
+ return ret;
++ }
+
+- return __add_to_free_space_tree(trans, block_group, path,
+- block_group->start,
+- block_group->length);
++ ret = __add_to_free_space_tree(trans, block_group, path,
++ block_group->start, block_group->length);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
++
++ return 0;
+ }
+
+ int add_block_group_free_space(struct btrfs_trans_handle *trans,
+@@ -1461,9 +1466,6 @@ int add_block_group_free_space(struct bt
+ }
+
+ ret = __add_block_group_free_space(trans, block_group, path);
+- if (ret)
+- btrfs_abort_transaction(trans, ret);
+-
+ out:
+ btrfs_free_path(path);
+ mutex_unlock(&block_group->free_space_lock);
--- /dev/null
+From stable+bounces-171701-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:44 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:15:31 -0400
+Subject: btrfs: fix subpage deadlock in try_release_subpage_extent_buffer()
+To: stable@vger.kernel.org
+Cc: Leo Martins <loemra.dev@gmail.com>, Boris Burkov <boris@bur.io>, Qu Wenruo <wqu@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011531.242846-4-sashal@kernel.org>
+
+From: Leo Martins <loemra.dev@gmail.com>
+
+[ Upstream commit ad580dfa388fabb52af033e3f8cc5d04be985e54 ]
+
+There is a potential deadlock that can happen in
+try_release_subpage_extent_buffer() because the irq-safe xarray spin
+lock fs_info->buffer_tree is being acquired before the irq-unsafe
+eb->refs_lock.
+
+This leads to the potential race:
+// T1 (random eb->refs user) // T2 (release folio)
+
+spin_lock(&eb->refs_lock);
+// interrupt
+end_bbio_meta_write()
+ btrfs_meta_folio_clear_writeback()
+ btree_release_folio()
+ folio_test_writeback() //false
+ try_release_extent_buffer()
+ try_release_subpage_extent_buffer()
+ xa_lock_irq(&fs_info->buffer_tree)
+ spin_lock(&eb->refs_lock); // blocked; held by T1
+ buffer_tree_clear_mark()
+ xas_lock_irqsave() // blocked; held by T2
+
+I believe that the spin lock can safely be replaced by an rcu_read_lock.
+The xa_for_each loop does not need the spin lock as it's already
+internally protected by the rcu_read_lock. The extent buffer is also
+protected by the rcu_read_lock so it won't be freed before we take the
+eb->refs_lock and check the ref count.
+
+The rcu_read_lock is taken and released every iteration, just like the
+spin lock, which means we're not protected against concurrent
+insertions into the xarray. This is fine because we rely on
+folio->private to detect if there are any ebs remaining in the folio.
+
+There is already some precedent for this with find_extent_buffer_nolock,
+which loads an extent buffer from the xarray with only rcu_read_lock.
+
+lockdep warning:
+
+ =====================================================
+ WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
+ 6.16.0-0_fbk701_debug_rc0_123_g4c06e63b9203 #1 Tainted: G E N
+ -----------------------------------------------------
+ kswapd0/66 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
+ ffff000011ffd600 (&eb->refs_lock){+.+.}-{3:3}, at: try_release_extent_buffer+0x18c/0x560
+
+and this task is already holding:
+ ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560
+ which would create a new lock dependency:
+ (&buffer_xa_class){-.-.}-{3:3} -> (&eb->refs_lock){+.+.}-{3:3}
+
+but this new dependency connects a HARDIRQ-irq-safe lock:
+ (&buffer_xa_class){-.-.}-{3:3}
+
+... which became HARDIRQ-irq-safe at:
+ lock_acquire+0x178/0x358
+ _raw_spin_lock_irqsave+0x60/0x88
+ buffer_tree_clear_mark+0xc4/0x160
+ end_bbio_meta_write+0x238/0x398
+ btrfs_bio_end_io+0x1f8/0x330
+ btrfs_orig_write_end_io+0x1c4/0x2c0
+ bio_endio+0x63c/0x678
+ blk_update_request+0x1c4/0xa00
+ blk_mq_end_request+0x54/0x88
+ virtblk_request_done+0x124/0x1d0
+ blk_mq_complete_request+0x84/0xa0
+ virtblk_done+0x130/0x238
+ vring_interrupt+0x130/0x288
+ __handle_irq_event_percpu+0x1e8/0x708
+ handle_irq_event+0x98/0x1b0
+ handle_fasteoi_irq+0x264/0x7c0
+ generic_handle_domain_irq+0xa4/0x108
+ gic_handle_irq+0x7c/0x1a0
+ do_interrupt_handler+0xe4/0x148
+ el1_interrupt+0x30/0x50
+ el1h_64_irq_handler+0x14/0x20
+ el1h_64_irq+0x6c/0x70
+ _raw_spin_unlock_irq+0x38/0x70
+ __run_timer_base+0xdc/0x5e0
+ run_timer_softirq+0xa0/0x138
+ handle_softirqs.llvm.13542289750107964195+0x32c/0xbd0
+ ____do_softirq.llvm.17674514681856217165+0x18/0x28
+ call_on_irq_stack+0x24/0x30
+ __irq_exit_rcu+0x164/0x430
+ irq_exit_rcu+0x18/0x88
+ el1_interrupt+0x34/0x50
+ el1h_64_irq_handler+0x14/0x20
+ el1h_64_irq+0x6c/0x70
+ arch_local_irq_enable+0x4/0x8
+ do_idle+0x1a0/0x3b8
+ cpu_startup_entry+0x60/0x80
+ rest_init+0x204/0x228
+ start_kernel+0x394/0x3f0
+ __primary_switched+0x8c/0x8958
+
+to a HARDIRQ-irq-unsafe lock:
+ (&eb->refs_lock){+.+.}-{3:3}
+
+... which became HARDIRQ-irq-unsafe at:
+ ...
+ lock_acquire+0x178/0x358
+ _raw_spin_lock+0x4c/0x68
+ free_extent_buffer_stale+0x2c/0x170
+ btrfs_read_sys_array+0x1b0/0x338
+ open_ctree+0xeb0/0x1df8
+ btrfs_get_tree+0xb60/0x1110
+ vfs_get_tree+0x8c/0x250
+ fc_mount+0x20/0x98
+ btrfs_get_tree+0x4a4/0x1110
+ vfs_get_tree+0x8c/0x250
+ do_new_mount+0x1e0/0x6c0
+ path_mount+0x4ec/0xa58
+ __arm64_sys_mount+0x370/0x490
+ invoke_syscall+0x6c/0x208
+ el0_svc_common+0x14c/0x1b8
+ do_el0_svc+0x4c/0x60
+ el0_svc+0x4c/0x160
+ el0t_64_sync_handler+0x70/0x100
+ el0t_64_sync+0x168/0x170
+
+other info that might help us debug this:
+ Possible interrupt unsafe locking scenario:
+ CPU0 CPU1
+ ---- ----
+ lock(&eb->refs_lock);
+ local_irq_disable();
+ lock(&buffer_xa_class);
+ lock(&eb->refs_lock);
+ <Interrupt>
+ lock(&buffer_xa_class);
+
+ *** DEADLOCK ***
+ 2 locks held by kswapd0/66:
+ #0: ffff800085506e40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0xe8/0xe50
+ #1: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560
+
+Link: https://www.kernel.org/doc/Documentation/locking/lockdep-design.rst#:~:text=Multi%2Dlock%20dependency%20rules%3A
+Fixes: 19d7f65f032f ("btrfs: convert the buffer_radix to an xarray")
+CC: stable@vger.kernel.org # 6.16+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Leo Martins <loemra.dev@gmail.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -4304,15 +4304,18 @@ static int try_release_subpage_extent_bu
+ unsigned long end = index + (PAGE_SIZE >> fs_info->sectorsize_bits) - 1;
+ int ret;
+
+- xa_lock_irq(&fs_info->buffer_tree);
++ rcu_read_lock();
+ xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) {
+ /*
+ * The same as try_release_extent_buffer(), to ensure the eb
+ * won't disappear out from under us.
+ */
+ spin_lock(&eb->refs_lock);
++ rcu_read_unlock();
++
+ if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
+ spin_unlock(&eb->refs_lock);
++ rcu_read_lock();
+ continue;
+ }
+
+@@ -4331,11 +4334,10 @@ static int try_release_subpage_extent_bu
+ * check the folio private at the end. And
+ * release_extent_buffer() will release the refs_lock.
+ */
+- xa_unlock_irq(&fs_info->buffer_tree);
+ release_extent_buffer(eb);
+- xa_lock_irq(&fs_info->buffer_tree);
++ rcu_read_lock();
+ }
+- xa_unlock_irq(&fs_info->buffer_tree);
++ rcu_read_unlock();
+
+ /*
+ * Finally to check if we have cleared folio private, as if we have
+@@ -4348,7 +4350,6 @@ static int try_release_subpage_extent_bu
+ ret = 0;
+ spin_unlock(&folio->mapping->i_private_lock);
+ return ret;
+-
+ }
+
+ int try_release_extent_buffer(struct folio *folio)
--- /dev/null
+From stable+bounces-171683-greg=kroah.com@vger.kernel.org Tue Aug 19 02:16:52 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 20:16:38 -0400
+Subject: btrfs: move transaction aborts to the error site in add_block_group_free_space()
+To: stable@vger.kernel.org
+Cc: David Sterba <dsterba@suse.com>, Filipe Manana <fdmanana@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819001639.204027-1-sashal@kernel.org>
+
+From: David Sterba <dsterba@suse.com>
+
+[ Upstream commit b63c8c1ede4407835cb8c8bed2014d96619389f3 ]
+
+Transaction aborts should be done next to the place the error happens,
+which was not done in add_block_group_free_space().
+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 1f06c942aa70 ("btrfs: always abort transaction on failure to add block group to free space tree")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/free-space-tree.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/free-space-tree.c
++++ b/fs/btrfs/free-space-tree.c
+@@ -1456,16 +1456,17 @@ int add_block_group_free_space(struct bt
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
++ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ ret = __add_block_group_free_space(trans, block_group, path);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
+
+ out:
+ btrfs_free_path(path);
+ mutex_unlock(&block_group->free_space_lock);
+- if (ret)
+- btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
--- /dev/null
+From stable+bounces-172265-greg=kroah.com@vger.kernel.org Fri Aug 22 05:35:38 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Aug 2025 23:35:16 -0400
+Subject: btrfs: rename btrfs_subpage structure
+To: stable@vger.kernel.org
+Cc: Qu Wenruo <wqu@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250822033527.1065200-2-sashal@kernel.org>
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 582cd4bad4332cca95c578e99442eb148366eb82 ]
+
+With the incoming large data folios support, the structure name
+btrfs_subpage is no longer correct, as for we can have multiple blocks
+inside a large folio, and the block size is still page size.
+
+So to follow the schema of iomap, rename btrfs_subpage to
+btrfs_folio_state, along with involved enums.
+
+There are still exported functions with "btrfs_subpage_" prefix, and I
+believe for metadata the name "subpage" will stay forever as we will
+never allocate a folio larger than nodesize anyway.
+
+The full cleanup of the word "subpage" will happen in much smaller steps
+in the future.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: b1511360c8ac ("btrfs: subpage: keep TOWRITE tag until folio is cleaned")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 28 ++---
+ fs/btrfs/inode.c | 8 -
+ fs/btrfs/subpage.c | 239 ++++++++++++++++++++++++---------------------------
+ fs/btrfs/subpage.h | 31 ++++--
+ 4 files changed, 156 insertions(+), 150 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -782,7 +782,7 @@ static void submit_extent_folio(struct b
+
+ static int attach_extent_buffer_folio(struct extent_buffer *eb,
+ struct folio *folio,
+- struct btrfs_subpage *prealloc)
++ struct btrfs_folio_state *prealloc)
+ {
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ int ret = 0;
+@@ -806,7 +806,7 @@ static int attach_extent_buffer_folio(st
+
+ /* Already mapped, just free prealloc */
+ if (folio_test_private(folio)) {
+- btrfs_free_subpage(prealloc);
++ btrfs_free_folio_state(prealloc);
+ return 0;
+ }
+
+@@ -815,7 +815,7 @@ static int attach_extent_buffer_folio(st
+ folio_attach_private(folio, prealloc);
+ else
+ /* Do new allocation to attach subpage */
+- ret = btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA);
++ ret = btrfs_attach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA);
+ return ret;
+ }
+
+@@ -831,7 +831,7 @@ int set_folio_extent_mapped(struct folio
+ fs_info = folio_to_fs_info(folio);
+
+ if (btrfs_is_subpage(fs_info, folio))
+- return btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA);
++ return btrfs_attach_folio_state(fs_info, folio, BTRFS_SUBPAGE_DATA);
+
+ folio_attach_private(folio, (void *)EXTENT_FOLIO_PRIVATE);
+ return 0;
+@@ -848,7 +848,7 @@ void clear_folio_extent_mapped(struct fo
+
+ fs_info = folio_to_fs_info(folio);
+ if (btrfs_is_subpage(fs_info, folio))
+- return btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA);
++ return btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_DATA);
+
+ folio_detach_private(folio);
+ }
+@@ -2731,13 +2731,13 @@ static int extent_buffer_under_io(const
+
+ static bool folio_range_has_eb(struct folio *folio)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+
+ lockdep_assert_held(&folio->mapping->i_private_lock);
+
+ if (folio_test_private(folio)) {
+- subpage = folio_get_private(folio);
+- if (atomic_read(&subpage->eb_refs))
++ bfs = folio_get_private(folio);
++ if (atomic_read(&bfs->eb_refs))
+ return true;
+ }
+ return false;
+@@ -2787,7 +2787,7 @@ static void detach_extent_buffer_folio(c
+ * attached to one dummy eb, no sharing.
+ */
+ if (!mapped) {
+- btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA);
++ btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA);
+ return;
+ }
+
+@@ -2798,7 +2798,7 @@ static void detach_extent_buffer_folio(c
+ * page range and no unfinished IO.
+ */
+ if (!folio_range_has_eb(folio))
+- btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA);
++ btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA);
+
+ spin_unlock(&mapping->i_private_lock);
+ }
+@@ -3141,7 +3141,7 @@ static bool check_eb_alignment(struct bt
+ * The caller needs to free the existing folios and retry using the same order.
+ */
+ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
+- struct btrfs_subpage *prealloc,
++ struct btrfs_folio_state *prealloc,
+ struct extent_buffer **found_eb_ret)
+ {
+
+@@ -3224,7 +3224,7 @@ struct extent_buffer *alloc_extent_buffe
+ int attached = 0;
+ struct extent_buffer *eb;
+ struct extent_buffer *existing_eb = NULL;
+- struct btrfs_subpage *prealloc = NULL;
++ struct btrfs_folio_state *prealloc = NULL;
+ u64 lockdep_owner = owner_root;
+ bool page_contig = true;
+ int uptodate = 1;
+@@ -3269,7 +3269,7 @@ struct extent_buffer *alloc_extent_buffe
+ * manually if we exit earlier.
+ */
+ if (btrfs_meta_is_subpage(fs_info)) {
+- prealloc = btrfs_alloc_subpage(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA);
++ prealloc = btrfs_alloc_folio_state(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA);
+ if (IS_ERR(prealloc)) {
+ ret = PTR_ERR(prealloc);
+ goto out;
+@@ -3280,7 +3280,7 @@ reallocate:
+ /* Allocate all pages first. */
+ ret = alloc_eb_folio_array(eb, true);
+ if (ret < 0) {
+- btrfs_free_subpage(prealloc);
++ btrfs_free_folio_state(prealloc);
+ goto out;
+ }
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -7364,13 +7364,13 @@ struct extent_map *btrfs_create_io_em(st
+ static void wait_subpage_spinlock(struct folio *folio)
+ {
+ struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+
+ if (!btrfs_is_subpage(fs_info, folio))
+ return;
+
+ ASSERT(folio_test_private(folio) && folio_get_private(folio));
+- subpage = folio_get_private(folio);
++ bfs = folio_get_private(folio);
+
+ /*
+ * This may look insane as we just acquire the spinlock and release it,
+@@ -7383,8 +7383,8 @@ static void wait_subpage_spinlock(struct
+ * Here we just acquire the spinlock so that all existing callers
+ * should exit and we're safe to release/invalidate the page.
+ */
+- spin_lock_irq(&subpage->lock);
+- spin_unlock_irq(&subpage->lock);
++ spin_lock_irq(&bfs->lock);
++ spin_unlock_irq(&bfs->lock);
+ }
+
+ static int btrfs_launder_folio(struct folio *folio)
+--- a/fs/btrfs/subpage.c
++++ b/fs/btrfs/subpage.c
+@@ -49,7 +49,7 @@
+ * Implementation:
+ *
+ * - Common
+- * Both metadata and data will use a new structure, btrfs_subpage, to
++ * Both metadata and data will use a new structure, btrfs_folio_state, to
+ * record the status of each sector inside a page. This provides the extra
+ * granularity needed.
+ *
+@@ -63,10 +63,10 @@
+ * This means a slightly higher tree locking latency.
+ */
+
+-int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
+- struct folio *folio, enum btrfs_subpage_type type)
++int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info,
++ struct folio *folio, enum btrfs_folio_type type)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+
+ /* For metadata we don't support large folio yet. */
+ if (type == BTRFS_SUBPAGE_METADATA)
+@@ -87,18 +87,18 @@ int btrfs_attach_subpage(const struct bt
+ if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio))
+ return 0;
+
+- subpage = btrfs_alloc_subpage(fs_info, folio_size(folio), type);
+- if (IS_ERR(subpage))
+- return PTR_ERR(subpage);
++ bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type);
++ if (IS_ERR(bfs))
++ return PTR_ERR(bfs);
+
+- folio_attach_private(folio, subpage);
++ folio_attach_private(folio, bfs);
+ return 0;
+ }
+
+-void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio,
+- enum btrfs_subpage_type type)
++void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio,
++ enum btrfs_folio_type type)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+
+ /* Either not subpage, or the folio already has private attached. */
+ if (!folio_test_private(folio))
+@@ -108,15 +108,15 @@ void btrfs_detach_subpage(const struct b
+ if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio))
+ return;
+
+- subpage = folio_detach_private(folio);
+- ASSERT(subpage);
+- btrfs_free_subpage(subpage);
++ bfs = folio_detach_private(folio);
++ ASSERT(bfs);
++ btrfs_free_folio_state(bfs);
+ }
+
+-struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
+- size_t fsize, enum btrfs_subpage_type type)
++struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info,
++ size_t fsize, enum btrfs_folio_type type)
+ {
+- struct btrfs_subpage *ret;
++ struct btrfs_folio_state *ret;
+ unsigned int real_size;
+
+ ASSERT(fs_info->sectorsize < fsize);
+@@ -136,11 +136,6 @@ struct btrfs_subpage *btrfs_alloc_subpag
+ return ret;
+ }
+
+-void btrfs_free_subpage(struct btrfs_subpage *subpage)
+-{
+- kfree(subpage);
+-}
+-
+ /*
+ * Increase the eb_refs of current subpage.
+ *
+@@ -152,7 +147,7 @@ void btrfs_free_subpage(struct btrfs_sub
+ */
+ void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+
+ if (!btrfs_meta_is_subpage(fs_info))
+ return;
+@@ -160,13 +155,13 @@ void btrfs_folio_inc_eb_refs(const struc
+ ASSERT(folio_test_private(folio) && folio->mapping);
+ lockdep_assert_held(&folio->mapping->i_private_lock);
+
+- subpage = folio_get_private(folio);
+- atomic_inc(&subpage->eb_refs);
++ bfs = folio_get_private(folio);
++ atomic_inc(&bfs->eb_refs);
+ }
+
+ void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+
+ if (!btrfs_meta_is_subpage(fs_info))
+ return;
+@@ -174,9 +169,9 @@ void btrfs_folio_dec_eb_refs(const struc
+ ASSERT(folio_test_private(folio) && folio->mapping);
+ lockdep_assert_held(&folio->mapping->i_private_lock);
+
+- subpage = folio_get_private(folio);
+- ASSERT(atomic_read(&subpage->eb_refs));
+- atomic_dec(&subpage->eb_refs);
++ bfs = folio_get_private(folio);
++ ASSERT(atomic_read(&bfs->eb_refs));
++ atomic_dec(&bfs->eb_refs);
+ }
+
+ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
+@@ -228,7 +223,7 @@ static void btrfs_subpage_clamp_range(st
+ static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
+ const int nbits = (len >> fs_info->sectorsize_bits);
+ unsigned long flags;
+@@ -238,7 +233,7 @@ static bool btrfs_subpage_end_and_test_l
+
+ btrfs_subpage_assert(fs_info, folio, start, len);
+
+- spin_lock_irqsave(&subpage->lock, flags);
++ spin_lock_irqsave(&bfs->lock, flags);
+ /*
+ * We have call sites passing @lock_page into
+ * extent_clear_unlock_delalloc() for compression path.
+@@ -246,18 +241,18 @@ static bool btrfs_subpage_end_and_test_l
+ * This @locked_page is locked by plain lock_page(), thus its
+ * subpage::locked is 0. Handle them in a special way.
+ */
+- if (atomic_read(&subpage->nr_locked) == 0) {
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ if (atomic_read(&bfs->nr_locked) == 0) {
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ return true;
+ }
+
+- for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) {
+- clear_bit(bit, subpage->bitmaps);
++ for_each_set_bit_from(bit, bfs->bitmaps, start_bit + nbits) {
++ clear_bit(bit, bfs->bitmaps);
+ cleared++;
+ }
+- ASSERT(atomic_read(&subpage->nr_locked) >= cleared);
+- last = atomic_sub_and_test(cleared, &subpage->nr_locked);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ ASSERT(atomic_read(&bfs->nr_locked) >= cleared);
++ last = atomic_sub_and_test(cleared, &bfs->nr_locked);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ return last;
+ }
+
+@@ -280,7 +275,7 @@ static bool btrfs_subpage_end_and_test_l
+ void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+
+ ASSERT(folio_test_locked(folio));
+
+@@ -296,7 +291,7 @@ void btrfs_folio_end_lock(const struct b
+ * Since we own the page lock, no one else could touch subpage::locked
+ * and we are safe to do several atomic operations without spinlock.
+ */
+- if (atomic_read(&subpage->nr_locked) == 0) {
++ if (atomic_read(&bfs->nr_locked) == 0) {
+ /* No subpage lock, locked by plain lock_page(). */
+ folio_unlock(folio);
+ return;
+@@ -310,7 +305,7 @@ void btrfs_folio_end_lock(const struct b
+ void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, unsigned long bitmap)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
+ const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked;
+ unsigned long flags;
+@@ -323,42 +318,42 @@ void btrfs_folio_end_lock_bitmap(const s
+ return;
+ }
+
+- if (atomic_read(&subpage->nr_locked) == 0) {
++ if (atomic_read(&bfs->nr_locked) == 0) {
+ /* No subpage lock, locked by plain lock_page(). */
+ folio_unlock(folio);
+ return;
+ }
+
+- spin_lock_irqsave(&subpage->lock, flags);
++ spin_lock_irqsave(&bfs->lock, flags);
+ for_each_set_bit(bit, &bitmap, blocks_per_folio) {
+- if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
++ if (test_and_clear_bit(bit + start_bit, bfs->bitmaps))
+ cleared++;
+ }
+- ASSERT(atomic_read(&subpage->nr_locked) >= cleared);
+- last = atomic_sub_and_test(cleared, &subpage->nr_locked);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ ASSERT(atomic_read(&bfs->nr_locked) >= cleared);
++ last = atomic_sub_and_test(cleared, &bfs->nr_locked);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ if (last)
+ folio_unlock(folio);
+ }
+
+ #define subpage_test_bitmap_all_set(fs_info, folio, name) \
+ ({ \
+- struct btrfs_subpage *subpage = folio_get_private(folio); \
++ struct btrfs_folio_state *bfs = folio_get_private(folio); \
+ const unsigned int blocks_per_folio = \
+ btrfs_blocks_per_folio(fs_info, folio); \
+ \
+- bitmap_test_range_all_set(subpage->bitmaps, \
++ bitmap_test_range_all_set(bfs->bitmaps, \
+ blocks_per_folio * btrfs_bitmap_nr_##name, \
+ blocks_per_folio); \
+ })
+
+ #define subpage_test_bitmap_all_zero(fs_info, folio, name) \
+ ({ \
+- struct btrfs_subpage *subpage = folio_get_private(folio); \
++ struct btrfs_folio_state *bfs = folio_get_private(folio); \
+ const unsigned int blocks_per_folio = \
+ btrfs_blocks_per_folio(fs_info, folio); \
+ \
+- bitmap_test_range_all_zero(subpage->bitmaps, \
++ bitmap_test_range_all_zero(bfs->bitmaps, \
+ blocks_per_folio * btrfs_bitmap_nr_##name, \
+ blocks_per_folio); \
+ })
+@@ -366,43 +361,43 @@ void btrfs_folio_end_lock_bitmap(const s
+ void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ uptodate, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ if (subpage_test_bitmap_all_set(fs_info, folio, uptodate))
+ folio_mark_uptodate(folio);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ uptodate, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ folio_clear_uptodate(folio);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ dirty, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ folio_mark_dirty(folio);
+ }
+
+@@ -419,17 +414,17 @@ void btrfs_subpage_set_dirty(const struc
+ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ dirty, start, len);
+ unsigned long flags;
+ bool last = false;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ if (subpage_test_bitmap_all_zero(fs_info, folio, dirty))
+ last = true;
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ return last;
+ }
+
+@@ -446,91 +441,91 @@ void btrfs_subpage_clear_dirty(const str
+ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ writeback, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ if (!folio_test_writeback(folio))
+ folio_start_writeback(folio);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ writeback, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) {
+ ASSERT(folio_test_writeback(folio));
+ folio_end_writeback(folio);
+ }
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ ordered, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ folio_set_ordered(folio);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ ordered, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ if (subpage_test_bitmap_all_zero(fs_info, folio, ordered))
+ folio_clear_ordered(folio);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ checked, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ if (subpage_test_bitmap_all_set(fs_info, folio, checked))
+ folio_set_checked(folio);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage = folio_get_private(folio);
++ struct btrfs_folio_state *bfs = folio_get_private(folio);
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
+ checked, start, len);
+ unsigned long flags;
+
+- spin_lock_irqsave(&subpage->lock, flags);
+- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++ spin_lock_irqsave(&bfs->lock, flags);
++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
+ folio_clear_checked(folio);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ /*
+@@ -541,16 +536,16 @@ void btrfs_subpage_clear_checked(const s
+ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
+ struct folio *folio, u64 start, u32 len) \
+ { \
+- struct btrfs_subpage *subpage = folio_get_private(folio); \
++ struct btrfs_folio_state *bfs = folio_get_private(folio); \
+ unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \
+ name, start, len); \
+ unsigned long flags; \
+ bool ret; \
+ \
+- spin_lock_irqsave(&subpage->lock, flags); \
+- ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \
++ spin_lock_irqsave(&bfs->lock, flags); \
++ ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit, \
+ len >> fs_info->sectorsize_bits); \
+- spin_unlock_irqrestore(&subpage->lock, flags); \
++ spin_unlock_irqrestore(&bfs->lock, flags); \
+ return ret; \
+ }
+ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate);
+@@ -662,10 +657,10 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_
+ { \
+ const unsigned int blocks_per_folio = \
+ btrfs_blocks_per_folio(fs_info, folio); \
+- const struct btrfs_subpage *subpage = folio_get_private(folio); \
++ const struct btrfs_folio_state *bfs = folio_get_private(folio); \
+ \
+ ASSERT(blocks_per_folio <= BITS_PER_LONG); \
+- *dst = bitmap_read(subpage->bitmaps, \
++ *dst = bitmap_read(bfs->bitmaps, \
+ blocks_per_folio * btrfs_bitmap_nr_##name, \
+ blocks_per_folio); \
+ }
+@@ -690,7 +685,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_
+ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+ unsigned int start_bit;
+ unsigned int nbits;
+ unsigned long flags;
+@@ -705,15 +700,15 @@ void btrfs_folio_assert_not_dirty(const
+
+ start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len);
+ nbits = len >> fs_info->sectorsize_bits;
+- subpage = folio_get_private(folio);
+- ASSERT(subpage);
+- spin_lock_irqsave(&subpage->lock, flags);
+- if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) {
++ bfs = folio_get_private(folio);
++ ASSERT(bfs);
++ spin_lock_irqsave(&bfs->lock, flags);
++ if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) {
+ SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len);
+- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
++ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
+ }
+- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ /*
+@@ -726,7 +721,7 @@ void btrfs_folio_assert_not_dirty(const
+ void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+ unsigned long flags;
+ unsigned int start_bit;
+ unsigned int nbits;
+@@ -736,19 +731,19 @@ void btrfs_folio_set_lock(const struct b
+ if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio))
+ return;
+
+- subpage = folio_get_private(folio);
++ bfs = folio_get_private(folio);
+ start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
+ nbits = len >> fs_info->sectorsize_bits;
+- spin_lock_irqsave(&subpage->lock, flags);
++ spin_lock_irqsave(&bfs->lock, flags);
+ /* Target range should not yet be locked. */
+- if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) {
++ if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) {
+ SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len);
+- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
++ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
+ }
+- bitmap_set(subpage->bitmaps, start_bit, nbits);
+- ret = atomic_add_return(nbits, &subpage->nr_locked);
++ bitmap_set(bfs->bitmaps, start_bit, nbits);
++ ret = atomic_add_return(nbits, &bfs->nr_locked);
+ ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio));
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
+ /*
+@@ -776,7 +771,7 @@ bool btrfs_meta_folio_clear_and_test_dir
+ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
+ struct folio *folio, u64 start, u32 len)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+ const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
+ unsigned long uptodate_bitmap;
+ unsigned long dirty_bitmap;
+@@ -788,18 +783,18 @@ void __cold btrfs_subpage_dump_bitmap(co
+
+ ASSERT(folio_test_private(folio) && folio_get_private(folio));
+ ASSERT(blocks_per_folio > 1);
+- subpage = folio_get_private(folio);
++ bfs = folio_get_private(folio);
+
+- spin_lock_irqsave(&subpage->lock, flags);
++ spin_lock_irqsave(&bfs->lock, flags);
+ GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap);
+ GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap);
+ GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap);
+ GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap);
+ GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap);
+ GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+
+- dump_page(folio_page(folio, 0), "btrfs subpage dump");
++ dump_page(folio_page(folio, 0), "btrfs folio state dump");
+ btrfs_warn(fs_info,
+ "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
+ start, len, folio_pos(folio),
+@@ -815,14 +810,14 @@ void btrfs_get_subpage_dirty_bitmap(stru
+ struct folio *folio,
+ unsigned long *ret_bitmap)
+ {
+- struct btrfs_subpage *subpage;
++ struct btrfs_folio_state *bfs;
+ unsigned long flags;
+
+ ASSERT(folio_test_private(folio) && folio_get_private(folio));
+ ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1);
+- subpage = folio_get_private(folio);
++ bfs = folio_get_private(folio);
+
+- spin_lock_irqsave(&subpage->lock, flags);
++ spin_lock_irqsave(&bfs->lock, flags);
+ GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap);
+- spin_unlock_irqrestore(&subpage->lock, flags);
++ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+--- a/fs/btrfs/subpage.h
++++ b/fs/btrfs/subpage.h
+@@ -32,7 +32,15 @@ struct folio;
+ enum {
+ btrfs_bitmap_nr_uptodate = 0,
+ btrfs_bitmap_nr_dirty,
++
++ /*
++ * This can be changed to atomic eventually. But this change will rely
++ * on the async delalloc range rework for locked bitmap. As async
++ * delalloc can unlock its range and mark blocks writeback at random
++ * timing.
++ */
+ btrfs_bitmap_nr_writeback,
++
+ /*
+ * The ordered and checked flags are for COW fixup, already marked
+ * deprecated, and will be removed eventually.
+@@ -57,7 +65,7 @@ enum {
+ * Structure to trace status of each sector inside a page, attached to
+ * page::private for both data and metadata inodes.
+ */
+-struct btrfs_subpage {
++struct btrfs_folio_state {
+ /* Common members for both data and metadata pages */
+ spinlock_t lock;
+ union {
+@@ -65,7 +73,7 @@ struct btrfs_subpage {
+ * Structures only used by metadata
+ *
+ * @eb_refs should only be operated under private_lock, as it
+- * manages whether the subpage can be detached.
++ * manages whether the btrfs_folio_state can be detached.
+ */
+ atomic_t eb_refs;
+
+@@ -79,7 +87,7 @@ struct btrfs_subpage {
+ unsigned long bitmaps[];
+ };
+
+-enum btrfs_subpage_type {
++enum btrfs_folio_type {
+ BTRFS_SUBPAGE_METADATA,
+ BTRFS_SUBPAGE_DATA,
+ };
+@@ -119,15 +127,18 @@ static inline bool btrfs_is_subpage(cons
+ }
+ #endif
+
+-int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
+- struct folio *folio, enum btrfs_subpage_type type);
+-void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio,
+- enum btrfs_subpage_type type);
++int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info,
++ struct folio *folio, enum btrfs_folio_type type);
++void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio,
++ enum btrfs_folio_type type);
+
+ /* Allocate additional data where page represents more than one sector */
+-struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
+- size_t fsize, enum btrfs_subpage_type type);
+-void btrfs_free_subpage(struct btrfs_subpage *subpage);
++struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info,
++ size_t fsize, enum btrfs_folio_type type);
++static inline void btrfs_free_folio_state(struct btrfs_folio_state *bfs)
++{
++ kfree(bfs);
++}
+
+ void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio);
+ void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio);
--- /dev/null
+From stable+bounces-171699-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:41 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:15:28 -0400
+Subject: btrfs: reorganize logic at free_extent_buffer() for better readability
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011531.242846-1-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 71c086b30d4373a01bd5627f54516a72891a026a ]
+
+It's hard to read the logic to break out of the while loop since it's a
+very long expression consisting of a logical or of two composite
+expressions, each one composed by a logical and. Further each one is also
+testing for the EXTENT_BUFFER_UNMAPPED bit, making it more verbose than
+necessary.
+
+So change from this:
+
+ if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
+ || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
+ refs == 1))
+ break;
+
+To this:
+
+ if (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) {
+ if (refs == 1)
+ break;
+ } else if (refs <= 3) {
+ break;
+ }
+
+At least on x86_64 using gcc 9.3.0, this doesn't change the object size.
+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: ad580dfa388f ("btrfs: fix subpage deadlock in try_release_subpage_extent_buffer()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -3486,10 +3486,13 @@ void free_extent_buffer(struct extent_bu
+
+ refs = atomic_read(&eb->refs);
+ while (1) {
+- if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
+- || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
+- refs == 1))
++ if (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) {
++ if (refs == 1)
++ break;
++ } else if (refs <= 3) {
+ break;
++ }
++
+ if (atomic_try_cmpxchg(&eb->refs, &refs, refs - 1))
+ return;
+ }
--- /dev/null
+From stable+bounces-172266-greg=kroah.com@vger.kernel.org Fri Aug 22 05:35:39 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Aug 2025 23:35:17 -0400
+Subject: btrfs: subpage: keep TOWRITE tag until folio is cleaned
+To: stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, Qu Wenruo <wqu@suse.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250822033527.1065200-3-sashal@kernel.org>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit b1511360c8ac882b0c52caa263620538e8d73220 ]
+
+btrfs_subpage_set_writeback() calls folio_start_writeback() the first time
+a folio is written back, and it also clears the PAGECACHE_TAG_TOWRITE tag
+even if there are still dirty blocks in the folio. This can break ordering
+guarantees, such as those required by btrfs_wait_ordered_extents().
+
+That ordering breakage leads to a real failure. For example, running
+generic/464 on a zoned setup will hit the following ASSERT. This happens
+because the broken ordering fails to flush existing dirty pages before the
+file size is truncated.
+
+ assertion failed: !list_empty(&ordered->list) :: 0, in fs/btrfs/zoned.c:1899
+ ------------[ cut here ]------------
+ kernel BUG at fs/btrfs/zoned.c:1899!
+ Oops: invalid opcode: 0000 [#1] SMP NOPTI
+ CPU: 2 UID: 0 PID: 1906169 Comm: kworker/u130:2 Kdump: loaded Not tainted 6.16.0-rc6-BTRFS-ZNS+ #554 PREEMPT(voluntary)
+ Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021
+ Workqueue: btrfs-endio-write btrfs_work_helper [btrfs]
+ RIP: 0010:btrfs_finish_ordered_zoned.cold+0x50/0x52 [btrfs]
+ RSP: 0018:ffffc9002efdbd60 EFLAGS: 00010246
+ RAX: 000000000000004c RBX: ffff88811923c4e0 RCX: 0000000000000000
+ RDX: 0000000000000000 RSI: ffffffff827e38b1 RDI: 00000000ffffffff
+ RBP: ffff88810005d000 R08: 00000000ffffdfff R09: ffffffff831051c8
+ R10: ffffffff83055220 R11: 0000000000000000 R12: ffff8881c2458c00
+ R13: ffff88811923c540 R14: ffff88811923c5e8 R15: ffff8881c1bd9680
+ FS: 0000000000000000(0000) GS:ffff88a04acd0000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007f907c7a918c CR3: 0000000004024000 CR4: 0000000000350ef0
+ Call Trace:
+ <TASK>
+ ? srso_return_thunk+0x5/0x5f
+ btrfs_finish_ordered_io+0x4a/0x60 [btrfs]
+ btrfs_work_helper+0xf9/0x490 [btrfs]
+ process_one_work+0x204/0x590
+ ? srso_return_thunk+0x5/0x5f
+ worker_thread+0x1d6/0x3d0
+ ? __pfx_worker_thread+0x10/0x10
+ kthread+0x118/0x230
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork+0x205/0x260
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+
+Consider process A calling writepages() with WB_SYNC_NONE. In zoned mode or
+for compressed writes, it locks several folios for delalloc and starts
+writing them out. Let's call the last locked folio folio X. Suppose the
+write range only partially covers folio X, leaving some pages dirty.
+Process A calls btrfs_subpage_set_writeback() when building a bio. This
+function call clears the TOWRITE tag of folio X, whose size = 8K and
+the block size = 4K. It is following state.
+
+ 0 4K 8K
+ |/////|/////| (flag: DIRTY, tag: DIRTY)
+ <-----> Process A will write this range.
+
+Now suppose process B concurrently calls writepages() with WB_SYNC_ALL. It
+calls tag_pages_for_writeback() to tag dirty folios with
+PAGECACHE_TAG_TOWRITE. Since folio X is still dirty, it gets tagged. Then,
+B collects tagged folios using filemap_get_folios_tag() and must wait for
+folio X to be written before returning from writepages().
+
+ 0 4K 8K
+ |/////|/////| (flag: DIRTY, tag: DIRTY|TOWRITE)
+
+However, between tagging and collecting, process A may call
+btrfs_subpage_set_writeback() and clear folio X's TOWRITE tag.
+ 0 4K 8K
+ | |/////| (flag: DIRTY|WRITEBACK, tag: DIRTY)
+
+As a result, process B won't see folio X in its batch, and returns without
+waiting for it. This breaks the WB_SYNC_ALL ordering requirement.
+
+Fix this by using btrfs_subpage_set_writeback_keepwrite(), which retains
+the TOWRITE tag. We now manually clear the tag only after the folio becomes
+clean, via the xas operation.
+
+Fixes: 3470da3b7d87 ("btrfs: subpage: introduce helpers for writeback status")
+CC: stable@vger.kernel.org # 6.12+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/subpage.c | 19 ++++++++++++++++++-
+ 1 file changed, 18 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/subpage.c
++++ b/fs/btrfs/subpage.c
+@@ -448,8 +448,25 @@ void btrfs_subpage_set_writeback(const s
+
+ spin_lock_irqsave(&bfs->lock, flags);
+ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
++
++ /*
++ * Don't clear the TOWRITE tag when starting writeback on a still-dirty
++ * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it,
++ * assume writeback is complete, and exit too early — violating sync
++ * ordering guarantees.
++ */
+ if (!folio_test_writeback(folio))
+- folio_start_writeback(folio);
++ __folio_start_writeback(folio, true);
++ if (!folio_test_dirty(folio)) {
++ struct address_space *mapping = folio_mapping(folio);
++ XA_STATE(xas, &mapping->i_pages, folio->index);
++ unsigned long flags;
++
++ xas_lock_irqsave(&xas, flags);
++ xas_load(&xas);
++ xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
++ xas_unlock_irqrestore(&xas, flags);
++ }
+ spin_unlock_irqrestore(&bfs->lock, flags);
+ }
+
--- /dev/null
+From stable+bounces-171700-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:45 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:15:30 -0400
+Subject: btrfs: use refcount_t type for the extent buffer reference counter
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011531.242846-3-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit b769777d927af168b1389388392bfd7dc4e38399 ]
+
+Instead of using a bare atomic, use the refcount_t type, which despite
+being a structure that contains only an atomic, has an API that checks
+for underflows and other hazards. This doesn't change the size of the
+extent_buffer structure.
+
+This removes the need to do things like this:
+
+ WARN_ON(atomic_read(&eb->refs) == 0);
+ if (atomic_dec_and_test(&eb->refs)) {
+ (...)
+ }
+
+And do just:
+
+ if (refcount_dec_and_test(&eb->refs)) {
+ (...)
+ }
+
+Since refcount_dec_and_test() already triggers a warning when we decrement
+a ref count that has a value of 0 (or below zero).
+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: ad580dfa388f ("btrfs: fix subpage deadlock in try_release_subpage_extent_buffer()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.c | 14 ++++++-------
+ fs/btrfs/extent-tree.c | 2 -
+ fs/btrfs/extent_io.c | 45 +++++++++++++++++++++----------------------
+ fs/btrfs/extent_io.h | 2 -
+ fs/btrfs/fiemap.c | 2 -
+ fs/btrfs/print-tree.c | 2 -
+ fs/btrfs/qgroup.c | 6 ++---
+ fs/btrfs/relocation.c | 4 +--
+ fs/btrfs/tree-log.c | 4 +--
+ fs/btrfs/zoned.c | 2 -
+ include/trace/events/btrfs.h | 2 -
+ 11 files changed, 42 insertions(+), 43 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -198,7 +198,7 @@ struct extent_buffer *btrfs_root_node(st
+ * the inc_not_zero dance and if it doesn't work then
+ * synchronize_rcu and try again.
+ */
+- if (atomic_inc_not_zero(&eb->refs)) {
++ if (refcount_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ break;
+ }
+@@ -556,7 +556,7 @@ int btrfs_force_cow_block(struct btrfs_t
+ btrfs_abort_transaction(trans, ret);
+ goto error_unlock_cow;
+ }
+- atomic_inc(&cow->refs);
++ refcount_inc(&cow->refs);
+ rcu_assign_pointer(root->node, cow);
+
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
+@@ -1088,7 +1088,7 @@ static noinline int balance_level(struct
+ /* update the path */
+ if (left) {
+ if (btrfs_header_nritems(left) > orig_slot) {
+- atomic_inc(&left->refs);
++ refcount_inc(&left->refs);
+ /* left was locked after cow */
+ path->nodes[level] = left;
+ path->slots[level + 1] -= 1;
+@@ -1692,7 +1692,7 @@ static struct extent_buffer *btrfs_searc
+
+ if (p->search_commit_root) {
+ b = root->commit_root;
+- atomic_inc(&b->refs);
++ refcount_inc(&b->refs);
+ level = btrfs_header_level(b);
+ /*
+ * Ensure that all callers have set skip_locking when
+@@ -2893,7 +2893,7 @@ static noinline int insert_new_root(stru
+ free_extent_buffer(old);
+
+ add_root_to_dirty_list(root);
+- atomic_inc(&c->refs);
++ refcount_inc(&c->refs);
+ path->nodes[level] = c;
+ path->locks[level] = BTRFS_WRITE_LOCK;
+ path->slots[level] = 0;
+@@ -4450,7 +4450,7 @@ static noinline int btrfs_del_leaf(struc
+
+ root_sub_used_bytes(root);
+
+- atomic_inc(&leaf->refs);
++ refcount_inc(&leaf->refs);
+ ret = btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
+ free_extent_buffer_stale(leaf);
+ if (ret < 0)
+@@ -4535,7 +4535,7 @@ int btrfs_del_items(struct btrfs_trans_h
+ * for possible call to btrfs_del_ptr below
+ */
+ slot = path->slots[1];
+- atomic_inc(&leaf->refs);
++ refcount_inc(&leaf->refs);
+ /*
+ * We want to be able to at least push one item to the
+ * left neighbour leaf, and that's the first item.
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -6342,7 +6342,7 @@ int btrfs_drop_subtree(struct btrfs_tran
+
+ btrfs_assert_tree_write_locked(parent);
+ parent_level = btrfs_header_level(parent);
+- atomic_inc(&parent->refs);
++ refcount_inc(&parent->refs);
+ path->nodes[parent_level] = parent;
+ path->slots[parent_level] = btrfs_header_nritems(parent);
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -77,7 +77,7 @@ void btrfs_extent_buffer_leak_debug_chec
+ struct extent_buffer, leak_list);
+ pr_err(
+ "BTRFS: buffer leak start %llu len %u refs %d bflags %lu owner %llu\n",
+- eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
++ eb->start, eb->len, refcount_read(&eb->refs), eb->bflags,
+ btrfs_header_owner(eb));
+ list_del(&eb->leak_list);
+ WARN_ON_ONCE(1);
+@@ -1961,7 +1961,7 @@ retry:
+ if (!eb)
+ return NULL;
+
+- if (!atomic_inc_not_zero(&eb->refs)) {
++ if (!refcount_inc_not_zero(&eb->refs)) {
+ xas_reset(xas);
+ goto retry;
+ }
+@@ -2012,7 +2012,7 @@ static struct extent_buffer *find_extent
+
+ rcu_read_lock();
+ eb = xa_load(&fs_info->buffer_tree, index);
+- if (eb && !atomic_inc_not_zero(&eb->refs))
++ if (eb && !refcount_inc_not_zero(&eb->refs))
+ eb = NULL;
+ rcu_read_unlock();
+ return eb;
+@@ -2842,7 +2842,7 @@ static struct extent_buffer *__alloc_ext
+ btrfs_leak_debug_add_eb(eb);
+
+ spin_lock_init(&eb->refs_lock);
+- atomic_set(&eb->refs, 1);
++ refcount_set(&eb->refs, 1);
+
+ ASSERT(eb->len <= BTRFS_MAX_METADATA_BLOCKSIZE);
+
+@@ -2975,13 +2975,13 @@ static void check_buffer_tree_ref(struct
+ * once io is initiated, TREE_REF can no longer be cleared, so that is
+ * the moment at which any such race is best fixed.
+ */
+- refs = atomic_read(&eb->refs);
++ refs = refcount_read(&eb->refs);
+ if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+ return;
+
+ spin_lock(&eb->refs_lock);
+ if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+- atomic_inc(&eb->refs);
++ refcount_inc(&eb->refs);
+ spin_unlock(&eb->refs_lock);
+ }
+
+@@ -3047,7 +3047,7 @@ again:
+ return ERR_PTR(ret);
+ }
+ if (exists) {
+- if (!atomic_inc_not_zero(&exists->refs)) {
++ if (!refcount_inc_not_zero(&exists->refs)) {
+ /* The extent buffer is being freed, retry. */
+ xa_unlock_irq(&fs_info->buffer_tree);
+ goto again;
+@@ -3092,7 +3092,7 @@ static struct extent_buffer *grab_extent
+ * just overwrite folio private.
+ */
+ exists = folio_get_private(folio);
+- if (atomic_inc_not_zero(&exists->refs))
++ if (refcount_inc_not_zero(&exists->refs))
+ return exists;
+
+ WARN_ON(folio_test_dirty(folio));
+@@ -3362,7 +3362,7 @@ again:
+ goto out;
+ }
+ if (existing_eb) {
+- if (!atomic_inc_not_zero(&existing_eb->refs)) {
++ if (!refcount_inc_not_zero(&existing_eb->refs)) {
+ xa_unlock_irq(&fs_info->buffer_tree);
+ goto again;
+ }
+@@ -3391,7 +3391,7 @@ again:
+ return eb;
+
+ out:
+- WARN_ON(!atomic_dec_and_test(&eb->refs));
++ WARN_ON(!refcount_dec_and_test(&eb->refs));
+
+ /*
+ * Any attached folios need to be detached before we unlock them. This
+@@ -3437,8 +3437,7 @@ static int release_extent_buffer(struct
+ {
+ lockdep_assert_held(&eb->refs_lock);
+
+- WARN_ON(atomic_read(&eb->refs) == 0);
+- if (atomic_dec_and_test(&eb->refs)) {
++ if (refcount_dec_and_test(&eb->refs)) {
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+
+ spin_unlock(&eb->refs_lock);
+@@ -3484,7 +3483,7 @@ void free_extent_buffer(struct extent_bu
+ if (!eb)
+ return;
+
+- refs = atomic_read(&eb->refs);
++ refs = refcount_read(&eb->refs);
+ while (1) {
+ if (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) {
+ if (refs == 1)
+@@ -3494,16 +3493,16 @@ void free_extent_buffer(struct extent_bu
+ }
+
+ /* Optimization to avoid locking eb->refs_lock. */
+- if (atomic_try_cmpxchg(&eb->refs, &refs, refs - 1))
++ if (atomic_try_cmpxchg(&eb->refs.refs, &refs, refs - 1))
+ return;
+ }
+
+ spin_lock(&eb->refs_lock);
+- if (atomic_read(&eb->refs) == 2 &&
++ if (refcount_read(&eb->refs) == 2 &&
+ test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
+ !extent_buffer_under_io(eb) &&
+ test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+- atomic_dec(&eb->refs);
++ refcount_dec(&eb->refs);
+
+ /*
+ * I know this is terrible, but it's temporary until we stop tracking
+@@ -3520,9 +3519,9 @@ void free_extent_buffer_stale(struct ext
+ spin_lock(&eb->refs_lock);
+ set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
+
+- if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
++ if (refcount_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
+ test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+- atomic_dec(&eb->refs);
++ refcount_dec(&eb->refs);
+ release_extent_buffer(eb);
+ }
+
+@@ -3580,7 +3579,7 @@ void btrfs_clear_buffer_dirty(struct btr
+ btree_clear_folio_dirty_tag(folio);
+ folio_unlock(folio);
+ }
+- WARN_ON(atomic_read(&eb->refs) == 0);
++ WARN_ON(refcount_read(&eb->refs) == 0);
+ }
+
+ void set_extent_buffer_dirty(struct extent_buffer *eb)
+@@ -3591,7 +3590,7 @@ void set_extent_buffer_dirty(struct exte
+
+ was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
+
+- WARN_ON(atomic_read(&eb->refs) == 0);
++ WARN_ON(refcount_read(&eb->refs) == 0);
+ WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+ WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags));
+
+@@ -3717,7 +3716,7 @@ int read_extent_buffer_pages_nowait(stru
+
+ eb->read_mirror = 0;
+ check_buffer_tree_ref(eb);
+- atomic_inc(&eb->refs);
++ refcount_inc(&eb->refs);
+
+ bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES,
+ REQ_OP_READ | REQ_META, eb->fs_info,
+@@ -4312,7 +4311,7 @@ static int try_release_subpage_extent_bu
+ * won't disappear out from under us.
+ */
+ spin_lock(&eb->refs_lock);
+- if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
++ if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
+ spin_unlock(&eb->refs_lock);
+ continue;
+ }
+@@ -4378,7 +4377,7 @@ int try_release_extent_buffer(struct fol
+ * this page.
+ */
+ spin_lock(&eb->refs_lock);
+- if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
++ if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
+ spin_unlock(&eb->refs_lock);
+ spin_unlock(&folio->mapping->i_private_lock);
+ return 0;
+--- a/fs/btrfs/extent_io.h
++++ b/fs/btrfs/extent_io.h
+@@ -98,7 +98,7 @@ struct extent_buffer {
+ void *addr;
+
+ spinlock_t refs_lock;
+- atomic_t refs;
++ refcount_t refs;
+ int read_mirror;
+ /* >= 0 if eb belongs to a log tree, -1 otherwise */
+ s8 log_index;
+--- a/fs/btrfs/fiemap.c
++++ b/fs/btrfs/fiemap.c
+@@ -320,7 +320,7 @@ static int fiemap_next_leaf_item(struct
+ * the cost of allocating a new one.
+ */
+ ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags));
+- atomic_inc(&clone->refs);
++ refcount_inc(&clone->refs);
+
+ ret = btrfs_next_leaf(inode->root, path);
+ if (ret != 0)
+--- a/fs/btrfs/print-tree.c
++++ b/fs/btrfs/print-tree.c
+@@ -223,7 +223,7 @@ static void print_eb_refs_lock(const str
+ {
+ #ifdef CONFIG_BTRFS_DEBUG
+ btrfs_info(eb->fs_info, "refs %u lock_owner %u current %u",
+- atomic_read(&eb->refs), eb->lock_owner, current->pid);
++ refcount_read(&eb->refs), eb->lock_owner, current->pid);
+ #endif
+ }
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -2348,7 +2348,7 @@ static int qgroup_trace_extent_swap(stru
+ btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
+
+ /* For src_path */
+- atomic_inc(&src_eb->refs);
++ refcount_inc(&src_eb->refs);
+ src_path->nodes[root_level] = src_eb;
+ src_path->slots[root_level] = dst_path->slots[root_level];
+ src_path->locks[root_level] = 0;
+@@ -2581,7 +2581,7 @@ static int qgroup_trace_subtree_swap(str
+ goto out;
+ }
+ /* For dst_path */
+- atomic_inc(&dst_eb->refs);
++ refcount_inc(&dst_eb->refs);
+ dst_path->nodes[level] = dst_eb;
+ dst_path->slots[level] = 0;
+ dst_path->locks[level] = 0;
+@@ -2673,7 +2673,7 @@ int btrfs_qgroup_trace_subtree(struct bt
+ * walk back up the tree (adjusting slot pointers as we go)
+ * and restart the search process.
+ */
+- atomic_inc(&root_eb->refs); /* For path */
++ refcount_inc(&root_eb->refs); /* For path */
+ path->nodes[root_level] = root_eb;
+ path->slots[root_level] = 0;
+ path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1535,7 +1535,7 @@ static noinline_for_stack int merge_relo
+
+ if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
+ level = btrfs_root_level(root_item);
+- atomic_inc(&reloc_root->node->refs);
++ refcount_inc(&reloc_root->node->refs);
+ path->nodes[level] = reloc_root->node;
+ path->slots[level] = 0;
+ } else {
+@@ -4358,7 +4358,7 @@ int btrfs_reloc_cow_block(struct btrfs_t
+ }
+
+ btrfs_backref_drop_node_buffer(node);
+- atomic_inc(&cow->refs);
++ refcount_inc(&cow->refs);
+ node->eb = cow;
+ node->new_bytenr = cow->start;
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -2747,7 +2747,7 @@ static int walk_log_tree(struct btrfs_tr
+ level = btrfs_header_level(log->node);
+ orig_level = level;
+ path->nodes[level] = log->node;
+- atomic_inc(&log->node->refs);
++ refcount_inc(&log->node->refs);
+ path->slots[level] = 0;
+
+ while (1) {
+@@ -3711,7 +3711,7 @@ static int clone_leaf(struct btrfs_path
+ * Add extra ref to scratch eb so that it is not freed when callers
+ * release the path, so we can reuse it later if needed.
+ */
+- atomic_inc(&ctx->scratch_eb->refs);
++ refcount_inc(&ctx->scratch_eb->refs);
+
+ return 0;
+ }
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2491,7 +2491,7 @@ void btrfs_schedule_zone_finish_bg(struc
+
+ /* For the work */
+ btrfs_get_block_group(bg);
+- atomic_inc(&eb->refs);
++ refcount_inc(&eb->refs);
+ bg->last_eb = eb;
+ INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn);
+ queue_work(system_unbound_wq, &bg->zone_finish_work);
+--- a/include/trace/events/btrfs.h
++++ b/include/trace/events/btrfs.h
+@@ -1095,7 +1095,7 @@ TRACE_EVENT(btrfs_cow_block,
+ TP_fast_assign_btrfs(root->fs_info,
+ __entry->root_objectid = btrfs_root_id(root);
+ __entry->buf_start = buf->start;
+- __entry->refs = atomic_read(&buf->refs);
++ __entry->refs = refcount_read(&buf->refs);
+ __entry->cow_start = cow->start;
+ __entry->buf_level = btrfs_header_level(buf);
+ __entry->cow_level = btrfs_header_level(cow);
--- /dev/null
+From stable+bounces-172225-greg=kroah.com@vger.kernel.org Thu Aug 21 21:21:50 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Aug 2025 15:21:31 -0400
+Subject: crypto: acomp - Fix CFI failure due to type punning
+To: stable@vger.kernel.org
+Cc: Eric Biggers <ebiggers@kernel.org>, Giovanni Cabiddu <giovanni.cabiddu@intel.com>, Herbert Xu <herbert@gondor.apana.org.au>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250821192131.923831-2-sashal@kernel.org>
+
+From: Eric Biggers <ebiggers@kernel.org>
+
+[ Upstream commit 962ddc5a7a4b04c007bba0f3e7298cda13c62efd ]
+
+To avoid a crash when control flow integrity is enabled, make the
+workspace ("stream") free function use a consistent type, and call it
+through a function pointer that has that same type.
+
+Fixes: 42d9f6c77479 ("crypto: acomp - Move scomp stream allocation code into acomp")
+Cc: stable@vger.kernel.org
+Signed-off-by: Eric Biggers <ebiggers@kernel.org>
+Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ crypto/deflate.c | 7 ++++++-
+ crypto/zstd.c | 7 ++++++-
+ include/crypto/internal/acompress.h | 5 +----
+ 3 files changed, 13 insertions(+), 6 deletions(-)
+
+--- a/crypto/deflate.c
++++ b/crypto/deflate.c
+@@ -48,9 +48,14 @@ static void *deflate_alloc_stream(void)
+ return ctx;
+ }
+
++static void deflate_free_stream(void *ctx)
++{
++ kvfree(ctx);
++}
++
+ static struct crypto_acomp_streams deflate_streams = {
+ .alloc_ctx = deflate_alloc_stream,
+- .cfree_ctx = kvfree,
++ .free_ctx = deflate_free_stream,
+ };
+
+ static int deflate_compress_one(struct acomp_req *req,
+--- a/crypto/zstd.c
++++ b/crypto/zstd.c
+@@ -54,9 +54,14 @@ static void *zstd_alloc_stream(void)
+ return ctx;
+ }
+
++static void zstd_free_stream(void *ctx)
++{
++ kvfree(ctx);
++}
++
+ static struct crypto_acomp_streams zstd_streams = {
+ .alloc_ctx = zstd_alloc_stream,
+- .cfree_ctx = kvfree,
++ .free_ctx = zstd_free_stream,
+ };
+
+ static int zstd_init(struct crypto_acomp *acomp_tfm)
+--- a/include/crypto/internal/acompress.h
++++ b/include/crypto/internal/acompress.h
+@@ -63,10 +63,7 @@ struct crypto_acomp_stream {
+ struct crypto_acomp_streams {
+ /* These must come first because of struct scomp_alg. */
+ void *(*alloc_ctx)(void);
+- union {
+- void (*free_ctx)(void *);
+- void (*cfree_ctx)(const void *);
+- };
++ void (*free_ctx)(void *);
+
+ struct crypto_acomp_stream __percpu *streams;
+ struct work_struct stream_work;
--- /dev/null
+From stable+bounces-172224-greg=kroah.com@vger.kernel.org Thu Aug 21 21:21:43 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Aug 2025 15:21:30 -0400
+Subject: crypto: zstd - convert to acomp
+To: stable@vger.kernel.org
+Cc: Suman Kumar Chakraborty <suman.kumar.chakraborty@intel.com>, Giovanni Cabiddu <giovanni.cabiddu@intel.com>, Herbert Xu <herbert@gondor.apana.org.au>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250821192131.923831-1-sashal@kernel.org>
+
+From: Suman Kumar Chakraborty <suman.kumar.chakraborty@intel.com>
+
+[ Upstream commit f5ad93ffb54119a8dc5e18f070624d4ead586969 ]
+
+Convert the implementation to a native acomp interface using zstd
+streaming APIs, eliminating the need for buffer linearization.
+
+This includes:
+ - Removal of the scomp interface in favor of acomp
+ - Refactoring of stream allocation, initialization, and handling for
+ both compression and decompression using Zstandard streaming APIs
+ - Replacement of crypto_register_scomp() with crypto_register_acomp()
+ for module registration
+
+Signed-off-by: Suman Kumar Chakraborty <suman.kumar.chakraborty@intel.com>
+Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Stable-dep-of: 962ddc5a7a4b ("crypto: acomp - Fix CFI failure due to type punning")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ crypto/zstd.c | 354 ++++++++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 233 insertions(+), 121 deletions(-)
+
+--- a/crypto/zstd.c
++++ b/crypto/zstd.c
+@@ -12,188 +12,300 @@
+ #include <linux/net.h>
+ #include <linux/vmalloc.h>
+ #include <linux/zstd.h>
+-#include <crypto/internal/scompress.h>
++#include <crypto/internal/acompress.h>
++#include <crypto/scatterwalk.h>
+
+
+-#define ZSTD_DEF_LEVEL 3
++#define ZSTD_DEF_LEVEL 3
++#define ZSTD_MAX_WINDOWLOG 18
++#define ZSTD_MAX_SIZE BIT(ZSTD_MAX_WINDOWLOG)
+
+ struct zstd_ctx {
+ zstd_cctx *cctx;
+ zstd_dctx *dctx;
+- void *cwksp;
+- void *dwksp;
++ size_t wksp_size;
++ zstd_parameters params;
++ u8 wksp[0] __aligned(8);
+ };
+
+-static zstd_parameters zstd_params(void)
+-{
+- return zstd_get_params(ZSTD_DEF_LEVEL, 0);
+-}
++static DEFINE_MUTEX(zstd_stream_lock);
+
+-static int zstd_comp_init(struct zstd_ctx *ctx)
++static void *zstd_alloc_stream(void)
+ {
+- int ret = 0;
+- const zstd_parameters params = zstd_params();
+- const size_t wksp_size = zstd_cctx_workspace_bound(¶ms.cParams);
++ zstd_parameters params;
++ struct zstd_ctx *ctx;
++ size_t wksp_size;
+
+- ctx->cwksp = vzalloc(wksp_size);
+- if (!ctx->cwksp) {
+- ret = -ENOMEM;
+- goto out;
+- }
++ params = zstd_get_params(ZSTD_DEF_LEVEL, ZSTD_MAX_SIZE);
+
+- ctx->cctx = zstd_init_cctx(ctx->cwksp, wksp_size);
+- if (!ctx->cctx) {
+- ret = -EINVAL;
+- goto out_free;
+- }
+-out:
+- return ret;
+-out_free:
+- vfree(ctx->cwksp);
+- goto out;
++ wksp_size = max_t(size_t,
++ zstd_cstream_workspace_bound(¶ms.cParams),
++ zstd_dstream_workspace_bound(ZSTD_MAX_SIZE));
++ if (!wksp_size)
++ return ERR_PTR(-EINVAL);
++
++ ctx = kvmalloc(sizeof(*ctx) + wksp_size, GFP_KERNEL);
++ if (!ctx)
++ return ERR_PTR(-ENOMEM);
++
++ ctx->params = params;
++ ctx->wksp_size = wksp_size;
++
++ return ctx;
+ }
+
+-static int zstd_decomp_init(struct zstd_ctx *ctx)
++static struct crypto_acomp_streams zstd_streams = {
++ .alloc_ctx = zstd_alloc_stream,
++ .cfree_ctx = kvfree,
++};
++
++static int zstd_init(struct crypto_acomp *acomp_tfm)
+ {
+ int ret = 0;
+- const size_t wksp_size = zstd_dctx_workspace_bound();
+
+- ctx->dwksp = vzalloc(wksp_size);
+- if (!ctx->dwksp) {
+- ret = -ENOMEM;
+- goto out;
+- }
++ mutex_lock(&zstd_stream_lock);
++ ret = crypto_acomp_alloc_streams(&zstd_streams);
++ mutex_unlock(&zstd_stream_lock);
+
+- ctx->dctx = zstd_init_dctx(ctx->dwksp, wksp_size);
+- if (!ctx->dctx) {
+- ret = -EINVAL;
+- goto out_free;
+- }
+-out:
+ return ret;
+-out_free:
+- vfree(ctx->dwksp);
+- goto out;
+ }
+
+-static void zstd_comp_exit(struct zstd_ctx *ctx)
++static void zstd_exit(struct crypto_acomp *acomp_tfm)
+ {
+- vfree(ctx->cwksp);
+- ctx->cwksp = NULL;
+- ctx->cctx = NULL;
++ crypto_acomp_free_streams(&zstd_streams);
+ }
+
+-static void zstd_decomp_exit(struct zstd_ctx *ctx)
++static int zstd_compress_one(struct acomp_req *req, struct zstd_ctx *ctx,
++ const void *src, void *dst, unsigned int *dlen)
+ {
+- vfree(ctx->dwksp);
+- ctx->dwksp = NULL;
+- ctx->dctx = NULL;
+-}
++ unsigned int out_len;
+
+-static int __zstd_init(void *ctx)
+-{
+- int ret;
++ ctx->cctx = zstd_init_cctx(ctx->wksp, ctx->wksp_size);
++ if (!ctx->cctx)
++ return -EINVAL;
+
+- ret = zstd_comp_init(ctx);
+- if (ret)
+- return ret;
+- ret = zstd_decomp_init(ctx);
+- if (ret)
+- zstd_comp_exit(ctx);
+- return ret;
++ out_len = zstd_compress_cctx(ctx->cctx, dst, req->dlen, src, req->slen,
++ &ctx->params);
++ if (zstd_is_error(out_len))
++ return -EINVAL;
++
++ *dlen = out_len;
++
++ return 0;
+ }
+
+-static void *zstd_alloc_ctx(void)
++static int zstd_compress(struct acomp_req *req)
+ {
+- int ret;
++ struct crypto_acomp_stream *s;
++ unsigned int pos, scur, dcur;
++ unsigned int total_out = 0;
++ bool data_available = true;
++ zstd_out_buffer outbuf;
++ struct acomp_walk walk;
++ zstd_in_buffer inbuf;
+ struct zstd_ctx *ctx;
++ size_t pending_bytes;
++ size_t num_bytes;
++ int ret;
+
+- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+- if (!ctx)
+- return ERR_PTR(-ENOMEM);
++ s = crypto_acomp_lock_stream_bh(&zstd_streams);
++ ctx = s->ctx;
+
+- ret = __zstd_init(ctx);
+- if (ret) {
+- kfree(ctx);
+- return ERR_PTR(ret);
++ ret = acomp_walk_virt(&walk, req, true);
++ if (ret)
++ goto out;
++
++ ctx->cctx = zstd_init_cstream(&ctx->params, 0, ctx->wksp, ctx->wksp_size);
++ if (!ctx->cctx) {
++ ret = -EINVAL;
++ goto out;
+ }
+
+- return ctx;
+-}
++ do {
++ dcur = acomp_walk_next_dst(&walk);
++ if (!dcur) {
++ ret = -ENOSPC;
++ goto out;
++ }
++
++ outbuf.pos = 0;
++ outbuf.dst = (u8 *)walk.dst.virt.addr;
++ outbuf.size = dcur;
++
++ do {
++ scur = acomp_walk_next_src(&walk);
++ if (dcur == req->dlen && scur == req->slen) {
++ ret = zstd_compress_one(req, ctx, walk.src.virt.addr,
++ walk.dst.virt.addr, &total_out);
++ acomp_walk_done_src(&walk, scur);
++ acomp_walk_done_dst(&walk, dcur);
++ goto out;
++ }
++
++ if (scur) {
++ inbuf.pos = 0;
++ inbuf.src = walk.src.virt.addr;
++ inbuf.size = scur;
++ } else {
++ data_available = false;
++ break;
++ }
++
++ num_bytes = zstd_compress_stream(ctx->cctx, &outbuf, &inbuf);
++ if (ZSTD_isError(num_bytes)) {
++ ret = -EIO;
++ goto out;
++ }
++
++ pending_bytes = zstd_flush_stream(ctx->cctx, &outbuf);
++ if (ZSTD_isError(pending_bytes)) {
++ ret = -EIO;
++ goto out;
++ }
++ acomp_walk_done_src(&walk, inbuf.pos);
++ } while (dcur != outbuf.pos);
++
++ total_out += outbuf.pos;
++ acomp_walk_done_dst(&walk, dcur);
++ } while (data_available);
++
++ pos = outbuf.pos;
++ num_bytes = zstd_end_stream(ctx->cctx, &outbuf);
++ if (ZSTD_isError(num_bytes))
++ ret = -EIO;
++ else
++ total_out += (outbuf.pos - pos);
+
+-static void __zstd_exit(void *ctx)
+-{
+- zstd_comp_exit(ctx);
+- zstd_decomp_exit(ctx);
+-}
++out:
++ if (ret)
++ req->dlen = 0;
++ else
++ req->dlen = total_out;
+
+-static void zstd_free_ctx(void *ctx)
+-{
+- __zstd_exit(ctx);
+- kfree_sensitive(ctx);
++ crypto_acomp_unlock_stream_bh(s);
++
++ return ret;
+ }
+
+-static int __zstd_compress(const u8 *src, unsigned int slen,
+- u8 *dst, unsigned int *dlen, void *ctx)
++static int zstd_decompress_one(struct acomp_req *req, struct zstd_ctx *ctx,
++ const void *src, void *dst, unsigned int *dlen)
+ {
+ size_t out_len;
+- struct zstd_ctx *zctx = ctx;
+- const zstd_parameters params = zstd_params();
+
+- out_len = zstd_compress_cctx(zctx->cctx, dst, *dlen, src, slen, ¶ms);
++ ctx->dctx = zstd_init_dctx(ctx->wksp, ctx->wksp_size);
++ if (!ctx->dctx)
++ return -EINVAL;
++
++ out_len = zstd_decompress_dctx(ctx->dctx, dst, req->dlen, src, req->slen);
+ if (zstd_is_error(out_len))
+ return -EINVAL;
++
+ *dlen = out_len;
++
+ return 0;
+ }
+
+-static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src,
+- unsigned int slen, u8 *dst, unsigned int *dlen,
+- void *ctx)
++static int zstd_decompress(struct acomp_req *req)
+ {
+- return __zstd_compress(src, slen, dst, dlen, ctx);
+-}
++ struct crypto_acomp_stream *s;
++ unsigned int total_out = 0;
++ unsigned int scur, dcur;
++ zstd_out_buffer outbuf;
++ struct acomp_walk walk;
++ zstd_in_buffer inbuf;
++ struct zstd_ctx *ctx;
++ size_t pending_bytes;
++ int ret;
+
+-static int __zstd_decompress(const u8 *src, unsigned int slen,
+- u8 *dst, unsigned int *dlen, void *ctx)
+-{
+- size_t out_len;
+- struct zstd_ctx *zctx = ctx;
++ s = crypto_acomp_lock_stream_bh(&zstd_streams);
++ ctx = s->ctx;
+
+- out_len = zstd_decompress_dctx(zctx->dctx, dst, *dlen, src, slen);
+- if (zstd_is_error(out_len))
+- return -EINVAL;
+- *dlen = out_len;
+- return 0;
+-}
++ ret = acomp_walk_virt(&walk, req, true);
++ if (ret)
++ goto out;
+
+-static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+- unsigned int slen, u8 *dst, unsigned int *dlen,
+- void *ctx)
+-{
+- return __zstd_decompress(src, slen, dst, dlen, ctx);
+-}
+-
+-static struct scomp_alg scomp = {
+- .alloc_ctx = zstd_alloc_ctx,
+- .free_ctx = zstd_free_ctx,
+- .compress = zstd_scompress,
+- .decompress = zstd_sdecompress,
+- .base = {
+- .cra_name = "zstd",
+- .cra_driver_name = "zstd-scomp",
+- .cra_module = THIS_MODULE,
++ ctx->dctx = zstd_init_dstream(ZSTD_MAX_SIZE, ctx->wksp, ctx->wksp_size);
++ if (!ctx->dctx) {
++ ret = -EINVAL;
++ goto out;
+ }
++
++ do {
++ scur = acomp_walk_next_src(&walk);
++ if (scur) {
++ inbuf.pos = 0;
++ inbuf.size = scur;
++ inbuf.src = walk.src.virt.addr;
++ } else {
++ break;
++ }
++
++ do {
++ dcur = acomp_walk_next_dst(&walk);
++ if (dcur == req->dlen && scur == req->slen) {
++ ret = zstd_decompress_one(req, ctx, walk.src.virt.addr,
++ walk.dst.virt.addr, &total_out);
++ acomp_walk_done_dst(&walk, dcur);
++ acomp_walk_done_src(&walk, scur);
++ goto out;
++ }
++
++ if (!dcur) {
++ ret = -ENOSPC;
++ goto out;
++ }
++
++ outbuf.pos = 0;
++ outbuf.dst = (u8 *)walk.dst.virt.addr;
++ outbuf.size = dcur;
++
++ pending_bytes = zstd_decompress_stream(ctx->dctx, &outbuf, &inbuf);
++ if (ZSTD_isError(pending_bytes)) {
++ ret = -EIO;
++ goto out;
++ }
++
++ total_out += outbuf.pos;
++
++ acomp_walk_done_dst(&walk, outbuf.pos);
++ } while (scur != inbuf.pos);
++
++ if (scur)
++ acomp_walk_done_src(&walk, scur);
++ } while (ret == 0);
++
++out:
++ if (ret)
++ req->dlen = 0;
++ else
++ req->dlen = total_out;
++
++ crypto_acomp_unlock_stream_bh(s);
++
++ return ret;
++}
++
++static struct acomp_alg zstd_acomp = {
++ .base = {
++ .cra_name = "zstd",
++ .cra_driver_name = "zstd-generic",
++ .cra_flags = CRYPTO_ALG_REQ_VIRT,
++ .cra_module = THIS_MODULE,
++ },
++ .init = zstd_init,
++ .exit = zstd_exit,
++ .compress = zstd_compress,
++ .decompress = zstd_decompress,
+ };
+
+ static int __init zstd_mod_init(void)
+ {
+- return crypto_register_scomp(&scomp);
++ return crypto_register_acomp(&zstd_acomp);
+ }
+
+ static void __exit zstd_mod_fini(void)
+ {
+- crypto_unregister_scomp(&scomp);
++ crypto_unregister_acomp(&zstd_acomp);
+ }
+
+ module_init(zstd_mod_init);
--- /dev/null
+From 7e6c3130690a01076efdf45aa02ba5d5c16849a0 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sun, 20 Jul 2025 11:58:22 -0700
+Subject: mm/damon/ops-common: ignore migration request to invalid nodes
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 7e6c3130690a01076efdf45aa02ba5d5c16849a0 upstream.
+
+damon_migrate_pages() tries migration even if the target node is invalid.
+If users mistakenly make such invalid requests via
+DAMOS_MIGRATE_{HOT,COLD} action, the below kernel BUG can happen.
+
+ [ 7831.883495] BUG: unable to handle page fault for address: 0000000000001f48
+ [ 7831.884160] #PF: supervisor read access in kernel mode
+ [ 7831.884681] #PF: error_code(0x0000) - not-present page
+ [ 7831.885203] PGD 0 P4D 0
+ [ 7831.885468] Oops: Oops: 0000 [#1] SMP PTI
+ [ 7831.885852] CPU: 31 UID: 0 PID: 94202 Comm: kdamond.0 Not tainted 6.16.0-rc5-mm-new-damon+ #93 PREEMPT(voluntary)
+ [ 7831.886913] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.el9 04/01/2014
+ [ 7831.887777] RIP: 0010:__alloc_frozen_pages_noprof (include/linux/mmzone.h:1724 include/linux/mmzone.h:1750 mm/page_alloc.c:4936 mm/page_alloc.c:5137)
+ [...]
+ [ 7831.895953] Call Trace:
+ [ 7831.896195] <TASK>
+ [ 7831.896397] __folio_alloc_noprof (mm/page_alloc.c:5183 mm/page_alloc.c:5192)
+ [ 7831.896787] migrate_pages_batch (mm/migrate.c:1189 mm/migrate.c:1851)
+ [ 7831.897228] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
+ [ 7831.897735] migrate_pages (mm/migrate.c:2078)
+ [ 7831.898141] ? __pfx_alloc_migration_target (mm/migrate.c:2137)
+ [ 7831.898664] damon_migrate_folio_list (mm/damon/ops-common.c:321 mm/damon/ops-common.c:354)
+ [ 7831.899140] damon_migrate_pages (mm/damon/ops-common.c:405)
+ [...]
+
+Add a target node validity check in damon_migrate_pages(). The validity
+check is stolen from that of do_pages_move(), which is being used for the
+move_pages() system call.
+
+Link: https://lkml.kernel.org/r/20250720185822.1451-1-sj@kernel.org
+Fixes: b51820ebea65 ("mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion") [6.11.x]
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
+Cc: Honggyu Kim <honggyu.kim@sk.com>
+Cc: Hyeongtak Ji <hyeongtak.ji@sk.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/paddr.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/damon/paddr.c
++++ b/mm/damon/paddr.c
+@@ -476,6 +476,10 @@ static unsigned long damon_pa_migrate_pa
+ if (list_empty(folio_list))
+ return nr_migrated;
+
++ if (target_nid < 0 || target_nid >= MAX_NUMNODES ||
++ !node_state(target_nid, N_MEMORY))
++ return nr_migrated;
++
+ noreclaim_flag = memalloc_noreclaim_save();
+
+ nid = folio_nid(lru_to_folio(folio_list));
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
- tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++--
- tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++--
+ tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++--
+ tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++--
2 files changed, 6 insertions(+), 4 deletions(-)
-diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
-index 3cf1e2a612ce..f3bcaa48df8f 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
-@@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service,
+@@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *nod
struct addrinfo *hints,
struct addrinfo **res)
{
if (err) {
const char *errstr;
-diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
-index 9934a68df237..e934dd26a59d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
-@@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service,
+@@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *nod
struct addrinfo *hints,
struct addrinfo **res)
{
if (err) {
const char *errstr;
---
-2.50.1
-
selftests-mptcp-pm-check-flush-doesn-t-reset-limits.patch
selftests-mptcp-connect-fix-c23-extension-warning.patch
selftests-mptcp-sockopt-fix-c23-extension-warning.patch
+mm-damon-ops-common-ignore-migration-request-to-invalid-nodes.patch
+btrfs-move-transaction-aborts-to-the-error-site-in-add_block_group_free_space.patch
+btrfs-always-abort-transaction-on-failure-to-add-block-group-to-free-space-tree.patch
+btrfs-abort-transaction-on-unexpected-eb-generation-at-btrfs_copy_root.patch
+crypto-zstd-convert-to-acomp.patch
+crypto-acomp-fix-cfi-failure-due-to-type-punning.patch
+btrfs-reorganize-logic-at-free_extent_buffer-for-better-readability.patch
+btrfs-add-comment-for-optimization-in-free_extent_buffer.patch
+btrfs-use-refcount_t-type-for-the-extent-buffer-reference-counter.patch
+btrfs-fix-subpage-deadlock-in-try_release_subpage_extent_buffer.patch
+btrfs-add-comments-on-the-extra-btrfs-specific-subpage-bitmaps.patch
+btrfs-rename-btrfs_subpage-structure.patch
+btrfs-subpage-keep-towrite-tag-until-folio-is-cleaned.patch
+xfs-decouple-xfs_trans_alloc_empty-from-xfs_trans_alloc.patch
+xfs-return-the-allocated-transaction-from-xfs_trans_alloc_empty.patch
+xfs-improve-the-comments-in-xfs_select_zone_nowait.patch
+xfs-fully-decouple-xfs_ibulk-flags-from-xfs_iwalk-flags.patch
+xfs-remove-unused-label-in-xfs_dax_notify_dev_failure.patch
--- /dev/null
+From stable+bounces-171702-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:11 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:19:55 -0400
+Subject: xfs: decouple xfs_trans_alloc_empty from xfs_trans_alloc
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011959.244870-1-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 83a80e95e797a2a6d14bf7983e5e6eecf8f5facb ]
+
+xfs_trans_alloc_empty only shares the very basic transaction structure
+allocation and initialization with xfs_trans_alloc.
+
+Split out a new __xfs_trans_alloc helper for that and otherwise decouple
+xfs_trans_alloc_empty from xfs_trans_alloc.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Stable-dep-of: d2845519b072 ("xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_trans.c | 52 ++++++++++++++++++++++++++++------------------------
+ 1 file changed, 28 insertions(+), 24 deletions(-)
+
+--- a/fs/xfs/xfs_trans.c
++++ b/fs/xfs/xfs_trans.c
+@@ -241,6 +241,28 @@ undo_blocks:
+ return error;
+ }
+
++static struct xfs_trans *
++__xfs_trans_alloc(
++ struct xfs_mount *mp,
++ uint flags)
++{
++ struct xfs_trans *tp;
++
++ ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || xfs_has_lazysbcount(mp));
++
++ tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
++ if (!(flags & XFS_TRANS_NO_WRITECOUNT))
++ sb_start_intwrite(mp->m_super);
++ xfs_trans_set_context(tp);
++ tp->t_flags = flags;
++ tp->t_mountp = mp;
++ INIT_LIST_HEAD(&tp->t_items);
++ INIT_LIST_HEAD(&tp->t_busy);
++ INIT_LIST_HEAD(&tp->t_dfops);
++ tp->t_highest_agno = NULLAGNUMBER;
++ return tp;
++}
++
+ int
+ xfs_trans_alloc(
+ struct xfs_mount *mp,
+@@ -254,33 +276,16 @@ xfs_trans_alloc(
+ bool want_retry = true;
+ int error;
+
++ ASSERT(resp->tr_logres > 0);
++
+ /*
+ * Allocate the handle before we do our freeze accounting and setting up
+ * GFP_NOFS allocation context so that we avoid lockdep false positives
+ * by doing GFP_KERNEL allocations inside sb_start_intwrite().
+ */
+ retry:
+- tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
+- if (!(flags & XFS_TRANS_NO_WRITECOUNT))
+- sb_start_intwrite(mp->m_super);
+- xfs_trans_set_context(tp);
+-
+- /*
+- * Zero-reservation ("empty") transactions can't modify anything, so
+- * they're allowed to run while we're frozen.
+- */
+- WARN_ON(resp->tr_logres > 0 &&
+- mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
+- ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) ||
+- xfs_has_lazysbcount(mp));
+-
+- tp->t_flags = flags;
+- tp->t_mountp = mp;
+- INIT_LIST_HEAD(&tp->t_items);
+- INIT_LIST_HEAD(&tp->t_busy);
+- INIT_LIST_HEAD(&tp->t_dfops);
+- tp->t_highest_agno = NULLAGNUMBER;
+-
++ WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
++ tp = __xfs_trans_alloc(mp, flags);
+ error = xfs_trans_reserve(tp, resp, blocks, rtextents);
+ if (error == -ENOSPC && want_retry) {
+ xfs_trans_cancel(tp);
+@@ -329,9 +334,8 @@ xfs_trans_alloc_empty(
+ struct xfs_mount *mp,
+ struct xfs_trans **tpp)
+ {
+- struct xfs_trans_res resv = {0};
+-
+- return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
++ *tpp = __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT);
++ return 0;
+ }
+
+ /*
--- /dev/null
+From stable+bounces-171705-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:18 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:19:58 -0400
+Subject: xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, cen zhang <zzzccc427@gmail.com>, "Darrick J. Wong" <djwong@kernel.org>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011959.244870-4-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit d2845519b0723c5d5a0266cbf410495f9b8fd65c ]
+
+Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags
+argument to xfs_inobt_walk, which expects the XFS_IWALK* flags.
+
+Currently passing the wrong flags works for non-debug builds because
+the only XFS_IWALK* flag has the same encoding as the corresponding
+XFS_IBULK* flag, but in debug builds it can trigger an assert that no
+incorrect flag is passed. Instead just extra the relevant flag.
+
+Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags")
+Cc: <stable@vger.kernel.org> # v5.19
+Reported-by: cen zhang <zzzccc427@gmail.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_itable.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_itable.c
++++ b/fs/xfs/xfs_itable.c
+@@ -447,17 +447,21 @@ xfs_inumbers(
+ .breq = breq,
+ };
+ struct xfs_trans *tp;
++ unsigned int iwalk_flags = 0;
+ int error = 0;
+
+ if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+ return 0;
+
++ if (breq->flags & XFS_IBULK_SAME_AG)
++ iwalk_flags |= XFS_IWALK_SAME_AG;
++
+ /*
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
+ */
+ tp = xfs_trans_alloc_empty(breq->mp);
+- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
++ error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags,
+ xfs_inumbers_walk, breq->icount, &ic);
+ xfs_trans_cancel(tp);
+
--- /dev/null
+From stable+bounces-171704-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:11 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:19:57 -0400
+Subject: xfs: improve the comments in xfs_select_zone_nowait
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011959.244870-3-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 60e02f956d77af31b85ed4e73abf85d5f12d0a98 ]
+
+The top of the function comment is outdated, and the parts still correct
+duplicate information in comment inside the function. Remove the top of
+the function comment and instead improve a comment inside the function.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Stable-dep-of: d2845519b072 ("xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_zone_alloc.c | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+--- a/fs/xfs/xfs_zone_alloc.c
++++ b/fs/xfs/xfs_zone_alloc.c
+@@ -654,13 +654,6 @@ static inline bool xfs_zoned_pack_tight(
+ !(ip->i_diflags & XFS_DIFLAG_APPEND);
+ }
+
+-/*
+- * Pick a new zone for writes.
+- *
+- * If we aren't using up our budget of open zones just open a new one from the
+- * freelist. Else try to find one that matches the expected data lifetime. If
+- * we don't find one that is good pick any zone that is available.
+- */
+ static struct xfs_open_zone *
+ xfs_select_zone_nowait(
+ struct xfs_mount *mp,
+@@ -688,7 +681,8 @@ xfs_select_zone_nowait(
+ goto out_unlock;
+
+ /*
+- * See if we can open a new zone and use that.
++ * See if we can open a new zone and use that so that data for different
++ * files is mixed as little as possible.
+ */
+ oz = xfs_try_open_zone(mp, write_hint);
+ if (oz)
--- /dev/null
+From stable+bounces-171706-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:16 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:19:59 -0400
+Subject: xfs: Remove unused label in xfs_dax_notify_dev_failure
+To: stable@vger.kernel.org
+Cc: Alan Huang <mmpgouride@gmail.com>, Christoph Hellwig <hch@lst.de>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011959.244870-5-sashal@kernel.org>
+
+From: Alan Huang <mmpgouride@gmail.com>
+
+[ Upstream commit 8c10b04f9fc1760cb79068073686d8866e59d40f ]
+
+Fixes: e967dc40d501 ("xfs: return the allocated transaction from xfs_trans_alloc_empty")
+Signed-off-by: Alan Huang <mmpgouride@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_notify_failure.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/xfs/xfs_notify_failure.c
++++ b/fs/xfs/xfs_notify_failure.c
+@@ -350,7 +350,6 @@ xfs_dax_notify_dev_failure(
+ error = -EFSCORRUPTED;
+ }
+
+-out:
+ /* Thaw the fs if it has been frozen before. */
+ if (mf_flags & MF_MEM_PRE_REMOVE)
+ xfs_dax_notify_failure_thaw(mp, kernel_frozen);
--- /dev/null
+From stable+bounces-171703-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:09 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:19:56 -0400
+Subject: xfs: return the allocated transaction from xfs_trans_alloc_empty
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011959.244870-2-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit d8e1ea43e5a314bc01ec059ce93396639dcf9112 ]
+
+xfs_trans_alloc_empty can't return errors, so return the allocated
+transaction directly instead of an output double pointer argument.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Stable-dep-of: d2845519b072 ("xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_refcount.c | 4 +---
+ fs/xfs/scrub/common.c | 3 ++-
+ fs/xfs/scrub/repair.c | 12 ++----------
+ fs/xfs/scrub/scrub.c | 5 +----
+ fs/xfs/xfs_attr_item.c | 5 +----
+ fs/xfs/xfs_discard.c | 12 +++---------
+ fs/xfs/xfs_fsmap.c | 4 +---
+ fs/xfs/xfs_icache.c | 5 +----
+ fs/xfs/xfs_inode.c | 7 ++-----
+ fs/xfs/xfs_itable.c | 18 +++---------------
+ fs/xfs/xfs_iwalk.c | 11 +++--------
+ fs/xfs/xfs_notify_failure.c | 5 +----
+ fs/xfs/xfs_qm.c | 10 ++--------
+ fs/xfs/xfs_rtalloc.c | 13 +++----------
+ fs/xfs/xfs_trans.c | 8 +++-----
+ fs/xfs/xfs_trans.h | 3 +--
+ fs/xfs/xfs_zone_gc.c | 5 +----
+ 17 files changed, 31 insertions(+), 99 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_refcount.c
++++ b/fs/xfs/libxfs/xfs_refcount.c
+@@ -2099,9 +2099,7 @@ xfs_refcount_recover_cow_leftovers(
+ * recording the CoW debris we cancel the (empty) transaction
+ * and everything goes away cleanly.
+ */
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
++ tp = xfs_trans_alloc_empty(mp);
+
+ if (isrt) {
+ xfs_rtgroup_lock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT);
+--- a/fs/xfs/scrub/common.c
++++ b/fs/xfs/scrub/common.c
+@@ -870,7 +870,8 @@ int
+ xchk_trans_alloc_empty(
+ struct xfs_scrub *sc)
+ {
+- return xfs_trans_alloc_empty(sc->mp, &sc->tp);
++ sc->tp = xfs_trans_alloc_empty(sc->mp);
++ return 0;
+ }
+
+ /*
+--- a/fs/xfs/scrub/repair.c
++++ b/fs/xfs/scrub/repair.c
+@@ -1279,18 +1279,10 @@ xrep_trans_alloc_hook_dummy(
+ void **cookiep,
+ struct xfs_trans **tpp)
+ {
+- int error;
+-
+ *cookiep = current->journal_info;
+ current->journal_info = NULL;
+-
+- error = xfs_trans_alloc_empty(mp, tpp);
+- if (!error)
+- return 0;
+-
+- current->journal_info = *cookiep;
+- *cookiep = NULL;
+- return error;
++ *tpp = xfs_trans_alloc_empty(mp);
++ return 0;
+ }
+
+ /* Cancel a dummy transaction used by a live update hook function. */
+--- a/fs/xfs/scrub/scrub.c
++++ b/fs/xfs/scrub/scrub.c
+@@ -876,10 +876,7 @@ xchk_scrubv_open_by_handle(
+ struct xfs_inode *ip;
+ int error;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return NULL;
+-
++ tp = xfs_trans_alloc_empty(mp);
+ error = xfs_iget(mp, tp, head->svh_ino, XCHK_IGET_FLAGS, 0, &ip);
+ xfs_trans_cancel(tp);
+ if (error)
+--- a/fs/xfs/xfs_attr_item.c
++++ b/fs/xfs/xfs_attr_item.c
+@@ -616,10 +616,7 @@ xfs_attri_iread_extents(
+ struct xfs_trans *tp;
+ int error;
+
+- error = xfs_trans_alloc_empty(ip->i_mount, &tp);
+- if (error)
+- return error;
+-
++ tp = xfs_trans_alloc_empty(ip->i_mount);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+--- a/fs/xfs/xfs_discard.c
++++ b/fs/xfs/xfs_discard.c
+@@ -189,9 +189,7 @@ xfs_trim_gather_extents(
+ */
+ xfs_log_force(mp, XFS_LOG_SYNC);
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
++ tp = xfs_trans_alloc_empty(mp);
+
+ error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
+ if (error)
+@@ -583,9 +581,7 @@ xfs_trim_rtextents(
+ struct xfs_trans *tp;
+ int error;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
++ tp = xfs_trans_alloc_empty(mp);
+
+ /*
+ * Walk the free ranges between low and high. The query_range function
+@@ -701,9 +697,7 @@ xfs_trim_rtgroup_extents(
+ struct xfs_trans *tp;
+ int error;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
++ tp = xfs_trans_alloc_empty(mp);
+
+ /*
+ * Walk the free ranges between low and high. The query_range function
+--- a/fs/xfs/xfs_fsmap.c
++++ b/fs/xfs/xfs_fsmap.c
+@@ -1270,9 +1270,7 @@ xfs_getfsmap(
+ * buffer locking abilities to detect cycles in the rmapbt
+ * without deadlocking.
+ */
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- break;
++ tp = xfs_trans_alloc_empty(mp);
+
+ info.dev = handlers[i].dev;
+ info.last = false;
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -893,10 +893,7 @@ xfs_metafile_iget(
+ struct xfs_trans *tp;
+ int error;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
+-
++ tp = xfs_trans_alloc_empty(mp);
+ error = xfs_trans_metafile_iget(tp, ino, metafile_type, ipp);
+ xfs_trans_cancel(tp);
+ return error;
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2932,12 +2932,9 @@ xfs_inode_reload_unlinked(
+ struct xfs_inode *ip)
+ {
+ struct xfs_trans *tp;
+- int error;
+-
+- error = xfs_trans_alloc_empty(ip->i_mount, &tp);
+- if (error)
+- return error;
++ int error = 0;
+
++ tp = xfs_trans_alloc_empty(ip->i_mount);
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ if (xfs_inode_unlinked_incomplete(ip))
+ error = xfs_inode_reload_unlinked_bucket(tp, ip);
+--- a/fs/xfs/xfs_itable.c
++++ b/fs/xfs/xfs_itable.c
+@@ -239,14 +239,10 @@ xfs_bulkstat_one(
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
+ */
+- error = xfs_trans_alloc_empty(breq->mp, &tp);
+- if (error)
+- goto out;
+-
++ tp = xfs_trans_alloc_empty(breq->mp);
+ error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp,
+ breq->startino, &bc);
+ xfs_trans_cancel(tp);
+-out:
+ kfree(bc.buf);
+
+ /*
+@@ -331,17 +327,13 @@ xfs_bulkstat(
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
+ */
+- error = xfs_trans_alloc_empty(breq->mp, &tp);
+- if (error)
+- goto out;
+-
++ tp = xfs_trans_alloc_empty(breq->mp);
+ if (breq->flags & XFS_IBULK_SAME_AG)
+ iwalk_flags |= XFS_IWALK_SAME_AG;
+
+ error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags,
+ xfs_bulkstat_iwalk, breq->icount, &bc);
+ xfs_trans_cancel(tp);
+-out:
+ kfree(bc.buf);
+
+ /*
+@@ -464,14 +456,10 @@ xfs_inumbers(
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
+ */
+- error = xfs_trans_alloc_empty(breq->mp, &tp);
+- if (error)
+- goto out;
+-
++ tp = xfs_trans_alloc_empty(breq->mp);
+ error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
+ xfs_inumbers_walk, breq->icount, &ic);
+ xfs_trans_cancel(tp);
+-out:
+
+ /*
+ * We found some inode groups, so clear the error status and return
+--- a/fs/xfs/xfs_iwalk.c
++++ b/fs/xfs/xfs_iwalk.c
+@@ -377,11 +377,8 @@ xfs_iwalk_run_callbacks(
+ if (!has_more)
+ return 0;
+
+- if (iwag->drop_trans) {
+- error = xfs_trans_alloc_empty(mp, &iwag->tp);
+- if (error)
+- return error;
+- }
++ if (iwag->drop_trans)
++ iwag->tp = xfs_trans_alloc_empty(mp);
+
+ /* ...and recreate the cursor just past where we left off. */
+ error = xfs_ialloc_read_agi(iwag->pag, iwag->tp, 0, agi_bpp);
+@@ -617,9 +614,7 @@ xfs_iwalk_ag_work(
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
+ */
+- error = xfs_trans_alloc_empty(mp, &iwag->tp);
+- if (error)
+- goto out;
++ iwag->tp = xfs_trans_alloc_empty(mp);
+ iwag->drop_trans = 1;
+
+ error = xfs_iwalk_ag(iwag);
+--- a/fs/xfs/xfs_notify_failure.c
++++ b/fs/xfs/xfs_notify_failure.c
+@@ -279,10 +279,7 @@ xfs_dax_notify_dev_failure(
+ kernel_frozen = xfs_dax_notify_failure_freeze(mp) == 0;
+ }
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- goto out;
+-
++ tp = xfs_trans_alloc_empty(mp);
+ start_gno = xfs_fsb_to_gno(mp, start_bno, type);
+ end_gno = xfs_fsb_to_gno(mp, end_bno, type);
+ while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -660,10 +660,7 @@ xfs_qm_load_metadir_qinos(
+ struct xfs_trans *tp;
+ int error;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
+-
++ tp = xfs_trans_alloc_empty(mp);
+ error = xfs_dqinode_load_parent(tp, &qi->qi_dirip);
+ if (error == -ENOENT) {
+ /* no quota dir directory, but we'll create one later */
+@@ -1755,10 +1752,7 @@ xfs_qm_qino_load(
+ struct xfs_inode *dp = NULL;
+ int error;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
+-
++ tp = xfs_trans_alloc_empty(mp);
+ if (xfs_has_metadir(mp)) {
+ error = xfs_dqinode_load_parent(tp, &dp);
+ if (error)
+--- a/fs/xfs/xfs_rtalloc.c
++++ b/fs/xfs/xfs_rtalloc.c
+@@ -729,9 +729,7 @@ xfs_rtginode_ensure(
+ if (rtg->rtg_inodes[type])
+ return 0;
+
+- error = xfs_trans_alloc_empty(rtg_mount(rtg), &tp);
+- if (error)
+- return error;
++ tp = xfs_trans_alloc_empty(rtg_mount(rtg));
+ error = xfs_rtginode_load(rtg, type, tp);
+ xfs_trans_cancel(tp);
+
+@@ -1305,9 +1303,7 @@ xfs_growfs_rt_prep_groups(
+ if (!mp->m_rtdirip) {
+ struct xfs_trans *tp;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
++ tp = xfs_trans_alloc_empty(mp);
+ error = xfs_rtginode_load_parent(tp);
+ xfs_trans_cancel(tp);
+
+@@ -1674,10 +1670,7 @@ xfs_rtmount_inodes(
+ struct xfs_rtgroup *rtg = NULL;
+ int error;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
+-
++ tp = xfs_trans_alloc_empty(mp);
+ if (xfs_has_rtgroups(mp) && mp->m_sb.sb_rgcount > 0) {
+ error = xfs_rtginode_load_parent(tp);
+ if (error)
+--- a/fs/xfs/xfs_trans.c
++++ b/fs/xfs/xfs_trans.c
+@@ -329,13 +329,11 @@ retry:
+ * where we can be grabbing buffers at the same time that freeze is trying to
+ * drain the buffer LRU list.
+ */
+-int
++struct xfs_trans *
+ xfs_trans_alloc_empty(
+- struct xfs_mount *mp,
+- struct xfs_trans **tpp)
++ struct xfs_mount *mp)
+ {
+- *tpp = __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT);
+- return 0;
++ return __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT);
+ }
+
+ /*
+--- a/fs/xfs/xfs_trans.h
++++ b/fs/xfs/xfs_trans.h
+@@ -168,8 +168,7 @@ int xfs_trans_alloc(struct xfs_mount *m
+ struct xfs_trans **tpp);
+ int xfs_trans_reserve_more(struct xfs_trans *tp,
+ unsigned int blocks, unsigned int rtextents);
+-int xfs_trans_alloc_empty(struct xfs_mount *mp,
+- struct xfs_trans **tpp);
++struct xfs_trans *xfs_trans_alloc_empty(struct xfs_mount *mp);
+ void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
+
+ int xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buftarg *target,
+--- a/fs/xfs/xfs_zone_gc.c
++++ b/fs/xfs/xfs_zone_gc.c
+@@ -328,10 +328,7 @@ xfs_zone_gc_query(
+ iter->rec_idx = 0;
+ iter->rec_count = 0;
+
+- error = xfs_trans_alloc_empty(mp, &tp);
+- if (error)
+- return error;
+-
++ tp = xfs_trans_alloc_empty(mp);
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ cur = xfs_rtrmapbt_init_cursor(tp, rtg);
+ error = xfs_rmap_query_range(cur, &ri_low, &ri_high,