From: Greg Kroah-Hartman Date: Fri, 22 Aug 2025 06:20:35 +0000 (+0200) Subject: 6.16-stable patches X-Git-Tag: v6.16.3~65 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7795c8c17d110015e05401fcde54c9e994be423a;p=thirdparty%2Fkernel%2Fstable-queue.git 6.16-stable patches added patches: btrfs-abort-transaction-on-unexpected-eb-generation-at-btrfs_copy_root.patch btrfs-add-comment-for-optimization-in-free_extent_buffer.patch btrfs-add-comments-on-the-extra-btrfs-specific-subpage-bitmaps.patch btrfs-always-abort-transaction-on-failure-to-add-block-group-to-free-space-tree.patch btrfs-fix-subpage-deadlock-in-try_release_subpage_extent_buffer.patch btrfs-move-transaction-aborts-to-the-error-site-in-add_block_group_free_space.patch btrfs-rename-btrfs_subpage-structure.patch btrfs-reorganize-logic-at-free_extent_buffer-for-better-readability.patch btrfs-subpage-keep-towrite-tag-until-folio-is-cleaned.patch btrfs-use-refcount_t-type-for-the-extent-buffer-reference-counter.patch crypto-acomp-fix-cfi-failure-due-to-type-punning.patch crypto-zstd-convert-to-acomp.patch mm-damon-ops-common-ignore-migration-request-to-invalid-nodes.patch xfs-decouple-xfs_trans_alloc_empty-from-xfs_trans_alloc.patch xfs-fully-decouple-xfs_ibulk-flags-from-xfs_iwalk-flags.patch xfs-improve-the-comments-in-xfs_select_zone_nowait.patch xfs-remove-unused-label-in-xfs_dax_notify_dev_failure.patch xfs-return-the-allocated-transaction-from-xfs_trans_alloc_empty.patch --- diff --git a/queue-6.16/btrfs-abort-transaction-on-unexpected-eb-generation-at-btrfs_copy_root.patch b/queue-6.16/btrfs-abort-transaction-on-unexpected-eb-generation-at-btrfs_copy_root.patch new file mode 100644 index 0000000000..5e6afbbeb2 --- /dev/null +++ b/queue-6.16/btrfs-abort-transaction-on-unexpected-eb-generation-at-btrfs_copy_root.patch @@ -0,0 +1,48 @@ +From stable+bounces-171688-greg=kroah.com@vger.kernel.org Tue Aug 19 02:39:42 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 20:38:40 -0400 +Subject: btrfs: abort transaction on unexpected eb generation at btrfs_copy_root() +To: stable@vger.kernel.org +Cc: Filipe Manana , Daniel Vacek , Qu Wenruo , David Sterba , Sasha Levin +Message-ID: <20250819003840.226789-1-sashal@kernel.org> + +From: Filipe Manana + +[ Upstream commit 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8 ] + +If we find an unexpected generation for the extent buffer we are cloning +at btrfs_copy_root(), we just WARN_ON() and don't error out and abort the +transaction, meaning we allow to persist metadata with an unexpected +generation. Instead of warning only, abort the transaction and return +-EUCLEAN. + +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Daniel Vacek +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -283,7 +283,14 @@ int btrfs_copy_root(struct btrfs_trans_h + + write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); + +- WARN_ON(btrfs_header_generation(buf) > trans->transid); ++ if (unlikely(btrfs_header_generation(buf) > trans->transid)) { ++ btrfs_tree_unlock(cow); ++ free_extent_buffer(cow); ++ ret = -EUCLEAN; ++ btrfs_abort_transaction(trans, ret); ++ return ret; ++ } ++ + if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_inc_ref(trans, root, cow, 1); + else diff --git a/queue-6.16/btrfs-add-comment-for-optimization-in-free_extent_buffer.patch b/queue-6.16/btrfs-add-comment-for-optimization-in-free_extent_buffer.patch new file mode 100644 index 0000000000..997307b556 --- /dev/null +++ b/queue-6.16/btrfs-add-comment-for-optimization-in-free_extent_buffer.patch @@ -0,0 +1,37 @@ +From stable+bounces-171698-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:43 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:15:29 -0400 +Subject: btrfs: add comment for optimization in free_extent_buffer() +To: stable@vger.kernel.org +Cc: Filipe Manana , Boris Burkov , David Sterba , Sasha Levin +Message-ID: <20250819011531.242846-2-sashal@kernel.org> + +From: Filipe Manana + +[ Upstream commit 2697b6159744e5afae0f7715da9f830ba6f9e45a ] + +There's this special atomic compare and exchange logic which serves to +avoid locking the extent buffers refs_lock spinlock and therefore reduce +lock contention, so add a comment to make it more obvious. + +Reviewed-by: Boris Burkov +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: ad580dfa388f ("btrfs: fix subpage deadlock in try_release_subpage_extent_buffer()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -3493,6 +3493,7 @@ void free_extent_buffer(struct extent_bu + break; + } + ++ /* Optimization to avoid locking eb->refs_lock. */ + if (atomic_try_cmpxchg(&eb->refs, &refs, refs - 1)) + return; + } diff --git a/queue-6.16/btrfs-add-comments-on-the-extra-btrfs-specific-subpage-bitmaps.patch b/queue-6.16/btrfs-add-comments-on-the-extra-btrfs-specific-subpage-bitmaps.patch new file mode 100644 index 0000000000..ca4975cc44 --- /dev/null +++ b/queue-6.16/btrfs-add-comments-on-the-extra-btrfs-specific-subpage-bitmaps.patch @@ -0,0 +1,78 @@ +From stable+bounces-172264-greg=kroah.com@vger.kernel.org Fri Aug 22 05:36:17 2025 +From: Sasha Levin +Date: Thu, 21 Aug 2025 23:35:15 -0400 +Subject: btrfs: add comments on the extra btrfs specific subpage bitmaps +To: stable@vger.kernel.org +Cc: Qu Wenruo , David Sterba , Sasha Levin +Message-ID: <20250822033527.1065200-1-sashal@kernel.org> + +From: Qu Wenruo + +[ Upstream commit 1e17738d6b76cdc76d240d64de87fa66ba2365f7 ] + +Unlike the iomap_folio_state structure, the btrfs_subpage structure has a +lot of extra sub-bitmaps, namely: + +- writeback sub-bitmap +- locked sub-bitmap + iomap_folio_state uses an atomic for writeback tracking, while it has + no per-block locked tracking. + + This is because iomap always locks a single folio, and submits dirty + blocks with that folio locked. + + But btrfs has async delalloc ranges (for compression), which are queued + with their range locked, until the compression is done, then marks the + involved range writeback and unlocked. + + This means a range can be unlocked and marked writeback at seemingly + random timing, thus it needs the extra tracking. + + This needs a huge rework on the lifespan of async delalloc range + before we can remove/simplify these two sub-bitmaps. + +- ordered sub-bitmap +- checked sub-bitmap + These are for COW-fixup, but as I mentioned in the past, the COW-fixup + is not really needed anymore and these two flags are already marked + deprecated, and will be removed in the near future after comprehensive + tests. + +Add related comments to indicate we're actively trying to align the +sub-bitmaps to the iomap ones. + +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: b1511360c8ac ("btrfs: subpage: keep TOWRITE tag until folio is cleaned") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/subpage.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/fs/btrfs/subpage.h ++++ b/fs/btrfs/subpage.h +@@ -33,8 +33,22 @@ enum { + btrfs_bitmap_nr_uptodate = 0, + btrfs_bitmap_nr_dirty, + btrfs_bitmap_nr_writeback, ++ /* ++ * The ordered and checked flags are for COW fixup, already marked ++ * deprecated, and will be removed eventually. ++ */ + btrfs_bitmap_nr_ordered, + btrfs_bitmap_nr_checked, ++ ++ /* ++ * The locked bit is for async delalloc range (compression), currently ++ * async extent is queued with the range locked, until the compression ++ * is done. ++ * So an async extent can unlock the range at any random timing. ++ * ++ * This will need a rework on the async extent lifespan (mark writeback ++ * and do compression) before deprecating this flag. ++ */ + btrfs_bitmap_nr_locked, + btrfs_bitmap_nr_max + }; diff --git a/queue-6.16/btrfs-always-abort-transaction-on-failure-to-add-block-group-to-free-space-tree.patch b/queue-6.16/btrfs-always-abort-transaction-on-failure-to-add-block-group-to-free-space-tree.patch new file mode 100644 index 0000000000..2bf9922590 --- /dev/null +++ b/queue-6.16/btrfs-always-abort-transaction-on-failure-to-add-block-group-to-free-space-tree.patch @@ -0,0 +1,78 @@ +From stable+bounces-171684-greg=kroah.com@vger.kernel.org Tue Aug 19 02:16:51 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 20:16:39 -0400 +Subject: btrfs: always abort transaction on failure to add block group to free space tree +To: stable@vger.kernel.org +Cc: Filipe Manana , Boris Burkov , David Sterba , Sasha Levin +Message-ID: <20250819001639.204027-2-sashal@kernel.org> + +From: Filipe Manana + +[ Upstream commit 1f06c942aa709d397cf6bed577a0d10a61509667 ] + +Only one of the callers of __add_block_group_free_space() aborts the +transaction if the call fails, while the others don't do it and it's +either never done up the call chain or much higher in the call chain. + +So make sure we abort the transaction at __add_block_group_free_space() +if it fails, which brings a couple benefits: + +1) If some call chain never aborts the transaction, we avoid having some + metadata inconsistency because BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE is + cleared when we enter __add_block_group_free_space() and therefore + __add_block_group_free_space() is never called again to add the block + group items to the free space tree, since the function is only called + when that flag is set in a block group; + +2) If the call chain already aborts the transaction, then we get a better + trace that points to the exact step from __add_block_group_free_space() + which failed, which is better for analysis. + +So abort the transaction at __add_block_group_free_space() if any of its +steps fails. + +CC: stable@vger.kernel.org # 6.6+ +Reviewed-by: Boris Burkov +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/free-space-tree.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/fs/btrfs/free-space-tree.c ++++ b/fs/btrfs/free-space-tree.c +@@ -1431,12 +1431,17 @@ static int __add_block_group_free_space( + set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); + + ret = add_new_free_space_info(trans, block_group, path); +- if (ret) ++ if (ret) { ++ btrfs_abort_transaction(trans, ret); + return ret; ++ } + +- return __add_to_free_space_tree(trans, block_group, path, +- block_group->start, +- block_group->length); ++ ret = __add_to_free_space_tree(trans, block_group, path, ++ block_group->start, block_group->length); ++ if (ret) ++ btrfs_abort_transaction(trans, ret); ++ ++ return 0; + } + + int add_block_group_free_space(struct btrfs_trans_handle *trans, +@@ -1461,9 +1466,6 @@ int add_block_group_free_space(struct bt + } + + ret = __add_block_group_free_space(trans, block_group, path); +- if (ret) +- btrfs_abort_transaction(trans, ret); +- + out: + btrfs_free_path(path); + mutex_unlock(&block_group->free_space_lock); diff --git a/queue-6.16/btrfs-fix-subpage-deadlock-in-try_release_subpage_extent_buffer.patch b/queue-6.16/btrfs-fix-subpage-deadlock-in-try_release_subpage_extent_buffer.patch new file mode 100644 index 0000000000..f0c982e6a6 --- /dev/null +++ b/queue-6.16/btrfs-fix-subpage-deadlock-in-try_release_subpage_extent_buffer.patch @@ -0,0 +1,203 @@ +From stable+bounces-171701-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:44 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:15:31 -0400 +Subject: btrfs: fix subpage deadlock in try_release_subpage_extent_buffer() +To: stable@vger.kernel.org +Cc: Leo Martins , Boris Burkov , Qu Wenruo , David Sterba , Sasha Levin +Message-ID: <20250819011531.242846-4-sashal@kernel.org> + +From: Leo Martins + +[ Upstream commit ad580dfa388fabb52af033e3f8cc5d04be985e54 ] + +There is a potential deadlock that can happen in +try_release_subpage_extent_buffer() because the irq-safe xarray spin +lock fs_info->buffer_tree is being acquired before the irq-unsafe +eb->refs_lock. + +This leads to the potential race: +// T1 (random eb->refs user) // T2 (release folio) + +spin_lock(&eb->refs_lock); +// interrupt +end_bbio_meta_write() + btrfs_meta_folio_clear_writeback() + btree_release_folio() + folio_test_writeback() //false + try_release_extent_buffer() + try_release_subpage_extent_buffer() + xa_lock_irq(&fs_info->buffer_tree) + spin_lock(&eb->refs_lock); // blocked; held by T1 + buffer_tree_clear_mark() + xas_lock_irqsave() // blocked; held by T2 + +I believe that the spin lock can safely be replaced by an rcu_read_lock. +The xa_for_each loop does not need the spin lock as it's already +internally protected by the rcu_read_lock. The extent buffer is also +protected by the rcu_read_lock so it won't be freed before we take the +eb->refs_lock and check the ref count. + +The rcu_read_lock is taken and released every iteration, just like the +spin lock, which means we're not protected against concurrent +insertions into the xarray. This is fine because we rely on +folio->private to detect if there are any ebs remaining in the folio. + +There is already some precedent for this with find_extent_buffer_nolock, +which loads an extent buffer from the xarray with only rcu_read_lock. + +lockdep warning: + + ===================================================== + WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected + 6.16.0-0_fbk701_debug_rc0_123_g4c06e63b9203 #1 Tainted: G E N + ----------------------------------------------------- + kswapd0/66 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: + ffff000011ffd600 (&eb->refs_lock){+.+.}-{3:3}, at: try_release_extent_buffer+0x18c/0x560 + +and this task is already holding: + ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560 + which would create a new lock dependency: + (&buffer_xa_class){-.-.}-{3:3} -> (&eb->refs_lock){+.+.}-{3:3} + +but this new dependency connects a HARDIRQ-irq-safe lock: + (&buffer_xa_class){-.-.}-{3:3} + +... which became HARDIRQ-irq-safe at: + lock_acquire+0x178/0x358 + _raw_spin_lock_irqsave+0x60/0x88 + buffer_tree_clear_mark+0xc4/0x160 + end_bbio_meta_write+0x238/0x398 + btrfs_bio_end_io+0x1f8/0x330 + btrfs_orig_write_end_io+0x1c4/0x2c0 + bio_endio+0x63c/0x678 + blk_update_request+0x1c4/0xa00 + blk_mq_end_request+0x54/0x88 + virtblk_request_done+0x124/0x1d0 + blk_mq_complete_request+0x84/0xa0 + virtblk_done+0x130/0x238 + vring_interrupt+0x130/0x288 + __handle_irq_event_percpu+0x1e8/0x708 + handle_irq_event+0x98/0x1b0 + handle_fasteoi_irq+0x264/0x7c0 + generic_handle_domain_irq+0xa4/0x108 + gic_handle_irq+0x7c/0x1a0 + do_interrupt_handler+0xe4/0x148 + el1_interrupt+0x30/0x50 + el1h_64_irq_handler+0x14/0x20 + el1h_64_irq+0x6c/0x70 + _raw_spin_unlock_irq+0x38/0x70 + __run_timer_base+0xdc/0x5e0 + run_timer_softirq+0xa0/0x138 + handle_softirqs.llvm.13542289750107964195+0x32c/0xbd0 + ____do_softirq.llvm.17674514681856217165+0x18/0x28 + call_on_irq_stack+0x24/0x30 + __irq_exit_rcu+0x164/0x430 + irq_exit_rcu+0x18/0x88 + el1_interrupt+0x34/0x50 + el1h_64_irq_handler+0x14/0x20 + el1h_64_irq+0x6c/0x70 + arch_local_irq_enable+0x4/0x8 + do_idle+0x1a0/0x3b8 + cpu_startup_entry+0x60/0x80 + rest_init+0x204/0x228 + start_kernel+0x394/0x3f0 + __primary_switched+0x8c/0x8958 + +to a HARDIRQ-irq-unsafe lock: + (&eb->refs_lock){+.+.}-{3:3} + +... which became HARDIRQ-irq-unsafe at: + ... + lock_acquire+0x178/0x358 + _raw_spin_lock+0x4c/0x68 + free_extent_buffer_stale+0x2c/0x170 + btrfs_read_sys_array+0x1b0/0x338 + open_ctree+0xeb0/0x1df8 + btrfs_get_tree+0xb60/0x1110 + vfs_get_tree+0x8c/0x250 + fc_mount+0x20/0x98 + btrfs_get_tree+0x4a4/0x1110 + vfs_get_tree+0x8c/0x250 + do_new_mount+0x1e0/0x6c0 + path_mount+0x4ec/0xa58 + __arm64_sys_mount+0x370/0x490 + invoke_syscall+0x6c/0x208 + el0_svc_common+0x14c/0x1b8 + do_el0_svc+0x4c/0x60 + el0_svc+0x4c/0x160 + el0t_64_sync_handler+0x70/0x100 + el0t_64_sync+0x168/0x170 + +other info that might help us debug this: + Possible interrupt unsafe locking scenario: + CPU0 CPU1 + ---- ---- + lock(&eb->refs_lock); + local_irq_disable(); + lock(&buffer_xa_class); + lock(&eb->refs_lock); + + lock(&buffer_xa_class); + + *** DEADLOCK *** + 2 locks held by kswapd0/66: + #0: ffff800085506e40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0xe8/0xe50 + #1: ffff0000c1d91b88 (&buffer_xa_class){-.-.}-{3:3}, at: try_release_extent_buffer+0x13c/0x560 + +Link: https://www.kernel.org/doc/Documentation/locking/lockdep-design.rst#:~:text=Multi%2Dlock%20dependency%20rules%3A +Fixes: 19d7f65f032f ("btrfs: convert the buffer_radix to an xarray") +CC: stable@vger.kernel.org # 6.16+ +Reviewed-by: Boris Burkov +Reviewed-by: Qu Wenruo +Signed-off-by: Leo Martins +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -4304,15 +4304,18 @@ static int try_release_subpage_extent_bu + unsigned long end = index + (PAGE_SIZE >> fs_info->sectorsize_bits) - 1; + int ret; + +- xa_lock_irq(&fs_info->buffer_tree); ++ rcu_read_lock(); + xa_for_each_range(&fs_info->buffer_tree, index, eb, start, end) { + /* + * The same as try_release_extent_buffer(), to ensure the eb + * won't disappear out from under us. + */ + spin_lock(&eb->refs_lock); ++ rcu_read_unlock(); ++ + if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { + spin_unlock(&eb->refs_lock); ++ rcu_read_lock(); + continue; + } + +@@ -4331,11 +4334,10 @@ static int try_release_subpage_extent_bu + * check the folio private at the end. And + * release_extent_buffer() will release the refs_lock. + */ +- xa_unlock_irq(&fs_info->buffer_tree); + release_extent_buffer(eb); +- xa_lock_irq(&fs_info->buffer_tree); ++ rcu_read_lock(); + } +- xa_unlock_irq(&fs_info->buffer_tree); ++ rcu_read_unlock(); + + /* + * Finally to check if we have cleared folio private, as if we have +@@ -4348,7 +4350,6 @@ static int try_release_subpage_extent_bu + ret = 0; + spin_unlock(&folio->mapping->i_private_lock); + return ret; +- + } + + int try_release_extent_buffer(struct folio *folio) diff --git a/queue-6.16/btrfs-move-transaction-aborts-to-the-error-site-in-add_block_group_free_space.patch b/queue-6.16/btrfs-move-transaction-aborts-to-the-error-site-in-add_block_group_free_space.patch new file mode 100644 index 0000000000..1808933b32 --- /dev/null +++ b/queue-6.16/btrfs-move-transaction-aborts-to-the-error-site-in-add_block_group_free_space.patch @@ -0,0 +1,46 @@ +From stable+bounces-171683-greg=kroah.com@vger.kernel.org Tue Aug 19 02:16:52 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 20:16:38 -0400 +Subject: btrfs: move transaction aborts to the error site in add_block_group_free_space() +To: stable@vger.kernel.org +Cc: David Sterba , Filipe Manana , Sasha Levin +Message-ID: <20250819001639.204027-1-sashal@kernel.org> + +From: David Sterba + +[ Upstream commit b63c8c1ede4407835cb8c8bed2014d96619389f3 ] + +Transaction aborts should be done next to the place the error happens, +which was not done in add_block_group_free_space(). + +Reviewed-by: Filipe Manana +Signed-off-by: David Sterba +Stable-dep-of: 1f06c942aa70 ("btrfs: always abort transaction on failure to add block group to free space tree") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/free-space-tree.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/free-space-tree.c ++++ b/fs/btrfs/free-space-tree.c +@@ -1456,16 +1456,17 @@ int add_block_group_free_space(struct bt + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; ++ btrfs_abort_transaction(trans, ret); + goto out; + } + + ret = __add_block_group_free_space(trans, block_group, path); ++ if (ret) ++ btrfs_abort_transaction(trans, ret); + + out: + btrfs_free_path(path); + mutex_unlock(&block_group->free_space_lock); +- if (ret) +- btrfs_abort_transaction(trans, ret); + return ret; + } + diff --git a/queue-6.16/btrfs-rename-btrfs_subpage-structure.patch b/queue-6.16/btrfs-rename-btrfs_subpage-structure.patch new file mode 100644 index 0000000000..19c0bc61a2 --- /dev/null +++ b/queue-6.16/btrfs-rename-btrfs_subpage-structure.patch @@ -0,0 +1,852 @@ +From stable+bounces-172265-greg=kroah.com@vger.kernel.org Fri Aug 22 05:35:38 2025 +From: Sasha Levin +Date: Thu, 21 Aug 2025 23:35:16 -0400 +Subject: btrfs: rename btrfs_subpage structure +To: stable@vger.kernel.org +Cc: Qu Wenruo , David Sterba , Sasha Levin +Message-ID: <20250822033527.1065200-2-sashal@kernel.org> + +From: Qu Wenruo + +[ Upstream commit 582cd4bad4332cca95c578e99442eb148366eb82 ] + +With the incoming large data folios support, the structure name +btrfs_subpage is no longer correct, as for we can have multiple blocks +inside a large folio, and the block size is still page size. + +So to follow the schema of iomap, rename btrfs_subpage to +btrfs_folio_state, along with involved enums. + +There are still exported functions with "btrfs_subpage_" prefix, and I +believe for metadata the name "subpage" will stay forever as we will +never allocate a folio larger than nodesize anyway. + +The full cleanup of the word "subpage" will happen in much smaller steps +in the future. + +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: b1511360c8ac ("btrfs: subpage: keep TOWRITE tag until folio is cleaned") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 28 ++--- + fs/btrfs/inode.c | 8 - + fs/btrfs/subpage.c | 239 ++++++++++++++++++++++++--------------------------- + fs/btrfs/subpage.h | 31 ++++-- + 4 files changed, 156 insertions(+), 150 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -782,7 +782,7 @@ static void submit_extent_folio(struct b + + static int attach_extent_buffer_folio(struct extent_buffer *eb, + struct folio *folio, +- struct btrfs_subpage *prealloc) ++ struct btrfs_folio_state *prealloc) + { + struct btrfs_fs_info *fs_info = eb->fs_info; + int ret = 0; +@@ -806,7 +806,7 @@ static int attach_extent_buffer_folio(st + + /* Already mapped, just free prealloc */ + if (folio_test_private(folio)) { +- btrfs_free_subpage(prealloc); ++ btrfs_free_folio_state(prealloc); + return 0; + } + +@@ -815,7 +815,7 @@ static int attach_extent_buffer_folio(st + folio_attach_private(folio, prealloc); + else + /* Do new allocation to attach subpage */ +- ret = btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); ++ ret = btrfs_attach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA); + return ret; + } + +@@ -831,7 +831,7 @@ int set_folio_extent_mapped(struct folio + fs_info = folio_to_fs_info(folio); + + if (btrfs_is_subpage(fs_info, folio)) +- return btrfs_attach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA); ++ return btrfs_attach_folio_state(fs_info, folio, BTRFS_SUBPAGE_DATA); + + folio_attach_private(folio, (void *)EXTENT_FOLIO_PRIVATE); + return 0; +@@ -848,7 +848,7 @@ void clear_folio_extent_mapped(struct fo + + fs_info = folio_to_fs_info(folio); + if (btrfs_is_subpage(fs_info, folio)) +- return btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_DATA); ++ return btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_DATA); + + folio_detach_private(folio); + } +@@ -2731,13 +2731,13 @@ static int extent_buffer_under_io(const + + static bool folio_range_has_eb(struct folio *folio) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + + lockdep_assert_held(&folio->mapping->i_private_lock); + + if (folio_test_private(folio)) { +- subpage = folio_get_private(folio); +- if (atomic_read(&subpage->eb_refs)) ++ bfs = folio_get_private(folio); ++ if (atomic_read(&bfs->eb_refs)) + return true; + } + return false; +@@ -2787,7 +2787,7 @@ static void detach_extent_buffer_folio(c + * attached to one dummy eb, no sharing. + */ + if (!mapped) { +- btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); ++ btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA); + return; + } + +@@ -2798,7 +2798,7 @@ static void detach_extent_buffer_folio(c + * page range and no unfinished IO. + */ + if (!folio_range_has_eb(folio)) +- btrfs_detach_subpage(fs_info, folio, BTRFS_SUBPAGE_METADATA); ++ btrfs_detach_folio_state(fs_info, folio, BTRFS_SUBPAGE_METADATA); + + spin_unlock(&mapping->i_private_lock); + } +@@ -3141,7 +3141,7 @@ static bool check_eb_alignment(struct bt + * The caller needs to free the existing folios and retry using the same order. + */ + static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, +- struct btrfs_subpage *prealloc, ++ struct btrfs_folio_state *prealloc, + struct extent_buffer **found_eb_ret) + { + +@@ -3224,7 +3224,7 @@ struct extent_buffer *alloc_extent_buffe + int attached = 0; + struct extent_buffer *eb; + struct extent_buffer *existing_eb = NULL; +- struct btrfs_subpage *prealloc = NULL; ++ struct btrfs_folio_state *prealloc = NULL; + u64 lockdep_owner = owner_root; + bool page_contig = true; + int uptodate = 1; +@@ -3269,7 +3269,7 @@ struct extent_buffer *alloc_extent_buffe + * manually if we exit earlier. + */ + if (btrfs_meta_is_subpage(fs_info)) { +- prealloc = btrfs_alloc_subpage(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA); ++ prealloc = btrfs_alloc_folio_state(fs_info, PAGE_SIZE, BTRFS_SUBPAGE_METADATA); + if (IS_ERR(prealloc)) { + ret = PTR_ERR(prealloc); + goto out; +@@ -3280,7 +3280,7 @@ reallocate: + /* Allocate all pages first. */ + ret = alloc_eb_folio_array(eb, true); + if (ret < 0) { +- btrfs_free_subpage(prealloc); ++ btrfs_free_folio_state(prealloc); + goto out; + } + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -7364,13 +7364,13 @@ struct extent_map *btrfs_create_io_em(st + static void wait_subpage_spinlock(struct folio *folio) + { + struct btrfs_fs_info *fs_info = folio_to_fs_info(folio); +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + + if (!btrfs_is_subpage(fs_info, folio)) + return; + + ASSERT(folio_test_private(folio) && folio_get_private(folio)); +- subpage = folio_get_private(folio); ++ bfs = folio_get_private(folio); + + /* + * This may look insane as we just acquire the spinlock and release it, +@@ -7383,8 +7383,8 @@ static void wait_subpage_spinlock(struct + * Here we just acquire the spinlock so that all existing callers + * should exit and we're safe to release/invalidate the page. + */ +- spin_lock_irq(&subpage->lock); +- spin_unlock_irq(&subpage->lock); ++ spin_lock_irq(&bfs->lock); ++ spin_unlock_irq(&bfs->lock); + } + + static int btrfs_launder_folio(struct folio *folio) +--- a/fs/btrfs/subpage.c ++++ b/fs/btrfs/subpage.c +@@ -49,7 +49,7 @@ + * Implementation: + * + * - Common +- * Both metadata and data will use a new structure, btrfs_subpage, to ++ * Both metadata and data will use a new structure, btrfs_folio_state, to + * record the status of each sector inside a page. This provides the extra + * granularity needed. + * +@@ -63,10 +63,10 @@ + * This means a slightly higher tree locking latency. + */ + +-int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, +- struct folio *folio, enum btrfs_subpage_type type) ++int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, enum btrfs_folio_type type) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + + /* For metadata we don't support large folio yet. */ + if (type == BTRFS_SUBPAGE_METADATA) +@@ -87,18 +87,18 @@ int btrfs_attach_subpage(const struct bt + if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) + return 0; + +- subpage = btrfs_alloc_subpage(fs_info, folio_size(folio), type); +- if (IS_ERR(subpage)) +- return PTR_ERR(subpage); ++ bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type); ++ if (IS_ERR(bfs)) ++ return PTR_ERR(bfs); + +- folio_attach_private(folio, subpage); ++ folio_attach_private(folio, bfs); + return 0; + } + +-void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio, +- enum btrfs_subpage_type type) ++void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio, ++ enum btrfs_folio_type type) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + + /* Either not subpage, or the folio already has private attached. */ + if (!folio_test_private(folio)) +@@ -108,15 +108,15 @@ void btrfs_detach_subpage(const struct b + if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) + return; + +- subpage = folio_detach_private(folio); +- ASSERT(subpage); +- btrfs_free_subpage(subpage); ++ bfs = folio_detach_private(folio); ++ ASSERT(bfs); ++ btrfs_free_folio_state(bfs); + } + +-struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, +- size_t fsize, enum btrfs_subpage_type type) ++struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info, ++ size_t fsize, enum btrfs_folio_type type) + { +- struct btrfs_subpage *ret; ++ struct btrfs_folio_state *ret; + unsigned int real_size; + + ASSERT(fs_info->sectorsize < fsize); +@@ -136,11 +136,6 @@ struct btrfs_subpage *btrfs_alloc_subpag + return ret; + } + +-void btrfs_free_subpage(struct btrfs_subpage *subpage) +-{ +- kfree(subpage); +-} +- + /* + * Increase the eb_refs of current subpage. + * +@@ -152,7 +147,7 @@ void btrfs_free_subpage(struct btrfs_sub + */ + void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + + if (!btrfs_meta_is_subpage(fs_info)) + return; +@@ -160,13 +155,13 @@ void btrfs_folio_inc_eb_refs(const struc + ASSERT(folio_test_private(folio) && folio->mapping); + lockdep_assert_held(&folio->mapping->i_private_lock); + +- subpage = folio_get_private(folio); +- atomic_inc(&subpage->eb_refs); ++ bfs = folio_get_private(folio); ++ atomic_inc(&bfs->eb_refs); + } + + void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + + if (!btrfs_meta_is_subpage(fs_info)) + return; +@@ -174,9 +169,9 @@ void btrfs_folio_dec_eb_refs(const struc + ASSERT(folio_test_private(folio) && folio->mapping); + lockdep_assert_held(&folio->mapping->i_private_lock); + +- subpage = folio_get_private(folio); +- ASSERT(atomic_read(&subpage->eb_refs)); +- atomic_dec(&subpage->eb_refs); ++ bfs = folio_get_private(folio); ++ ASSERT(atomic_read(&bfs->eb_refs)); ++ atomic_dec(&bfs->eb_refs); + } + + static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, +@@ -228,7 +223,7 @@ static void btrfs_subpage_clamp_range(st + static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); + const int nbits = (len >> fs_info->sectorsize_bits); + unsigned long flags; +@@ -238,7 +233,7 @@ static bool btrfs_subpage_end_and_test_l + + btrfs_subpage_assert(fs_info, folio, start, len); + +- spin_lock_irqsave(&subpage->lock, flags); ++ spin_lock_irqsave(&bfs->lock, flags); + /* + * We have call sites passing @lock_page into + * extent_clear_unlock_delalloc() for compression path. +@@ -246,18 +241,18 @@ static bool btrfs_subpage_end_and_test_l + * This @locked_page is locked by plain lock_page(), thus its + * subpage::locked is 0. Handle them in a special way. + */ +- if (atomic_read(&subpage->nr_locked) == 0) { +- spin_unlock_irqrestore(&subpage->lock, flags); ++ if (atomic_read(&bfs->nr_locked) == 0) { ++ spin_unlock_irqrestore(&bfs->lock, flags); + return true; + } + +- for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) { +- clear_bit(bit, subpage->bitmaps); ++ for_each_set_bit_from(bit, bfs->bitmaps, start_bit + nbits) { ++ clear_bit(bit, bfs->bitmaps); + cleared++; + } +- ASSERT(atomic_read(&subpage->nr_locked) >= cleared); +- last = atomic_sub_and_test(cleared, &subpage->nr_locked); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ ASSERT(atomic_read(&bfs->nr_locked) >= cleared); ++ last = atomic_sub_and_test(cleared, &bfs->nr_locked); ++ spin_unlock_irqrestore(&bfs->lock, flags); + return last; + } + +@@ -280,7 +275,7 @@ static bool btrfs_subpage_end_and_test_l + void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + + ASSERT(folio_test_locked(folio)); + +@@ -296,7 +291,7 @@ void btrfs_folio_end_lock(const struct b + * Since we own the page lock, no one else could touch subpage::locked + * and we are safe to do several atomic operations without spinlock. + */ +- if (atomic_read(&subpage->nr_locked) == 0) { ++ if (atomic_read(&bfs->nr_locked) == 0) { + /* No subpage lock, locked by plain lock_page(). */ + folio_unlock(folio); + return; +@@ -310,7 +305,7 @@ void btrfs_folio_end_lock(const struct b + void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, + struct folio *folio, unsigned long bitmap) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); + const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked; + unsigned long flags; +@@ -323,42 +318,42 @@ void btrfs_folio_end_lock_bitmap(const s + return; + } + +- if (atomic_read(&subpage->nr_locked) == 0) { ++ if (atomic_read(&bfs->nr_locked) == 0) { + /* No subpage lock, locked by plain lock_page(). */ + folio_unlock(folio); + return; + } + +- spin_lock_irqsave(&subpage->lock, flags); ++ spin_lock_irqsave(&bfs->lock, flags); + for_each_set_bit(bit, &bitmap, blocks_per_folio) { +- if (test_and_clear_bit(bit + start_bit, subpage->bitmaps)) ++ if (test_and_clear_bit(bit + start_bit, bfs->bitmaps)) + cleared++; + } +- ASSERT(atomic_read(&subpage->nr_locked) >= cleared); +- last = atomic_sub_and_test(cleared, &subpage->nr_locked); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ ASSERT(atomic_read(&bfs->nr_locked) >= cleared); ++ last = atomic_sub_and_test(cleared, &bfs->nr_locked); ++ spin_unlock_irqrestore(&bfs->lock, flags); + if (last) + folio_unlock(folio); + } + + #define subpage_test_bitmap_all_set(fs_info, folio, name) \ + ({ \ +- struct btrfs_subpage *subpage = folio_get_private(folio); \ ++ struct btrfs_folio_state *bfs = folio_get_private(folio); \ + const unsigned int blocks_per_folio = \ + btrfs_blocks_per_folio(fs_info, folio); \ + \ +- bitmap_test_range_all_set(subpage->bitmaps, \ ++ bitmap_test_range_all_set(bfs->bitmaps, \ + blocks_per_folio * btrfs_bitmap_nr_##name, \ + blocks_per_folio); \ + }) + + #define subpage_test_bitmap_all_zero(fs_info, folio, name) \ + ({ \ +- struct btrfs_subpage *subpage = folio_get_private(folio); \ ++ struct btrfs_folio_state *bfs = folio_get_private(folio); \ + const unsigned int blocks_per_folio = \ + btrfs_blocks_per_folio(fs_info, folio); \ + \ +- bitmap_test_range_all_zero(subpage->bitmaps, \ ++ bitmap_test_range_all_zero(bfs->bitmaps, \ + blocks_per_folio * btrfs_bitmap_nr_##name, \ + blocks_per_folio); \ + }) +@@ -366,43 +361,43 @@ void btrfs_folio_end_lock_bitmap(const s + void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + uptodate, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_set(fs_info, folio, uptodate)) + folio_mark_uptodate(folio); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + uptodate, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + folio_clear_uptodate(folio); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + dirty, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_unlock_irqrestore(&bfs->lock, flags); + folio_mark_dirty(folio); + } + +@@ -419,17 +414,17 @@ void btrfs_subpage_set_dirty(const struc + bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + dirty, start, len); + unsigned long flags; + bool last = false; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_zero(fs_info, folio, dirty)) + last = true; +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + return last; + } + +@@ -446,91 +441,91 @@ void btrfs_subpage_clear_dirty(const str + void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + writeback, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (!folio_test_writeback(folio)) + folio_start_writeback(folio); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + writeback, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) { + ASSERT(folio_test_writeback(folio)); + folio_end_writeback(folio); + } +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + ordered, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + folio_set_ordered(folio); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + ordered, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_zero(fs_info, folio, ordered)) + folio_clear_ordered(folio); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + checked, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_set(fs_info, folio, checked)) + folio_set_checked(folio); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage = folio_get_private(folio); ++ struct btrfs_folio_state *bfs = folio_get_private(folio); + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, + checked, start, len); + unsigned long flags; + +- spin_lock_irqsave(&subpage->lock, flags); +- bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ spin_lock_irqsave(&bfs->lock, flags); ++ bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + folio_clear_checked(folio); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + /* +@@ -541,16 +536,16 @@ void btrfs_subpage_clear_checked(const s + bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ + struct folio *folio, u64 start, u32 len) \ + { \ +- struct btrfs_subpage *subpage = folio_get_private(folio); \ ++ struct btrfs_folio_state *bfs = folio_get_private(folio); \ + unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ + name, start, len); \ + unsigned long flags; \ + bool ret; \ + \ +- spin_lock_irqsave(&subpage->lock, flags); \ +- ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ ++ spin_lock_irqsave(&bfs->lock, flags); \ ++ ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit, \ + len >> fs_info->sectorsize_bits); \ +- spin_unlock_irqrestore(&subpage->lock, flags); \ ++ spin_unlock_irqrestore(&bfs->lock, flags); \ + return ret; \ + } + IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); +@@ -662,10 +657,10 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_ + { \ + const unsigned int blocks_per_folio = \ + btrfs_blocks_per_folio(fs_info, folio); \ +- const struct btrfs_subpage *subpage = folio_get_private(folio); \ ++ const struct btrfs_folio_state *bfs = folio_get_private(folio); \ + \ + ASSERT(blocks_per_folio <= BITS_PER_LONG); \ +- *dst = bitmap_read(subpage->bitmaps, \ ++ *dst = bitmap_read(bfs->bitmaps, \ + blocks_per_folio * btrfs_bitmap_nr_##name, \ + blocks_per_folio); \ + } +@@ -690,7 +685,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_ + void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + unsigned int start_bit; + unsigned int nbits; + unsigned long flags; +@@ -705,15 +700,15 @@ void btrfs_folio_assert_not_dirty(const + + start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); + nbits = len >> fs_info->sectorsize_bits; +- subpage = folio_get_private(folio); +- ASSERT(subpage); +- spin_lock_irqsave(&subpage->lock, flags); +- if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { ++ bfs = folio_get_private(folio); ++ ASSERT(bfs); ++ spin_lock_irqsave(&bfs->lock, flags); ++ if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { + SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len); +- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); ++ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); + } +- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + /* +@@ -726,7 +721,7 @@ void btrfs_folio_assert_not_dirty(const + void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + unsigned long flags; + unsigned int start_bit; + unsigned int nbits; +@@ -736,19 +731,19 @@ void btrfs_folio_set_lock(const struct b + if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) + return; + +- subpage = folio_get_private(folio); ++ bfs = folio_get_private(folio); + start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); + nbits = len >> fs_info->sectorsize_bits; +- spin_lock_irqsave(&subpage->lock, flags); ++ spin_lock_irqsave(&bfs->lock, flags); + /* Target range should not yet be locked. */ +- if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { ++ if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { + SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len); +- ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); ++ ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); + } +- bitmap_set(subpage->bitmaps, start_bit, nbits); +- ret = atomic_add_return(nbits, &subpage->nr_locked); ++ bitmap_set(bfs->bitmaps, start_bit, nbits); ++ ret = atomic_add_return(nbits, &bfs->nr_locked); + ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio)); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } + + /* +@@ -776,7 +771,7 @@ bool btrfs_meta_folio_clear_and_test_dir + void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, + struct folio *folio, u64 start, u32 len) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); + unsigned long uptodate_bitmap; + unsigned long dirty_bitmap; +@@ -788,18 +783,18 @@ void __cold btrfs_subpage_dump_bitmap(co + + ASSERT(folio_test_private(folio) && folio_get_private(folio)); + ASSERT(blocks_per_folio > 1); +- subpage = folio_get_private(folio); ++ bfs = folio_get_private(folio); + +- spin_lock_irqsave(&subpage->lock, flags); ++ spin_lock_irqsave(&bfs->lock, flags); + GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap); + GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap); + GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap); + GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap); + GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap); + GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + +- dump_page(folio_page(folio, 0), "btrfs subpage dump"); ++ dump_page(folio_page(folio, 0), "btrfs folio state dump"); + btrfs_warn(fs_info, + "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", + start, len, folio_pos(folio), +@@ -815,14 +810,14 @@ void btrfs_get_subpage_dirty_bitmap(stru + struct folio *folio, + unsigned long *ret_bitmap) + { +- struct btrfs_subpage *subpage; ++ struct btrfs_folio_state *bfs; + unsigned long flags; + + ASSERT(folio_test_private(folio) && folio_get_private(folio)); + ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1); +- subpage = folio_get_private(folio); ++ bfs = folio_get_private(folio); + +- spin_lock_irqsave(&subpage->lock, flags); ++ spin_lock_irqsave(&bfs->lock, flags); + GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap); +- spin_unlock_irqrestore(&subpage->lock, flags); ++ spin_unlock_irqrestore(&bfs->lock, flags); + } +--- a/fs/btrfs/subpage.h ++++ b/fs/btrfs/subpage.h +@@ -32,7 +32,15 @@ struct folio; + enum { + btrfs_bitmap_nr_uptodate = 0, + btrfs_bitmap_nr_dirty, ++ ++ /* ++ * This can be changed to atomic eventually. But this change will rely ++ * on the async delalloc range rework for locked bitmap. As async ++ * delalloc can unlock its range and mark blocks writeback at random ++ * timing. ++ */ + btrfs_bitmap_nr_writeback, ++ + /* + * The ordered and checked flags are for COW fixup, already marked + * deprecated, and will be removed eventually. +@@ -57,7 +65,7 @@ enum { + * Structure to trace status of each sector inside a page, attached to + * page::private for both data and metadata inodes. + */ +-struct btrfs_subpage { ++struct btrfs_folio_state { + /* Common members for both data and metadata pages */ + spinlock_t lock; + union { +@@ -65,7 +73,7 @@ struct btrfs_subpage { + * Structures only used by metadata + * + * @eb_refs should only be operated under private_lock, as it +- * manages whether the subpage can be detached. ++ * manages whether the btrfs_folio_state can be detached. + */ + atomic_t eb_refs; + +@@ -79,7 +87,7 @@ struct btrfs_subpage { + unsigned long bitmaps[]; + }; + +-enum btrfs_subpage_type { ++enum btrfs_folio_type { + BTRFS_SUBPAGE_METADATA, + BTRFS_SUBPAGE_DATA, + }; +@@ -119,15 +127,18 @@ static inline bool btrfs_is_subpage(cons + } + #endif + +-int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, +- struct folio *folio, enum btrfs_subpage_type type); +-void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio, +- enum btrfs_subpage_type type); ++int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info, ++ struct folio *folio, enum btrfs_folio_type type); ++void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio, ++ enum btrfs_folio_type type); + + /* Allocate additional data where page represents more than one sector */ +-struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, +- size_t fsize, enum btrfs_subpage_type type); +-void btrfs_free_subpage(struct btrfs_subpage *subpage); ++struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info, ++ size_t fsize, enum btrfs_folio_type type); ++static inline void btrfs_free_folio_state(struct btrfs_folio_state *bfs) ++{ ++ kfree(bfs); ++} + + void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); + void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio); diff --git a/queue-6.16/btrfs-reorganize-logic-at-free_extent_buffer-for-better-readability.patch b/queue-6.16/btrfs-reorganize-logic-at-free_extent_buffer-for-better-readability.patch new file mode 100644 index 0000000000..4904e619fc --- /dev/null +++ b/queue-6.16/btrfs-reorganize-logic-at-free_extent_buffer-for-better-readability.patch @@ -0,0 +1,66 @@ +From stable+bounces-171699-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:41 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:15:28 -0400 +Subject: btrfs: reorganize logic at free_extent_buffer() for better readability +To: stable@vger.kernel.org +Cc: Filipe Manana , Boris Burkov , David Sterba , Sasha Levin +Message-ID: <20250819011531.242846-1-sashal@kernel.org> + +From: Filipe Manana + +[ Upstream commit 71c086b30d4373a01bd5627f54516a72891a026a ] + +It's hard to read the logic to break out of the while loop since it's a +very long expression consisting of a logical or of two composite +expressions, each one composed by a logical and. Further each one is also +testing for the EXTENT_BUFFER_UNMAPPED bit, making it more verbose than +necessary. + +So change from this: + + if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3) + || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && + refs == 1)) + break; + +To this: + + if (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) { + if (refs == 1) + break; + } else if (refs <= 3) { + break; + } + +At least on x86_64 using gcc 9.3.0, this doesn't change the object size. + +Reviewed-by: Boris Burkov +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: ad580dfa388f ("btrfs: fix subpage deadlock in try_release_subpage_extent_buffer()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -3486,10 +3486,13 @@ void free_extent_buffer(struct extent_bu + + refs = atomic_read(&eb->refs); + while (1) { +- if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3) +- || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && +- refs == 1)) ++ if (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) { ++ if (refs == 1) ++ break; ++ } else if (refs <= 3) { + break; ++ } ++ + if (atomic_try_cmpxchg(&eb->refs, &refs, refs - 1)) + return; + } diff --git a/queue-6.16/btrfs-subpage-keep-towrite-tag-until-folio-is-cleaned.patch b/queue-6.16/btrfs-subpage-keep-towrite-tag-until-folio-is-cleaned.patch new file mode 100644 index 0000000000..285f6352be --- /dev/null +++ b/queue-6.16/btrfs-subpage-keep-towrite-tag-until-folio-is-cleaned.patch @@ -0,0 +1,129 @@ +From stable+bounces-172266-greg=kroah.com@vger.kernel.org Fri Aug 22 05:35:39 2025 +From: Sasha Levin +Date: Thu, 21 Aug 2025 23:35:17 -0400 +Subject: btrfs: subpage: keep TOWRITE tag until folio is cleaned +To: stable@vger.kernel.org +Cc: Naohiro Aota , Qu Wenruo , Johannes Thumshirn , David Sterba , Sasha Levin +Message-ID: <20250822033527.1065200-3-sashal@kernel.org> + +From: Naohiro Aota + +[ Upstream commit b1511360c8ac882b0c52caa263620538e8d73220 ] + +btrfs_subpage_set_writeback() calls folio_start_writeback() the first time +a folio is written back, and it also clears the PAGECACHE_TAG_TOWRITE tag +even if there are still dirty blocks in the folio. This can break ordering +guarantees, such as those required by btrfs_wait_ordered_extents(). + +That ordering breakage leads to a real failure. For example, running +generic/464 on a zoned setup will hit the following ASSERT. This happens +because the broken ordering fails to flush existing dirty pages before the +file size is truncated. + + assertion failed: !list_empty(&ordered->list) :: 0, in fs/btrfs/zoned.c:1899 + ------------[ cut here ]------------ + kernel BUG at fs/btrfs/zoned.c:1899! + Oops: invalid opcode: 0000 [#1] SMP NOPTI + CPU: 2 UID: 0 PID: 1906169 Comm: kworker/u130:2 Kdump: loaded Not tainted 6.16.0-rc6-BTRFS-ZNS+ #554 PREEMPT(voluntary) + Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021 + Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] + RIP: 0010:btrfs_finish_ordered_zoned.cold+0x50/0x52 [btrfs] + RSP: 0018:ffffc9002efdbd60 EFLAGS: 00010246 + RAX: 000000000000004c RBX: ffff88811923c4e0 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: ffffffff827e38b1 RDI: 00000000ffffffff + RBP: ffff88810005d000 R08: 00000000ffffdfff R09: ffffffff831051c8 + R10: ffffffff83055220 R11: 0000000000000000 R12: ffff8881c2458c00 + R13: ffff88811923c540 R14: ffff88811923c5e8 R15: ffff8881c1bd9680 + FS: 0000000000000000(0000) GS:ffff88a04acd0000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f907c7a918c CR3: 0000000004024000 CR4: 0000000000350ef0 + Call Trace: + + ? srso_return_thunk+0x5/0x5f + btrfs_finish_ordered_io+0x4a/0x60 [btrfs] + btrfs_work_helper+0xf9/0x490 [btrfs] + process_one_work+0x204/0x590 + ? srso_return_thunk+0x5/0x5f + worker_thread+0x1d6/0x3d0 + ? __pfx_worker_thread+0x10/0x10 + kthread+0x118/0x230 + ? __pfx_kthread+0x10/0x10 + ret_from_fork+0x205/0x260 + ? __pfx_kthread+0x10/0x10 + ret_from_fork_asm+0x1a/0x30 + + +Consider process A calling writepages() with WB_SYNC_NONE. In zoned mode or +for compressed writes, it locks several folios for delalloc and starts +writing them out. Let's call the last locked folio folio X. Suppose the +write range only partially covers folio X, leaving some pages dirty. +Process A calls btrfs_subpage_set_writeback() when building a bio. This +function call clears the TOWRITE tag of folio X, whose size = 8K and +the block size = 4K. It is following state. + + 0 4K 8K + |/////|/////| (flag: DIRTY, tag: DIRTY) + <-----> Process A will write this range. + +Now suppose process B concurrently calls writepages() with WB_SYNC_ALL. It +calls tag_pages_for_writeback() to tag dirty folios with +PAGECACHE_TAG_TOWRITE. Since folio X is still dirty, it gets tagged. Then, +B collects tagged folios using filemap_get_folios_tag() and must wait for +folio X to be written before returning from writepages(). + + 0 4K 8K + |/////|/////| (flag: DIRTY, tag: DIRTY|TOWRITE) + +However, between tagging and collecting, process A may call +btrfs_subpage_set_writeback() and clear folio X's TOWRITE tag. + 0 4K 8K + | |/////| (flag: DIRTY|WRITEBACK, tag: DIRTY) + +As a result, process B won't see folio X in its batch, and returns without +waiting for it. This breaks the WB_SYNC_ALL ordering requirement. + +Fix this by using btrfs_subpage_set_writeback_keepwrite(), which retains +the TOWRITE tag. We now manually clear the tag only after the folio becomes +clean, via the xas operation. + +Fixes: 3470da3b7d87 ("btrfs: subpage: introduce helpers for writeback status") +CC: stable@vger.kernel.org # 6.12+ +Reviewed-by: Qu Wenruo +Reviewed-by: Johannes Thumshirn +Signed-off-by: Naohiro Aota +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/subpage.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/subpage.c ++++ b/fs/btrfs/subpage.c +@@ -448,8 +448,25 @@ void btrfs_subpage_set_writeback(const s + + spin_lock_irqsave(&bfs->lock, flags); + bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ++ ++ /* ++ * Don't clear the TOWRITE tag when starting writeback on a still-dirty ++ * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, ++ * assume writeback is complete, and exit too early — violating sync ++ * ordering guarantees. ++ */ + if (!folio_test_writeback(folio)) +- folio_start_writeback(folio); ++ __folio_start_writeback(folio, true); ++ if (!folio_test_dirty(folio)) { ++ struct address_space *mapping = folio_mapping(folio); ++ XA_STATE(xas, &mapping->i_pages, folio->index); ++ unsigned long flags; ++ ++ xas_lock_irqsave(&xas, flags); ++ xas_load(&xas); ++ xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); ++ xas_unlock_irqrestore(&xas, flags); ++ } + spin_unlock_irqrestore(&bfs->lock, flags); + } + diff --git a/queue-6.16/btrfs-use-refcount_t-type-for-the-extent-buffer-reference-counter.patch b/queue-6.16/btrfs-use-refcount_t-type-for-the-extent-buffer-reference-counter.patch new file mode 100644 index 0000000000..accf93f3c3 --- /dev/null +++ b/queue-6.16/btrfs-use-refcount_t-type-for-the-extent-buffer-reference-counter.patch @@ -0,0 +1,440 @@ +From stable+bounces-171700-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:45 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:15:30 -0400 +Subject: btrfs: use refcount_t type for the extent buffer reference counter +To: stable@vger.kernel.org +Cc: Filipe Manana , Boris Burkov , David Sterba , Sasha Levin +Message-ID: <20250819011531.242846-3-sashal@kernel.org> + +From: Filipe Manana + +[ Upstream commit b769777d927af168b1389388392bfd7dc4e38399 ] + +Instead of using a bare atomic, use the refcount_t type, which despite +being a structure that contains only an atomic, has an API that checks +for underflows and other hazards. This doesn't change the size of the +extent_buffer structure. + +This removes the need to do things like this: + + WARN_ON(atomic_read(&eb->refs) == 0); + if (atomic_dec_and_test(&eb->refs)) { + (...) + } + +And do just: + + if (refcount_dec_and_test(&eb->refs)) { + (...) + } + +Since refcount_dec_and_test() already triggers a warning when we decrement +a ref count that has a value of 0 (or below zero). + +Reviewed-by: Boris Burkov +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: ad580dfa388f ("btrfs: fix subpage deadlock in try_release_subpage_extent_buffer()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.c | 14 ++++++------- + fs/btrfs/extent-tree.c | 2 - + fs/btrfs/extent_io.c | 45 +++++++++++++++++++++---------------------- + fs/btrfs/extent_io.h | 2 - + fs/btrfs/fiemap.c | 2 - + fs/btrfs/print-tree.c | 2 - + fs/btrfs/qgroup.c | 6 ++--- + fs/btrfs/relocation.c | 4 +-- + fs/btrfs/tree-log.c | 4 +-- + fs/btrfs/zoned.c | 2 - + include/trace/events/btrfs.h | 2 - + 11 files changed, 42 insertions(+), 43 deletions(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -198,7 +198,7 @@ struct extent_buffer *btrfs_root_node(st + * the inc_not_zero dance and if it doesn't work then + * synchronize_rcu and try again. + */ +- if (atomic_inc_not_zero(&eb->refs)) { ++ if (refcount_inc_not_zero(&eb->refs)) { + rcu_read_unlock(); + break; + } +@@ -556,7 +556,7 @@ int btrfs_force_cow_block(struct btrfs_t + btrfs_abort_transaction(trans, ret); + goto error_unlock_cow; + } +- atomic_inc(&cow->refs); ++ refcount_inc(&cow->refs); + rcu_assign_pointer(root->node, cow); + + ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf, +@@ -1088,7 +1088,7 @@ static noinline int balance_level(struct + /* update the path */ + if (left) { + if (btrfs_header_nritems(left) > orig_slot) { +- atomic_inc(&left->refs); ++ refcount_inc(&left->refs); + /* left was locked after cow */ + path->nodes[level] = left; + path->slots[level + 1] -= 1; +@@ -1692,7 +1692,7 @@ static struct extent_buffer *btrfs_searc + + if (p->search_commit_root) { + b = root->commit_root; +- atomic_inc(&b->refs); ++ refcount_inc(&b->refs); + level = btrfs_header_level(b); + /* + * Ensure that all callers have set skip_locking when +@@ -2893,7 +2893,7 @@ static noinline int insert_new_root(stru + free_extent_buffer(old); + + add_root_to_dirty_list(root); +- atomic_inc(&c->refs); ++ refcount_inc(&c->refs); + path->nodes[level] = c; + path->locks[level] = BTRFS_WRITE_LOCK; + path->slots[level] = 0; +@@ -4450,7 +4450,7 @@ static noinline int btrfs_del_leaf(struc + + root_sub_used_bytes(root); + +- atomic_inc(&leaf->refs); ++ refcount_inc(&leaf->refs); + ret = btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); + free_extent_buffer_stale(leaf); + if (ret < 0) +@@ -4535,7 +4535,7 @@ int btrfs_del_items(struct btrfs_trans_h + * for possible call to btrfs_del_ptr below + */ + slot = path->slots[1]; +- atomic_inc(&leaf->refs); ++ refcount_inc(&leaf->refs); + /* + * We want to be able to at least push one item to the + * left neighbour leaf, and that's the first item. +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -6342,7 +6342,7 @@ int btrfs_drop_subtree(struct btrfs_tran + + btrfs_assert_tree_write_locked(parent); + parent_level = btrfs_header_level(parent); +- atomic_inc(&parent->refs); ++ refcount_inc(&parent->refs); + path->nodes[parent_level] = parent; + path->slots[parent_level] = btrfs_header_nritems(parent); + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -77,7 +77,7 @@ void btrfs_extent_buffer_leak_debug_chec + struct extent_buffer, leak_list); + pr_err( + "BTRFS: buffer leak start %llu len %u refs %d bflags %lu owner %llu\n", +- eb->start, eb->len, atomic_read(&eb->refs), eb->bflags, ++ eb->start, eb->len, refcount_read(&eb->refs), eb->bflags, + btrfs_header_owner(eb)); + list_del(&eb->leak_list); + WARN_ON_ONCE(1); +@@ -1961,7 +1961,7 @@ retry: + if (!eb) + return NULL; + +- if (!atomic_inc_not_zero(&eb->refs)) { ++ if (!refcount_inc_not_zero(&eb->refs)) { + xas_reset(xas); + goto retry; + } +@@ -2012,7 +2012,7 @@ static struct extent_buffer *find_extent + + rcu_read_lock(); + eb = xa_load(&fs_info->buffer_tree, index); +- if (eb && !atomic_inc_not_zero(&eb->refs)) ++ if (eb && !refcount_inc_not_zero(&eb->refs)) + eb = NULL; + rcu_read_unlock(); + return eb; +@@ -2842,7 +2842,7 @@ static struct extent_buffer *__alloc_ext + btrfs_leak_debug_add_eb(eb); + + spin_lock_init(&eb->refs_lock); +- atomic_set(&eb->refs, 1); ++ refcount_set(&eb->refs, 1); + + ASSERT(eb->len <= BTRFS_MAX_METADATA_BLOCKSIZE); + +@@ -2975,13 +2975,13 @@ static void check_buffer_tree_ref(struct + * once io is initiated, TREE_REF can no longer be cleared, so that is + * the moment at which any such race is best fixed. + */ +- refs = atomic_read(&eb->refs); ++ refs = refcount_read(&eb->refs); + if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) + return; + + spin_lock(&eb->refs_lock); + if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) +- atomic_inc(&eb->refs); ++ refcount_inc(&eb->refs); + spin_unlock(&eb->refs_lock); + } + +@@ -3047,7 +3047,7 @@ again: + return ERR_PTR(ret); + } + if (exists) { +- if (!atomic_inc_not_zero(&exists->refs)) { ++ if (!refcount_inc_not_zero(&exists->refs)) { + /* The extent buffer is being freed, retry. */ + xa_unlock_irq(&fs_info->buffer_tree); + goto again; +@@ -3092,7 +3092,7 @@ static struct extent_buffer *grab_extent + * just overwrite folio private. + */ + exists = folio_get_private(folio); +- if (atomic_inc_not_zero(&exists->refs)) ++ if (refcount_inc_not_zero(&exists->refs)) + return exists; + + WARN_ON(folio_test_dirty(folio)); +@@ -3362,7 +3362,7 @@ again: + goto out; + } + if (existing_eb) { +- if (!atomic_inc_not_zero(&existing_eb->refs)) { ++ if (!refcount_inc_not_zero(&existing_eb->refs)) { + xa_unlock_irq(&fs_info->buffer_tree); + goto again; + } +@@ -3391,7 +3391,7 @@ again: + return eb; + + out: +- WARN_ON(!atomic_dec_and_test(&eb->refs)); ++ WARN_ON(!refcount_dec_and_test(&eb->refs)); + + /* + * Any attached folios need to be detached before we unlock them. This +@@ -3437,8 +3437,7 @@ static int release_extent_buffer(struct + { + lockdep_assert_held(&eb->refs_lock); + +- WARN_ON(atomic_read(&eb->refs) == 0); +- if (atomic_dec_and_test(&eb->refs)) { ++ if (refcount_dec_and_test(&eb->refs)) { + struct btrfs_fs_info *fs_info = eb->fs_info; + + spin_unlock(&eb->refs_lock); +@@ -3484,7 +3483,7 @@ void free_extent_buffer(struct extent_bu + if (!eb) + return; + +- refs = atomic_read(&eb->refs); ++ refs = refcount_read(&eb->refs); + while (1) { + if (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)) { + if (refs == 1) +@@ -3494,16 +3493,16 @@ void free_extent_buffer(struct extent_bu + } + + /* Optimization to avoid locking eb->refs_lock. */ +- if (atomic_try_cmpxchg(&eb->refs, &refs, refs - 1)) ++ if (atomic_try_cmpxchg(&eb->refs.refs, &refs, refs - 1)) + return; + } + + spin_lock(&eb->refs_lock); +- if (atomic_read(&eb->refs) == 2 && ++ if (refcount_read(&eb->refs) == 2 && + test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && + !extent_buffer_under_io(eb) && + test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) +- atomic_dec(&eb->refs); ++ refcount_dec(&eb->refs); + + /* + * I know this is terrible, but it's temporary until we stop tracking +@@ -3520,9 +3519,9 @@ void free_extent_buffer_stale(struct ext + spin_lock(&eb->refs_lock); + set_bit(EXTENT_BUFFER_STALE, &eb->bflags); + +- if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && ++ if (refcount_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && + test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) +- atomic_dec(&eb->refs); ++ refcount_dec(&eb->refs); + release_extent_buffer(eb); + } + +@@ -3580,7 +3579,7 @@ void btrfs_clear_buffer_dirty(struct btr + btree_clear_folio_dirty_tag(folio); + folio_unlock(folio); + } +- WARN_ON(atomic_read(&eb->refs) == 0); ++ WARN_ON(refcount_read(&eb->refs) == 0); + } + + void set_extent_buffer_dirty(struct extent_buffer *eb) +@@ -3591,7 +3590,7 @@ void set_extent_buffer_dirty(struct exte + + was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); + +- WARN_ON(atomic_read(&eb->refs) == 0); ++ WARN_ON(refcount_read(&eb->refs) == 0); + WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); + WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)); + +@@ -3717,7 +3716,7 @@ int read_extent_buffer_pages_nowait(stru + + eb->read_mirror = 0; + check_buffer_tree_ref(eb); +- atomic_inc(&eb->refs); ++ refcount_inc(&eb->refs); + + bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES, + REQ_OP_READ | REQ_META, eb->fs_info, +@@ -4312,7 +4311,7 @@ static int try_release_subpage_extent_bu + * won't disappear out from under us. + */ + spin_lock(&eb->refs_lock); +- if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { ++ if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { + spin_unlock(&eb->refs_lock); + continue; + } +@@ -4378,7 +4377,7 @@ int try_release_extent_buffer(struct fol + * this page. + */ + spin_lock(&eb->refs_lock); +- if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { ++ if (refcount_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { + spin_unlock(&eb->refs_lock); + spin_unlock(&folio->mapping->i_private_lock); + return 0; +--- a/fs/btrfs/extent_io.h ++++ b/fs/btrfs/extent_io.h +@@ -98,7 +98,7 @@ struct extent_buffer { + void *addr; + + spinlock_t refs_lock; +- atomic_t refs; ++ refcount_t refs; + int read_mirror; + /* >= 0 if eb belongs to a log tree, -1 otherwise */ + s8 log_index; +--- a/fs/btrfs/fiemap.c ++++ b/fs/btrfs/fiemap.c +@@ -320,7 +320,7 @@ static int fiemap_next_leaf_item(struct + * the cost of allocating a new one. + */ + ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags)); +- atomic_inc(&clone->refs); ++ refcount_inc(&clone->refs); + + ret = btrfs_next_leaf(inode->root, path); + if (ret != 0) +--- a/fs/btrfs/print-tree.c ++++ b/fs/btrfs/print-tree.c +@@ -223,7 +223,7 @@ static void print_eb_refs_lock(const str + { + #ifdef CONFIG_BTRFS_DEBUG + btrfs_info(eb->fs_info, "refs %u lock_owner %u current %u", +- atomic_read(&eb->refs), eb->lock_owner, current->pid); ++ refcount_read(&eb->refs), eb->lock_owner, current->pid); + #endif + } + +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -2348,7 +2348,7 @@ static int qgroup_trace_extent_swap(stru + btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0); + + /* For src_path */ +- atomic_inc(&src_eb->refs); ++ refcount_inc(&src_eb->refs); + src_path->nodes[root_level] = src_eb; + src_path->slots[root_level] = dst_path->slots[root_level]; + src_path->locks[root_level] = 0; +@@ -2581,7 +2581,7 @@ static int qgroup_trace_subtree_swap(str + goto out; + } + /* For dst_path */ +- atomic_inc(&dst_eb->refs); ++ refcount_inc(&dst_eb->refs); + dst_path->nodes[level] = dst_eb; + dst_path->slots[level] = 0; + dst_path->locks[level] = 0; +@@ -2673,7 +2673,7 @@ int btrfs_qgroup_trace_subtree(struct bt + * walk back up the tree (adjusting slot pointers as we go) + * and restart the search process. + */ +- atomic_inc(&root_eb->refs); /* For path */ ++ refcount_inc(&root_eb->refs); /* For path */ + path->nodes[root_level] = root_eb; + path->slots[root_level] = 0; + path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -1535,7 +1535,7 @@ static noinline_for_stack int merge_relo + + if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { + level = btrfs_root_level(root_item); +- atomic_inc(&reloc_root->node->refs); ++ refcount_inc(&reloc_root->node->refs); + path->nodes[level] = reloc_root->node; + path->slots[level] = 0; + } else { +@@ -4358,7 +4358,7 @@ int btrfs_reloc_cow_block(struct btrfs_t + } + + btrfs_backref_drop_node_buffer(node); +- atomic_inc(&cow->refs); ++ refcount_inc(&cow->refs); + node->eb = cow; + node->new_bytenr = cow->start; + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -2747,7 +2747,7 @@ static int walk_log_tree(struct btrfs_tr + level = btrfs_header_level(log->node); + orig_level = level; + path->nodes[level] = log->node; +- atomic_inc(&log->node->refs); ++ refcount_inc(&log->node->refs); + path->slots[level] = 0; + + while (1) { +@@ -3711,7 +3711,7 @@ static int clone_leaf(struct btrfs_path + * Add extra ref to scratch eb so that it is not freed when callers + * release the path, so we can reuse it later if needed. + */ +- atomic_inc(&ctx->scratch_eb->refs); ++ refcount_inc(&ctx->scratch_eb->refs); + + return 0; + } +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -2491,7 +2491,7 @@ void btrfs_schedule_zone_finish_bg(struc + + /* For the work */ + btrfs_get_block_group(bg); +- atomic_inc(&eb->refs); ++ refcount_inc(&eb->refs); + bg->last_eb = eb; + INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn); + queue_work(system_unbound_wq, &bg->zone_finish_work); +--- a/include/trace/events/btrfs.h ++++ b/include/trace/events/btrfs.h +@@ -1095,7 +1095,7 @@ TRACE_EVENT(btrfs_cow_block, + TP_fast_assign_btrfs(root->fs_info, + __entry->root_objectid = btrfs_root_id(root); + __entry->buf_start = buf->start; +- __entry->refs = atomic_read(&buf->refs); ++ __entry->refs = refcount_read(&buf->refs); + __entry->cow_start = cow->start; + __entry->buf_level = btrfs_header_level(buf); + __entry->cow_level = btrfs_header_level(cow); diff --git a/queue-6.16/crypto-acomp-fix-cfi-failure-due-to-type-punning.patch b/queue-6.16/crypto-acomp-fix-cfi-failure-due-to-type-punning.patch new file mode 100644 index 0000000000..390416087d --- /dev/null +++ b/queue-6.16/crypto-acomp-fix-cfi-failure-due-to-type-punning.patch @@ -0,0 +1,79 @@ +From stable+bounces-172225-greg=kroah.com@vger.kernel.org Thu Aug 21 21:21:50 2025 +From: Sasha Levin +Date: Thu, 21 Aug 2025 15:21:31 -0400 +Subject: crypto: acomp - Fix CFI failure due to type punning +To: stable@vger.kernel.org +Cc: Eric Biggers , Giovanni Cabiddu , Herbert Xu , Sasha Levin +Message-ID: <20250821192131.923831-2-sashal@kernel.org> + +From: Eric Biggers + +[ Upstream commit 962ddc5a7a4b04c007bba0f3e7298cda13c62efd ] + +To avoid a crash when control flow integrity is enabled, make the +workspace ("stream") free function use a consistent type, and call it +through a function pointer that has that same type. + +Fixes: 42d9f6c77479 ("crypto: acomp - Move scomp stream allocation code into acomp") +Cc: stable@vger.kernel.org +Signed-off-by: Eric Biggers +Reviewed-by: Giovanni Cabiddu +Signed-off-by: Herbert Xu +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + crypto/deflate.c | 7 ++++++- + crypto/zstd.c | 7 ++++++- + include/crypto/internal/acompress.h | 5 +---- + 3 files changed, 13 insertions(+), 6 deletions(-) + +--- a/crypto/deflate.c ++++ b/crypto/deflate.c +@@ -48,9 +48,14 @@ static void *deflate_alloc_stream(void) + return ctx; + } + ++static void deflate_free_stream(void *ctx) ++{ ++ kvfree(ctx); ++} ++ + static struct crypto_acomp_streams deflate_streams = { + .alloc_ctx = deflate_alloc_stream, +- .cfree_ctx = kvfree, ++ .free_ctx = deflate_free_stream, + }; + + static int deflate_compress_one(struct acomp_req *req, +--- a/crypto/zstd.c ++++ b/crypto/zstd.c +@@ -54,9 +54,14 @@ static void *zstd_alloc_stream(void) + return ctx; + } + ++static void zstd_free_stream(void *ctx) ++{ ++ kvfree(ctx); ++} ++ + static struct crypto_acomp_streams zstd_streams = { + .alloc_ctx = zstd_alloc_stream, +- .cfree_ctx = kvfree, ++ .free_ctx = zstd_free_stream, + }; + + static int zstd_init(struct crypto_acomp *acomp_tfm) +--- a/include/crypto/internal/acompress.h ++++ b/include/crypto/internal/acompress.h +@@ -63,10 +63,7 @@ struct crypto_acomp_stream { + struct crypto_acomp_streams { + /* These must come first because of struct scomp_alg. */ + void *(*alloc_ctx)(void); +- union { +- void (*free_ctx)(void *); +- void (*cfree_ctx)(const void *); +- }; ++ void (*free_ctx)(void *); + + struct crypto_acomp_stream __percpu *streams; + struct work_struct stream_work; diff --git a/queue-6.16/crypto-zstd-convert-to-acomp.patch b/queue-6.16/crypto-zstd-convert-to-acomp.patch new file mode 100644 index 0000000000..53e273d2b7 --- /dev/null +++ b/queue-6.16/crypto-zstd-convert-to-acomp.patch @@ -0,0 +1,456 @@ +From stable+bounces-172224-greg=kroah.com@vger.kernel.org Thu Aug 21 21:21:43 2025 +From: Sasha Levin +Date: Thu, 21 Aug 2025 15:21:30 -0400 +Subject: crypto: zstd - convert to acomp +To: stable@vger.kernel.org +Cc: Suman Kumar Chakraborty , Giovanni Cabiddu , Herbert Xu , Sasha Levin +Message-ID: <20250821192131.923831-1-sashal@kernel.org> + +From: Suman Kumar Chakraborty + +[ Upstream commit f5ad93ffb54119a8dc5e18f070624d4ead586969 ] + +Convert the implementation to a native acomp interface using zstd +streaming APIs, eliminating the need for buffer linearization. + +This includes: + - Removal of the scomp interface in favor of acomp + - Refactoring of stream allocation, initialization, and handling for + both compression and decompression using Zstandard streaming APIs + - Replacement of crypto_register_scomp() with crypto_register_acomp() + for module registration + +Signed-off-by: Suman Kumar Chakraborty +Reviewed-by: Giovanni Cabiddu +Signed-off-by: Herbert Xu +Stable-dep-of: 962ddc5a7a4b ("crypto: acomp - Fix CFI failure due to type punning") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + crypto/zstd.c | 354 ++++++++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 233 insertions(+), 121 deletions(-) + +--- a/crypto/zstd.c ++++ b/crypto/zstd.c +@@ -12,188 +12,300 @@ + #include + #include + #include +-#include ++#include ++#include + + +-#define ZSTD_DEF_LEVEL 3 ++#define ZSTD_DEF_LEVEL 3 ++#define ZSTD_MAX_WINDOWLOG 18 ++#define ZSTD_MAX_SIZE BIT(ZSTD_MAX_WINDOWLOG) + + struct zstd_ctx { + zstd_cctx *cctx; + zstd_dctx *dctx; +- void *cwksp; +- void *dwksp; ++ size_t wksp_size; ++ zstd_parameters params; ++ u8 wksp[0] __aligned(8); + }; + +-static zstd_parameters zstd_params(void) +-{ +- return zstd_get_params(ZSTD_DEF_LEVEL, 0); +-} ++static DEFINE_MUTEX(zstd_stream_lock); + +-static int zstd_comp_init(struct zstd_ctx *ctx) ++static void *zstd_alloc_stream(void) + { +- int ret = 0; +- const zstd_parameters params = zstd_params(); +- const size_t wksp_size = zstd_cctx_workspace_bound(¶ms.cParams); ++ zstd_parameters params; ++ struct zstd_ctx *ctx; ++ size_t wksp_size; + +- ctx->cwksp = vzalloc(wksp_size); +- if (!ctx->cwksp) { +- ret = -ENOMEM; +- goto out; +- } ++ params = zstd_get_params(ZSTD_DEF_LEVEL, ZSTD_MAX_SIZE); + +- ctx->cctx = zstd_init_cctx(ctx->cwksp, wksp_size); +- if (!ctx->cctx) { +- ret = -EINVAL; +- goto out_free; +- } +-out: +- return ret; +-out_free: +- vfree(ctx->cwksp); +- goto out; ++ wksp_size = max_t(size_t, ++ zstd_cstream_workspace_bound(¶ms.cParams), ++ zstd_dstream_workspace_bound(ZSTD_MAX_SIZE)); ++ if (!wksp_size) ++ return ERR_PTR(-EINVAL); ++ ++ ctx = kvmalloc(sizeof(*ctx) + wksp_size, GFP_KERNEL); ++ if (!ctx) ++ return ERR_PTR(-ENOMEM); ++ ++ ctx->params = params; ++ ctx->wksp_size = wksp_size; ++ ++ return ctx; + } + +-static int zstd_decomp_init(struct zstd_ctx *ctx) ++static struct crypto_acomp_streams zstd_streams = { ++ .alloc_ctx = zstd_alloc_stream, ++ .cfree_ctx = kvfree, ++}; ++ ++static int zstd_init(struct crypto_acomp *acomp_tfm) + { + int ret = 0; +- const size_t wksp_size = zstd_dctx_workspace_bound(); + +- ctx->dwksp = vzalloc(wksp_size); +- if (!ctx->dwksp) { +- ret = -ENOMEM; +- goto out; +- } ++ mutex_lock(&zstd_stream_lock); ++ ret = crypto_acomp_alloc_streams(&zstd_streams); ++ mutex_unlock(&zstd_stream_lock); + +- ctx->dctx = zstd_init_dctx(ctx->dwksp, wksp_size); +- if (!ctx->dctx) { +- ret = -EINVAL; +- goto out_free; +- } +-out: + return ret; +-out_free: +- vfree(ctx->dwksp); +- goto out; + } + +-static void zstd_comp_exit(struct zstd_ctx *ctx) ++static void zstd_exit(struct crypto_acomp *acomp_tfm) + { +- vfree(ctx->cwksp); +- ctx->cwksp = NULL; +- ctx->cctx = NULL; ++ crypto_acomp_free_streams(&zstd_streams); + } + +-static void zstd_decomp_exit(struct zstd_ctx *ctx) ++static int zstd_compress_one(struct acomp_req *req, struct zstd_ctx *ctx, ++ const void *src, void *dst, unsigned int *dlen) + { +- vfree(ctx->dwksp); +- ctx->dwksp = NULL; +- ctx->dctx = NULL; +-} ++ unsigned int out_len; + +-static int __zstd_init(void *ctx) +-{ +- int ret; ++ ctx->cctx = zstd_init_cctx(ctx->wksp, ctx->wksp_size); ++ if (!ctx->cctx) ++ return -EINVAL; + +- ret = zstd_comp_init(ctx); +- if (ret) +- return ret; +- ret = zstd_decomp_init(ctx); +- if (ret) +- zstd_comp_exit(ctx); +- return ret; ++ out_len = zstd_compress_cctx(ctx->cctx, dst, req->dlen, src, req->slen, ++ &ctx->params); ++ if (zstd_is_error(out_len)) ++ return -EINVAL; ++ ++ *dlen = out_len; ++ ++ return 0; + } + +-static void *zstd_alloc_ctx(void) ++static int zstd_compress(struct acomp_req *req) + { +- int ret; ++ struct crypto_acomp_stream *s; ++ unsigned int pos, scur, dcur; ++ unsigned int total_out = 0; ++ bool data_available = true; ++ zstd_out_buffer outbuf; ++ struct acomp_walk walk; ++ zstd_in_buffer inbuf; + struct zstd_ctx *ctx; ++ size_t pending_bytes; ++ size_t num_bytes; ++ int ret; + +- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); +- if (!ctx) +- return ERR_PTR(-ENOMEM); ++ s = crypto_acomp_lock_stream_bh(&zstd_streams); ++ ctx = s->ctx; + +- ret = __zstd_init(ctx); +- if (ret) { +- kfree(ctx); +- return ERR_PTR(ret); ++ ret = acomp_walk_virt(&walk, req, true); ++ if (ret) ++ goto out; ++ ++ ctx->cctx = zstd_init_cstream(&ctx->params, 0, ctx->wksp, ctx->wksp_size); ++ if (!ctx->cctx) { ++ ret = -EINVAL; ++ goto out; + } + +- return ctx; +-} ++ do { ++ dcur = acomp_walk_next_dst(&walk); ++ if (!dcur) { ++ ret = -ENOSPC; ++ goto out; ++ } ++ ++ outbuf.pos = 0; ++ outbuf.dst = (u8 *)walk.dst.virt.addr; ++ outbuf.size = dcur; ++ ++ do { ++ scur = acomp_walk_next_src(&walk); ++ if (dcur == req->dlen && scur == req->slen) { ++ ret = zstd_compress_one(req, ctx, walk.src.virt.addr, ++ walk.dst.virt.addr, &total_out); ++ acomp_walk_done_src(&walk, scur); ++ acomp_walk_done_dst(&walk, dcur); ++ goto out; ++ } ++ ++ if (scur) { ++ inbuf.pos = 0; ++ inbuf.src = walk.src.virt.addr; ++ inbuf.size = scur; ++ } else { ++ data_available = false; ++ break; ++ } ++ ++ num_bytes = zstd_compress_stream(ctx->cctx, &outbuf, &inbuf); ++ if (ZSTD_isError(num_bytes)) { ++ ret = -EIO; ++ goto out; ++ } ++ ++ pending_bytes = zstd_flush_stream(ctx->cctx, &outbuf); ++ if (ZSTD_isError(pending_bytes)) { ++ ret = -EIO; ++ goto out; ++ } ++ acomp_walk_done_src(&walk, inbuf.pos); ++ } while (dcur != outbuf.pos); ++ ++ total_out += outbuf.pos; ++ acomp_walk_done_dst(&walk, dcur); ++ } while (data_available); ++ ++ pos = outbuf.pos; ++ num_bytes = zstd_end_stream(ctx->cctx, &outbuf); ++ if (ZSTD_isError(num_bytes)) ++ ret = -EIO; ++ else ++ total_out += (outbuf.pos - pos); + +-static void __zstd_exit(void *ctx) +-{ +- zstd_comp_exit(ctx); +- zstd_decomp_exit(ctx); +-} ++out: ++ if (ret) ++ req->dlen = 0; ++ else ++ req->dlen = total_out; + +-static void zstd_free_ctx(void *ctx) +-{ +- __zstd_exit(ctx); +- kfree_sensitive(ctx); ++ crypto_acomp_unlock_stream_bh(s); ++ ++ return ret; + } + +-static int __zstd_compress(const u8 *src, unsigned int slen, +- u8 *dst, unsigned int *dlen, void *ctx) ++static int zstd_decompress_one(struct acomp_req *req, struct zstd_ctx *ctx, ++ const void *src, void *dst, unsigned int *dlen) + { + size_t out_len; +- struct zstd_ctx *zctx = ctx; +- const zstd_parameters params = zstd_params(); + +- out_len = zstd_compress_cctx(zctx->cctx, dst, *dlen, src, slen, ¶ms); ++ ctx->dctx = zstd_init_dctx(ctx->wksp, ctx->wksp_size); ++ if (!ctx->dctx) ++ return -EINVAL; ++ ++ out_len = zstd_decompress_dctx(ctx->dctx, dst, req->dlen, src, req->slen); + if (zstd_is_error(out_len)) + return -EINVAL; ++ + *dlen = out_len; ++ + return 0; + } + +-static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src, +- unsigned int slen, u8 *dst, unsigned int *dlen, +- void *ctx) ++static int zstd_decompress(struct acomp_req *req) + { +- return __zstd_compress(src, slen, dst, dlen, ctx); +-} ++ struct crypto_acomp_stream *s; ++ unsigned int total_out = 0; ++ unsigned int scur, dcur; ++ zstd_out_buffer outbuf; ++ struct acomp_walk walk; ++ zstd_in_buffer inbuf; ++ struct zstd_ctx *ctx; ++ size_t pending_bytes; ++ int ret; + +-static int __zstd_decompress(const u8 *src, unsigned int slen, +- u8 *dst, unsigned int *dlen, void *ctx) +-{ +- size_t out_len; +- struct zstd_ctx *zctx = ctx; ++ s = crypto_acomp_lock_stream_bh(&zstd_streams); ++ ctx = s->ctx; + +- out_len = zstd_decompress_dctx(zctx->dctx, dst, *dlen, src, slen); +- if (zstd_is_error(out_len)) +- return -EINVAL; +- *dlen = out_len; +- return 0; +-} ++ ret = acomp_walk_virt(&walk, req, true); ++ if (ret) ++ goto out; + +-static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src, +- unsigned int slen, u8 *dst, unsigned int *dlen, +- void *ctx) +-{ +- return __zstd_decompress(src, slen, dst, dlen, ctx); +-} +- +-static struct scomp_alg scomp = { +- .alloc_ctx = zstd_alloc_ctx, +- .free_ctx = zstd_free_ctx, +- .compress = zstd_scompress, +- .decompress = zstd_sdecompress, +- .base = { +- .cra_name = "zstd", +- .cra_driver_name = "zstd-scomp", +- .cra_module = THIS_MODULE, ++ ctx->dctx = zstd_init_dstream(ZSTD_MAX_SIZE, ctx->wksp, ctx->wksp_size); ++ if (!ctx->dctx) { ++ ret = -EINVAL; ++ goto out; + } ++ ++ do { ++ scur = acomp_walk_next_src(&walk); ++ if (scur) { ++ inbuf.pos = 0; ++ inbuf.size = scur; ++ inbuf.src = walk.src.virt.addr; ++ } else { ++ break; ++ } ++ ++ do { ++ dcur = acomp_walk_next_dst(&walk); ++ if (dcur == req->dlen && scur == req->slen) { ++ ret = zstd_decompress_one(req, ctx, walk.src.virt.addr, ++ walk.dst.virt.addr, &total_out); ++ acomp_walk_done_dst(&walk, dcur); ++ acomp_walk_done_src(&walk, scur); ++ goto out; ++ } ++ ++ if (!dcur) { ++ ret = -ENOSPC; ++ goto out; ++ } ++ ++ outbuf.pos = 0; ++ outbuf.dst = (u8 *)walk.dst.virt.addr; ++ outbuf.size = dcur; ++ ++ pending_bytes = zstd_decompress_stream(ctx->dctx, &outbuf, &inbuf); ++ if (ZSTD_isError(pending_bytes)) { ++ ret = -EIO; ++ goto out; ++ } ++ ++ total_out += outbuf.pos; ++ ++ acomp_walk_done_dst(&walk, outbuf.pos); ++ } while (scur != inbuf.pos); ++ ++ if (scur) ++ acomp_walk_done_src(&walk, scur); ++ } while (ret == 0); ++ ++out: ++ if (ret) ++ req->dlen = 0; ++ else ++ req->dlen = total_out; ++ ++ crypto_acomp_unlock_stream_bh(s); ++ ++ return ret; ++} ++ ++static struct acomp_alg zstd_acomp = { ++ .base = { ++ .cra_name = "zstd", ++ .cra_driver_name = "zstd-generic", ++ .cra_flags = CRYPTO_ALG_REQ_VIRT, ++ .cra_module = THIS_MODULE, ++ }, ++ .init = zstd_init, ++ .exit = zstd_exit, ++ .compress = zstd_compress, ++ .decompress = zstd_decompress, + }; + + static int __init zstd_mod_init(void) + { +- return crypto_register_scomp(&scomp); ++ return crypto_register_acomp(&zstd_acomp); + } + + static void __exit zstd_mod_fini(void) + { +- crypto_unregister_scomp(&scomp); ++ crypto_unregister_acomp(&zstd_acomp); + } + + module_init(zstd_mod_init); diff --git a/queue-6.16/mm-damon-ops-common-ignore-migration-request-to-invalid-nodes.patch b/queue-6.16/mm-damon-ops-common-ignore-migration-request-to-invalid-nodes.patch new file mode 100644 index 0000000000..bd835402b4 --- /dev/null +++ b/queue-6.16/mm-damon-ops-common-ignore-migration-request-to-invalid-nodes.patch @@ -0,0 +1,63 @@ +From 7e6c3130690a01076efdf45aa02ba5d5c16849a0 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Sun, 20 Jul 2025 11:58:22 -0700 +Subject: mm/damon/ops-common: ignore migration request to invalid nodes + +From: SeongJae Park + +commit 7e6c3130690a01076efdf45aa02ba5d5c16849a0 upstream. + +damon_migrate_pages() tries migration even if the target node is invalid. +If users mistakenly make such invalid requests via +DAMOS_MIGRATE_{HOT,COLD} action, the below kernel BUG can happen. + + [ 7831.883495] BUG: unable to handle page fault for address: 0000000000001f48 + [ 7831.884160] #PF: supervisor read access in kernel mode + [ 7831.884681] #PF: error_code(0x0000) - not-present page + [ 7831.885203] PGD 0 P4D 0 + [ 7831.885468] Oops: Oops: 0000 [#1] SMP PTI + [ 7831.885852] CPU: 31 UID: 0 PID: 94202 Comm: kdamond.0 Not tainted 6.16.0-rc5-mm-new-damon+ #93 PREEMPT(voluntary) + [ 7831.886913] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-4.el9 04/01/2014 + [ 7831.887777] RIP: 0010:__alloc_frozen_pages_noprof (include/linux/mmzone.h:1724 include/linux/mmzone.h:1750 mm/page_alloc.c:4936 mm/page_alloc.c:5137) + [...] + [ 7831.895953] Call Trace: + [ 7831.896195] + [ 7831.896397] __folio_alloc_noprof (mm/page_alloc.c:5183 mm/page_alloc.c:5192) + [ 7831.896787] migrate_pages_batch (mm/migrate.c:1189 mm/migrate.c:1851) + [ 7831.897228] ? __pfx_alloc_migration_target (mm/migrate.c:2137) + [ 7831.897735] migrate_pages (mm/migrate.c:2078) + [ 7831.898141] ? __pfx_alloc_migration_target (mm/migrate.c:2137) + [ 7831.898664] damon_migrate_folio_list (mm/damon/ops-common.c:321 mm/damon/ops-common.c:354) + [ 7831.899140] damon_migrate_pages (mm/damon/ops-common.c:405) + [...] + +Add a target node validity check in damon_migrate_pages(). The validity +check is stolen from that of do_pages_move(), which is being used for the +move_pages() system call. + +Link: https://lkml.kernel.org/r/20250720185822.1451-1-sj@kernel.org +Fixes: b51820ebea65 ("mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion") [6.11.x] +Signed-off-by: SeongJae Park +Reviewed-by: Joshua Hahn +Cc: Honggyu Kim +Cc: Hyeongtak Ji +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/paddr.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/mm/damon/paddr.c ++++ b/mm/damon/paddr.c +@@ -476,6 +476,10 @@ static unsigned long damon_pa_migrate_pa + if (list_empty(folio_list)) + return nr_migrated; + ++ if (target_nid < 0 || target_nid >= MAX_NUMNODES || ++ !node_state(target_nid, N_MEMORY)) ++ return nr_migrated; ++ + noreclaim_flag = memalloc_noreclaim_save(); + + nid = folio_nid(lru_to_folio(folio_list)); diff --git a/queue-6.16/selftests-mptcp-sockopt-fix-c23-extension-warning.patch b/queue-6.16/selftests-mptcp-sockopt-fix-c23-extension-warning.patch index aa11107dd6..4ddbae4f64 100644 --- a/queue-6.16/selftests-mptcp-sockopt-fix-c23-extension-warning.patch +++ b/queue-6.16/selftests-mptcp-sockopt-fix-c23-extension-warning.patch @@ -27,15 +27,13 @@ Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-8-521fe Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- - tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++-- - tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++-- + tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++-- + tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) -diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c -index 3cf1e2a612ce..f3bcaa48df8f 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c -@@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service, +@@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *nod struct addrinfo *hints, struct addrinfo **res) { @@ -48,11 +46,9 @@ index 3cf1e2a612ce..f3bcaa48df8f 100644 if (err) { const char *errstr; -diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c -index 9934a68df237..e934dd26a59d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c -@@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service, +@@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *nod struct addrinfo *hints, struct addrinfo **res) { @@ -65,6 +61,3 @@ index 9934a68df237..e934dd26a59d 100644 if (err) { const char *errstr; --- -2.50.1 - diff --git a/queue-6.16/series b/queue-6.16/series index 63bac638a8..88d22180e6 100644 --- a/queue-6.16/series +++ b/queue-6.16/series @@ -208,3 +208,21 @@ mptcp-disable-add_addr-retransmission-when-timeout-is-0.patch selftests-mptcp-pm-check-flush-doesn-t-reset-limits.patch selftests-mptcp-connect-fix-c23-extension-warning.patch selftests-mptcp-sockopt-fix-c23-extension-warning.patch +mm-damon-ops-common-ignore-migration-request-to-invalid-nodes.patch +btrfs-move-transaction-aborts-to-the-error-site-in-add_block_group_free_space.patch +btrfs-always-abort-transaction-on-failure-to-add-block-group-to-free-space-tree.patch +btrfs-abort-transaction-on-unexpected-eb-generation-at-btrfs_copy_root.patch +crypto-zstd-convert-to-acomp.patch +crypto-acomp-fix-cfi-failure-due-to-type-punning.patch +btrfs-reorganize-logic-at-free_extent_buffer-for-better-readability.patch +btrfs-add-comment-for-optimization-in-free_extent_buffer.patch +btrfs-use-refcount_t-type-for-the-extent-buffer-reference-counter.patch +btrfs-fix-subpage-deadlock-in-try_release_subpage_extent_buffer.patch +btrfs-add-comments-on-the-extra-btrfs-specific-subpage-bitmaps.patch +btrfs-rename-btrfs_subpage-structure.patch +btrfs-subpage-keep-towrite-tag-until-folio-is-cleaned.patch +xfs-decouple-xfs_trans_alloc_empty-from-xfs_trans_alloc.patch +xfs-return-the-allocated-transaction-from-xfs_trans_alloc_empty.patch +xfs-improve-the-comments-in-xfs_select_zone_nowait.patch +xfs-fully-decouple-xfs_ibulk-flags-from-xfs_iwalk-flags.patch +xfs-remove-unused-label-in-xfs_dax_notify_dev_failure.patch diff --git a/queue-6.16/xfs-decouple-xfs_trans_alloc_empty-from-xfs_trans_alloc.patch b/queue-6.16/xfs-decouple-xfs_trans_alloc_empty-from-xfs_trans_alloc.patch new file mode 100644 index 0000000000..9f83206c11 --- /dev/null +++ b/queue-6.16/xfs-decouple-xfs_trans_alloc_empty-from-xfs_trans_alloc.patch @@ -0,0 +1,109 @@ +From stable+bounces-171702-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:11 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:19:55 -0400 +Subject: xfs: decouple xfs_trans_alloc_empty from xfs_trans_alloc +To: stable@vger.kernel.org +Cc: Christoph Hellwig , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20250819011959.244870-1-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit 83a80e95e797a2a6d14bf7983e5e6eecf8f5facb ] + +xfs_trans_alloc_empty only shares the very basic transaction structure +allocation and initialization with xfs_trans_alloc. + +Split out a new __xfs_trans_alloc helper for that and otherwise decouple +xfs_trans_alloc_empty from xfs_trans_alloc. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Stable-dep-of: d2845519b072 ("xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_trans.c | 52 ++++++++++++++++++++++++++++------------------------ + 1 file changed, 28 insertions(+), 24 deletions(-) + +--- a/fs/xfs/xfs_trans.c ++++ b/fs/xfs/xfs_trans.c +@@ -241,6 +241,28 @@ undo_blocks: + return error; + } + ++static struct xfs_trans * ++__xfs_trans_alloc( ++ struct xfs_mount *mp, ++ uint flags) ++{ ++ struct xfs_trans *tp; ++ ++ ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || xfs_has_lazysbcount(mp)); ++ ++ tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); ++ if (!(flags & XFS_TRANS_NO_WRITECOUNT)) ++ sb_start_intwrite(mp->m_super); ++ xfs_trans_set_context(tp); ++ tp->t_flags = flags; ++ tp->t_mountp = mp; ++ INIT_LIST_HEAD(&tp->t_items); ++ INIT_LIST_HEAD(&tp->t_busy); ++ INIT_LIST_HEAD(&tp->t_dfops); ++ tp->t_highest_agno = NULLAGNUMBER; ++ return tp; ++} ++ + int + xfs_trans_alloc( + struct xfs_mount *mp, +@@ -254,33 +276,16 @@ xfs_trans_alloc( + bool want_retry = true; + int error; + ++ ASSERT(resp->tr_logres > 0); ++ + /* + * Allocate the handle before we do our freeze accounting and setting up + * GFP_NOFS allocation context so that we avoid lockdep false positives + * by doing GFP_KERNEL allocations inside sb_start_intwrite(). + */ + retry: +- tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); +- if (!(flags & XFS_TRANS_NO_WRITECOUNT)) +- sb_start_intwrite(mp->m_super); +- xfs_trans_set_context(tp); +- +- /* +- * Zero-reservation ("empty") transactions can't modify anything, so +- * they're allowed to run while we're frozen. +- */ +- WARN_ON(resp->tr_logres > 0 && +- mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); +- ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) || +- xfs_has_lazysbcount(mp)); +- +- tp->t_flags = flags; +- tp->t_mountp = mp; +- INIT_LIST_HEAD(&tp->t_items); +- INIT_LIST_HEAD(&tp->t_busy); +- INIT_LIST_HEAD(&tp->t_dfops); +- tp->t_highest_agno = NULLAGNUMBER; +- ++ WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); ++ tp = __xfs_trans_alloc(mp, flags); + error = xfs_trans_reserve(tp, resp, blocks, rtextents); + if (error == -ENOSPC && want_retry) { + xfs_trans_cancel(tp); +@@ -329,9 +334,8 @@ xfs_trans_alloc_empty( + struct xfs_mount *mp, + struct xfs_trans **tpp) + { +- struct xfs_trans_res resv = {0}; +- +- return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); ++ *tpp = __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT); ++ return 0; + } + + /* diff --git a/queue-6.16/xfs-fully-decouple-xfs_ibulk-flags-from-xfs_iwalk-flags.patch b/queue-6.16/xfs-fully-decouple-xfs_ibulk-flags-from-xfs_iwalk-flags.patch new file mode 100644 index 0000000000..7d24183eec --- /dev/null +++ b/queue-6.16/xfs-fully-decouple-xfs_ibulk-flags-from-xfs_iwalk-flags.patch @@ -0,0 +1,57 @@ +From stable+bounces-171705-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:18 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:19:58 -0400 +Subject: xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags +To: stable@vger.kernel.org +Cc: Christoph Hellwig , cen zhang , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20250819011959.244870-4-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit d2845519b0723c5d5a0266cbf410495f9b8fd65c ] + +Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags +argument to xfs_inobt_walk, which expects the XFS_IWALK* flags. + +Currently passing the wrong flags works for non-debug builds because +the only XFS_IWALK* flag has the same encoding as the corresponding +XFS_IBULK* flag, but in debug builds it can trigger an assert that no +incorrect flag is passed. Instead just extra the relevant flag. + +Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags") +Cc: # v5.19 +Reported-by: cen zhang +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_itable.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_itable.c ++++ b/fs/xfs/xfs_itable.c +@@ -447,17 +447,21 @@ xfs_inumbers( + .breq = breq, + }; + struct xfs_trans *tp; ++ unsigned int iwalk_flags = 0; + int error = 0; + + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) + return 0; + ++ if (breq->flags & XFS_IBULK_SAME_AG) ++ iwalk_flags |= XFS_IWALK_SAME_AG; ++ + /* + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ + tp = xfs_trans_alloc_empty(breq->mp); +- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, ++ error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags, + xfs_inumbers_walk, breq->icount, &ic); + xfs_trans_cancel(tp); + diff --git a/queue-6.16/xfs-improve-the-comments-in-xfs_select_zone_nowait.patch b/queue-6.16/xfs-improve-the-comments-in-xfs_select_zone_nowait.patch new file mode 100644 index 0000000000..b614c12405 --- /dev/null +++ b/queue-6.16/xfs-improve-the-comments-in-xfs_select_zone_nowait.patch @@ -0,0 +1,52 @@ +From stable+bounces-171704-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:11 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:19:57 -0400 +Subject: xfs: improve the comments in xfs_select_zone_nowait +To: stable@vger.kernel.org +Cc: Christoph Hellwig , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20250819011959.244870-3-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit 60e02f956d77af31b85ed4e73abf85d5f12d0a98 ] + +The top of the function comment is outdated, and the parts still correct +duplicate information in comment inside the function. Remove the top of +the function comment and instead improve a comment inside the function. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Stable-dep-of: d2845519b072 ("xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_zone_alloc.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +--- a/fs/xfs/xfs_zone_alloc.c ++++ b/fs/xfs/xfs_zone_alloc.c +@@ -654,13 +654,6 @@ static inline bool xfs_zoned_pack_tight( + !(ip->i_diflags & XFS_DIFLAG_APPEND); + } + +-/* +- * Pick a new zone for writes. +- * +- * If we aren't using up our budget of open zones just open a new one from the +- * freelist. Else try to find one that matches the expected data lifetime. If +- * we don't find one that is good pick any zone that is available. +- */ + static struct xfs_open_zone * + xfs_select_zone_nowait( + struct xfs_mount *mp, +@@ -688,7 +681,8 @@ xfs_select_zone_nowait( + goto out_unlock; + + /* +- * See if we can open a new zone and use that. ++ * See if we can open a new zone and use that so that data for different ++ * files is mixed as little as possible. + */ + oz = xfs_try_open_zone(mp, write_hint); + if (oz) diff --git a/queue-6.16/xfs-remove-unused-label-in-xfs_dax_notify_dev_failure.patch b/queue-6.16/xfs-remove-unused-label-in-xfs_dax_notify_dev_failure.patch new file mode 100644 index 0000000000..af76f9f92a --- /dev/null +++ b/queue-6.16/xfs-remove-unused-label-in-xfs_dax_notify_dev_failure.patch @@ -0,0 +1,32 @@ +From stable+bounces-171706-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:16 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:19:59 -0400 +Subject: xfs: Remove unused label in xfs_dax_notify_dev_failure +To: stable@vger.kernel.org +Cc: Alan Huang , Christoph Hellwig , Carlos Maiolino , Sasha Levin +Message-ID: <20250819011959.244870-5-sashal@kernel.org> + +From: Alan Huang + +[ Upstream commit 8c10b04f9fc1760cb79068073686d8866e59d40f ] + +Fixes: e967dc40d501 ("xfs: return the allocated transaction from xfs_trans_alloc_empty") +Signed-off-by: Alan Huang +Reviewed-by: Christoph Hellwig +Signed-off-by: Carlos Maiolino +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_notify_failure.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/fs/xfs/xfs_notify_failure.c ++++ b/fs/xfs/xfs_notify_failure.c +@@ -350,7 +350,6 @@ xfs_dax_notify_dev_failure( + error = -EFSCORRUPTED; + } + +-out: + /* Thaw the fs if it has been frozen before. */ + if (mf_flags & MF_MEM_PRE_REMOVE) + xfs_dax_notify_failure_thaw(mp, kernel_frozen); diff --git a/queue-6.16/xfs-return-the-allocated-transaction-from-xfs_trans_alloc_empty.patch b/queue-6.16/xfs-return-the-allocated-transaction-from-xfs_trans_alloc_empty.patch new file mode 100644 index 0000000000..7ca0e49b30 --- /dev/null +++ b/queue-6.16/xfs-return-the-allocated-transaction-from-xfs_trans_alloc_empty.patch @@ -0,0 +1,397 @@ +From stable+bounces-171703-greg=kroah.com@vger.kernel.org Tue Aug 19 03:20:09 2025 +From: Sasha Levin +Date: Mon, 18 Aug 2025 21:19:56 -0400 +Subject: xfs: return the allocated transaction from xfs_trans_alloc_empty +To: stable@vger.kernel.org +Cc: Christoph Hellwig , "Darrick J. Wong" , Carlos Maiolino , Sasha Levin +Message-ID: <20250819011959.244870-2-sashal@kernel.org> + +From: Christoph Hellwig + +[ Upstream commit d8e1ea43e5a314bc01ec059ce93396639dcf9112 ] + +xfs_trans_alloc_empty can't return errors, so return the allocated +transaction directly instead of an output double pointer argument. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Stable-dep-of: d2845519b072 ("xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_refcount.c | 4 +--- + fs/xfs/scrub/common.c | 3 ++- + fs/xfs/scrub/repair.c | 12 ++---------- + fs/xfs/scrub/scrub.c | 5 +---- + fs/xfs/xfs_attr_item.c | 5 +---- + fs/xfs/xfs_discard.c | 12 +++--------- + fs/xfs/xfs_fsmap.c | 4 +--- + fs/xfs/xfs_icache.c | 5 +---- + fs/xfs/xfs_inode.c | 7 ++----- + fs/xfs/xfs_itable.c | 18 +++--------------- + fs/xfs/xfs_iwalk.c | 11 +++-------- + fs/xfs/xfs_notify_failure.c | 5 +---- + fs/xfs/xfs_qm.c | 10 ++-------- + fs/xfs/xfs_rtalloc.c | 13 +++---------- + fs/xfs/xfs_trans.c | 8 +++----- + fs/xfs/xfs_trans.h | 3 +-- + fs/xfs/xfs_zone_gc.c | 5 +---- + 17 files changed, 31 insertions(+), 99 deletions(-) + +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -2099,9 +2099,7 @@ xfs_refcount_recover_cow_leftovers( + * recording the CoW debris we cancel the (empty) transaction + * and everything goes away cleanly. + */ +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; ++ tp = xfs_trans_alloc_empty(mp); + + if (isrt) { + xfs_rtgroup_lock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT); +--- a/fs/xfs/scrub/common.c ++++ b/fs/xfs/scrub/common.c +@@ -870,7 +870,8 @@ int + xchk_trans_alloc_empty( + struct xfs_scrub *sc) + { +- return xfs_trans_alloc_empty(sc->mp, &sc->tp); ++ sc->tp = xfs_trans_alloc_empty(sc->mp); ++ return 0; + } + + /* +--- a/fs/xfs/scrub/repair.c ++++ b/fs/xfs/scrub/repair.c +@@ -1279,18 +1279,10 @@ xrep_trans_alloc_hook_dummy( + void **cookiep, + struct xfs_trans **tpp) + { +- int error; +- + *cookiep = current->journal_info; + current->journal_info = NULL; +- +- error = xfs_trans_alloc_empty(mp, tpp); +- if (!error) +- return 0; +- +- current->journal_info = *cookiep; +- *cookiep = NULL; +- return error; ++ *tpp = xfs_trans_alloc_empty(mp); ++ return 0; + } + + /* Cancel a dummy transaction used by a live update hook function. */ +--- a/fs/xfs/scrub/scrub.c ++++ b/fs/xfs/scrub/scrub.c +@@ -876,10 +876,7 @@ xchk_scrubv_open_by_handle( + struct xfs_inode *ip; + int error; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return NULL; +- ++ tp = xfs_trans_alloc_empty(mp); + error = xfs_iget(mp, tp, head->svh_ino, XCHK_IGET_FLAGS, 0, &ip); + xfs_trans_cancel(tp); + if (error) +--- a/fs/xfs/xfs_attr_item.c ++++ b/fs/xfs/xfs_attr_item.c +@@ -616,10 +616,7 @@ xfs_attri_iread_extents( + struct xfs_trans *tp; + int error; + +- error = xfs_trans_alloc_empty(ip->i_mount, &tp); +- if (error) +- return error; +- ++ tp = xfs_trans_alloc_empty(ip->i_mount); + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK); + xfs_iunlock(ip, XFS_ILOCK_EXCL); +--- a/fs/xfs/xfs_discard.c ++++ b/fs/xfs/xfs_discard.c +@@ -189,9 +189,7 @@ xfs_trim_gather_extents( + */ + xfs_log_force(mp, XFS_LOG_SYNC); + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; ++ tp = xfs_trans_alloc_empty(mp); + + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); + if (error) +@@ -583,9 +581,7 @@ xfs_trim_rtextents( + struct xfs_trans *tp; + int error; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; ++ tp = xfs_trans_alloc_empty(mp); + + /* + * Walk the free ranges between low and high. The query_range function +@@ -701,9 +697,7 @@ xfs_trim_rtgroup_extents( + struct xfs_trans *tp; + int error; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; ++ tp = xfs_trans_alloc_empty(mp); + + /* + * Walk the free ranges between low and high. The query_range function +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -1270,9 +1270,7 @@ xfs_getfsmap( + * buffer locking abilities to detect cycles in the rmapbt + * without deadlocking. + */ +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- break; ++ tp = xfs_trans_alloc_empty(mp); + + info.dev = handlers[i].dev; + info.last = false; +--- a/fs/xfs/xfs_icache.c ++++ b/fs/xfs/xfs_icache.c +@@ -893,10 +893,7 @@ xfs_metafile_iget( + struct xfs_trans *tp; + int error; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; +- ++ tp = xfs_trans_alloc_empty(mp); + error = xfs_trans_metafile_iget(tp, ino, metafile_type, ipp); + xfs_trans_cancel(tp); + return error; +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -2932,12 +2932,9 @@ xfs_inode_reload_unlinked( + struct xfs_inode *ip) + { + struct xfs_trans *tp; +- int error; +- +- error = xfs_trans_alloc_empty(ip->i_mount, &tp); +- if (error) +- return error; ++ int error = 0; + ++ tp = xfs_trans_alloc_empty(ip->i_mount); + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_inode_unlinked_incomplete(ip)) + error = xfs_inode_reload_unlinked_bucket(tp, ip); +--- a/fs/xfs/xfs_itable.c ++++ b/fs/xfs/xfs_itable.c +@@ -239,14 +239,10 @@ xfs_bulkstat_one( + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ +- error = xfs_trans_alloc_empty(breq->mp, &tp); +- if (error) +- goto out; +- ++ tp = xfs_trans_alloc_empty(breq->mp); + error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp, + breq->startino, &bc); + xfs_trans_cancel(tp); +-out: + kfree(bc.buf); + + /* +@@ -331,17 +327,13 @@ xfs_bulkstat( + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ +- error = xfs_trans_alloc_empty(breq->mp, &tp); +- if (error) +- goto out; +- ++ tp = xfs_trans_alloc_empty(breq->mp); + if (breq->flags & XFS_IBULK_SAME_AG) + iwalk_flags |= XFS_IWALK_SAME_AG; + + error = xfs_iwalk(breq->mp, tp, breq->startino, iwalk_flags, + xfs_bulkstat_iwalk, breq->icount, &bc); + xfs_trans_cancel(tp); +-out: + kfree(bc.buf); + + /* +@@ -464,14 +456,10 @@ xfs_inumbers( + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ +- error = xfs_trans_alloc_empty(breq->mp, &tp); +- if (error) +- goto out; +- ++ tp = xfs_trans_alloc_empty(breq->mp); + error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags, + xfs_inumbers_walk, breq->icount, &ic); + xfs_trans_cancel(tp); +-out: + + /* + * We found some inode groups, so clear the error status and return +--- a/fs/xfs/xfs_iwalk.c ++++ b/fs/xfs/xfs_iwalk.c +@@ -377,11 +377,8 @@ xfs_iwalk_run_callbacks( + if (!has_more) + return 0; + +- if (iwag->drop_trans) { +- error = xfs_trans_alloc_empty(mp, &iwag->tp); +- if (error) +- return error; +- } ++ if (iwag->drop_trans) ++ iwag->tp = xfs_trans_alloc_empty(mp); + + /* ...and recreate the cursor just past where we left off. */ + error = xfs_ialloc_read_agi(iwag->pag, iwag->tp, 0, agi_bpp); +@@ -617,9 +614,7 @@ xfs_iwalk_ag_work( + * Grab an empty transaction so that we can use its recursive buffer + * locking abilities to detect cycles in the inobt without deadlocking. + */ +- error = xfs_trans_alloc_empty(mp, &iwag->tp); +- if (error) +- goto out; ++ iwag->tp = xfs_trans_alloc_empty(mp); + iwag->drop_trans = 1; + + error = xfs_iwalk_ag(iwag); +--- a/fs/xfs/xfs_notify_failure.c ++++ b/fs/xfs/xfs_notify_failure.c +@@ -279,10 +279,7 @@ xfs_dax_notify_dev_failure( + kernel_frozen = xfs_dax_notify_failure_freeze(mp) == 0; + } + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- goto out; +- ++ tp = xfs_trans_alloc_empty(mp); + start_gno = xfs_fsb_to_gno(mp, start_bno, type); + end_gno = xfs_fsb_to_gno(mp, end_bno, type); + while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) { +--- a/fs/xfs/xfs_qm.c ++++ b/fs/xfs/xfs_qm.c +@@ -660,10 +660,7 @@ xfs_qm_load_metadir_qinos( + struct xfs_trans *tp; + int error; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; +- ++ tp = xfs_trans_alloc_empty(mp); + error = xfs_dqinode_load_parent(tp, &qi->qi_dirip); + if (error == -ENOENT) { + /* no quota dir directory, but we'll create one later */ +@@ -1755,10 +1752,7 @@ xfs_qm_qino_load( + struct xfs_inode *dp = NULL; + int error; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; +- ++ tp = xfs_trans_alloc_empty(mp); + if (xfs_has_metadir(mp)) { + error = xfs_dqinode_load_parent(tp, &dp); + if (error) +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -729,9 +729,7 @@ xfs_rtginode_ensure( + if (rtg->rtg_inodes[type]) + return 0; + +- error = xfs_trans_alloc_empty(rtg_mount(rtg), &tp); +- if (error) +- return error; ++ tp = xfs_trans_alloc_empty(rtg_mount(rtg)); + error = xfs_rtginode_load(rtg, type, tp); + xfs_trans_cancel(tp); + +@@ -1305,9 +1303,7 @@ xfs_growfs_rt_prep_groups( + if (!mp->m_rtdirip) { + struct xfs_trans *tp; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; ++ tp = xfs_trans_alloc_empty(mp); + error = xfs_rtginode_load_parent(tp); + xfs_trans_cancel(tp); + +@@ -1674,10 +1670,7 @@ xfs_rtmount_inodes( + struct xfs_rtgroup *rtg = NULL; + int error; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; +- ++ tp = xfs_trans_alloc_empty(mp); + if (xfs_has_rtgroups(mp) && mp->m_sb.sb_rgcount > 0) { + error = xfs_rtginode_load_parent(tp); + if (error) +--- a/fs/xfs/xfs_trans.c ++++ b/fs/xfs/xfs_trans.c +@@ -329,13 +329,11 @@ retry: + * where we can be grabbing buffers at the same time that freeze is trying to + * drain the buffer LRU list. + */ +-int ++struct xfs_trans * + xfs_trans_alloc_empty( +- struct xfs_mount *mp, +- struct xfs_trans **tpp) ++ struct xfs_mount *mp) + { +- *tpp = __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT); +- return 0; ++ return __xfs_trans_alloc(mp, XFS_TRANS_NO_WRITECOUNT); + } + + /* +--- a/fs/xfs/xfs_trans.h ++++ b/fs/xfs/xfs_trans.h +@@ -168,8 +168,7 @@ int xfs_trans_alloc(struct xfs_mount *m + struct xfs_trans **tpp); + int xfs_trans_reserve_more(struct xfs_trans *tp, + unsigned int blocks, unsigned int rtextents); +-int xfs_trans_alloc_empty(struct xfs_mount *mp, +- struct xfs_trans **tpp); ++struct xfs_trans *xfs_trans_alloc_empty(struct xfs_mount *mp); + void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); + + int xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buftarg *target, +--- a/fs/xfs/xfs_zone_gc.c ++++ b/fs/xfs/xfs_zone_gc.c +@@ -328,10 +328,7 @@ xfs_zone_gc_query( + iter->rec_idx = 0; + iter->rec_count = 0; + +- error = xfs_trans_alloc_empty(mp, &tp); +- if (error) +- return error; +- ++ tp = xfs_trans_alloc_empty(mp); + xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); + cur = xfs_rtrmapbt_init_cursor(tp, rtg); + error = xfs_rmap_query_range(cur, &ri_low, &ri_high,