--- /dev/null
+From stable+bounces-169830-greg=kroah.com@vger.kernel.org Sat Aug 16 00:08:14 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 18:07:59 -0400
+Subject: block: Make REQ_OP_ZONE_FINISH a write operation
+To: stable@vger.kernel.org
+Cc: Damien Le Moal <dlemoal@kernel.org>, Bart Van Assche <bvanassche@acm.org>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250815220759.248365-2-sashal@kernel.org>
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+[ Upstream commit 3f66ccbaaef3a0c5bd844eab04e3207b4061c546 ]
+
+REQ_OP_ZONE_FINISH is defined as "12", which makes
+op_is_write(REQ_OP_ZONE_FINISH) return false, despite the fact that a
+zone finish operation is an operation that modifies a zone (transition
+it to full) and so should be considered as a write operation (albeit
+one that does not transfer any data to the device).
+
+Fix this by redefining REQ_OP_ZONE_FINISH to be an odd number (13), and
+redefine REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL using sequential
+odd numbers from that new value.
+
+Fixes: 6c1b1da58f8c ("block: add zone open, close and finish operations")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20250625093327.548866-2-dlemoal@kernel.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/blk_types.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/linux/blk_types.h
++++ b/include/linux/blk_types.h
+@@ -388,11 +388,11 @@ enum req_op {
+ /* Close a zone */
+ REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11,
+ /* Transition a zone to full */
+- REQ_OP_ZONE_FINISH = (__force blk_opf_t)12,
++ REQ_OP_ZONE_FINISH = (__force blk_opf_t)13,
+ /* reset a zone write pointer */
+- REQ_OP_ZONE_RESET = (__force blk_opf_t)13,
++ REQ_OP_ZONE_RESET = (__force blk_opf_t)15,
+ /* reset all the zone present on the device */
+- REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15,
++ REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17,
+
+ /* Driver private requests */
+ REQ_OP_DRV_IN = (__force blk_opf_t)34,
--- /dev/null
+From stable+bounces-169829-greg=kroah.com@vger.kernel.org Sat Aug 16 00:08:09 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 18:07:58 -0400
+Subject: block: reject invalid operation in submit_bio_noacct
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, Jens Axboe <axboe@kernel.dk>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250815220759.248365-1-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 1c042f8d4bc342b7985b1de3d76836f1a1083b65 ]
+
+submit_bio_noacct allows completely invalid operations, or operations
+that are not supported in the bio path. Extent the existing switch
+statement to rejcect all invalid types.
+
+Move the code point for REQ_OP_ZONE_APPEND so that it's not right in the
+middle of the zone management operations and the switch statement can
+follow the numerical order of the operations.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20231221070538.1112446-1-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Stable-dep-of: 3f66ccbaaef3 ("block: Make REQ_OP_ZONE_FINISH a write operation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-core.c | 26 +++++++++++++++++++++-----
+ include/linux/blk_types.h | 8 ++++----
+ 2 files changed, 25 insertions(+), 9 deletions(-)
+
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -775,6 +775,15 @@ void submit_bio_noacct(struct bio *bio)
+ bio_clear_polled(bio);
+
+ switch (bio_op(bio)) {
++ case REQ_OP_READ:
++ case REQ_OP_WRITE:
++ break;
++ case REQ_OP_FLUSH:
++ /*
++ * REQ_OP_FLUSH can't be submitted through bios, it is only
++ * synthetized in struct request by the flush state machine.
++ */
++ goto not_supported;
+ case REQ_OP_DISCARD:
+ if (!bdev_max_discard_sectors(bdev))
+ goto not_supported;
+@@ -788,6 +797,10 @@ void submit_bio_noacct(struct bio *bio)
+ if (status != BLK_STS_OK)
+ goto end_io;
+ break;
++ case REQ_OP_WRITE_ZEROES:
++ if (!q->limits.max_write_zeroes_sectors)
++ goto not_supported;
++ break;
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+@@ -799,12 +812,15 @@ void submit_bio_noacct(struct bio *bio)
+ if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
+ goto not_supported;
+ break;
+- case REQ_OP_WRITE_ZEROES:
+- if (!q->limits.max_write_zeroes_sectors)
+- goto not_supported;
+- break;
++ case REQ_OP_DRV_IN:
++ case REQ_OP_DRV_OUT:
++ /*
++ * Driver private operations are only used with passthrough
++ * requests.
++ */
++ fallthrough;
+ default:
+- break;
++ goto not_supported;
+ }
+
+ if (blk_throtl_bio(bio))
+--- a/include/linux/blk_types.h
++++ b/include/linux/blk_types.h
+@@ -379,6 +379,8 @@ enum req_op {
+ REQ_OP_DISCARD = (__force blk_opf_t)3,
+ /* securely erase sectors */
+ REQ_OP_SECURE_ERASE = (__force blk_opf_t)5,
++ /* write data at the current zone write pointer */
++ REQ_OP_ZONE_APPEND = (__force blk_opf_t)7,
+ /* write the zero filled sector many times */
+ REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9,
+ /* Open a zone */
+@@ -387,12 +389,10 @@ enum req_op {
+ REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11,
+ /* Transition a zone to full */
+ REQ_OP_ZONE_FINISH = (__force blk_opf_t)12,
+- /* write data at the current zone write pointer */
+- REQ_OP_ZONE_APPEND = (__force blk_opf_t)13,
+ /* reset a zone write pointer */
+- REQ_OP_ZONE_RESET = (__force blk_opf_t)15,
++ REQ_OP_ZONE_RESET = (__force blk_opf_t)13,
+ /* reset all the zone present on the device */
+- REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17,
++ REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15,
+
+ /* Driver private requests */
+ REQ_OP_DRV_IN = (__force blk_opf_t)34,
--- /dev/null
+From stable+bounces-171697-greg=kroah.com@vger.kernel.org Tue Aug 19 03:15:28 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:15:16 -0400
+Subject: btrfs: abort transaction on unexpected eb generation at btrfs_copy_root()
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Daniel Vacek <neelx@suse.com>, Qu Wenruo <wqu@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819011516.242515-1-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 33e8f24b52d2796b8cfb28c19a1a7dd6476323a8 ]
+
+If we find an unexpected generation for the extent buffer we are cloning
+at btrfs_copy_root(), we just WARN_ON() and don't error out and abort the
+transaction, meaning we allow to persist metadata with an unexpected
+generation. Instead of warning only, abort the transaction and return
+-EUCLEAN.
+
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Daniel Vacek <neelx@suse.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -347,7 +347,14 @@ int btrfs_copy_root(struct btrfs_trans_h
+
+ write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
+
+- WARN_ON(btrfs_header_generation(buf) > trans->transid);
++ if (unlikely(btrfs_header_generation(buf) > trans->transid)) {
++ btrfs_tree_unlock(cow);
++ free_extent_buffer(cow);
++ ret = -EUCLEAN;
++ btrfs_abort_transaction(trans, ret);
++ return ret;
++ }
++
+ if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+ ret = btrfs_inc_ref(trans, root, cow, 1);
+ else
--- /dev/null
+From stable+bounces-171694-greg=kroah.com@vger.kernel.org Tue Aug 19 02:58:08 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 20:57:51 -0400
+Subject: btrfs: always abort transaction on failure to add block group to free space tree
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819005751.234544-2-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 1f06c942aa709d397cf6bed577a0d10a61509667 ]
+
+Only one of the callers of __add_block_group_free_space() aborts the
+transaction if the call fails, while the others don't do it and it's
+either never done up the call chain or much higher in the call chain.
+
+So make sure we abort the transaction at __add_block_group_free_space()
+if it fails, which brings a couple benefits:
+
+1) If some call chain never aborts the transaction, we avoid having some
+ metadata inconsistency because BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE is
+ cleared when we enter __add_block_group_free_space() and therefore
+ __add_block_group_free_space() is never called again to add the block
+ group items to the free space tree, since the function is only called
+ when that flag is set in a block group;
+
+2) If the call chain already aborts the transaction, then we get a better
+ trace that points to the exact step from __add_block_group_free_space()
+ which failed, which is better for analysis.
+
+So abort the transaction at __add_block_group_free_space() if any of its
+steps fails.
+
+CC: stable@vger.kernel.org # 6.6+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/free-space-tree.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/free-space-tree.c
++++ b/fs/btrfs/free-space-tree.c
+@@ -1371,12 +1371,17 @@ static int __add_block_group_free_space(
+ clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
+
+ ret = add_new_free_space_info(trans, block_group, path);
+- if (ret)
++ if (ret) {
++ btrfs_abort_transaction(trans, ret);
+ return ret;
++ }
+
+- return __add_to_free_space_tree(trans, block_group, path,
+- block_group->start,
+- block_group->length);
++ ret = __add_to_free_space_tree(trans, block_group, path,
++ block_group->start, block_group->length);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
++
++ return 0;
+ }
+
+ int add_block_group_free_space(struct btrfs_trans_handle *trans,
+@@ -1401,9 +1406,6 @@ int add_block_group_free_space(struct bt
+ }
+
+ ret = __add_block_group_free_space(trans, block_group, path);
+- if (ret)
+- btrfs_abort_transaction(trans, ret);
+-
+ out:
+ btrfs_free_path(path);
+ mutex_unlock(&block_group->free_space_lock);
--- /dev/null
+From stable+bounces-171727-greg=kroah.com@vger.kernel.org Tue Aug 19 04:28:05 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:27:53 -0400
+Subject: btrfs: constify more pointer parameters
+To: stable@vger.kernel.org
+Cc: David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819022753.281349-2-sashal@kernel.org>
+
+From: David Sterba <dsterba@suse.com>
+
+[ Upstream commit ca283ea9920ac20ae23ed398b693db3121045019 ]
+
+Continue adding const to parameters. This is for clarity and minor
+addition to safety. There are some minor effects, in the assembly code
+and .ko measured on release config.
+
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/backref.c | 6 +++---
+ fs/btrfs/block-group.c | 34 +++++++++++++++++-----------------
+ fs/btrfs/block-group.h | 11 +++++------
+ fs/btrfs/block-rsv.c | 2 +-
+ fs/btrfs/block-rsv.h | 2 +-
+ fs/btrfs/ctree.c | 14 +++++++-------
+ fs/btrfs/ctree.h | 6 +++---
+ fs/btrfs/discard.c | 4 ++--
+ fs/btrfs/file-item.c | 4 ++--
+ fs/btrfs/file-item.h | 2 +-
+ fs/btrfs/inode-item.c | 10 +++++-----
+ fs/btrfs/inode-item.h | 4 ++--
+ fs/btrfs/space-info.c | 17 ++++++++---------
+ fs/btrfs/space-info.h | 6 +++---
+ fs/btrfs/tree-mod-log.c | 14 +++++++-------
+ fs/btrfs/tree-mod-log.h | 6 +++---
+ fs/btrfs/zoned.c | 2 +-
+ fs/btrfs/zoned.h | 4 ++--
+ include/trace/events/btrfs.h | 6 +++---
+ 19 files changed, 76 insertions(+), 78 deletions(-)
+
+--- a/fs/btrfs/backref.c
++++ b/fs/btrfs/backref.c
+@@ -222,8 +222,8 @@ static void free_pref(struct prelim_ref
+ * A -1 return indicates ref1 is a 'lower' block than ref2, while 1
+ * indicates a 'higher' block.
+ */
+-static int prelim_ref_compare(struct prelim_ref *ref1,
+- struct prelim_ref *ref2)
++static int prelim_ref_compare(const struct prelim_ref *ref1,
++ const struct prelim_ref *ref2)
+ {
+ if (ref1->level < ref2->level)
+ return -1;
+@@ -254,7 +254,7 @@ static int prelim_ref_compare(struct pre
+ }
+
+ static void update_share_count(struct share_check *sc, int oldcount,
+- int newcount, struct prelim_ref *newref)
++ int newcount, const struct prelim_ref *newref)
+ {
+ if ((!sc) || (oldcount == 0 && newcount < 1))
+ return;
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -23,7 +23,7 @@
+ #include "extent-tree.h"
+
+ #ifdef CONFIG_BTRFS_DEBUG
+-int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group)
++int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group)
+ {
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
+
+@@ -53,9 +53,9 @@ static inline bool has_unwritten_metadat
+ *
+ * Should be called with balance_lock held
+ */
+-static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
++static u64 get_restripe_target(const struct btrfs_fs_info *fs_info, u64 flags)
+ {
+- struct btrfs_balance_control *bctl = fs_info->balance_ctl;
++ const struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+ u64 target = 0;
+
+ if (!bctl)
+@@ -1440,9 +1440,9 @@ out:
+ }
+
+ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
+- struct btrfs_block_group *bg)
++ const struct btrfs_block_group *bg)
+ {
+- struct btrfs_fs_info *fs_info = bg->fs_info;
++ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_transaction *prev_trans = NULL;
+ const u64 start = bg->start;
+ const u64 end = start + bg->length - 1;
+@@ -1775,14 +1775,14 @@ static int reclaim_bgs_cmp(void *unused,
+ return bg1->used > bg2->used;
+ }
+
+-static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
++static inline bool btrfs_should_reclaim(const struct btrfs_fs_info *fs_info)
+ {
+ if (btrfs_is_zoned(fs_info))
+ return btrfs_zoned_should_reclaim(fs_info);
+ return true;
+ }
+
+-static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed)
++static bool should_reclaim_block_group(const struct btrfs_block_group *bg, u64 bytes_freed)
+ {
+ const struct btrfs_space_info *space_info = bg->space_info;
+ const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
+@@ -2014,8 +2014,8 @@ void btrfs_mark_bg_to_reclaim(struct btr
+ spin_unlock(&fs_info->unused_bgs_lock);
+ }
+
+-static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
+- struct btrfs_path *path)
++static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key *key,
++ const struct btrfs_path *path)
+ {
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+@@ -2067,7 +2067,7 @@ out_free_em:
+
+ static int find_first_block_group(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path,
+- struct btrfs_key *key)
++ const struct btrfs_key *key)
+ {
+ struct btrfs_root *root = btrfs_block_group_root(fs_info);
+ int ret;
+@@ -2659,8 +2659,8 @@ static int insert_block_group_item(struc
+ }
+
+ static int insert_dev_extent(struct btrfs_trans_handle *trans,
+- struct btrfs_device *device, u64 chunk_offset,
+- u64 start, u64 num_bytes)
++ const struct btrfs_device *device, u64 chunk_offset,
++ u64 start, u64 num_bytes)
+ {
+ struct btrfs_fs_info *fs_info = device->fs_info;
+ struct btrfs_root *root = fs_info->dev_root;
+@@ -2810,7 +2810,7 @@ next:
+ * For extent tree v2 we use the block_group_item->chunk_offset to point at our
+ * global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
+ */
+-static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
++static u64 calculate_global_root_id(const struct btrfs_fs_info *fs_info, u64 offset)
+ {
+ u64 div = SZ_1G;
+ u64 index;
+@@ -3846,8 +3846,8 @@ static void force_metadata_allocation(st
+ }
+ }
+
+-static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
+- struct btrfs_space_info *sinfo, int force)
++static int should_alloc_chunk(const struct btrfs_fs_info *fs_info,
++ const struct btrfs_space_info *sinfo, int force)
+ {
+ u64 bytes_used = btrfs_space_info_used(sinfo, false);
+ u64 thresh;
+@@ -4222,7 +4222,7 @@ out:
+ return ret;
+ }
+
+-static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
++static u64 get_profile_num_devs(const struct btrfs_fs_info *fs_info, u64 type)
+ {
+ u64 num_dev;
+
+@@ -4629,7 +4629,7 @@ int btrfs_use_block_group_size_class(str
+ return 0;
+ }
+
+-bool btrfs_block_group_should_use_size_class(struct btrfs_block_group *bg)
++bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg)
+ {
+ if (btrfs_is_zoned(bg->fs_info))
+ return false;
+--- a/fs/btrfs/block-group.h
++++ b/fs/btrfs/block-group.h
+@@ -250,7 +250,7 @@ struct btrfs_block_group {
+ enum btrfs_block_group_size_class size_class;
+ };
+
+-static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
++static inline u64 btrfs_block_group_end(const struct btrfs_block_group *block_group)
+ {
+ return (block_group->start + block_group->length);
+ }
+@@ -262,8 +262,7 @@ static inline bool btrfs_is_block_group_
+ return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0);
+ }
+
+-static inline bool btrfs_is_block_group_data_only(
+- struct btrfs_block_group *block_group)
++static inline bool btrfs_is_block_group_data_only(const struct btrfs_block_group *block_group)
+ {
+ /*
+ * In mixed mode the fragmentation is expected to be high, lowering the
+@@ -274,7 +273,7 @@ static inline bool btrfs_is_block_group_
+ }
+
+ #ifdef CONFIG_BTRFS_DEBUG
+-int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group);
++int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group);
+ #endif
+
+ struct btrfs_block_group *btrfs_lookup_first_block_group(
+@@ -355,7 +354,7 @@ static inline u64 btrfs_system_alloc_pro
+ return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+ }
+
+-static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
++static inline int btrfs_block_group_done(const struct btrfs_block_group *cache)
+ {
+ smp_mb();
+ return cache->cached == BTRFS_CACHE_FINISHED ||
+@@ -372,6 +371,6 @@ enum btrfs_block_group_size_class btrfs_
+ int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
+ enum btrfs_block_group_size_class size_class,
+ bool force_wrong_size_class);
+-bool btrfs_block_group_should_use_size_class(struct btrfs_block_group *bg);
++bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg);
+
+ #endif /* BTRFS_BLOCK_GROUP_H */
+--- a/fs/btrfs/block-rsv.c
++++ b/fs/btrfs/block-rsv.c
+@@ -547,7 +547,7 @@ try_reserve:
+ return ERR_PTR(ret);
+ }
+
+-int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
++int btrfs_check_trunc_cache_free_space(const struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv)
+ {
+ u64 needed_bytes;
+--- a/fs/btrfs/block-rsv.h
++++ b/fs/btrfs/block-rsv.h
+@@ -82,7 +82,7 @@ void btrfs_release_global_block_rsv(stru
+ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u32 blocksize);
+-int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
++int btrfs_check_trunc_cache_free_space(const struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *rsv);
+ static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_block_rsv *block_rsv,
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -2719,7 +2719,7 @@ int btrfs_get_next_valid_item(struct btr
+ *
+ */
+ static void fixup_low_keys(struct btrfs_trans_handle *trans,
+- struct btrfs_path *path,
++ const struct btrfs_path *path,
+ struct btrfs_disk_key *key, int level)
+ {
+ int i;
+@@ -2749,7 +2749,7 @@ static void fixup_low_keys(struct btrfs_
+ * that the new key won't break the order
+ */
+ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+- struct btrfs_path *path,
++ const struct btrfs_path *path,
+ const struct btrfs_key *new_key)
+ {
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+@@ -2815,8 +2815,8 @@ void btrfs_set_item_key_safe(struct btrf
+ * is correct, we only need to bother the last key of @left and the first
+ * key of @right.
+ */
+-static bool check_sibling_keys(struct extent_buffer *left,
+- struct extent_buffer *right)
++static bool check_sibling_keys(const struct extent_buffer *left,
++ const struct extent_buffer *right)
+ {
+ struct btrfs_key left_last;
+ struct btrfs_key right_first;
+@@ -3085,7 +3085,7 @@ static noinline int insert_new_root(stru
+ * blocknr is the block the key points to.
+ */
+ static int insert_ptr(struct btrfs_trans_handle *trans,
+- struct btrfs_path *path,
++ const struct btrfs_path *path,
+ struct btrfs_disk_key *key, u64 bytenr,
+ int slot, int level)
+ {
+@@ -4176,7 +4176,7 @@ int btrfs_split_item(struct btrfs_trans_
+ * the front.
+ */
+ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
+- struct btrfs_path *path, u32 new_size, int from_end)
++ const struct btrfs_path *path, u32 new_size, int from_end)
+ {
+ int slot;
+ struct extent_buffer *leaf;
+@@ -4268,7 +4268,7 @@ void btrfs_truncate_item(struct btrfs_tr
+ * make the item pointed to by the path bigger, data_size is the added size.
+ */
+ void btrfs_extend_item(struct btrfs_trans_handle *trans,
+- struct btrfs_path *path, u32 data_size)
++ const struct btrfs_path *path, u32 data_size)
+ {
+ int slot;
+ struct extent_buffer *leaf;
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -521,7 +521,7 @@ int btrfs_previous_item(struct btrfs_roo
+ int btrfs_previous_extent_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid);
+ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
+- struct btrfs_path *path,
++ const struct btrfs_path *path,
+ const struct btrfs_key *new_key);
+ struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
+ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
+@@ -555,9 +555,9 @@ int btrfs_block_can_be_shared(struct btr
+ int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot);
+ void btrfs_extend_item(struct btrfs_trans_handle *trans,
+- struct btrfs_path *path, u32 data_size);
++ const struct btrfs_path *path, u32 data_size);
+ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
+- struct btrfs_path *path, u32 new_size, int from_end);
++ const struct btrfs_path *path, u32 new_size, int from_end);
+ int btrfs_split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+--- a/fs/btrfs/discard.c
++++ b/fs/btrfs/discard.c
+@@ -68,7 +68,7 @@ static int discard_minlen[BTRFS_NR_DISCA
+ };
+
+ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
+- struct btrfs_block_group *block_group)
++ const struct btrfs_block_group *block_group)
+ {
+ return &discard_ctl->discard_list[block_group->discard_index];
+ }
+@@ -80,7 +80,7 @@ static struct list_head *get_discard_lis
+ *
+ * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
+ */
+-static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
++static bool btrfs_run_discard_work(const struct btrfs_discard_ctl *discard_ctl)
+ {
+ struct btrfs_fs_info *fs_info = container_of(discard_ctl,
+ struct btrfs_fs_info,
+--- a/fs/btrfs/file-item.c
++++ b/fs/btrfs/file-item.c
+@@ -153,7 +153,7 @@ static inline u32 max_ordered_sum_bytes(
+ * Calculate the total size needed to allocate for an ordered sum structure
+ * spanning @bytes in the file.
+ */
+-static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes)
++static int btrfs_ordered_sum_size(const struct btrfs_fs_info *fs_info, unsigned long bytes)
+ {
+ return sizeof(struct btrfs_ordered_sum) + bytes_to_csum_size(fs_info, bytes);
+ }
+@@ -1263,7 +1263,7 @@ out:
+
+ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
+ const struct btrfs_path *path,
+- struct btrfs_file_extent_item *fi,
++ const struct btrfs_file_extent_item *fi,
+ struct extent_map *em)
+ {
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+--- a/fs/btrfs/file-item.h
++++ b/fs/btrfs/file-item.h
+@@ -62,7 +62,7 @@ int btrfs_lookup_csums_bitmap(struct btr
+ unsigned long *csum_bitmap);
+ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
+ const struct btrfs_path *path,
+- struct btrfs_file_extent_item *fi,
++ const struct btrfs_file_extent_item *fi,
+ struct extent_map *em);
+ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
+ u64 len);
+--- a/fs/btrfs/inode-item.c
++++ b/fs/btrfs/inode-item.c
+@@ -15,7 +15,7 @@
+ #include "extent-tree.h"
+ #include "file-item.h"
+
+-struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
++struct btrfs_inode_ref *btrfs_find_name_in_backref(const struct extent_buffer *leaf,
+ int slot,
+ const struct fscrypt_str *name)
+ {
+@@ -43,7 +43,7 @@ struct btrfs_inode_ref *btrfs_find_name_
+ }
+
+ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
+- struct extent_buffer *leaf, int slot, u64 ref_objectid,
++ const struct extent_buffer *leaf, int slot, u64 ref_objectid,
+ const struct fscrypt_str *name)
+ {
+ struct btrfs_inode_extref *extref;
+@@ -424,9 +424,9 @@ int btrfs_lookup_inode(struct btrfs_tran
+ return ret;
+ }
+
+-static inline void btrfs_trace_truncate(struct btrfs_inode *inode,
+- struct extent_buffer *leaf,
+- struct btrfs_file_extent_item *fi,
++static inline void btrfs_trace_truncate(const struct btrfs_inode *inode,
++ const struct extent_buffer *leaf,
++ const struct btrfs_file_extent_item *fi,
+ u64 offset, int extent_type, int slot)
+ {
+ if (!inode)
+--- a/fs/btrfs/inode-item.h
++++ b/fs/btrfs/inode-item.h
+@@ -100,11 +100,11 @@ struct btrfs_inode_extref *btrfs_lookup_
+ u64 inode_objectid, u64 ref_objectid, int ins_len,
+ int cow);
+
+-struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
++struct btrfs_inode_ref *btrfs_find_name_in_backref(const struct extent_buffer *leaf,
+ int slot,
+ const struct fscrypt_str *name);
+ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
+- struct extent_buffer *leaf, int slot, u64 ref_objectid,
++ const struct extent_buffer *leaf, int slot, u64 ref_objectid,
+ const struct fscrypt_str *name);
+
+ #endif
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -162,7 +162,7 @@
+ * thing with or without extra unallocated space.
+ */
+
+-u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
++u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info,
+ bool may_use_included)
+ {
+ ASSERT(s_info);
+@@ -342,7 +342,7 @@ struct btrfs_space_info *btrfs_find_spac
+ }
+
+ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
+- struct btrfs_space_info *space_info,
++ const struct btrfs_space_info *space_info,
+ enum btrfs_reserve_flush_enum flush)
+ {
+ u64 profile;
+@@ -378,7 +378,7 @@ static u64 calc_available_free_space(str
+ }
+
+ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+- struct btrfs_space_info *space_info, u64 bytes,
++ const struct btrfs_space_info *space_info, u64 bytes,
+ enum btrfs_reserve_flush_enum flush)
+ {
+ u64 avail;
+@@ -483,8 +483,8 @@ static void dump_global_block_rsv(struct
+ DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
+ }
+
+-static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
+- struct btrfs_space_info *info)
++static void __btrfs_dump_space_info(const struct btrfs_fs_info *fs_info,
++ const struct btrfs_space_info *info)
+ {
+ const char *flag_str = space_info_flag_to_str(info);
+ lockdep_assert_held(&info->lock);
+@@ -807,9 +807,8 @@ static void flush_space(struct btrfs_fs_
+ return;
+ }
+
+-static inline u64
+-btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+- struct btrfs_space_info *space_info)
++static u64 btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
++ const struct btrfs_space_info *space_info)
+ {
+ u64 used;
+ u64 avail;
+@@ -834,7 +833,7 @@ btrfs_calc_reclaim_metadata_size(struct
+ }
+
+ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
+- struct btrfs_space_info *space_info)
++ const struct btrfs_space_info *space_info)
+ {
+ const u64 global_rsv_size = btrfs_block_rsv_reserved(&fs_info->global_block_rsv);
+ u64 ordered, delalloc;
+--- a/fs/btrfs/space-info.h
++++ b/fs/btrfs/space-info.h
+@@ -165,7 +165,7 @@ struct reserve_ticket {
+ wait_queue_head_t wait;
+ };
+
+-static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
++static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info)
+ {
+ return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+@@ -206,7 +206,7 @@ void btrfs_update_space_info_chunk_size(
+ u64 chunk_size);
+ struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+ u64 flags);
+-u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
++u64 __pure btrfs_space_info_used(const struct btrfs_space_info *s_info,
+ bool may_use_included);
+ void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
+@@ -219,7 +219,7 @@ int btrfs_reserve_metadata_bytes(struct
+ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info);
+ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+- struct btrfs_space_info *space_info, u64 bytes,
++ const struct btrfs_space_info *space_info, u64 bytes,
+ enum btrfs_reserve_flush_enum flush);
+
+ static inline void btrfs_space_info_free_bytes_may_use(
+--- a/fs/btrfs/tree-mod-log.c
++++ b/fs/btrfs/tree-mod-log.c
+@@ -171,7 +171,7 @@ static noinline int tree_mod_log_insert(
+ * write unlock fs_info::tree_mod_log_lock.
+ */
+ static inline bool tree_mod_dont_log(struct btrfs_fs_info *fs_info,
+- struct extent_buffer *eb)
++ const struct extent_buffer *eb)
+ {
+ if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
+ return true;
+@@ -189,7 +189,7 @@ static inline bool tree_mod_dont_log(str
+
+ /* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
+ static inline bool tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+- struct extent_buffer *eb)
++ const struct extent_buffer *eb)
+ {
+ if (!test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
+ return false;
+@@ -199,7 +199,7 @@ static inline bool tree_mod_need_log(con
+ return true;
+ }
+
+-static struct tree_mod_elem *alloc_tree_mod_elem(struct extent_buffer *eb,
++static struct tree_mod_elem *alloc_tree_mod_elem(const struct extent_buffer *eb,
+ int slot,
+ enum btrfs_mod_log_op op)
+ {
+@@ -222,7 +222,7 @@ static struct tree_mod_elem *alloc_tree_
+ return tm;
+ }
+
+-int btrfs_tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
++int btrfs_tree_mod_log_insert_key(const struct extent_buffer *eb, int slot,
+ enum btrfs_mod_log_op op)
+ {
+ struct tree_mod_elem *tm;
+@@ -259,7 +259,7 @@ out_unlock:
+ return ret;
+ }
+
+-static struct tree_mod_elem *tree_mod_log_alloc_move(struct extent_buffer *eb,
++static struct tree_mod_elem *tree_mod_log_alloc_move(const struct extent_buffer *eb,
+ int dst_slot, int src_slot,
+ int nr_items)
+ {
+@@ -279,7 +279,7 @@ static struct tree_mod_elem *tree_mod_lo
+ return tm;
+ }
+
+-int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
++int btrfs_tree_mod_log_insert_move(const struct extent_buffer *eb,
+ int dst_slot, int src_slot,
+ int nr_items)
+ {
+@@ -536,7 +536,7 @@ static struct tree_mod_elem *tree_mod_lo
+ }
+
+ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
+- struct extent_buffer *src,
++ const struct extent_buffer *src,
+ unsigned long dst_offset,
+ unsigned long src_offset,
+ int nr_items)
+--- a/fs/btrfs/tree-mod-log.h
++++ b/fs/btrfs/tree-mod-log.h
+@@ -31,7 +31,7 @@ void btrfs_put_tree_mod_seq(struct btrfs
+ int btrfs_tree_mod_log_insert_root(struct extent_buffer *old_root,
+ struct extent_buffer *new_root,
+ bool log_removal);
+-int btrfs_tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
++int btrfs_tree_mod_log_insert_key(const struct extent_buffer *eb, int slot,
+ enum btrfs_mod_log_op op);
+ int btrfs_tree_mod_log_free_eb(struct extent_buffer *eb);
+ struct extent_buffer *btrfs_tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
+@@ -41,11 +41,11 @@ struct extent_buffer *btrfs_tree_mod_log
+ struct extent_buffer *btrfs_get_old_root(struct btrfs_root *root, u64 time_seq);
+ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
+ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
+- struct extent_buffer *src,
++ const struct extent_buffer *src,
+ unsigned long dst_offset,
+ unsigned long src_offset,
+ int nr_items);
+-int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
++int btrfs_tree_mod_log_insert_move(const struct extent_buffer *eb,
+ int dst_slot, int src_slot,
+ int nr_items);
+ u64 btrfs_tree_mod_log_lowest_seq(struct btrfs_fs_info *fs_info);
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2351,7 +2351,7 @@ void btrfs_free_zone_cache(struct btrfs_
+ mutex_unlock(&fs_devices->device_list_mutex);
+ }
+
+-bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
++bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
+ {
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_device *device;
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -77,7 +77,7 @@ void btrfs_schedule_zone_finish_bg(struc
+ struct extent_buffer *eb);
+ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
+ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
+-bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
++bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info);
+ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length);
+ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
+@@ -237,7 +237,7 @@ static inline void btrfs_clear_data_relo
+
+ static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
+
+-static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
++static inline bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
+ {
+ return false;
+ }
+--- a/include/trace/events/btrfs.h
++++ b/include/trace/events/btrfs.h
+@@ -1857,7 +1857,7 @@ TRACE_EVENT(qgroup_update_counters,
+
+ TRACE_EVENT(qgroup_update_reserve,
+
+- TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
++ TP_PROTO(const struct btrfs_fs_info *fs_info, const struct btrfs_qgroup *qgroup,
+ s64 diff, int type),
+
+ TP_ARGS(fs_info, qgroup, diff, type),
+@@ -1883,7 +1883,7 @@ TRACE_EVENT(qgroup_update_reserve,
+
+ TRACE_EVENT(qgroup_meta_reserve,
+
+- TP_PROTO(struct btrfs_root *root, s64 diff, int type),
++ TP_PROTO(const struct btrfs_root *root, s64 diff, int type),
+
+ TP_ARGS(root, diff, type),
+
+@@ -1906,7 +1906,7 @@ TRACE_EVENT(qgroup_meta_reserve,
+
+ TRACE_EVENT(qgroup_meta_convert,
+
+- TP_PROTO(struct btrfs_root *root, s64 diff),
++ TP_PROTO(const struct btrfs_root *root, s64 diff),
+
+ TP_ARGS(root, diff),
+
--- /dev/null
+From stable+bounces-171678-greg=kroah.com@vger.kernel.org Tue Aug 19 01:48:19 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 19:47:23 -0400
+Subject: btrfs: don't ignore inode missing when replaying log tree
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250818234723.154435-1-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 7ebf381a69421a88265d3c49cd0f007ba7336c9d ]
+
+During log replay, at add_inode_ref(), we return -ENOENT if our current
+inode isn't found on the subvolume tree or if a parent directory isn't
+found. The error comes from btrfs_iget_logging() <- btrfs_iget() <-
+btrfs_read_locked_inode().
+
+The single caller of add_inode_ref(), replay_one_buffer(), ignores an
+-ENOENT error because it expects that error to mean only that a parent
+directory wasn't found and that is ok.
+
+Before commit 5f61b961599a ("btrfs: fix inode lookup error handling during
+log replay") we were converting any error when getting a parent directory
+to -ENOENT and any error when getting the current inode to -EIO, so our
+caller would fail log replay in case we can't find the current inode.
+After that commit however in case the current inode is not found we return
+-ENOENT to the caller and therefore it ignores the critical fact that the
+current inode was not found in the subvolume tree.
+
+Fix this by converting -ENOENT to 0 when we don't find a parent directory,
+returning -ENOENT when we don't find the current inode and making the
+caller, replay_one_buffer(), not ignore -ENOENT anymore.
+
+Fixes: 5f61b961599a ("btrfs: fix inode lookup error handling during log replay")
+CC: stable@vger.kernel.org # 6.16
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[ adapted btrfs_inode pointer usage to older inode API ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-log.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -1422,6 +1422,8 @@ static noinline int add_inode_ref(struct
+ btrfs_dir = btrfs_iget_logging(parent_objectid, root);
+ if (IS_ERR(btrfs_dir)) {
+ ret = PTR_ERR(btrfs_dir);
++ if (ret == -ENOENT)
++ ret = 0;
+ dir = NULL;
+ goto out;
+ }
+@@ -1455,6 +1457,15 @@ static noinline int add_inode_ref(struct
+ if (IS_ERR(btrfs_dir)) {
+ ret = PTR_ERR(btrfs_dir);
+ dir = NULL;
++ /*
++ * A new parent dir may have not been
++ * logged and not exist in the subvolume
++ * tree, see the comment above before
++ * the loop when getting the first
++ * parent dir.
++ */
++ if (ret == -ENOENT)
++ ret = 0;
+ goto out;
+ }
+ dir = &btrfs_dir->vfs_inode;
+@@ -2623,9 +2634,8 @@ static int replay_one_buffer(struct btrf
+ key.type == BTRFS_INODE_EXTREF_KEY) {
+ ret = add_inode_ref(wc->trans, root, log, path,
+ eb, i, &key);
+- if (ret && ret != -ENOENT)
++ if (ret)
+ break;
+- ret = 0;
+ } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
+ ret = replay_one_extent(wc->trans, root, path,
+ eb, i, &key);
--- /dev/null
+From stable+bounces-171726-greg=kroah.com@vger.kernel.org Tue Aug 19 04:28:01 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:27:52 -0400
+Subject: btrfs: fix ssd_spread overallocation
+To: stable@vger.kernel.org
+Cc: Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Filipe Manana <fdmanana@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819022753.281349-1-sashal@kernel.org>
+
+From: Boris Burkov <boris@bur.io>
+
+[ Upstream commit 807d9023e75fc20bfd6dd2ac0408ce4af53f1648 ]
+
+If the ssd_spread mount option is enabled, then we run the so called
+clustered allocator for data block groups. In practice, this results in
+creating a btrfs_free_cluster which caches a block_group and borrows its
+free extents for allocation.
+
+Since the introduction of allocation size classes in 6.1, there has been
+a bug in the interaction between that feature and ssd_spread.
+find_free_extent() has a number of nested loops. The loop going over the
+allocation stages, stored in ffe_ctl->loop and managed by
+find_free_extent_update_loop(), the loop over the raid levels, and the
+loop over all the block_groups in a space_info. The size class feature
+relies on the block_group loop to ensure it gets a chance to see a
+block_group of a given size class. However, the clustered allocator
+uses the cached cluster block_group and breaks that loop. Each call to
+do_allocation() will really just go back to the same cached block_group.
+Normally, this is OK, as the allocation either succeeds and we don't
+want to loop any more or it fails, and we clear the cluster and return
+its space to the block_group.
+
+But with size classes, the allocation can succeed, then later fail,
+outside of do_allocation() due to size class mismatch. That latter
+failure is not properly handled due to the highly complex multi loop
+logic. The result is a painful loop where we continue to allocate the
+same num_bytes from the cluster in a tight loop until it fails and
+releases the cluster and lets us try a new block_group. But by then, we
+have skipped great swaths of the available block_groups and are likely
+to fail to allocate, looping the outer loop. In pathological cases like
+the reproducer below, the cached block_group is often the very last one,
+in which case we don't perform this tight bg loop but instead rip
+through the ffe stages to LOOP_CHUNK_ALLOC and allocate a chunk, which
+is now the last one, and we enter the tight inner loop until an
+allocation failure. Then allocation succeeds on the final block_group
+and if the next allocation is a size mismatch, the exact same thing
+happens again.
+
+Triggering this is as easy as mounting with -o ssd_spread and then
+running:
+
+ mount -o ssd_spread $dev $mnt
+ dd if=/dev/zero of=$mnt/big bs=16M count=1 &>/dev/null
+ dd if=/dev/zero of=$mnt/med bs=4M count=1 &>/dev/null
+ sync
+
+if you do the two writes + sync in a loop, you can force btrfs to spin
+an excessive amount on semi-successful clustered allocations, before
+ultimately failing and advancing to the stage where we force a chunk
+allocation. This results in 2G of data allocated per iteration, despite
+only using ~20M of data. By using a small size classed extent, the inner
+loop takes longer and we can spin for longer.
+
+The simplest, shortest term fix to unbreak this is to make the clustered
+allocator size_class aware in the dumbest way, where it fails on size
+class mismatch. This may hinder the operation of the clustered
+allocator, but better hindered than completely broken and terribly
+overallocating.
+
+Further re-design improvements are also in the works.
+
+Fixes: 52bb7a2166af ("btrfs: introduce size class to block group allocator")
+CC: stable@vger.kernel.org # 6.1+
+Reported-by: David Sterba <dsterba@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c | 33 +++++++++++++++++----------------
+ 1 file changed, 17 insertions(+), 16 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3530,6 +3530,21 @@ btrfs_release_block_group(struct btrfs_b
+ btrfs_put_block_group(cache);
+ }
+
++static bool find_free_extent_check_size_class(const struct find_free_extent_ctl *ffe_ctl,
++ const struct btrfs_block_group *bg)
++{
++ if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
++ return true;
++ if (!btrfs_block_group_should_use_size_class(bg))
++ return true;
++ if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
++ return true;
++ if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
++ bg->size_class == BTRFS_BG_SZ_NONE)
++ return true;
++ return ffe_ctl->size_class == bg->size_class;
++}
++
+ /*
+ * Helper function for find_free_extent().
+ *
+@@ -3551,7 +3566,8 @@ static int find_free_extent_clustered(st
+ if (!cluster_bg)
+ goto refill_cluster;
+ if (cluster_bg != bg && (cluster_bg->ro ||
+- !block_group_bits(cluster_bg, ffe_ctl->flags)))
++ !block_group_bits(cluster_bg, ffe_ctl->flags) ||
++ !find_free_extent_check_size_class(ffe_ctl, cluster_bg)))
+ goto release_cluster;
+
+ offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
+@@ -4107,21 +4123,6 @@ static int find_free_extent_update_loop(
+ return -ENOSPC;
+ }
+
+-static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
+- struct btrfs_block_group *bg)
+-{
+- if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
+- return true;
+- if (!btrfs_block_group_should_use_size_class(bg))
+- return true;
+- if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
+- return true;
+- if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
+- bg->size_class == BTRFS_BG_SZ_NONE)
+- return true;
+- return ffe_ctl->size_class == bg->size_class;
+-}
+-
+ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
+ struct find_free_extent_ctl *ffe_ctl,
+ struct btrfs_space_info *space_info,
--- /dev/null
+From stable+bounces-171693-greg=kroah.com@vger.kernel.org Tue Aug 19 02:58:00 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 20:57:50 -0400
+Subject: btrfs: move transaction aborts to the error site in add_block_group_free_space()
+To: stable@vger.kernel.org
+Cc: David Sterba <dsterba@suse.com>, Filipe Manana <fdmanana@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819005751.234544-1-sashal@kernel.org>
+
+From: David Sterba <dsterba@suse.com>
+
+[ Upstream commit b63c8c1ede4407835cb8c8bed2014d96619389f3 ]
+
+Transaction aborts should be done next to the place the error happens,
+which was not done in add_block_group_free_space().
+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 1f06c942aa70 ("btrfs: always abort transaction on failure to add block group to free space tree")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/free-space-tree.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/free-space-tree.c
++++ b/fs/btrfs/free-space-tree.c
+@@ -1396,16 +1396,17 @@ int add_block_group_free_space(struct bt
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
++ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ ret = __add_block_group_free_space(trans, block_group, path);
++ if (ret)
++ btrfs_abort_transaction(trans, ret);
+
+ out:
+ btrfs_free_path(path);
+ mutex_unlock(&block_group->free_space_lock);
+- if (ret)
+- btrfs_abort_transaction(trans, ret);
+ return ret;
+ }
+
--- /dev/null
+From stable+bounces-171738-greg=kroah.com@vger.kernel.org Tue Aug 19 05:04:40 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 23:04:31 -0400
+Subject: btrfs: open code timespec64 in struct btrfs_inode
+To: stable@vger.kernel.org
+Cc: David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819030432.303556-1-sashal@kernel.org>
+
+From: David Sterba <dsterba@suse.com>
+
+[ Upstream commit c6e8f898f56fae2cb5bc4396bec480f23cd8b066 ]
+
+The type of timespec64::tv_nsec is 'unsigned long', while we have only
+u32 for on-disk and in-memory. This wastes a few bytes in btrfs_inode.
+Add separate members for sec and nsec with the corresponding type width.
+This creates a 4 byte hole in btrfs_inode which can be utilized in the
+future.
+
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 1ef94169db09 ("btrfs: populate otime when logging an inode item")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/btrfs_inode.h | 3 ++-
+ fs/btrfs/delayed-inode.c | 12 ++++--------
+ fs/btrfs/inode.c | 26 ++++++++++++--------------
+ 3 files changed, 18 insertions(+), 23 deletions(-)
+
+--- a/fs/btrfs/btrfs_inode.h
++++ b/fs/btrfs/btrfs_inode.h
+@@ -251,7 +251,8 @@ struct btrfs_inode {
+ struct btrfs_delayed_node *delayed_node;
+
+ /* File creation time. */
+- struct timespec64 i_otime;
++ u64 i_otime_sec;
++ u32 i_otime_nsec;
+
+ /* Hook into fs_info->delayed_iputs */
+ struct list_head delayed_iput;
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1849,10 +1849,8 @@ static void fill_stack_inode_item(struct
+ btrfs_set_stack_timespec_nsec(&inode_item->ctime,
+ inode_get_ctime(inode).tv_nsec);
+
+- btrfs_set_stack_timespec_sec(&inode_item->otime,
+- BTRFS_I(inode)->i_otime.tv_sec);
+- btrfs_set_stack_timespec_nsec(&inode_item->otime,
+- BTRFS_I(inode)->i_otime.tv_nsec);
++ btrfs_set_stack_timespec_sec(&inode_item->otime, BTRFS_I(inode)->i_otime_sec);
++ btrfs_set_stack_timespec_nsec(&inode_item->otime, BTRFS_I(inode)->i_otime_nsec);
+ }
+
+ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
+@@ -1901,10 +1899,8 @@ int btrfs_fill_inode(struct inode *inode
+ inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime),
+ btrfs_stack_timespec_nsec(&inode_item->ctime));
+
+- BTRFS_I(inode)->i_otime.tv_sec =
+- btrfs_stack_timespec_sec(&inode_item->otime);
+- BTRFS_I(inode)->i_otime.tv_nsec =
+- btrfs_stack_timespec_nsec(&inode_item->otime);
++ BTRFS_I(inode)->i_otime_sec = btrfs_stack_timespec_sec(&inode_item->otime);
++ BTRFS_I(inode)->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime);
+
+ inode->i_generation = BTRFS_I(inode)->generation;
+ BTRFS_I(inode)->index_cnt = (u64)-1;
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3785,10 +3785,8 @@ static int btrfs_read_locked_inode(struc
+ inode_set_ctime(inode, btrfs_timespec_sec(leaf, &inode_item->ctime),
+ btrfs_timespec_nsec(leaf, &inode_item->ctime));
+
+- BTRFS_I(inode)->i_otime.tv_sec =
+- btrfs_timespec_sec(leaf, &inode_item->otime);
+- BTRFS_I(inode)->i_otime.tv_nsec =
+- btrfs_timespec_nsec(leaf, &inode_item->otime);
++ BTRFS_I(inode)->i_otime_sec = btrfs_timespec_sec(leaf, &inode_item->otime);
++ BTRFS_I(inode)->i_otime_nsec = btrfs_timespec_nsec(leaf, &inode_item->otime);
+
+ inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
+ BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
+@@ -3958,10 +3956,8 @@ static void fill_inode_item(struct btrfs
+ btrfs_set_token_timespec_nsec(&token, &item->ctime,
+ inode_get_ctime(inode).tv_nsec);
+
+- btrfs_set_token_timespec_sec(&token, &item->otime,
+- BTRFS_I(inode)->i_otime.tv_sec);
+- btrfs_set_token_timespec_nsec(&token, &item->otime,
+- BTRFS_I(inode)->i_otime.tv_nsec);
++ btrfs_set_token_timespec_sec(&token, &item->otime, BTRFS_I(inode)->i_otime_sec);
++ btrfs_set_token_timespec_nsec(&token, &item->otime, BTRFS_I(inode)->i_otime_nsec);
+
+ btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
+ btrfs_set_token_inode_generation(&token, item,
+@@ -5644,7 +5640,8 @@ static struct inode *new_simple_dir(stru
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
+ inode->i_mtime = inode_set_ctime_current(inode);
+ inode->i_atime = dir->i_atime;
+- BTRFS_I(inode)->i_otime = inode->i_mtime;
++ BTRFS_I(inode)->i_otime_sec = inode->i_mtime.tv_sec;
++ BTRFS_I(inode)->i_otime_nsec = inode->i_mtime.tv_nsec;
+ inode->i_uid = dir->i_uid;
+ inode->i_gid = dir->i_gid;
+
+@@ -6321,7 +6318,8 @@ int btrfs_create_new_inode(struct btrfs_
+
+ inode->i_mtime = inode_set_ctime_current(inode);
+ inode->i_atime = inode->i_mtime;
+- BTRFS_I(inode)->i_otime = inode->i_mtime;
++ BTRFS_I(inode)->i_otime_sec = inode->i_mtime.tv_sec;
++ BTRFS_I(inode)->i_otime_nsec = inode->i_mtime.tv_nsec;
+
+ /*
+ * We're going to fill the inode item now, so at this point the inode
+@@ -8550,8 +8548,8 @@ struct inode *btrfs_alloc_inode(struct s
+
+ ei->delayed_node = NULL;
+
+- ei->i_otime.tv_sec = 0;
+- ei->i_otime.tv_nsec = 0;
++ ei->i_otime_sec = 0;
++ ei->i_otime_nsec = 0;
+
+ inode = &ei->vfs_inode;
+ extent_map_tree_init(&ei->extent_tree);
+@@ -8703,8 +8701,8 @@ static int btrfs_getattr(struct mnt_idma
+ u32 bi_ro_flags = BTRFS_I(inode)->ro_flags;
+
+ stat->result_mask |= STATX_BTIME;
+- stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
+- stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
++ stat->btime.tv_sec = BTRFS_I(inode)->i_otime_sec;
++ stat->btime.tv_nsec = BTRFS_I(inode)->i_otime_nsec;
+ if (bi_flags & BTRFS_INODE_APPEND)
+ stat->attributes |= STATX_ATTR_APPEND;
+ if (bi_flags & BTRFS_INODE_COMPRESS)
--- /dev/null
+From stable+bounces-171739-greg=kroah.com@vger.kernel.org Tue Aug 19 05:04:42 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 23:04:32 -0400
+Subject: btrfs: populate otime when logging an inode item
+To: stable@vger.kernel.org
+Cc: Qu Wenruo <wqu@suse.com>, Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819030432.303556-2-sashal@kernel.org>
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 1ef94169db0958d6de39f9ea6e063ce887342e2d ]
+
+[TEST FAILURE WITH EXPERIMENTAL FEATURES]
+When running test case generic/508, the test case will fail with the new
+btrfs shutdown support:
+
+generic/508 - output mismatch (see /home/adam/xfstests/results//generic/508.out.bad)
+# --- tests/generic/508.out 2022-05-11 11:25:30.806666664 +0930
+# +++ /home/adam/xfstests/results//generic/508.out.bad 2025-07-02 14:53:22.401824212 +0930
+# @@ -1,2 +1,6 @@
+# QA output created by 508
+# Silence is golden
+# +Before:
+# +After : stat.btime = Thu Jan 1 09:30:00 1970
+# +Before:
+# +After : stat.btime = Wed Jul 2 14:53:22 2025
+# ...
+# (Run 'diff -u /home/adam/xfstests/tests/generic/508.out /home/adam/xfstests/results//generic/508.out.bad' to see the entire diff)
+Ran: generic/508
+Failures: generic/508
+Failed 1 of 1 tests
+
+Please note that the test case requires shutdown support, thus the test
+case will be skipped using the current upstream kernel, as it doesn't
+have shutdown ioctl support.
+
+[CAUSE]
+The direct cause the 0 time stamp in the log tree:
+
+leaf 30507008 items 2 free space 16057 generation 9 owner TREE_LOG
+leaf 30507008 flags 0x1(WRITTEN) backref revision 1
+checksum stored e522548d
+checksum calced e522548d
+fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
+chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
+ item 0 key (257 INODE_ITEM 0) itemoff 16123 itemsize 160
+ generation 9 transid 9 size 0 nbytes 0
+ block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0
+ sequence 1 flags 0x0(none)
+ atime 1751432947.492000000 (2025-07-02 14:39:07)
+ ctime 1751432947.492000000 (2025-07-02 14:39:07)
+ mtime 1751432947.492000000 (2025-07-02 14:39:07)
+ otime 0.0 (1970-01-01 09:30:00) <<<
+
+But the old fs tree has all the correct time stamp:
+
+btrfs-progs v6.12
+fs tree key (FS_TREE ROOT_ITEM 0)
+leaf 30425088 items 2 free space 16061 generation 5 owner FS_TREE
+leaf 30425088 flags 0x1(WRITTEN) backref revision 1
+checksum stored 48f6c57e
+checksum calced 48f6c57e
+fs uuid 57d45451-481e-43e4-aa93-289ad707a3a0
+chunk uuid d52bd3fd-5163-4337-98a7-7986993ad398
+ item 0 key (256 INODE_ITEM 0) itemoff 16123 itemsize 160
+ generation 3 transid 0 size 0 nbytes 16384
+ block group 0 mode 40755 links 1 uid 0 gid 0 rdev 0
+ sequence 0 flags 0x0(none)
+ atime 1751432947.0 (2025-07-02 14:39:07)
+ ctime 1751432947.0 (2025-07-02 14:39:07)
+ mtime 1751432947.0 (2025-07-02 14:39:07)
+ otime 1751432947.0 (2025-07-02 14:39:07) <<<
+
+The root cause is that fill_inode_item() in tree-log.c is only
+populating a/c/m time, not the otime (or btime in statx output).
+
+Part of the reason is that, the vfs inode only has a/c/m time, no native
+btime support yet.
+
+[FIX]
+Thankfully btrfs has its otime stored in btrfs_inode::i_otime_sec and
+btrfs_inode::i_otime_nsec.
+
+So what we really need is just fill the otime time stamp in
+fill_inode_item() of tree-log.c
+
+There is another fill_inode_item() in inode.c, which is doing the proper
+otime population.
+
+Fixes: 94edf4ae43a5 ("Btrfs: don't bother committing delayed inode updates when fsyncing")
+CC: stable@vger.kernel.org
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-log.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -4265,6 +4265,9 @@ static void fill_inode_item(struct btrfs
+ btrfs_set_token_timespec_nsec(&token, &item->ctime,
+ inode_get_ctime(inode).tv_nsec);
+
++ btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec);
++ btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec);
++
+ /*
+ * We do not need to set the nbytes field, in fact during a fast fsync
+ * its value may not even be correct, since a fast fsync does not wait
--- /dev/null
+From stable+bounces-171681-greg=kroah.com@vger.kernel.org Tue Aug 19 02:07:36 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 20:07:19 -0400
+Subject: btrfs: qgroup: fix race between quota disable and quota rescan ioctl
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, cen zhang <zzzccc427@gmail.com>, Boris Burkov <boris@bur.io>, Qu Wenruo <wqu@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819000719.186990-1-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit e1249667750399a48cafcf5945761d39fa584edf ]
+
+There's a race between a task disabling quotas and another running the
+rescan ioctl that can result in a use-after-free of qgroup records from
+the fs_info->qgroup_tree rbtree.
+
+This happens as follows:
+
+1) Task A enters btrfs_ioctl_quota_rescan() -> btrfs_qgroup_rescan();
+
+2) Task B enters btrfs_quota_disable() and calls
+ btrfs_qgroup_wait_for_completion(), which does nothing because at that
+ point fs_info->qgroup_rescan_running is false (it wasn't set yet by
+ task A);
+
+3) Task B calls btrfs_free_qgroup_config() which starts freeing qgroups
+ from fs_info->qgroup_tree without taking the lock fs_info->qgroup_lock;
+
+4) Task A enters qgroup_rescan_zero_tracking() which starts iterating
+ the fs_info->qgroup_tree tree while holding fs_info->qgroup_lock,
+ but task B is freeing qgroup records from that tree without holding
+ the lock, resulting in a use-after-free.
+
+Fix this by taking fs_info->qgroup_lock at btrfs_free_qgroup_config().
+Also at btrfs_qgroup_rescan() don't start the rescan worker if quotas
+were already disabled.
+
+Reported-by: cen zhang <zzzccc427@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CAFRLqsV+cMDETFuzqdKSHk_FDm6tneea45krsHqPD6B3FetLpQ@mail.gmail.com/
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[ Check for BTRFS_FS_QUOTA_ENABLED, instead of btrfs_qgroup_full_accounting() ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/qgroup.c | 31 ++++++++++++++++++++++++-------
+ 1 file changed, 24 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -579,22 +579,30 @@ bool btrfs_check_quota_leak(struct btrfs
+
+ /*
+ * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
+- * first two are in single-threaded paths.And for the third one, we have set
+- * quota_root to be null with qgroup_lock held before, so it is safe to clean
+- * up the in-memory structures without qgroup_lock held.
++ * first two are in single-threaded paths.
+ */
+ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
+ {
+ struct rb_node *n;
+ struct btrfs_qgroup *qgroup;
+
++ /*
++ * btrfs_quota_disable() can be called concurrently with
++ * btrfs_qgroup_rescan() -> qgroup_rescan_zero_tracking(), so take the
++ * lock.
++ */
++ spin_lock(&fs_info->qgroup_lock);
+ while ((n = rb_first(&fs_info->qgroup_tree))) {
+ qgroup = rb_entry(n, struct btrfs_qgroup, node);
+ rb_erase(n, &fs_info->qgroup_tree);
+ __del_qgroup_rb(fs_info, qgroup);
++ spin_unlock(&fs_info->qgroup_lock);
+ btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
+ kfree(qgroup);
++ spin_lock(&fs_info->qgroup_lock);
+ }
++ spin_unlock(&fs_info->qgroup_lock);
++
+ /*
+ * We call btrfs_free_qgroup_config() when unmounting
+ * filesystem and disabling quota, so we set qgroup_ulist
+@@ -3616,12 +3624,21 @@ btrfs_qgroup_rescan(struct btrfs_fs_info
+ qgroup_rescan_zero_tracking(fs_info);
+
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+- fs_info->qgroup_rescan_running = true;
+- btrfs_queue_work(fs_info->qgroup_rescan_workers,
+- &fs_info->qgroup_rescan_work);
++ /*
++ * The rescan worker is only for full accounting qgroups, check if it's
++ * enabled as it is pointless to queue it otherwise. A concurrent quota
++ * disable may also have just cleared BTRFS_FS_QUOTA_ENABLED.
++ */
++ if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
++ fs_info->qgroup_rescan_running = true;
++ btrfs_queue_work(fs_info->qgroup_rescan_workers,
++ &fs_info->qgroup_rescan_work);
++ } else {
++ ret = -ENOTCONN;
++ }
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+- return 0;
++ return ret;
+ }
+
+ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
--- /dev/null
+From stable+bounces-171730-greg=kroah.com@vger.kernel.org Tue Aug 19 04:40:32 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:40:16 -0400
+Subject: btrfs: send: add and use helper to rename current inode when processing refs
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819024020.291759-3-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit ec666c84deba56f714505b53556a97565f72db86 ]
+
+Extract the logic to rename the current inode at process_recorded_refs()
+into a helper function and use it, therefore removing duplicated logic
+and making it easier for an upcoming patch by avoiding yet more duplicated
+logic.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 005b0a0c24e1 ("btrfs: send: use fallocate for hole punching with send stream v2")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c | 23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -4166,6 +4166,19 @@ out:
+ return ret;
+ }
+
++static int rename_current_inode(struct send_ctx *sctx,
++ struct fs_path *current_path,
++ struct fs_path *new_path)
++{
++ int ret;
++
++ ret = send_rename(sctx, current_path, new_path);
++ if (ret < 0)
++ return ret;
++
++ return fs_path_copy(current_path, new_path);
++}
++
+ /*
+ * This does all the move/link/unlink/rmdir magic.
+ */
+@@ -4451,13 +4464,10 @@ static int process_recorded_refs(struct
+ * it depending on the inode mode.
+ */
+ if (is_orphan && can_rename) {
+- ret = send_rename(sctx, valid_path, cur->full_path);
++ ret = rename_current_inode(sctx, valid_path, cur->full_path);
+ if (ret < 0)
+ goto out;
+ is_orphan = false;
+- ret = fs_path_copy(valid_path, cur->full_path);
+- if (ret < 0)
+- goto out;
+ } else if (can_rename) {
+ if (S_ISDIR(sctx->cur_inode_mode)) {
+ /*
+@@ -4465,10 +4475,7 @@ static int process_recorded_refs(struct
+ * dirs, we always have one new and one deleted
+ * ref. The deleted ref is ignored later.
+ */
+- ret = send_rename(sctx, valid_path,
+- cur->full_path);
+- if (!ret)
+- ret = fs_path_copy(valid_path,
++ ret = rename_current_inode(sctx, valid_path,
+ cur->full_path);
+ if (ret < 0)
+ goto out;
--- /dev/null
+From stable+bounces-171732-greg=kroah.com@vger.kernel.org Tue Aug 19 04:40:35 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:40:18 -0400
+Subject: btrfs: send: avoid path allocation for the current inode when issuing commands
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819024020.291759-5-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 374d45af6435534a11b01b88762323abf03dd755 ]
+
+Whenever we issue a command we allocate a path and then compute it. For
+the current inode this is not necessary since we have one preallocated
+and computed in the send context structure, so we can use it instead
+and avoid allocating and freeing a path.
+
+For example if we have 100 extents to send (100 write commands) for a
+file, we are allocating and freeing paths 100 times.
+
+So improve on this by avoiding path allocation and freeing whenever a
+command is for the current inode by using the current inode's path
+stored in the send context structure.
+
+A test was run before applying this patch and the previous one in the
+series:
+
+ "btrfs: send: keep the current inode's path cached"
+
+The test script is the following:
+
+ $ cat test.sh
+ #!/bin/bash
+
+ DEV=/dev/nullb0
+ MNT=/mnt/nullb0
+
+ mkfs.btrfs -f $DEV > /dev/null
+ mount $DEV $MNT
+
+ DIR="$MNT/one/two/three/four"
+ FILE="$DIR/foobar"
+
+ mkdir -p $DIR
+
+ # Create some empty files to get a deeper btree and therefore make
+ # path computations slower.
+ for ((i = 1; i <= 30000; i++)); do
+ echo -n > "$DIR/filler_$i"
+ done
+
+ for ((i = 0; i < 10000; i += 2)); do
+ offset=$(( i * 4096 ))
+ xfs_io -f -c "pwrite -S 0xab $offset 4K" $FILE > /dev/null
+ done
+
+ btrfs subvolume snapshot -r $MNT $MNT/snap
+
+ start=$(date +%s%N)
+ btrfs send -f /dev/null $MNT/snap
+ end=$(date +%s%N)
+
+ echo -e "\nsend took $(( (end - start) / 1000000 )) milliseconds"
+
+ umount $MNT
+
+Result before applying the 2 patches: 1121 milliseconds
+Result after applying the 2 patches: 815 milliseconds (-31.6%)
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 005b0a0c24e1 ("btrfs: send: use fallocate for hole punching with send stream v2")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c | 215 +++++++++++++++++++++++++-------------------------------
+ 1 file changed, 97 insertions(+), 118 deletions(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -2623,6 +2623,47 @@ out:
+ return ret;
+ }
+
++static struct fs_path *get_cur_inode_path(struct send_ctx *sctx)
++{
++ if (fs_path_len(&sctx->cur_inode_path) == 0) {
++ int ret;
++
++ ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
++ &sctx->cur_inode_path);
++ if (ret < 0)
++ return ERR_PTR(ret);
++ }
++
++ return &sctx->cur_inode_path;
++}
++
++static struct fs_path *get_path_for_command(struct send_ctx *sctx, u64 ino, u64 gen)
++{
++ struct fs_path *path;
++ int ret;
++
++ if (ino == sctx->cur_ino && gen == sctx->cur_inode_gen)
++ return get_cur_inode_path(sctx);
++
++ path = fs_path_alloc();
++ if (!path)
++ return ERR_PTR(-ENOMEM);
++
++ ret = get_cur_path(sctx, ino, gen, path);
++ if (ret < 0) {
++ fs_path_free(path);
++ return ERR_PTR(ret);
++ }
++
++ return path;
++}
++
++static void free_path_for_command(const struct send_ctx *sctx, struct fs_path *path)
++{
++ if (path != &sctx->cur_inode_path)
++ fs_path_free(path);
++}
++
+ static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
+ {
+ struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
+@@ -2631,17 +2672,14 @@ static int send_truncate(struct send_ctx
+
+ btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
++ p = get_path_for_command(sctx, ino, gen);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
+ if (ret < 0)
+ goto out;
+
+- ret = get_cur_path(sctx, ino, gen, p);
+- if (ret < 0)
+- goto out;
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
+
+@@ -2649,7 +2687,7 @@ static int send_truncate(struct send_ctx
+
+ tlv_put_failure:
+ out:
+- fs_path_free(p);
++ free_path_for_command(sctx, p);
+ return ret;
+ }
+
+@@ -2661,17 +2699,14 @@ static int send_chmod(struct send_ctx *s
+
+ btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
++ p = get_path_for_command(sctx, ino, gen);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
+ if (ret < 0)
+ goto out;
+
+- ret = get_cur_path(sctx, ino, gen, p);
+- if (ret < 0)
+- goto out;
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
+
+@@ -2679,7 +2714,7 @@ static int send_chmod(struct send_ctx *s
+
+ tlv_put_failure:
+ out:
+- fs_path_free(p);
++ free_path_for_command(sctx, p);
+ return ret;
+ }
+
+@@ -2694,17 +2729,14 @@ static int send_fileattr(struct send_ctx
+
+ btrfs_debug(fs_info, "send_fileattr %llu fileattr=%llu", ino, fileattr);
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
++ p = get_path_for_command(sctx, ino, gen);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_FILEATTR);
+ if (ret < 0)
+ goto out;
+
+- ret = get_cur_path(sctx, ino, gen, p);
+- if (ret < 0)
+- goto out;
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILEATTR, fileattr);
+
+@@ -2712,7 +2744,7 @@ static int send_fileattr(struct send_ctx
+
+ tlv_put_failure:
+ out:
+- fs_path_free(p);
++ free_path_for_command(sctx, p);
+ return ret;
+ }
+
+@@ -2725,17 +2757,14 @@ static int send_chown(struct send_ctx *s
+ btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
+ ino, uid, gid);
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
++ p = get_path_for_command(sctx, ino, gen);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
+ if (ret < 0)
+ goto out;
+
+- ret = get_cur_path(sctx, ino, gen, p);
+- if (ret < 0)
+- goto out;
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
+@@ -2744,7 +2773,7 @@ static int send_chown(struct send_ctx *s
+
+ tlv_put_failure:
+ out:
+- fs_path_free(p);
++ free_path_for_command(sctx, p);
+ return ret;
+ }
+
+@@ -2761,9 +2790,9 @@ static int send_utimes(struct send_ctx *
+
+ btrfs_debug(fs_info, "send_utimes %llu", ino);
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
++ p = get_path_for_command(sctx, ino, gen);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
+
+ path = alloc_path_for_send();
+ if (!path) {
+@@ -2788,9 +2817,6 @@ static int send_utimes(struct send_ctx *
+ if (ret < 0)
+ goto out;
+
+- ret = get_cur_path(sctx, ino, gen, p);
+- if (ret < 0)
+- goto out;
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+ TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
+ TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
+@@ -2802,7 +2828,7 @@ static int send_utimes(struct send_ctx *
+
+ tlv_put_failure:
+ out:
+- fs_path_free(p);
++ free_path_for_command(sctx, p);
+ btrfs_free_path(path);
+ return ret;
+ }
+@@ -4930,13 +4956,9 @@ static int send_set_xattr(struct send_ct
+ struct fs_path *path;
+ int ret;
+
+- path = fs_path_alloc();
+- if (!path)
+- return -ENOMEM;
+-
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, path);
+- if (ret < 0)
+- goto out;
++ path = get_cur_inode_path(sctx);
++ if (IS_ERR(path))
++ return PTR_ERR(path);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
+ if (ret < 0)
+@@ -4950,8 +4972,6 @@ static int send_set_xattr(struct send_ct
+
+ tlv_put_failure:
+ out:
+- fs_path_free(path);
+-
+ return ret;
+ }
+
+@@ -5009,23 +5029,14 @@ static int __process_deleted_xattr(int n
+ const char *name, int name_len,
+ const char *data, int data_len, void *ctx)
+ {
+- int ret;
+ struct send_ctx *sctx = ctx;
+ struct fs_path *p;
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
+-
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+- if (ret < 0)
+- goto out;
+-
+- ret = send_remove_xattr(sctx, p, name, name_len);
++ p = get_cur_inode_path(sctx);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
+
+-out:
+- fs_path_free(p);
+- return ret;
++ return send_remove_xattr(sctx, p, name, name_len);
+ }
+
+ static int process_new_xattr(struct send_ctx *sctx)
+@@ -5259,21 +5270,13 @@ static int process_verity(struct send_ct
+ if (ret < 0)
+ goto iput;
+
+- p = fs_path_alloc();
+- if (!p) {
+- ret = -ENOMEM;
++ p = get_cur_inode_path(sctx);
++ if (IS_ERR(p)) {
++ ret = PTR_ERR(p);
+ goto iput;
+ }
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+- if (ret < 0)
+- goto free_path;
+
+ ret = send_verity(sctx, p, sctx->verity_descriptor);
+- if (ret < 0)
+- goto free_path;
+-
+-free_path:
+- fs_path_free(p);
+ iput:
+ iput(inode);
+ return ret;
+@@ -5388,31 +5391,25 @@ static int send_write(struct send_ctx *s
+ int ret = 0;
+ struct fs_path *p;
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
+-
+ btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
+
+- ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
+- if (ret < 0)
+- goto out;
++ p = get_cur_inode_path(sctx);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
+
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
++ ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
+ if (ret < 0)
+- goto out;
++ return ret;
+
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+ ret = put_file_data(sctx, offset, len);
+ if (ret < 0)
+- goto out;
++ return ret;
+
+ ret = send_cmd(sctx);
+
+ tlv_put_failure:
+-out:
+- fs_path_free(p);
+ return ret;
+ }
+
+@@ -5425,6 +5422,7 @@ static int send_clone(struct send_ctx *s
+ {
+ int ret = 0;
+ struct fs_path *p;
++ struct fs_path *cur_inode_path;
+ u64 gen;
+
+ btrfs_debug(sctx->send_root->fs_info,
+@@ -5432,6 +5430,10 @@ static int send_clone(struct send_ctx *s
+ offset, len, clone_root->root->root_key.objectid,
+ clone_root->ino, clone_root->offset);
+
++ cur_inode_path = get_cur_inode_path(sctx);
++ if (IS_ERR(cur_inode_path))
++ return PTR_ERR(cur_inode_path);
++
+ p = fs_path_alloc();
+ if (!p)
+ return -ENOMEM;
+@@ -5440,13 +5442,9 @@ static int send_clone(struct send_ctx *s
+ if (ret < 0)
+ goto out;
+
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+- if (ret < 0)
+- goto out;
+-
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
+- TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
++ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, cur_inode_path);
+
+ if (clone_root->root == sctx->send_root) {
+ ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen);
+@@ -5497,17 +5495,13 @@ static int send_update_extent(struct sen
+ int ret = 0;
+ struct fs_path *p;
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
++ p = get_cur_inode_path(sctx);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
+ if (ret < 0)
+- goto out;
+-
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+- if (ret < 0)
+- goto out;
++ return ret;
+
+ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
+ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
+@@ -5516,8 +5510,6 @@ static int send_update_extent(struct sen
+ ret = send_cmd(sctx);
+
+ tlv_put_failure:
+-out:
+- fs_path_free(p);
+ return ret;
+ }
+
+@@ -5546,12 +5538,10 @@ static int send_hole(struct send_ctx *sc
+ if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
+ return send_update_extent(sctx, offset, end - offset);
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+- if (ret < 0)
+- goto tlv_put_failure;
++ p = get_cur_inode_path(sctx);
++ if (IS_ERR(p))
++ return PTR_ERR(p);
++
+ while (offset < end) {
+ u64 len = min(end - offset, read_size);
+
+@@ -5572,7 +5562,6 @@ static int send_hole(struct send_ctx *sc
+ }
+ sctx->cur_inode_next_write_offset = offset;
+ tlv_put_failure:
+- fs_path_free(p);
+ return ret;
+ }
+
+@@ -5595,9 +5584,9 @@ static int send_encoded_inline_extent(st
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+- fspath = fs_path_alloc();
+- if (!fspath) {
+- ret = -ENOMEM;
++ fspath = get_cur_inode_path(sctx);
++ if (IS_ERR(fspath)) {
++ ret = PTR_ERR(fspath);
+ goto out;
+ }
+
+@@ -5605,10 +5594,6 @@ static int send_encoded_inline_extent(st
+ if (ret < 0)
+ goto out;
+
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+- if (ret < 0)
+- goto out;
+-
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
+ ram_bytes = btrfs_file_extent_ram_bytes(leaf, ei);
+@@ -5637,7 +5622,6 @@ static int send_encoded_inline_extent(st
+
+ tlv_put_failure:
+ out:
+- fs_path_free(fspath);
+ iput(inode);
+ return ret;
+ }
+@@ -5662,9 +5646,9 @@ static int send_encoded_extent(struct se
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+- fspath = fs_path_alloc();
+- if (!fspath) {
+- ret = -ENOMEM;
++ fspath = get_cur_inode_path(sctx);
++ if (IS_ERR(fspath)) {
++ ret = PTR_ERR(fspath);
+ goto out;
+ }
+
+@@ -5672,10 +5656,6 @@ static int send_encoded_extent(struct se
+ if (ret < 0)
+ goto out;
+
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+- if (ret < 0)
+- goto out;
+-
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
+@@ -5742,7 +5722,6 @@ static int send_encoded_extent(struct se
+
+ tlv_put_failure:
+ out:
+- fs_path_free(fspath);
+ iput(inode);
+ return ret;
+ }
--- /dev/null
+From stable+bounces-171728-greg=kroah.com@vger.kernel.org Tue Aug 19 04:40:29 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:40:14 -0400
+Subject: btrfs: send: factor out common logic when sending xattrs
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819024020.291759-1-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 17f6a74d0b89092e38e3328b66eda1ab29a195d4 ]
+
+We always send xattrs for the current inode only and both callers of
+send_set_xattr() pass a path for the current inode. So move the path
+allocation and computation to send_set_xattr(), reducing duplicated
+code. This also facilitates an upcoming patch.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 005b0a0c24e1 ("btrfs: send: use fallocate for hole punching with send stream v2")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c | 41 +++++++++++++++--------------------------
+ 1 file changed, 15 insertions(+), 26 deletions(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -4879,11 +4879,19 @@ out:
+ }
+
+ static int send_set_xattr(struct send_ctx *sctx,
+- struct fs_path *path,
+ const char *name, int name_len,
+ const char *data, int data_len)
+ {
+- int ret = 0;
++ struct fs_path *path;
++ int ret;
++
++ path = fs_path_alloc();
++ if (!path)
++ return -ENOMEM;
++
++ ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, path);
++ if (ret < 0)
++ goto out;
+
+ ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
+ if (ret < 0)
+@@ -4897,6 +4905,8 @@ static int send_set_xattr(struct send_ct
+
+ tlv_put_failure:
+ out:
++ fs_path_free(path);
++
+ return ret;
+ }
+
+@@ -4924,19 +4934,13 @@ static int __process_new_xattr(int num,
+ const char *name, int name_len, const char *data,
+ int data_len, void *ctx)
+ {
+- int ret;
+ struct send_ctx *sctx = ctx;
+- struct fs_path *p;
+ struct posix_acl_xattr_header dummy_acl;
+
+ /* Capabilities are emitted by finish_inode_if_needed */
+ if (!strncmp(name, XATTR_NAME_CAPS, name_len))
+ return 0;
+
+- p = fs_path_alloc();
+- if (!p)
+- return -ENOMEM;
+-
+ /*
+ * This hack is needed because empty acls are stored as zero byte
+ * data in xattrs. Problem with that is, that receiving these zero byte
+@@ -4953,15 +4957,7 @@ static int __process_new_xattr(int num,
+ }
+ }
+
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+- if (ret < 0)
+- goto out;
+-
+- ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
+-
+-out:
+- fs_path_free(p);
+- return ret;
++ return send_set_xattr(sctx, name, name_len, data, data_len);
+ }
+
+ static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
+@@ -5831,7 +5827,6 @@ static int send_extent_data(struct send_
+ */
+ static int send_capabilities(struct send_ctx *sctx)
+ {
+- struct fs_path *fspath = NULL;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *di;
+ struct extent_buffer *leaf;
+@@ -5857,25 +5852,19 @@ static int send_capabilities(struct send
+ leaf = path->nodes[0];
+ buf_len = btrfs_dir_data_len(leaf, di);
+
+- fspath = fs_path_alloc();
+ buf = kmalloc(buf_len, GFP_KERNEL);
+- if (!fspath || !buf) {
++ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+- ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+- if (ret < 0)
+- goto out;
+-
+ data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
+ read_extent_buffer(leaf, buf, data_ptr, buf_len);
+
+- ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
++ ret = send_set_xattr(sctx, XATTR_NAME_CAPS,
+ strlen(XATTR_NAME_CAPS), buf, buf_len);
+ out:
+ kfree(buf);
+- fs_path_free(fspath);
+ btrfs_free_path(path);
+ return ret;
+ }
--- /dev/null
+From stable+bounces-171731-greg=kroah.com@vger.kernel.org Tue Aug 19 04:40:34 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:40:17 -0400
+Subject: btrfs: send: keep the current inode's path cached
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819024020.291759-4-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit fc746acb7aa9aeaa2cb5dcba449323319ba5c8eb ]
+
+Whenever we need to send a command for the current inode, like sending
+writes, xattr updates, truncates, utimes, etc, we compute the inode's
+path each time, which implies doing some memory allocations and traversing
+the inode hierarchy to extract the name of the inode and each ancestor
+directory, and that implies doing lookups in the subvolume tree amongst
+other operations.
+
+Most of the time, by far, the current inode's path doesn't change while
+we are processing it (like if we need to issue 100 write commands, the
+path remains the same and it's pointless to compute it 100 times).
+
+To avoid this keep the current inode's path cached in the send context
+and invalidate it or update it whenever it's needed (after unlinks or
+renames).
+
+A performance test, and its results, is mentioned in the next patch in
+the series (subject: "btrfs: send: avoid path allocation for the current
+inode when issuing commands").
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 005b0a0c24e1 ("btrfs: send: use fallocate for hole punching with send stream v2")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 48 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -179,6 +179,7 @@ struct send_ctx {
+ u64 cur_inode_rdev;
+ u64 cur_inode_last_extent;
+ u64 cur_inode_next_write_offset;
++ struct fs_path cur_inode_path;
+ bool cur_inode_new;
+ bool cur_inode_new_gen;
+ bool cur_inode_deleted;
+@@ -436,6 +437,14 @@ static void fs_path_reset(struct fs_path
+ }
+ }
+
++static void init_path(struct fs_path *p)
++{
++ p->reversed = 0;
++ p->buf = p->inline_buf;
++ p->buf_len = FS_PATH_INLINE_SIZE;
++ fs_path_reset(p);
++}
++
+ static struct fs_path *fs_path_alloc(void)
+ {
+ struct fs_path *p;
+@@ -443,10 +452,7 @@ static struct fs_path *fs_path_alloc(voi
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return NULL;
+- p->reversed = 0;
+- p->buf = p->inline_buf;
+- p->buf_len = FS_PATH_INLINE_SIZE;
+- fs_path_reset(p);
++ init_path(p);
+ return p;
+ }
+
+@@ -624,6 +630,14 @@ static void fs_path_unreverse(struct fs_
+ p->reversed = 0;
+ }
+
++static inline bool is_current_inode_path(const struct send_ctx *sctx,
++ const struct fs_path *path)
++{
++ const struct fs_path *cur = &sctx->cur_inode_path;
++
++ return (strncmp(path->start, cur->start, fs_path_len(cur)) == 0);
++}
++
+ static struct btrfs_path *alloc_path_for_send(void)
+ {
+ struct btrfs_path *path;
+@@ -2450,6 +2464,14 @@ static int get_cur_path(struct send_ctx
+ u64 parent_inode = 0;
+ u64 parent_gen = 0;
+ int stop = 0;
++ const bool is_cur_inode = (ino == sctx->cur_ino && gen == sctx->cur_inode_gen);
++
++ if (is_cur_inode && fs_path_len(&sctx->cur_inode_path) > 0) {
++ if (dest != &sctx->cur_inode_path)
++ return fs_path_copy(dest, &sctx->cur_inode_path);
++
++ return 0;
++ }
+
+ name = fs_path_alloc();
+ if (!name) {
+@@ -2501,8 +2523,12 @@ static int get_cur_path(struct send_ctx
+
+ out:
+ fs_path_free(name);
+- if (!ret)
++ if (!ret) {
+ fs_path_unreverse(dest);
++ if (is_cur_inode && dest != &sctx->cur_inode_path)
++ ret = fs_path_copy(&sctx->cur_inode_path, dest);
++ }
++
+ return ret;
+ }
+
+@@ -3113,6 +3139,11 @@ static int orphanize_inode(struct send_c
+ goto out;
+
+ ret = send_rename(sctx, path, orphan);
++ if (ret < 0)
++ goto out;
++
++ if (ino == sctx->cur_ino && gen == sctx->cur_inode_gen)
++ ret = fs_path_copy(&sctx->cur_inode_path, orphan);
+
+ out:
+ fs_path_free(orphan);
+@@ -4176,6 +4207,10 @@ static int rename_current_inode(struct s
+ if (ret < 0)
+ return ret;
+
++ ret = fs_path_copy(&sctx->cur_inode_path, new_path);
++ if (ret < 0)
++ return ret;
++
+ return fs_path_copy(current_path, new_path);
+ }
+
+@@ -4369,6 +4404,7 @@ static int process_recorded_refs(struct
+ if (ret > 0) {
+ orphanized_ancestor = true;
+ fs_path_reset(valid_path);
++ fs_path_reset(&sctx->cur_inode_path);
+ ret = get_cur_path(sctx, sctx->cur_ino,
+ sctx->cur_inode_gen,
+ valid_path);
+@@ -4568,6 +4604,8 @@ static int process_recorded_refs(struct
+ ret = send_unlink(sctx, cur->full_path);
+ if (ret < 0)
+ goto out;
++ if (is_current_inode_path(sctx, cur->full_path))
++ fs_path_reset(&sctx->cur_inode_path);
+ }
+ ret = dup_ref(cur, &check_dirs);
+ if (ret < 0)
+@@ -6900,6 +6938,7 @@ static int changed_inode(struct send_ctx
+ sctx->cur_inode_last_extent = (u64)-1;
+ sctx->cur_inode_next_write_offset = 0;
+ sctx->ignore_cur_inode = false;
++ fs_path_reset(&sctx->cur_inode_path);
+
+ /*
+ * Set send_progress to current inode. This will tell all get_cur_xxx
+@@ -8190,6 +8229,7 @@ long btrfs_ioctl_send(struct inode *inod
+ goto out;
+ }
+
++ init_path(&sctx->cur_inode_path);
+ INIT_LIST_HEAD(&sctx->new_refs);
+ INIT_LIST_HEAD(&sctx->deleted_refs);
+
+@@ -8475,6 +8515,9 @@ out:
+ btrfs_lru_cache_clear(&sctx->dir_created_cache);
+ btrfs_lru_cache_clear(&sctx->dir_utimes_cache);
+
++ if (sctx->cur_inode_path.buf != sctx->cur_inode_path.inline_buf)
++ kfree(sctx->cur_inode_path.buf);
++
+ kfree(sctx);
+ }
+
--- /dev/null
+From stable+bounces-171734-greg=kroah.com@vger.kernel.org Tue Aug 19 04:40:39 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:40:20 -0400
+Subject: btrfs: send: make fs_path_len() inline and constify its argument
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819024020.291759-7-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 920e8ee2bfcaf886fd8c0ad9df097a7dddfeb2d8 ]
+
+The helper function fs_path_len() is trivial and doesn't need to change
+its path argument, so make it inline and constify the argument.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -478,7 +478,7 @@ static void fs_path_free(struct fs_path
+ kfree(p);
+ }
+
+-static int fs_path_len(struct fs_path *p)
++static inline int fs_path_len(const struct fs_path *p)
+ {
+ return p->end - p->start;
+ }
--- /dev/null
+From stable+bounces-171729-greg=kroah.com@vger.kernel.org Tue Aug 19 04:40:30 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:40:15 -0400
+Subject: btrfs: send: only use boolean variables at process_recorded_refs()
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819024020.291759-2-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 9453fe329789073d9a971de01da5902c32c1a01a ]
+
+We have several local variables at process_recorded_refs() that are used
+as booleans, with some of them having a 'bool' type while two of them
+having an 'int' type. Change this to make them all use the 'bool' type
+which is more clear and to make everything more consistent.
+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 005b0a0c24e1 ("btrfs: send: use fallocate for hole punching with send stream v2")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -4180,9 +4180,9 @@ static int process_recorded_refs(struct
+ u64 ow_inode = 0;
+ u64 ow_gen;
+ u64 ow_mode;
+- int did_overwrite = 0;
+- int is_orphan = 0;
+ u64 last_dir_ino_rm = 0;
++ bool did_overwrite = false;
++ bool is_orphan = false;
+ bool can_rename = true;
+ bool orphanized_dir = false;
+ bool orphanized_ancestor = false;
+@@ -4224,14 +4224,14 @@ static int process_recorded_refs(struct
+ if (ret < 0)
+ goto out;
+ if (ret)
+- did_overwrite = 1;
++ did_overwrite = true;
+ }
+ if (sctx->cur_inode_new || did_overwrite) {
+ ret = gen_unique_name(sctx, sctx->cur_ino,
+ sctx->cur_inode_gen, valid_path);
+ if (ret < 0)
+ goto out;
+- is_orphan = 1;
++ is_orphan = true;
+ } else {
+ ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
+ valid_path);
+@@ -4454,7 +4454,7 @@ static int process_recorded_refs(struct
+ ret = send_rename(sctx, valid_path, cur->full_path);
+ if (ret < 0)
+ goto out;
+- is_orphan = 0;
++ is_orphan = false;
+ ret = fs_path_copy(valid_path, cur->full_path);
+ if (ret < 0)
+ goto out;
+@@ -4515,7 +4515,7 @@ static int process_recorded_refs(struct
+ sctx->cur_inode_gen, valid_path);
+ if (ret < 0)
+ goto out;
+- is_orphan = 1;
++ is_orphan = true;
+ }
+
+ list_for_each_entry(cur, &sctx->deleted_refs, list) {
--- /dev/null
+From stable+bounces-171733-greg=kroah.com@vger.kernel.org Tue Aug 19 04:40:34 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:40:19 -0400
+Subject: btrfs: send: use fallocate for hole punching with send stream v2
+To: stable@vger.kernel.org
+Cc: Filipe Manana <fdmanana@suse.com>, Boris Burkov <boris@bur.io>, David Sterba <dsterba@suse.com>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819024020.291759-6-sashal@kernel.org>
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 005b0a0c24e1628313e951516b675109a92cacfe ]
+
+Currently holes are sent as writes full of zeroes, which results in
+unnecessarily using disk space at the receiving end and increasing the
+stream size.
+
+In some cases we avoid sending writes of zeroes, like during a full
+send operation where we just skip writes for holes.
+
+But for some cases we fill previous holes with writes of zeroes too, like
+in this scenario:
+
+1) We have a file with a hole in the range [2M, 3M), we snapshot the
+ subvolume and do a full send. The range [2M, 3M) stays as a hole at
+ the receiver since we skip sending write commands full of zeroes;
+
+2) We punch a hole for the range [3M, 4M) in our file, so that now it
+ has a 2M hole in the range [2M, 4M), and snapshot the subvolume.
+ Now if we do an incremental send, we will send write commands full
+ of zeroes for the range [2M, 4M), removing the hole for [2M, 3M) at
+ the receiver.
+
+We could improve cases such as this last one by doing additional
+comparisons of file extent items (or their absence) between the parent
+and send snapshots, but that's a lot of code to add plus additional CPU
+and IO costs.
+
+Since the send stream v2 already has a fallocate command and btrfs-progs
+implements a callback to execute fallocate since the send stream v2
+support was added to it, update the kernel to use fallocate for punching
+holes for V2+ streams.
+
+Test coverage is provided by btrfs/284 which is a version of btrfs/007
+that exercises send stream v2 instead of v1, using fsstress with random
+operations and fssum to verify file contents.
+
+Link: https://github.com/kdave/btrfs-progs/issues/1001
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c | 33 +++++++++++++++++++++++++++++++++
+ 1 file changed, 33 insertions(+)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -4,6 +4,7 @@
+ */
+
+ #include <linux/bsearch.h>
++#include <linux/falloc.h>
+ #include <linux/fs.h>
+ #include <linux/file.h>
+ #include <linux/sort.h>
+@@ -5513,6 +5514,30 @@ tlv_put_failure:
+ return ret;
+ }
+
++static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len)
++{
++ struct fs_path *path;
++ int ret;
++
++ path = get_cur_inode_path(sctx);
++ if (IS_ERR(path))
++ return PTR_ERR(path);
++
++ ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE);
++ if (ret < 0)
++ return ret;
++
++ TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
++ TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode);
++ TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
++ TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
++
++ ret = send_cmd(sctx);
++
++tlv_put_failure:
++ return ret;
++}
++
+ static int send_hole(struct send_ctx *sctx, u64 end)
+ {
+ struct fs_path *p = NULL;
+@@ -5521,6 +5546,14 @@ static int send_hole(struct send_ctx *sc
+ int ret = 0;
+
+ /*
++ * Starting with send stream v2 we have fallocate and can use it to
++ * punch holes instead of sending writes full of zeroes.
++ */
++ if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE))
++ return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
++ offset, end - offset);
++
++ /*
+ * A hole that starts at EOF or beyond it. Since we do not yet support
+ * fallocate (for extent preallocation and hole punching), sending a
+ * write of zeroes starting at EOF or beyond would later require issuing
--- /dev/null
+From stable+bounces-169827-greg=kroah.com@vger.kernel.org Fri Aug 15 23:14:46 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 17:14:21 -0400
+Subject: fscrypt: Don't use problematic non-inline crypto engines
+To: stable@vger.kernel.org
+Cc: Eric Biggers <ebiggers@kernel.org>, Ard Biesheuvel <ardb@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250815211421.234132-1-sashal@kernel.org>
+
+From: Eric Biggers <ebiggers@kernel.org>
+
+[ Upstream commit b41c1d8d07906786c60893980d52688f31d114a6 ]
+
+Make fscrypt no longer use Crypto API drivers for non-inline crypto
+engines, even when the Crypto API prioritizes them over CPU-based code
+(which unfortunately it often does). These drivers tend to be really
+problematic, especially for fscrypt's workload. This commit has no
+effect on inline crypto engines, which are different and do work well.
+
+Specifically, exclude drivers that have CRYPTO_ALG_KERN_DRIVER_ONLY or
+CRYPTO_ALG_ALLOCATES_MEMORY set. (Later, CRYPTO_ALG_ASYNC should be
+excluded too. That's omitted for now to keep this commit backportable,
+since until recently some CPU-based code had CRYPTO_ALG_ASYNC set.)
+
+There are two major issues with these drivers: bugs and performance.
+
+First, these drivers tend to be buggy. They're fundamentally much more
+error-prone and harder to test than the CPU-based code. They often
+don't get tested before kernel releases, and even if they do, the crypto
+self-tests don't properly test these drivers. Released drivers have
+en/decrypted or hashed data incorrectly. These bugs cause issues for
+fscrypt users who often didn't even want to use these drivers, e.g.:
+
+- https://github.com/google/fscryptctl/issues/32
+- https://github.com/google/fscryptctl/issues/9
+- https://lore.kernel.org/r/PH0PR02MB731916ECDB6C613665863B6CFFAA2@PH0PR02MB7319.namprd02.prod.outlook.com
+
+These drivers have also similarly caused issues for dm-crypt users,
+including data corruption and deadlocks. Since Linux v5.10, dm-crypt
+has disabled most of them by excluding CRYPTO_ALG_ALLOCATES_MEMORY.
+
+Second, these drivers tend to be *much* slower than the CPU-based code.
+This may seem counterintuitive, but benchmarks clearly show it. There's
+a *lot* of overhead associated with going to a hardware driver, off the
+CPU, and back again. To prove this, I gathered as many systems with
+this type of crypto engine as I could, and I measured synchronous
+encryption of 4096-byte messages (which matches fscrypt's workload):
+
+Intel Emerald Rapids server:
+ AES-256-XTS:
+ xts-aes-vaes-avx512 16171 MB/s [CPU-based, Vector AES]
+ qat_aes_xts 289 MB/s [Offload, Intel QuickAssist]
+
+Qualcomm SM8650 HDK:
+ AES-256-XTS:
+ xts-aes-ce 4301 MB/s [CPU-based, ARMv8 Crypto Extensions]
+ xts-aes-qce 73 MB/s [Offload, Qualcomm Crypto Engine]
+
+i.MX 8M Nano LPDDR4 EVK:
+ AES-256-XTS:
+ xts-aes-ce 647 MB/s [CPU-based, ARMv8 Crypto Extensions]
+ xts(ecb-aes-caam) 20 MB/s [Offload, CAAM]
+ AES-128-CBC-ESSIV:
+ essiv(cbc-aes-caam,sha256-lib) 23 MB/s [Offload, CAAM]
+
+STM32MP157F-DK2:
+ AES-256-XTS:
+ xts-aes-neonbs 13.2 MB/s [CPU-based, ARM NEON]
+ xts(stm32-ecb-aes) 3.1 MB/s [Offload, STM32 crypto engine]
+ AES-128-CBC-ESSIV:
+ essiv(cbc-aes-neonbs,sha256-lib)
+ 14.7 MB/s [CPU-based, ARM NEON]
+ essiv(stm32-cbc-aes,sha256-lib)
+ 3.2 MB/s [Offload, STM32 crypto engine]
+ Adiantum:
+ adiantum(xchacha12-arm,aes-arm,nhpoly1305-neon)
+ 52.8 MB/s [CPU-based, ARM scalar + NEON]
+
+So, there was no case in which the crypto engine was even *close* to
+being faster. On the first three, which have AES instructions in the
+CPU, the CPU was 30 to 55 times faster (!). Even on STM32MP157F-DK2
+which has a Cortex-A7 CPU that doesn't have AES instructions, AES was
+over 4 times faster on the CPU. And Adiantum encryption, which is what
+actually should be used on CPUs like that, was over 17 times faster.
+
+Other justifications that have been given for these non-inline crypto
+engines (almost always coming from the hardware vendors, not actual
+users) don't seem very plausible either:
+
+ - The crypto engine throughput could be improved by processing
+ multiple requests concurrently. Currently irrelevant to fscrypt,
+ since it doesn't do that. This would also be complex, and unhelpful
+ in many cases. 2 of the 4 engines I tested even had only one queue.
+
+ - Some of the engines, e.g. STM32, support hardware keys. Also
+ currently irrelevant to fscrypt, since it doesn't support these.
+ Interestingly, the STM32 driver itself doesn't support this either.
+
+ - Free up CPU for other tasks and/or reduce energy usage. Not very
+ plausible considering the "short" message length, driver overhead,
+ and scheduling overhead. There's just very little time for the CPU
+ to do something else like run another task or enter low-power state,
+ before the message finishes and it's time to process the next one.
+
+ - Some of these engines resist power analysis and electromagnetic
+ attacks, while the CPU-based crypto generally does not. In theory,
+ this sounds great. In practice, if this benefit requires the use of
+ an off-CPU offload that massively regresses performance and has a
+ low-quality, buggy driver, the price for this hardening (which is
+ not relevant to most fscrypt users, and tends to be incomplete) is
+ just too high. Inline crypto engines are much more promising here,
+ as are on-CPU solutions like RISC-V High Assurance Cryptography.
+
+Fixes: b30ab0e03407 ("ext4 crypto: add ext4 encryption facilities")
+Cc: stable@vger.kernel.org
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20250704070322.20692-1-ebiggers@kernel.org
+Signed-off-by: Eric Biggers <ebiggers@kernel.org>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/filesystems/fscrypt.rst | 37 +++++++++++++---------------------
+ fs/crypto/fscrypt_private.h | 16 ++++++++++++++
+ fs/crypto/hkdf.c | 2 -
+ fs/crypto/keysetup.c | 3 +-
+ fs/crypto/keysetup_v1.c | 3 +-
+ 5 files changed, 36 insertions(+), 25 deletions(-)
+
+--- a/Documentation/filesystems/fscrypt.rst
++++ b/Documentation/filesystems/fscrypt.rst
+@@ -141,9 +141,8 @@ However, these ioctls have some limitati
+ CONFIG_PAGE_POISONING=y in your kernel config and add page_poison=1
+ to your kernel command line. However, this has a performance cost.
+
+-- Secret keys might still exist in CPU registers, in crypto
+- accelerator hardware (if used by the crypto API to implement any of
+- the algorithms), or in other places not explicitly considered here.
++- Secret keys might still exist in CPU registers or in other places
++ not explicitly considered here.
+
+ Limitations of v1 policies
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+@@ -375,9 +374,12 @@ the work is done by XChaCha12, which is
+ acceleration is unavailable. For more information about Adiantum, see
+ `the Adiantum paper <https://eprint.iacr.org/2018/720.pdf>`_.
+
+-The (AES-128-CBC-ESSIV, AES-128-CTS-CBC) pair exists only to support
+-systems whose only form of AES acceleration is an off-CPU crypto
+-accelerator such as CAAM or CESA that does not support XTS.
++The (AES-128-CBC-ESSIV, AES-128-CTS-CBC) pair was added to try to
++provide a more efficient option for systems that lack AES instructions
++in the CPU but do have a non-inline crypto engine such as CAAM or CESA
++that supports AES-CBC (and not AES-XTS). This is deprecated. It has
++been shown that just doing AES on the CPU is actually faster.
++Moreover, Adiantum is faster still and is recommended on such systems.
+
+ The remaining mode pairs are the "national pride ciphers":
+
+@@ -1231,22 +1233,13 @@ this by validating all top-level encrypt
+ Inline encryption support
+ =========================
+
+-By default, fscrypt uses the kernel crypto API for all cryptographic
+-operations (other than HKDF, which fscrypt partially implements
+-itself). The kernel crypto API supports hardware crypto accelerators,
+-but only ones that work in the traditional way where all inputs and
+-outputs (e.g. plaintexts and ciphertexts) are in memory. fscrypt can
+-take advantage of such hardware, but the traditional acceleration
+-model isn't particularly efficient and fscrypt hasn't been optimized
+-for it.
+-
+-Instead, many newer systems (especially mobile SoCs) have *inline
+-encryption hardware* that can encrypt/decrypt data while it is on its
+-way to/from the storage device. Linux supports inline encryption
+-through a set of extensions to the block layer called *blk-crypto*.
+-blk-crypto allows filesystems to attach encryption contexts to bios
+-(I/O requests) to specify how the data will be encrypted or decrypted
+-in-line. For more information about blk-crypto, see
++Many newer systems (especially mobile SoCs) have *inline encryption
++hardware* that can encrypt/decrypt data while it is on its way to/from
++the storage device. Linux supports inline encryption through a set of
++extensions to the block layer called *blk-crypto*. blk-crypto allows
++filesystems to attach encryption contexts to bios (I/O requests) to
++specify how the data will be encrypted or decrypted in-line. For more
++information about blk-crypto, see
+ :ref:`Documentation/block/inline-encryption.rst <inline_encryption>`.
+
+ On supported filesystems (currently ext4 and f2fs), fscrypt can use
+--- a/fs/crypto/fscrypt_private.h
++++ b/fs/crypto/fscrypt_private.h
+@@ -27,6 +27,22 @@
+ */
+ #define FSCRYPT_MIN_KEY_SIZE 16
+
++/*
++ * This mask is passed as the third argument to the crypto_alloc_*() functions
++ * to prevent fscrypt from using the Crypto API drivers for non-inline crypto
++ * engines. Those drivers have been problematic for fscrypt. fscrypt users
++ * have reported hangs and even incorrect en/decryption with these drivers.
++ * Since going to the driver, off CPU, and back again is really slow, such
++ * drivers can be over 50 times slower than the CPU-based code for fscrypt's
++ * workload. Even on platforms that lack AES instructions on the CPU, using the
++ * offloads has been shown to be slower, even staying with AES. (Of course,
++ * Adiantum is faster still, and is the recommended option on such platforms...)
++ *
++ * Note that fscrypt also supports inline crypto engines. Those don't use the
++ * Crypto API and work much better than the old-style (non-inline) engines.
++ */
++#define FSCRYPT_CRYPTOAPI_MASK \
++ (CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY)
+ #define FSCRYPT_CONTEXT_V1 1
+ #define FSCRYPT_CONTEXT_V2 2
+
+--- a/fs/crypto/hkdf.c
++++ b/fs/crypto/hkdf.c
+@@ -72,7 +72,7 @@ int fscrypt_init_hkdf(struct fscrypt_hkd
+ u8 prk[HKDF_HASHLEN];
+ int err;
+
+- hmac_tfm = crypto_alloc_shash(HKDF_HMAC_ALG, 0, 0);
++ hmac_tfm = crypto_alloc_shash(HKDF_HMAC_ALG, 0, FSCRYPT_CRYPTOAPI_MASK);
+ if (IS_ERR(hmac_tfm)) {
+ fscrypt_err(NULL, "Error allocating " HKDF_HMAC_ALG ": %ld",
+ PTR_ERR(hmac_tfm));
+--- a/fs/crypto/keysetup.c
++++ b/fs/crypto/keysetup.c
+@@ -103,7 +103,8 @@ fscrypt_allocate_skcipher(struct fscrypt
+ struct crypto_skcipher *tfm;
+ int err;
+
+- tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
++ tfm = crypto_alloc_skcipher(mode->cipher_str, 0,
++ FSCRYPT_CRYPTOAPI_MASK);
+ if (IS_ERR(tfm)) {
+ if (PTR_ERR(tfm) == -ENOENT) {
+ fscrypt_warn(inode,
+--- a/fs/crypto/keysetup_v1.c
++++ b/fs/crypto/keysetup_v1.c
+@@ -52,7 +52,8 @@ static int derive_key_aes(const u8 *mast
+ struct skcipher_request *req = NULL;
+ DECLARE_CRYPTO_WAIT(wait);
+ struct scatterlist src_sg, dst_sg;
+- struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
++ struct crypto_skcipher *tfm =
++ crypto_alloc_skcipher("ecb(aes)", 0, FSCRYPT_CRYPTOAPI_MASK);
+
+ if (IS_ERR(tfm)) {
+ res = PTR_ERR(tfm);
--- /dev/null
+From stable+bounces-169815-greg=kroah.com@vger.kernel.org Fri Aug 15 21:31:49 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 15:31:27 -0400
+Subject: leds: flash: leds-qcom-flash: Fix registry access after re-bind
+To: stable@vger.kernel.org
+Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>, Fenglin Wu <fenglin.wu@oss.qualcomm.com>, Lee Jones <lee@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250815193127.192775-2-sashal@kernel.org>
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+[ Upstream commit fab15f57360b1e6620a1d0d6b0fbee896e6c1f07 ]
+
+Driver in probe() updates each of 'reg_field' with 'reg_base':
+
+ for (i = 0; i < REG_MAX_COUNT; i++)
+ regs[i].reg += reg_base;
+
+'reg_field' array (under variable 'regs' above) is statically allocated,
+thus each re-bind would add another 'reg_base' leading to bogus
+register addresses. Constify the local 'reg_field' array and duplicate
+it in probe to solve this.
+
+Fixes: 96a2e242a5dc ("leds: flash: Add driver to support flash LED module in QCOM PMICs")
+Cc: stable@vger.kernel.org
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Reviewed-by: Fenglin Wu <fenglin.wu@oss.qualcomm.com>
+Link: https://lore.kernel.org/r/20250529063335.8785-2-krzysztof.kozlowski@linaro.org
+Signed-off-by: Lee Jones <lee@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/leds/flash/leds-qcom-flash.c | 15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/drivers/leds/flash/leds-qcom-flash.c
++++ b/drivers/leds/flash/leds-qcom-flash.c
+@@ -117,7 +117,7 @@ enum {
+ REG_MAX_COUNT,
+ };
+
+-static struct reg_field mvflash_3ch_regs[REG_MAX_COUNT] = {
++static const struct reg_field mvflash_3ch_regs[REG_MAX_COUNT] = {
+ REG_FIELD(0x08, 0, 7), /* status1 */
+ REG_FIELD(0x09, 0, 7), /* status2 */
+ REG_FIELD(0x0a, 0, 7), /* status3 */
+@@ -132,7 +132,7 @@ static struct reg_field mvflash_3ch_regs
+ REG_FIELD(0x58, 0, 2), /* therm_thrsh3 */
+ };
+
+-static struct reg_field mvflash_4ch_regs[REG_MAX_COUNT] = {
++static const struct reg_field mvflash_4ch_regs[REG_MAX_COUNT] = {
+ REG_FIELD(0x06, 0, 7), /* status1 */
+ REG_FIELD(0x07, 0, 6), /* status2 */
+ REG_FIELD(0x09, 0, 7), /* status3 */
+@@ -855,11 +855,17 @@ static int qcom_flash_led_probe(struct p
+ if (val == FLASH_SUBTYPE_3CH_PM8150_VAL || val == FLASH_SUBTYPE_3CH_PMI8998_VAL) {
+ flash_data->hw_type = QCOM_MVFLASH_3CH;
+ flash_data->max_channels = 3;
+- regs = mvflash_3ch_regs;
++ regs = devm_kmemdup(dev, mvflash_3ch_regs, sizeof(mvflash_3ch_regs),
++ GFP_KERNEL);
++ if (!regs)
++ return -ENOMEM;
+ } else if (val == FLASH_SUBTYPE_4CH_VAL) {
+ flash_data->hw_type = QCOM_MVFLASH_4CH;
+ flash_data->max_channels = 4;
+- regs = mvflash_4ch_regs;
++ regs = devm_kmemdup(dev, mvflash_4ch_regs, sizeof(mvflash_4ch_regs),
++ GFP_KERNEL);
++ if (!regs)
++ return -ENOMEM;
+
+ rc = regmap_read(regmap, reg_base + FLASH_REVISION_REG, &val);
+ if (rc < 0) {
+@@ -881,6 +887,7 @@ static int qcom_flash_led_probe(struct p
+ dev_err(dev, "Failed to allocate regmap field, rc=%d\n", rc);
+ return rc;
+ }
++ devm_kfree(dev, regs); /* devm_regmap_field_bulk_alloc() makes copies */
+
+ platform_set_drvdata(pdev, flash_data);
+ mutex_init(&flash_data->lock);
--- /dev/null
+From stable+bounces-169814-greg=kroah.com@vger.kernel.org Fri Aug 15 21:32:07 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 15:31:26 -0400
+Subject: leds: flash: leds-qcom-flash: Limit LED current based on thermal condition
+To: stable@vger.kernel.org
+Cc: Fenglin Wu <quic_fenglinw@quicinc.com>, Lee Jones <lee@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250815193127.192775-1-sashal@kernel.org>
+
+From: Fenglin Wu <quic_fenglinw@quicinc.com>
+
+[ Upstream commit a0864cf32044233e56247fa0eed3ac660f15db9e ]
+
+The flash module has status bits to indicate different thermal
+conditions which are called as OTSTx. For each OTSTx status,
+there is a recommended total flash current for all channels to
+prevent the flash module entering into higher thermal level.
+For example, the total flash current should be limited to 1000mA/500mA
+respectively when the HW reaches the OTST1/OTST2 thermal level.
+
+Signed-off-by: Fenglin Wu <quic_fenglinw@quicinc.com>
+Link: https://lore.kernel.org/r/20240705-qcom_flash_thermal_derating-v3-1-8e2e2783e3a6@quicinc.com
+Signed-off-by: Lee Jones <lee@kernel.org>
+Stable-dep-of: fab15f57360b ("leds: flash: leds-qcom-flash: Fix registry access after re-bind")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/leds/flash/leds-qcom-flash.c | 163 ++++++++++++++++++++++++++++++++++-
+ 1 file changed, 162 insertions(+), 1 deletion(-)
+
+--- a/drivers/leds/flash/leds-qcom-flash.c
++++ b/drivers/leds/flash/leds-qcom-flash.c
+@@ -1,6 +1,6 @@
+ // SPDX-License-Identifier: GPL-2.0-only
+ /*
+- * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
++ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+ #include <linux/bitfield.h>
+@@ -14,6 +14,9 @@
+ #include <media/v4l2-flash-led-class.h>
+
+ /* registers definitions */
++#define FLASH_REVISION_REG 0x00
++#define FLASH_4CH_REVISION_V0P1 0x01
++
+ #define FLASH_TYPE_REG 0x04
+ #define FLASH_TYPE_VAL 0x18
+
+@@ -73,6 +76,16 @@
+
+ #define UA_PER_MA 1000
+
++/* thermal threshold constants */
++#define OTST_3CH_MIN_VAL 3
++#define OTST1_4CH_MIN_VAL 0
++#define OTST1_4CH_V0P1_MIN_VAL 3
++#define OTST2_4CH_MIN_VAL 0
++
++#define OTST1_MAX_CURRENT_MA 1000
++#define OTST2_MAX_CURRENT_MA 500
++#define OTST3_MAX_CURRENT_MA 200
++
+ enum hw_type {
+ QCOM_MVFLASH_3CH,
+ QCOM_MVFLASH_4CH,
+@@ -98,6 +111,9 @@ enum {
+ REG_IRESOLUTION,
+ REG_CHAN_STROBE,
+ REG_CHAN_EN,
++ REG_THERM_THRSH1,
++ REG_THERM_THRSH2,
++ REG_THERM_THRSH3,
+ REG_MAX_COUNT,
+ };
+
+@@ -111,6 +127,9 @@ static struct reg_field mvflash_3ch_regs
+ REG_FIELD(0x47, 0, 5), /* iresolution */
+ REG_FIELD_ID(0x49, 0, 2, 3, 1), /* chan_strobe */
+ REG_FIELD(0x4c, 0, 2), /* chan_en */
++ REG_FIELD(0x56, 0, 2), /* therm_thrsh1 */
++ REG_FIELD(0x57, 0, 2), /* therm_thrsh2 */
++ REG_FIELD(0x58, 0, 2), /* therm_thrsh3 */
+ };
+
+ static struct reg_field mvflash_4ch_regs[REG_MAX_COUNT] = {
+@@ -123,6 +142,8 @@ static struct reg_field mvflash_4ch_regs
+ REG_FIELD(0x49, 0, 3), /* iresolution */
+ REG_FIELD_ID(0x4a, 0, 6, 4, 1), /* chan_strobe */
+ REG_FIELD(0x4e, 0, 3), /* chan_en */
++ REG_FIELD(0x7a, 0, 2), /* therm_thrsh1 */
++ REG_FIELD(0x78, 0, 2), /* therm_thrsh2 */
+ };
+
+ struct qcom_flash_data {
+@@ -130,9 +151,11 @@ struct qcom_flash_data {
+ struct regmap_field *r_fields[REG_MAX_COUNT];
+ struct mutex lock;
+ enum hw_type hw_type;
++ u32 total_ma;
+ u8 leds_count;
+ u8 max_channels;
+ u8 chan_en_bits;
++ u8 revision;
+ };
+
+ struct qcom_flash_led {
+@@ -143,6 +166,7 @@ struct qcom_flash_led {
+ u32 max_timeout_ms;
+ u32 flash_current_ma;
+ u32 flash_timeout_ms;
++ u32 current_in_use_ma;
+ u8 *chan_id;
+ u8 chan_count;
+ bool enabled;
+@@ -172,6 +196,127 @@ static int set_flash_module_en(struct qc
+ return rc;
+ }
+
++static int update_allowed_flash_current(struct qcom_flash_led *led, u32 *current_ma, bool strobe)
++{
++ struct qcom_flash_data *flash_data = led->flash_data;
++ u32 therm_ma, avail_ma, thrsh[3], min_thrsh, sts;
++ int rc = 0;
++
++ mutex_lock(&flash_data->lock);
++ /*
++ * Put previously allocated current into allowed budget in either of these two cases:
++ * 1) LED is disabled;
++ * 2) LED is enabled repeatedly
++ */
++ if (!strobe || led->current_in_use_ma != 0) {
++ if (flash_data->total_ma >= led->current_in_use_ma)
++ flash_data->total_ma -= led->current_in_use_ma;
++ else
++ flash_data->total_ma = 0;
++
++ led->current_in_use_ma = 0;
++ if (!strobe)
++ goto unlock;
++ }
++
++ /*
++ * Cache the default thermal threshold settings, and set them to the lowest levels before
++ * reading over-temp real time status. If over-temp has been triggered at the lowest
++ * threshold, it's very likely that it would be triggered at a higher (default) threshold
++ * when more flash current is requested. Prevent device from triggering over-temp condition
++ * by limiting the flash current for the new request.
++ */
++ rc = regmap_field_read(flash_data->r_fields[REG_THERM_THRSH1], &thrsh[0]);
++ if (rc < 0)
++ goto unlock;
++
++ rc = regmap_field_read(flash_data->r_fields[REG_THERM_THRSH2], &thrsh[1]);
++ if (rc < 0)
++ goto unlock;
++
++ if (flash_data->hw_type == QCOM_MVFLASH_3CH) {
++ rc = regmap_field_read(flash_data->r_fields[REG_THERM_THRSH3], &thrsh[2]);
++ if (rc < 0)
++ goto unlock;
++ }
++
++ min_thrsh = OTST_3CH_MIN_VAL;
++ if (flash_data->hw_type == QCOM_MVFLASH_4CH)
++ min_thrsh = (flash_data->revision == FLASH_4CH_REVISION_V0P1) ?
++ OTST1_4CH_V0P1_MIN_VAL : OTST1_4CH_MIN_VAL;
++
++ rc = regmap_field_write(flash_data->r_fields[REG_THERM_THRSH1], min_thrsh);
++ if (rc < 0)
++ goto unlock;
++
++ if (flash_data->hw_type == QCOM_MVFLASH_4CH)
++ min_thrsh = OTST2_4CH_MIN_VAL;
++
++ /*
++ * The default thermal threshold settings have been updated hence
++ * restore them if any fault happens starting from here.
++ */
++ rc = regmap_field_write(flash_data->r_fields[REG_THERM_THRSH2], min_thrsh);
++ if (rc < 0)
++ goto restore;
++
++ if (flash_data->hw_type == QCOM_MVFLASH_3CH) {
++ rc = regmap_field_write(flash_data->r_fields[REG_THERM_THRSH3], min_thrsh);
++ if (rc < 0)
++ goto restore;
++ }
++
++ /* Read thermal level status to get corresponding derating flash current */
++ rc = regmap_field_read(flash_data->r_fields[REG_STATUS2], &sts);
++ if (rc)
++ goto restore;
++
++ therm_ma = FLASH_TOTAL_CURRENT_MAX_UA / 1000;
++ if (flash_data->hw_type == QCOM_MVFLASH_3CH) {
++ if (sts & FLASH_STS_3CH_OTST3)
++ therm_ma = OTST3_MAX_CURRENT_MA;
++ else if (sts & FLASH_STS_3CH_OTST2)
++ therm_ma = OTST2_MAX_CURRENT_MA;
++ else if (sts & FLASH_STS_3CH_OTST1)
++ therm_ma = OTST1_MAX_CURRENT_MA;
++ } else {
++ if (sts & FLASH_STS_4CH_OTST2)
++ therm_ma = OTST2_MAX_CURRENT_MA;
++ else if (sts & FLASH_STS_4CH_OTST1)
++ therm_ma = OTST1_MAX_CURRENT_MA;
++ }
++
++ /* Calculate the allowed flash current for the request */
++ if (therm_ma <= flash_data->total_ma)
++ avail_ma = 0;
++ else
++ avail_ma = therm_ma - flash_data->total_ma;
++
++ *current_ma = min_t(u32, *current_ma, avail_ma);
++ led->current_in_use_ma = *current_ma;
++ flash_data->total_ma += led->current_in_use_ma;
++
++ dev_dbg(led->flash.led_cdev.dev, "allowed flash current: %dmA, total current: %dmA\n",
++ led->current_in_use_ma, flash_data->total_ma);
++
++restore:
++ /* Restore to default thermal threshold settings */
++ rc = regmap_field_write(flash_data->r_fields[REG_THERM_THRSH1], thrsh[0]);
++ if (rc < 0)
++ goto unlock;
++
++ rc = regmap_field_write(flash_data->r_fields[REG_THERM_THRSH2], thrsh[1]);
++ if (rc < 0)
++ goto unlock;
++
++ if (flash_data->hw_type == QCOM_MVFLASH_3CH)
++ rc = regmap_field_write(flash_data->r_fields[REG_THERM_THRSH3], thrsh[2]);
++
++unlock:
++ mutex_unlock(&flash_data->lock);
++ return rc;
++}
++
+ static int set_flash_current(struct qcom_flash_led *led, u32 current_ma, enum led_mode mode)
+ {
+ struct qcom_flash_data *flash_data = led->flash_data;
+@@ -313,6 +458,10 @@ static int qcom_flash_strobe_set(struct
+ if (rc)
+ return rc;
+
++ rc = update_allowed_flash_current(led, &led->flash_current_ma, state);
++ if (rc < 0)
++ return rc;
++
+ rc = set_flash_current(led, led->flash_current_ma, FLASH_MODE);
+ if (rc)
+ return rc;
+@@ -429,6 +578,10 @@ static int qcom_flash_led_brightness_set
+ if (rc)
+ return rc;
+
++ rc = update_allowed_flash_current(led, ¤t_ma, enable);
++ if (rc < 0)
++ return rc;
++
+ rc = set_flash_current(led, current_ma, TORCH_MODE);
+ if (rc)
+ return rc;
+@@ -707,6 +860,14 @@ static int qcom_flash_led_probe(struct p
+ flash_data->hw_type = QCOM_MVFLASH_4CH;
+ flash_data->max_channels = 4;
+ regs = mvflash_4ch_regs;
++
++ rc = regmap_read(regmap, reg_base + FLASH_REVISION_REG, &val);
++ if (rc < 0) {
++ dev_err(dev, "Failed to read flash LED module revision, rc=%d\n", rc);
++ return rc;
++ }
++
++ flash_data->revision = val;
+ } else {
+ dev_err(dev, "flash LED subtype %#x is not yet supported\n", val);
+ return -ENODEV;
--- /dev/null
+From stable+bounces-171743-greg=kroah.com@vger.kernel.org Tue Aug 19 05:32:04 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 23:31:53 -0400
+Subject: net/sched: ets: use old 'nbands' while purging unused classes
+To: stable@vger.kernel.org
+Cc: Davide Caratti <dcaratti@redhat.com>, Li Shuang <shuali@redhat.com>, Petr Machata <petrm@nvidia.com>, Ivan Vecera <ivecera@redhat.com>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819033153.315415-2-sashal@kernel.org>
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit 87c6efc5ce9c126ae4a781bc04504b83780e3650 ]
+
+Shuang reported sch_ets test-case [1] crashing in ets_class_qlen_notify()
+after recent changes from Lion [2]. The problem is: in ets_qdisc_change()
+we purge unused DWRR queues; the value of 'q->nbands' is the new one, and
+the cleanup should be done with the old one. The problem is here since my
+first attempts to fix ets_qdisc_change(), but it surfaced again after the
+recent qdisc len accounting fixes. Fix it purging idle DWRR queues before
+assigning a new value of 'q->nbands', so that all purge operations find a
+consistent configuration:
+
+ - old 'q->nbands' because it's needed by ets_class_find()
+ - old 'q->nstrict' because it's needed by ets_class_is_strict()
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000000
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 0 P4D 0
+ Oops: Oops: 0000 [#1] SMP NOPTI
+ CPU: 62 UID: 0 PID: 39457 Comm: tc Kdump: loaded Not tainted 6.12.0-116.el10.x86_64 #1 PREEMPT(voluntary)
+ Hardware name: Dell Inc. PowerEdge R640/06DKY5, BIOS 2.12.2 07/09/2021
+ RIP: 0010:__list_del_entry_valid_or_report+0x4/0x80
+ Code: ff 4c 39 c7 0f 84 39 19 8e ff b8 01 00 00 00 c3 cc cc cc cc 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa <48> 8b 17 48 8b 4f 08 48 85 d2 0f 84 56 19 8e ff 48 85 c9 0f 84 ab
+ RSP: 0018:ffffba186009f400 EFLAGS: 00010202
+ RAX: 00000000000000d6 RBX: 0000000000000000 RCX: 0000000000000004
+ RDX: ffff9f0fa29b69c0 RSI: 0000000000000000 RDI: 0000000000000000
+ RBP: ffffffffc12c2400 R08: 0000000000000008 R09: 0000000000000004
+ R10: ffffffffffffffff R11: 0000000000000004 R12: 0000000000000000
+ R13: ffff9f0f8cfe0000 R14: 0000000000100005 R15: 0000000000000000
+ FS: 00007f2154f37480(0000) GS:ffff9f269c1c0000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000000 CR3: 00000001530be001 CR4: 00000000007726f0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ PKRU: 55555554
+ Call Trace:
+ <TASK>
+ ets_class_qlen_notify+0x65/0x90 [sch_ets]
+ qdisc_tree_reduce_backlog+0x74/0x110
+ ets_qdisc_change+0x630/0xa40 [sch_ets]
+ __tc_modify_qdisc.constprop.0+0x216/0x7f0
+ tc_modify_qdisc+0x7c/0x120
+ rtnetlink_rcv_msg+0x145/0x3f0
+ netlink_rcv_skb+0x53/0x100
+ netlink_unicast+0x245/0x390
+ netlink_sendmsg+0x21b/0x470
+ ____sys_sendmsg+0x39d/0x3d0
+ ___sys_sendmsg+0x9a/0xe0
+ __sys_sendmsg+0x7a/0xd0
+ do_syscall_64+0x7d/0x160
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+ RIP: 0033:0x7f2155114084
+ Code: 89 02 b8 ff ff ff ff eb bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 80 3d 25 f0 0c 00 00 74 13 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89
+ RSP: 002b:00007fff1fd7a988 EFLAGS: 00000202 ORIG_RAX: 000000000000002e
+ RAX: ffffffffffffffda RBX: 0000560ec063e5e0 RCX: 00007f2155114084
+ RDX: 0000000000000000 RSI: 00007fff1fd7a9f0 RDI: 0000000000000003
+ RBP: 00007fff1fd7aa60 R08: 0000000000000010 R09: 000000000000003f
+ R10: 0000560ee9b3a010 R11: 0000000000000202 R12: 00007fff1fd7aae0
+ R13: 000000006891ccde R14: 0000560ec063e5e0 R15: 00007fff1fd7aad0
+ </TASK>
+
+ [1] https://lore.kernel.org/netdev/e08c7f4a6882f260011909a868311c6e9b54f3e4.1639153474.git.dcaratti@redhat.com/
+ [2] https://lore.kernel.org/netdev/d912cbd7-193b-4269-9857-525bee8bbb6a@gmail.com/
+
+Cc: stable@vger.kernel.org
+Fixes: 103406b38c60 ("net/sched: Always pass notifications when child class becomes empty")
+Fixes: c062f2a0b04d ("net/sched: sch_ets: don't remove idle classes from the round-robin list")
+Fixes: dcc68b4d8084 ("net: sch_ets: Add a new Qdisc")
+Reported-by: Li Shuang <shuali@redhat.com>
+Closes: https://issues.redhat.com/browse/RHEL-108026
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Co-developed-by: Ivan Vecera <ivecera@redhat.com>
+Signed-off-by: Ivan Vecera <ivecera@redhat.com>
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Link: https://patch.msgid.link/7928ff6d17db47a2ae7cc205c44777b1f1950545.1755016081.git.dcaratti@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_ets.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -651,6 +651,12 @@ static int ets_qdisc_change(struct Qdisc
+
+ sch_tree_lock(sch);
+
++ for (i = nbands; i < oldbands; i++) {
++ if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
++ list_del_init(&q->classes[i].alist);
++ qdisc_purge_queue(q->classes[i].qdisc);
++ }
++
+ WRITE_ONCE(q->nbands, nbands);
+ for (i = nstrict; i < q->nstrict; i++) {
+ if (q->classes[i].qdisc->q.qlen) {
+@@ -658,11 +664,6 @@ static int ets_qdisc_change(struct Qdisc
+ q->classes[i].deficit = quanta[i];
+ }
+ }
+- for (i = q->nbands; i < oldbands; i++) {
+- if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
+- list_del_init(&q->classes[i].alist);
+- qdisc_purge_queue(q->classes[i].qdisc);
+- }
+ WRITE_ONCE(q->nstrict, nstrict);
+ memcpy(q->prio2band, priomap, sizeof(priomap));
+
--- /dev/null
+From stable+bounces-171742-greg=kroah.com@vger.kernel.org Tue Aug 19 05:32:04 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 23:31:52 -0400
+Subject: net_sched: sch_ets: implement lockless ets_dump()
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Simon Horman <horms@kernel.org>, "David S. Miller" <davem@davemloft.net>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819033153.315415-1-sashal@kernel.org>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c5f1dde7f731e7bf2e7c169ca42cb4989fc2f8b9 ]
+
+Instead of relying on RTNL, ets_dump() can use READ_ONCE()
+annotations, paired with WRITE_ONCE() ones in ets_change().
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 87c6efc5ce9c ("net/sched: ets: use old 'nbands' while purging unused classes")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_ets.c | 25 ++++++++++++++-----------
+ 1 file changed, 14 insertions(+), 11 deletions(-)
+
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -651,7 +651,7 @@ static int ets_qdisc_change(struct Qdisc
+
+ sch_tree_lock(sch);
+
+- q->nbands = nbands;
++ WRITE_ONCE(q->nbands, nbands);
+ for (i = nstrict; i < q->nstrict; i++) {
+ if (q->classes[i].qdisc->q.qlen) {
+ list_add_tail(&q->classes[i].alist, &q->active);
+@@ -663,11 +663,11 @@ static int ets_qdisc_change(struct Qdisc
+ list_del_init(&q->classes[i].alist);
+ qdisc_purge_queue(q->classes[i].qdisc);
+ }
+- q->nstrict = nstrict;
++ WRITE_ONCE(q->nstrict, nstrict);
+ memcpy(q->prio2band, priomap, sizeof(priomap));
+
+ for (i = 0; i < q->nbands; i++)
+- q->classes[i].quantum = quanta[i];
++ WRITE_ONCE(q->classes[i].quantum, quanta[i]);
+
+ for (i = oldbands; i < q->nbands; i++) {
+ q->classes[i].qdisc = queues[i];
+@@ -681,7 +681,7 @@ static int ets_qdisc_change(struct Qdisc
+ for (i = q->nbands; i < oldbands; i++) {
+ qdisc_put(q->classes[i].qdisc);
+ q->classes[i].qdisc = NULL;
+- q->classes[i].quantum = 0;
++ WRITE_ONCE(q->classes[i].quantum, 0);
+ q->classes[i].deficit = 0;
+ gnet_stats_basic_sync_init(&q->classes[i].bstats);
+ memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
+@@ -738,6 +738,7 @@ static int ets_qdisc_dump(struct Qdisc *
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *opts;
+ struct nlattr *nest;
++ u8 nbands, nstrict;
+ int band;
+ int prio;
+ int err;
+@@ -750,21 +751,22 @@ static int ets_qdisc_dump(struct Qdisc *
+ if (!opts)
+ goto nla_err;
+
+- if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
++ nbands = READ_ONCE(q->nbands);
++ if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
+ goto nla_err;
+
+- if (q->nstrict &&
+- nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
++ nstrict = READ_ONCE(q->nstrict);
++ if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
+ goto nla_err;
+
+- if (q->nbands > q->nstrict) {
++ if (nbands > nstrict) {
+ nest = nla_nest_start(skb, TCA_ETS_QUANTA);
+ if (!nest)
+ goto nla_err;
+
+- for (band = q->nstrict; band < q->nbands; band++) {
++ for (band = nstrict; band < nbands; band++) {
+ if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
+- q->classes[band].quantum))
++ READ_ONCE(q->classes[band].quantum)))
+ goto nla_err;
+ }
+
+@@ -776,7 +778,8 @@ static int ets_qdisc_dump(struct Qdisc *
+ goto nla_err;
+
+ for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
+- if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
++ if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
++ READ_ONCE(q->prio2band[prio])))
+ goto nla_err;
+ }
+
--- /dev/null
+From stable+bounces-169844-greg=kroah.com@vger.kernel.org Sat Aug 16 05:50:29 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 23:50:20 -0400
+Subject: PCI/ACPI: Fix runtime PM ref imbalance on Hot-Plug Capable ports
+To: stable@vger.kernel.org
+Cc: Lukas Wunner <lukas@wunner.de>, Laurent Bigonville <bigon@bigon.be>, Mario Limonciello <mario.limonciello@amd.com>, Bjorn Helgaas <bhelgaas@google.com>, "Rafael J. Wysocki" <rafael@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250816035021.666925-1-sashal@kernel.org>
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit 6cff20ce3b92ffbf2fc5eb9e5a030b3672aa414a ]
+
+pci_bridge_d3_possible() is called from both pcie_portdrv_probe() and
+pcie_portdrv_remove() to determine whether runtime power management shall
+be enabled (on probe) or disabled (on remove) on a PCIe port.
+
+The underlying assumption is that pci_bridge_d3_possible() always returns
+the same value, else a runtime PM reference imbalance would occur. That
+assumption is not given if the PCIe port is inaccessible on remove due to
+hot-unplug: pci_bridge_d3_possible() calls pciehp_is_native(), which
+accesses Config Space to determine whether the port is Hot-Plug Capable.
+An inaccessible port returns "all ones", which is converted to "all
+zeroes" by pcie_capability_read_dword(). Hence the port no longer seems
+Hot-Plug Capable on remove even though it was on probe.
+
+The resulting runtime PM ref imbalance causes warning messages such as:
+
+ pcieport 0000:02:04.0: Runtime PM usage count underflow!
+
+Avoid the Config Space access (and thus the runtime PM ref imbalance) by
+caching the Hot-Plug Capable bit in struct pci_dev.
+
+The struct already contains an "is_hotplug_bridge" flag, which however is
+not only set on Hot-Plug Capable PCIe ports, but also Conventional PCI
+Hot-Plug bridges and ACPI slots. The flag identifies bridges which are
+allocated additional MMIO and bus number resources to allow for hierarchy
+expansion.
+
+The kernel is somewhat sloppily using "is_hotplug_bridge" in a number of
+places to identify Hot-Plug Capable PCIe ports, even though the flag
+encompasses other devices. Subsequent commits replace these occurrences
+with the new flag to clearly delineate Hot-Plug Capable PCIe ports from
+other kinds of hotplug bridges.
+
+Document the existing "is_hotplug_bridge" and the new "is_pciehp" flag
+and document the (non-obvious) requirement that pci_bridge_d3_possible()
+always returns the same value across the entire lifetime of a bridge,
+including its hot-removal.
+
+Fixes: 5352a44a561d ("PCI: pciehp: Make pciehp_is_native() stricter")
+Reported-by: Laurent Bigonville <bigon@bigon.be>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220216
+Reported-by: Mario Limonciello <mario.limonciello@amd.com>
+Closes: https://lore.kernel.org/r/20250609020223.269407-3-superm1@kernel.org/
+Link: https://lore.kernel.org/all/20250620025535.3425049-3-superm1@kernel.org/T/#u
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Acked-by: Rafael J. Wysocki <rafael@kernel.org>
+Cc: stable@vger.kernel.org # v4.18+
+Link: https://patch.msgid.link/fe5dcc3b2e62ee1df7905d746bde161eb1b3291c.1752390101.git.lukas@wunner.de
+[ changed "recent enough PCIe ports" comment to "some PCIe ports" ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pci/pci-acpi.c | 4 +---
+ drivers/pci/pci.c | 8 ++++++--
+ drivers/pci/probe.c | 2 +-
+ include/linux/pci.h | 10 +++++++++-
+ 4 files changed, 17 insertions(+), 7 deletions(-)
+
+--- a/drivers/pci/pci-acpi.c
++++ b/drivers/pci/pci-acpi.c
+@@ -793,13 +793,11 @@ int pci_acpi_program_hp_params(struct pc
+ bool pciehp_is_native(struct pci_dev *bridge)
+ {
+ const struct pci_host_bridge *host;
+- u32 slot_cap;
+
+ if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+ return false;
+
+- pcie_capability_read_dword(bridge, PCI_EXP_SLTCAP, &slot_cap);
+- if (!(slot_cap & PCI_EXP_SLTCAP_HPC))
++ if (!bridge->is_pciehp)
+ return false;
+
+ if (pcie_ports_native)
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -3065,8 +3065,12 @@ static const struct dmi_system_id bridge
+ * pci_bridge_d3_possible - Is it possible to put the bridge into D3
+ * @bridge: Bridge to check
+ *
+- * This function checks if it is possible to move the bridge to D3.
+- * Currently we only allow D3 for recent enough PCIe ports and Thunderbolt.
++ * Currently we only allow D3 for some PCIe ports and for Thunderbolt.
++ *
++ * Return: Whether it is possible to move the bridge to D3.
++ *
++ * The return value is guaranteed to be constant across the entire lifetime
++ * of the bridge, including its hot-removal.
+ */
+ bool pci_bridge_d3_possible(struct pci_dev *bridge)
+ {
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -1594,7 +1594,7 @@ void set_pcie_hotplug_bridge(struct pci_
+
+ pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, ®32);
+ if (reg32 & PCI_EXP_SLTCAP_HPC)
+- pdev->is_hotplug_bridge = 1;
++ pdev->is_hotplug_bridge = pdev->is_pciehp = 1;
+ }
+
+ static void set_pcie_thunderbolt(struct pci_dev *dev)
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -318,7 +318,14 @@ struct pci_sriov;
+ struct pci_p2pdma;
+ struct rcec_ea;
+
+-/* The pci_dev structure describes PCI devices */
++/* struct pci_dev - describes a PCI device
++ *
++ * @is_hotplug_bridge: Hotplug bridge of any kind (e.g. PCIe Hot-Plug Capable,
++ * Conventional PCI Hot-Plug, ACPI slot).
++ * Such bridges are allocated additional MMIO and bus
++ * number resources to allow for hierarchy expansion.
++ * @is_pciehp: PCIe Hot-Plug Capable bridge.
++ */
+ struct pci_dev {
+ struct list_head bus_list; /* Node in per-bus list */
+ struct pci_bus *bus; /* Bus this device is on */
+@@ -439,6 +446,7 @@ struct pci_dev {
+ unsigned int is_physfn:1;
+ unsigned int is_virtfn:1;
+ unsigned int is_hotplug_bridge:1;
++ unsigned int is_pciehp:1;
+ unsigned int shpc_managed:1; /* SHPC owned by shpchp */
+ unsigned int is_thunderbolt:1; /* Thunderbolt controller */
+ /*
net-add-net_passive_inc-and-net_passive_dec.patch
net-better-track-kernel-sockets-lifetime.patch
smb-client-fix-netns-refcount-leak-after-net_passive-changes.patch
+net_sched-sch_ets-implement-lockless-ets_dump.patch
+net-sched-ets-use-old-nbands-while-purging-unused-classes.patch
+leds-flash-leds-qcom-flash-limit-led-current-based-on-thermal-condition.patch
+leds-flash-leds-qcom-flash-fix-registry-access-after-re-bind.patch
+fscrypt-don-t-use-problematic-non-inline-crypto-engines.patch
+block-reject-invalid-operation-in-submit_bio_noacct.patch
+block-make-req_op_zone_finish-a-write-operation.patch
+pci-acpi-fix-runtime-pm-ref-imbalance-on-hot-plug-capable-ports.patch
+usb-typec-fusb302-cache-pd-rx-state.patch
+btrfs-don-t-ignore-inode-missing-when-replaying-log-tree.patch
+btrfs-qgroup-fix-race-between-quota-disable-and-quota-rescan-ioctl.patch
+btrfs-move-transaction-aborts-to-the-error-site-in-add_block_group_free_space.patch
+btrfs-always-abort-transaction-on-failure-to-add-block-group-to-free-space-tree.patch
+btrfs-abort-transaction-on-unexpected-eb-generation-at-btrfs_copy_root.patch
+xfs-fully-decouple-xfs_ibulk-flags-from-xfs_iwalk-flags.patch
+btrfs-open-code-timespec64-in-struct-btrfs_inode.patch
+btrfs-fix-ssd_spread-overallocation.patch
+btrfs-constify-more-pointer-parameters.patch
+btrfs-populate-otime-when-logging-an-inode-item.patch
+btrfs-send-factor-out-common-logic-when-sending-xattrs.patch
+btrfs-send-only-use-boolean-variables-at-process_recorded_refs.patch
+btrfs-send-add-and-use-helper-to-rename-current-inode-when-processing-refs.patch
+btrfs-send-keep-the-current-inode-s-path-cached.patch
+btrfs-send-avoid-path-allocation-for-the-current-inode-when-issuing-commands.patch
+btrfs-send-use-fallocate-for-hole-punching-with-send-stream-v2.patch
+btrfs-send-make-fs_path_len-inline-and-constify-its-argument.patch
--- /dev/null
+From stable+bounces-171653-greg=kroah.com@vger.kernel.org Mon Aug 18 22:22:35 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 16:22:08 -0400
+Subject: usb: typec: fusb302: cache PD RX state
+To: stable@vger.kernel.org
+Cc: Sebastian Reichel <sebastian.reichel@collabora.com>, stable <stable@kernel.org>, Heikki Krogerus <heikki.krogerus@linux.intel.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250818202208.73004-1-sashal@kernel.org>
+
+From: Sebastian Reichel <sebastian.reichel@collabora.com>
+
+[ Upstream commit 1e61f6ab08786d66a11cfc51e13d6f08a6b06c56 ]
+
+This patch fixes a race condition communication error, which ends up in
+PD hard resets when losing the race. Some systems, like the Radxa ROCK
+5B are powered through USB-C without any backup power source and use a
+FUSB302 chip to do the PD negotiation. This means it is quite important
+to avoid hard resets, since that effectively kills the system's
+power-supply.
+
+I've found the following race condition while debugging unplanned power
+loss during booting the board every now and then:
+
+1. lots of TCPM/FUSB302/PD initialization stuff
+2. TCPM ends up in SNK_WAIT_CAPABILITIES (tcpm_set_pd_rx is enabled here)
+3. the remote PD source does not send anything, so TCPM does a SOFT RESET
+4. TCPM ends up in SNK_WAIT_CAPABILITIES for the second time
+ (tcpm_set_pd_rx is enabled again, even though it is still on)
+
+At this point I've seen broken CRC good messages being send by the
+FUSB302 with a logic analyzer sniffing the CC lines. Also it looks like
+messages are being lost and things generally going haywire with one of
+the two sides doing a hard reset once a broken CRC good message was send
+to the bus.
+
+I think the system is running into a race condition, that the FIFOs are
+being cleared and/or the automatic good CRC message generation flag is
+being updated while a message is already arriving.
+
+Let's avoid this by caching the PD RX enabled state, as we have already
+processed anything in the FIFOs and are in a good state. As a side
+effect that this also optimizes I2C bus usage :)
+
+As far as I can tell the problem theoretically also exists when TCPM
+enters SNK_WAIT_CAPABILITIES the first time, but I believe this is less
+critical for the following reason:
+
+On devices like the ROCK 5B, which are powered through a TCPM backed
+USB-C port, the bootloader must have done some prior PD communication
+(initial communication must happen within 5 seconds after plugging the
+USB-C plug). This means the first time the kernel TCPM state machine
+reaches SNK_WAIT_CAPABILITIES, the remote side is not sending messages
+actively. On other devices a hard reset simply adds some extra delay and
+things should be good afterwards.
+
+Fixes: c034a43e72dda ("staging: typec: Fairchild FUSB302 Type-c chip driver")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20250704-fusb302-race-condition-fix-v1-1-239012c0e27a@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/typec/tcpm/fusb302.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/usb/typec/tcpm/fusb302.c
++++ b/drivers/usb/typec/tcpm/fusb302.c
+@@ -103,6 +103,7 @@ struct fusb302_chip {
+ bool vconn_on;
+ bool vbus_on;
+ bool charge_on;
++ bool pd_rx_on;
+ bool vbus_present;
+ enum typec_cc_polarity cc_polarity;
+ enum typec_cc_status cc1;
+@@ -841,6 +842,11 @@ static int tcpm_set_pd_rx(struct tcpc_de
+ int ret = 0;
+
+ mutex_lock(&chip->lock);
++ if (chip->pd_rx_on == on) {
++ fusb302_log(chip, "pd is already %s", str_on_off(on));
++ goto done;
++ }
++
+ ret = fusb302_pd_rx_flush(chip);
+ if (ret < 0) {
+ fusb302_log(chip, "cannot flush pd rx buffer, ret=%d", ret);
+@@ -863,6 +869,8 @@ static int tcpm_set_pd_rx(struct tcpc_de
+ on ? "on" : "off", ret);
+ goto done;
+ }
++
++ chip->pd_rx_on = on;
+ fusb302_log(chip, "pd := %s", on ? "on" : "off");
+ done:
+ mutex_unlock(&chip->lock);
--- /dev/null
+From stable+bounces-171737-greg=kroah.com@vger.kernel.org Tue Aug 19 04:59:38 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 22:56:55 -0400
+Subject: xfs: fully decouple XFS_IBULK* flags from XFS_IWALK* flags
+To: stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, cen zhang <zzzccc427@gmail.com>, "Darrick J. Wong" <djwong@kernel.org>, Carlos Maiolino <cem@kernel.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250819025655.301046-1-sashal@kernel.org>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit d2845519b0723c5d5a0266cbf410495f9b8fd65c ]
+
+Fix up xfs_inumbers to now pass in the XFS_IBULK* flags into the flags
+argument to xfs_inobt_walk, which expects the XFS_IWALK* flags.
+
+Currently passing the wrong flags works for non-debug builds because
+the only XFS_IWALK* flag has the same encoding as the corresponding
+XFS_IBULK* flag, but in debug builds it can trigger an assert that no
+incorrect flag is passed. Instead just extra the relevant flag.
+
+Fixes: 5b35d922c52798 ("xfs: Decouple XFS_IBULK flags from XFS_IWALK flags")
+Cc: <stable@vger.kernel.org> # v5.19
+Reported-by: cen zhang <zzzccc427@gmail.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_itable.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_itable.c
++++ b/fs/xfs/xfs_itable.c
+@@ -422,11 +422,15 @@ xfs_inumbers(
+ .breq = breq,
+ };
+ struct xfs_trans *tp;
++ unsigned int iwalk_flags = 0;
+ int error = 0;
+
+ if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+ return 0;
+
++ if (breq->flags & XFS_IBULK_SAME_AG)
++ iwalk_flags |= XFS_IWALK_SAME_AG;
++
+ /*
+ * Grab an empty transaction so that we can use its recursive buffer
+ * locking abilities to detect cycles in the inobt without deadlocking.
+@@ -435,7 +439,7 @@ xfs_inumbers(
+ if (error)
+ goto out;
+
+- error = xfs_inobt_walk(breq->mp, tp, breq->startino, breq->flags,
++ error = xfs_inobt_walk(breq->mp, tp, breq->startino, iwalk_flags,
+ xfs_inumbers_walk, breq->icount, &ic);
+ xfs_trans_cancel(tp);
+ out: