From: Sasha Levin Date: Sun, 15 Jun 2025 22:47:44 +0000 (-0400) Subject: Fixes for 6.15 X-Git-Tag: v6.6.94~58 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=958bed42fb5af62768124767d18b6afcc243ce6b;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.15 Signed-off-by: Sasha Levin --- diff --git a/queue-6.15/bio-fix-bio_first_folio-for-sparsemem-without-vmemma.patch b/queue-6.15/bio-fix-bio_first_folio-for-sparsemem-without-vmemma.patch new file mode 100644 index 0000000000..2e13cf82ba --- /dev/null +++ b/queue-6.15/bio-fix-bio_first_folio-for-sparsemem-without-vmemma.patch @@ -0,0 +1,39 @@ +From ebfba348111722463033598894a92ac6f0cdd202 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Jun 2025 15:41:25 +0100 +Subject: bio: Fix bio_first_folio() for SPARSEMEM without VMEMMAP + +From: Matthew Wilcox (Oracle) + +[ Upstream commit f826ec7966a63d48e16e0868af4e038bf9a1a3ae ] + +It is possible for physically contiguous folios to have discontiguous +struct pages if SPARSEMEM is enabled and SPARSEMEM_VMEMMAP is not. +This is correctly handled by folio_page_idx(), so remove this open-coded +implementation. + +Fixes: 640d1930bef4 (block: Add bio_for_each_folio_all()) +Signed-off-by: Matthew Wilcox (Oracle) +Link: https://lore.kernel.org/r/20250612144126.2849931-1-willy@infradead.org +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + include/linux/bio.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/bio.h b/include/linux/bio.h +index b786ec5bcc81d..b474a47ec7eef 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -291,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, + + fi->folio = page_folio(bvec->bv_page); + fi->offset = bvec->bv_offset + +- PAGE_SIZE * (bvec->bv_page - &fi->folio->page); ++ PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); + fi->_seg_count = bvec->bv_len; + fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); + fi->_next = folio_next(fi->folio); +-- +2.39.5 + diff --git a/queue-6.15/block-don-t-use-submit_bio_noacct_nocheck-in-blk_zon.patch b/queue-6.15/block-don-t-use-submit_bio_noacct_nocheck-in-blk_zon.patch new file mode 100644 index 0000000000..f3288ac04c --- /dev/null +++ b/queue-6.15/block-don-t-use-submit_bio_noacct_nocheck-in-blk_zon.patch @@ -0,0 +1,61 @@ +From fd690870c62846af88fc1c37bfaeee55e262a3fb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 06:44:16 +0200 +Subject: block: don't use submit_bio_noacct_nocheck in blk_zone_wplug_bio_work + +From: Christoph Hellwig + +[ Upstream commit cf625013d8741c01407bbb4a60c111b61b9fa69d ] + +Bios queued up in the zone write plug have already gone through all all +preparation in the submit_bio path, including the freeze protection. + +Submitting them through submit_bio_noacct_nocheck duplicates the work +and can can cause deadlocks when freezing a queue with pending bio +write plugs. + +Go straight to ->submit_bio or blk_mq_submit_bio to bypass the +superfluous extra freeze protection and checks. + +Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") +Reported-by: Bart Van Assche +Signed-off-by: Christoph Hellwig +Reviewed-by: Johannes Thumshirn +Reviewed-by: Damien Le Moal +Tested-by: Damien Le Moal +Link: https://lore.kernel.org/r/20250611044416.2351850-1-hch@lst.de +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/blk-zoned.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block/blk-zoned.c b/block/blk-zoned.c +index 8f15d1aa6eb89..45c91016cef38 100644 +--- a/block/blk-zoned.c ++++ b/block/blk-zoned.c +@@ -1306,7 +1306,6 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) + spin_unlock_irqrestore(&zwplug->lock, flags); + + bdev = bio->bi_bdev; +- submit_bio_noacct_nocheck(bio); + + /* + * blk-mq devices will reuse the extra reference on the request queue +@@ -1314,8 +1313,12 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) + * path for BIO-based devices will not do that. So drop this extra + * reference here. + */ +- if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) ++ if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) { ++ bdev->bd_disk->fops->submit_bio(bio); + blk_queue_exit(bdev->bd_disk->queue); ++ } else { ++ blk_mq_submit_bio(bio); ++ } + + put_zwplug: + /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ +-- +2.39.5 + diff --git a/queue-6.15/block-fix-bvec_set_folio-for-very-large-folios.patch b/queue-6.15/block-fix-bvec_set_folio-for-very-large-folios.patch new file mode 100644 index 0000000000..b2088a9751 --- /dev/null +++ b/queue-6.15/block-fix-bvec_set_folio-for-very-large-folios.patch @@ -0,0 +1,46 @@ +From abcf568604922548b04d4aec04a121898c367682 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Jun 2025 15:42:53 +0100 +Subject: block: Fix bvec_set_folio() for very large folios + +From: Matthew Wilcox (Oracle) + +[ Upstream commit 5e223e06ee7c6d8f630041a0645ac90e39a42cc6 ] + +Similarly to 26064d3e2b4d ("block: fix adding folio to bio"), if +we attempt to add a folio that is larger than 4GB, we'll silently +truncate the offset and len. Widen the parameters to size_t, assert +that the length is less than 4GB and set the first page that contains +the interesting data rather than the first page of the folio. + +Fixes: 26db5ee15851 (block: add a bvec_set_folio helper) +Signed-off-by: Matthew Wilcox (Oracle) +Link: https://lore.kernel.org/r/20250612144255.2850278-1-willy@infradead.org +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + include/linux/bvec.h | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/include/linux/bvec.h b/include/linux/bvec.h +index 204b22a99c4ba..0a80e1f9aa201 100644 +--- a/include/linux/bvec.h ++++ b/include/linux/bvec.h +@@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, + * @offset: offset into the folio + */ + static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, +- unsigned int len, unsigned int offset) ++ size_t len, size_t offset) + { +- bvec_set_page(bv, &folio->page, len, offset); ++ unsigned long nr = offset / PAGE_SIZE; ++ ++ WARN_ON_ONCE(len > UINT_MAX); ++ bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); + } + + /** +-- +2.39.5 + diff --git a/queue-6.15/block-use-q-elevator-with-elevator_lock-held-in-elv_.patch b/queue-6.15/block-use-q-elevator-with-elevator_lock-held-in-elv_.patch new file mode 100644 index 0000000000..ff7b430a91 --- /dev/null +++ b/queue-6.15/block-use-q-elevator-with-elevator_lock-held-in-elv_.patch @@ -0,0 +1,49 @@ +From e180cb3328b585872e7886cabded927fd7817cb9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 May 2025 22:17:42 +0800 +Subject: block: use q->elevator with ->elevator_lock held in + elv_iosched_show() + +From: Ming Lei + +[ Upstream commit 94209d27d14104ed828ca88cd5403a99162fe51a ] + +Use q->elevator with ->elevator_lock held in elv_iosched_show(), since +the local cached elevator reference may become stale after getting +->elevator_lock. + +Reviewed-by: Hannes Reinecke +Reviewed-by: Nilay Shroff +Reviewed-by: Christoph Hellwig +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20250505141805.2751237-5-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/elevator.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/block/elevator.c b/block/elevator.c +index b4d08026b02ce..dc4cadef728e5 100644 +--- a/block/elevator.c ++++ b/block/elevator.c +@@ -744,7 +744,6 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, + ssize_t elv_iosched_show(struct gendisk *disk, char *name) + { + struct request_queue *q = disk->queue; +- struct elevator_queue *eq = q->elevator; + struct elevator_type *cur = NULL, *e; + int len = 0; + +@@ -753,7 +752,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name) + len += sprintf(name+len, "[none] "); + } else { + len += sprintf(name+len, "none "); +- cur = eq->type; ++ cur = q->elevator->type; + } + + spin_lock(&elv_list_lock); +-- +2.39.5 + diff --git a/queue-6.15/btrfs-exit-after-state-insertion-failure-at-btrfs_co.patch b/queue-6.15/btrfs-exit-after-state-insertion-failure-at-btrfs_co.patch new file mode 100644 index 0000000000..4b3fac7a50 --- /dev/null +++ b/queue-6.15/btrfs-exit-after-state-insertion-failure-at-btrfs_co.patch @@ -0,0 +1,43 @@ +From f6d3bebd2424dd94262bea320f0a1f1d76f9a88c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Apr 2025 17:11:14 +0100 +Subject: btrfs: exit after state insertion failure at + btrfs_convert_extent_bit() + +From: Filipe Manana + +[ Upstream commit 3bf179e36da917c5d9bec71c714573ed1649b7c1 ] + +If insert_state() state failed it returns an error pointer and we call +extent_io_tree_panic() which will trigger a BUG() call. However if +CONFIG_BUG is disabled, which is an uncommon and exotic scenario, then +we fallthrough and call cache_state() which will dereference the error +pointer, resulting in an invalid memory access. + +So jump to the 'out' label after calling extent_io_tree_panic(), it also +makes the code more clear besides dealing with the exotic scenario where +CONFIG_BUG is disabled. + +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent-io-tree.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c +index 13de6af279e52..92cfde37b1d33 100644 +--- a/fs/btrfs/extent-io-tree.c ++++ b/fs/btrfs/extent-io-tree.c +@@ -1456,6 +1456,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + if (IS_ERR(inserted_state)) { + ret = PTR_ERR(inserted_state); + extent_io_tree_panic(tree, prealloc, "insert", ret); ++ goto out; + } + cache_state(inserted_state, cached_state); + if (inserted_state == prealloc) +-- +2.39.5 + diff --git a/queue-6.15/btrfs-exit-after-state-split-error-at-set_extent_bit.patch b/queue-6.15/btrfs-exit-after-state-split-error-at-set_extent_bit.patch new file mode 100644 index 0000000000..5bbf55d72c --- /dev/null +++ b/queue-6.15/btrfs-exit-after-state-split-error-at-set_extent_bit.patch @@ -0,0 +1,47 @@ +From 962d4b4b7999df4f2be1d7e244b7bb74a6a5263a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Apr 2025 16:00:28 +0100 +Subject: btrfs: exit after state split error at set_extent_bit() + +From: Filipe Manana + +[ Upstream commit 41d69d4d78d8b179bf3bcdfc56d28a12b3a608d2 ] + +If split_state() returned an error we call extent_io_tree_panic() which +will trigger a BUG() call. However if CONFIG_BUG is disabled, which is an +uncommon and exotic scenario, then we fallthrough and hit a use after free +when calling set_state_bits() since the extent state record which the +local variable 'prealloc' points to was freed by split_state(). + +So jump to the label 'out' after calling extent_io_tree_panic() and set +the 'prealloc' pointer to NULL since split_state() has already freed it +when it hit an error. + +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent-io-tree.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c +index 92cfde37b1d33..b5b44ea91f999 100644 +--- a/fs/btrfs/extent-io-tree.c ++++ b/fs/btrfs/extent-io-tree.c +@@ -1252,8 +1252,11 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + if (!prealloc) + goto search_again; + ret = split_state(tree, state, prealloc, end + 1); +- if (ret) ++ if (ret) { + extent_io_tree_panic(tree, state, "split", ret); ++ prealloc = NULL; ++ goto out; ++ } + + set_state_bits(tree, prealloc, bits, changeset); + cache_state(prealloc, cached_state); +-- +2.39.5 + diff --git a/queue-6.15/btrfs-fix-fsync-of-files-with-no-hard-links-not-pers.patch b/queue-6.15/btrfs-fix-fsync-of-files-with-no-hard-links-not-pers.patch new file mode 100644 index 0000000000..a841b098d0 --- /dev/null +++ b/queue-6.15/btrfs-fix-fsync-of-files-with-no-hard-links-not-pers.patch @@ -0,0 +1,148 @@ +From 319be88c8a49e4bbc8b28e98de43a6b1e267e999 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Mar 2025 16:05:50 +0000 +Subject: btrfs: fix fsync of files with no hard links not persisting deletion + +From: Filipe Manana + +[ Upstream commit 5e85262e542d6da8898bb8563a724ad98f6fc936 ] + +If we fsync a file (or directory) that has no more hard links, because +while a process had a file descriptor open on it, the file's last hard +link was removed and then the process did an fsync against the file +descriptor, after a power failure or crash the file still exists after +replaying the log. + +This behaviour is incorrect since once an inode has no more hard links +it's not accessible anymore and we insert an orphan item into its +subvolume's tree so that the deletion of all its items is not missed in +case of a power failure or crash. + +So after log replay the file shouldn't exist anymore, which is also the +behaviour on ext4, xfs, f2fs and other filesystems. + +Fix this by not ignoring inodes with zero hard links at +btrfs_log_inode_parent() and by committing an inode's delayed inode when +we are not doing a fast fsync (either BTRFS_INODE_COPY_EVERYTHING or +BTRFS_INODE_NEEDS_FULL_SYNC is set in the inode's runtime flags). This +last step is necessary because when removing the last hard link we don't +delete the corresponding ref (or extref) item, instead we record the +change in the inode's delayed inode with the BTRFS_DELAYED_NODE_DEL_IREF +flag, so that when the delayed inode is committed we delete the ref/extref +item from the inode's subvolume tree - otherwise the logging code will log +the last hard link and therefore upon log replay the inode is not deleted. + +The base code for a fstests test case that reproduces this bug is the +following: + + . ./common/dmflakey + + _require_scratch + _require_dm_target flakey + _require_mknod + + _scratch_mkfs >>$seqres.full 2>&1 || _fail "mkfs failed" + _require_metadata_journaling $SCRATCH_DEV + _init_flakey + _mount_flakey + + touch $SCRATCH_MNT/foo + + # Commit the current transaction and persist the file. + _scratch_sync + + # A fifo to communicate with a background xfs_io process that will + # fsync the file after we deleted its hard link while it's open by + # xfs_io. + mkfifo $SCRATCH_MNT/fifo + + tail -f $SCRATCH_MNT/fifo | \ + $XFS_IO_PROG $SCRATCH_MNT/foo >>$seqres.full & + XFS_IO_PID=$! + + # Give some time for the xfs_io process to open a file descriptor for + # the file. + sleep 1 + + # Now while the file is open by the xfs_io process, delete its only + # hard link. + rm -f $SCRATCH_MNT/foo + + # Now that it has no more hard links, make the xfs_io process fsync it. + echo "fsync" > $SCRATCH_MNT/fifo + + # Terminate the xfs_io process so that we can unmount. + echo "quit" > $SCRATCH_MNT/fifo + wait $XFS_IO_PID + unset XFS_IO_PID + + # Simulate a power failure and then mount again the filesystem to + # replay the journal/log. + _flakey_drop_and_remount + + # We don't expect the file to exist anymore, since it was fsynced when + # it had no more hard links. + [ -f $SCRATCH_MNT/foo ] && echo "file foo still exists" + + _unmount_flakey + + # success, all done + echo "Silence is golden" + status=0 + exit + +A test case for fstests will be submitted soon. + +Reviewed-by: Boris Burkov +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/tree-log.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c +index 90dc094cfa5e5..f5af11565b876 100644 +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -6583,6 +6583,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, + btrfs_log_get_delayed_items(inode, &delayed_ins_list, + &delayed_del_list); + ++ /* ++ * If we are fsyncing a file with 0 hard links, then commit the delayed ++ * inode because the last inode ref (or extref) item may still be in the ++ * subvolume tree and if we log it the file will still exist after a log ++ * replay. So commit the delayed inode to delete that last ref and we ++ * skip logging it. ++ */ ++ if (inode->vfs_inode.i_nlink == 0) { ++ ret = btrfs_commit_inode_delayed_inode(inode); ++ if (ret) ++ goto out_unlock; ++ } ++ + ret = copy_inode_items_to_log(trans, inode, &min_key, &max_key, + path, dst_path, logged_isize, + inode_only, ctx, +@@ -7051,14 +7064,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, + if (btrfs_root_generation(&root->root_item) == trans->transid) + return BTRFS_LOG_FORCE_COMMIT; + +- /* +- * Skip already logged inodes or inodes corresponding to tmpfiles +- * (since logging them is pointless, a link count of 0 means they +- * will never be accessible). +- */ +- if ((btrfs_inode_in_log(inode, trans->transid) && +- list_empty(&ctx->ordered_extents)) || +- inode->vfs_inode.i_nlink == 0) ++ /* Skip already logged inodes and without new extents. */ ++ if (btrfs_inode_in_log(inode, trans->transid) && ++ list_empty(&ctx->ordered_extents)) + return BTRFS_NO_LOG_SYNC; + + ret = start_log_trans(trans, root, ctx); +-- +2.39.5 + diff --git a/queue-6.15/fs-filesystems-fix-potential-unsigned-integer-underf.patch b/queue-6.15/fs-filesystems-fix-potential-unsigned-integer-underf.patch new file mode 100644 index 0000000000..a46f48076f --- /dev/null +++ b/queue-6.15/fs-filesystems-fix-potential-unsigned-integer-underf.patch @@ -0,0 +1,55 @@ +From fee4a2d557a6ff56b1a1935873e0680eec47f787 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Apr 2025 19:45:27 +0800 +Subject: fs/filesystems: Fix potential unsigned integer underflow in fs_name() + +From: Zijun Hu + +[ Upstream commit 1363c134ade81e425873b410566e957fecebb261 ] + +fs_name() has @index as unsigned int, so there is underflow risk for +operation '@index--'. + +Fix by breaking the for loop when '@index == 0' which is also more proper +than '@index <= 0' for unsigned integer comparison. + +Signed-off-by: Zijun Hu +Link: https://lore.kernel.org/20250410-fix_fs-v1-1-7c14ccc8ebaa@quicinc.com +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/filesystems.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/fs/filesystems.c b/fs/filesystems.c +index 58b9067b2391c..95e5256821a53 100644 +--- a/fs/filesystems.c ++++ b/fs/filesystems.c +@@ -156,15 +156,19 @@ static int fs_index(const char __user * __name) + static int fs_name(unsigned int index, char __user * buf) + { + struct file_system_type * tmp; +- int len, res; ++ int len, res = -EINVAL; + + read_lock(&file_systems_lock); +- for (tmp = file_systems; tmp; tmp = tmp->next, index--) +- if (index <= 0 && try_module_get(tmp->owner)) ++ for (tmp = file_systems; tmp; tmp = tmp->next, index--) { ++ if (index == 0) { ++ if (try_module_get(tmp->owner)) ++ res = 0; + break; ++ } ++ } + read_unlock(&file_systems_lock); +- if (!tmp) +- return -EINVAL; ++ if (res) ++ return res; + + /* OK, we got the reference, so we can safely block */ + len = strlen(tmp->name) + 1; +-- +2.39.5 + diff --git a/queue-6.15/gfs2-pass-through-holder-from-the-vfs-for-freeze-tha.patch b/queue-6.15/gfs2-pass-through-holder-from-the-vfs-for-freeze-tha.patch new file mode 100644 index 0000000000..e552c19bf7 --- /dev/null +++ b/queue-6.15/gfs2-pass-through-holder-from-the-vfs-for-freeze-tha.patch @@ -0,0 +1,97 @@ +From e0e37229330bdec04abc31325bfa2996ccfdc77d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 4 Apr 2025 21:02:28 +0200 +Subject: gfs2: pass through holder from the VFS for freeze/thaw + +From: Christian Brauner + +[ Upstream commit 62a2175ddf7e72941868f164b7c1f92e00f213bd ] + +The filesystem's freeze/thaw functions can be called from contexts where +the holder isn't userspace but the kernel, e.g., during systemd +suspend/hibernate. So pass through the freeze/thaw flags from the VFS +instead of hard-coding them. + +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/gfs2/super.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c +index 54f1efd47a3e5..0bd7827e6371e 100644 +--- a/fs/gfs2/super.c ++++ b/fs/gfs2/super.c +@@ -674,7 +674,7 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) + return sdp->sd_log_error; + } + +-static int gfs2_do_thaw(struct gfs2_sbd *sdp) ++static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who) + { + struct super_block *sb = sdp->sd_vfs; + int error; +@@ -682,7 +682,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp) + error = gfs2_freeze_lock_shared(sdp); + if (error) + goto fail; +- error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); ++ error = thaw_super(sb, who); + if (!error) + return 0; + +@@ -710,7 +710,7 @@ void gfs2_freeze_func(struct work_struct *work) + gfs2_freeze_unlock(sdp); + set_bit(SDF_FROZEN, &sdp->sd_flags); + +- error = gfs2_do_thaw(sdp); ++ error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE); + if (error) + goto out; + +@@ -728,6 +728,7 @@ void gfs2_freeze_func(struct work_struct *work) + /** + * gfs2_freeze_super - prevent further writes to the filesystem + * @sb: the VFS structure for the filesystem ++ * @who: freeze flags + * + */ + +@@ -744,7 +745,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) + } + + for (;;) { +- error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); ++ error = freeze_super(sb, who); + if (error) { + fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", + error); +@@ -758,7 +759,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) + break; + } + +- error = gfs2_do_thaw(sdp); ++ error = gfs2_do_thaw(sdp, who); + if (error) + goto out; + +@@ -796,6 +797,7 @@ static int gfs2_freeze_fs(struct super_block *sb) + /** + * gfs2_thaw_super - reallow writes to the filesystem + * @sb: the VFS structure for the filesystem ++ * @who: freeze flags + * + */ + +@@ -814,7 +816,7 @@ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who) + atomic_inc(&sb->s_active); + gfs2_freeze_unlock(sdp); + +- error = gfs2_do_thaw(sdp); ++ error = gfs2_do_thaw(sdp, who); + + if (!error) { + clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); +-- +2.39.5 + diff --git a/queue-6.15/io_uring-consistently-use-rcu-semantics-with-sqpoll-.patch b/queue-6.15/io_uring-consistently-use-rcu-semantics-with-sqpoll-.patch new file mode 100644 index 0000000000..a60c91185d --- /dev/null +++ b/queue-6.15/io_uring-consistently-use-rcu-semantics-with-sqpoll-.patch @@ -0,0 +1,183 @@ +From 4a341f091b2fddccccf86db2b6c6e165c030cdf9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 13:53:43 -0700 +Subject: io_uring: consistently use rcu semantics with sqpoll thread + +From: Keith Busch + +[ Upstream commit c538f400fae22725580842deb2bef546701b64bd ] + +The sqpoll thread is dereferenced with rcu read protection in one place, +so it needs to be annotated as an __rcu type, and should consistently +use rcu helpers for access and assignment to make sparse happy. + +Since most of the accesses occur under the sqd->lock, we can use +rcu_dereference_protected() without declaring an rcu read section. +Provide a simple helper to get the thread from a locked context. + +Fixes: ac0b8b327a5677d ("io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo()") +Signed-off-by: Keith Busch +Link: https://lore.kernel.org/r/20250611205343.1821117-1-kbusch@meta.com +[axboe: fold in fix for register.c] +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 4 ++-- + io_uring/register.c | 7 +++++-- + io_uring/sqpoll.c | 34 ++++++++++++++++++++++++---------- + io_uring/sqpoll.h | 8 +++++++- + 4 files changed, 38 insertions(+), 15 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 9266d4f2016ad..e5466f6568269 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2913,7 +2913,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) + struct task_struct *tsk; + + io_sq_thread_park(sqd); +- tsk = sqd->thread; ++ tsk = sqpoll_task_locked(sqd); + if (tsk && tsk->io_uring && tsk->io_uring->io_wq) + io_wq_cancel_cb(tsk->io_uring->io_wq, + io_cancel_ctx_cb, ctx, true); +@@ -3150,7 +3150,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) + s64 inflight; + DEFINE_WAIT(wait); + +- WARN_ON_ONCE(sqd && sqd->thread != current); ++ WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); + + if (!current->io_uring) + return; +diff --git a/io_uring/register.c b/io_uring/register.c +index cc23a4c205cd4..a59589249fce7 100644 +--- a/io_uring/register.c ++++ b/io_uring/register.c +@@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, + if (ctx->flags & IORING_SETUP_SQPOLL) { + sqd = ctx->sq_data; + if (sqd) { ++ struct task_struct *tsk; ++ + /* + * Observe the correct sqd->lock -> ctx->uring_lock + * ordering. Fine to drop uring_lock here, we hold +@@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, + mutex_unlock(&ctx->uring_lock); + mutex_lock(&sqd->lock); + mutex_lock(&ctx->uring_lock); +- if (sqd->thread) +- tctx = sqd->thread->io_uring; ++ tsk = sqpoll_task_locked(sqd); ++ if (tsk) ++ tctx = tsk->io_uring; + } + } else { + tctx = current->io_uring; +diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c +index 0625a421626f4..268d2fbe6160c 100644 +--- a/io_uring/sqpoll.c ++++ b/io_uring/sqpoll.c +@@ -30,7 +30,7 @@ enum { + void io_sq_thread_unpark(struct io_sq_data *sqd) + __releases(&sqd->lock) + { +- WARN_ON_ONCE(sqd->thread == current); ++ WARN_ON_ONCE(sqpoll_task_locked(sqd) == current); + + /* + * Do the dance but not conditional clear_bit() because it'd race with +@@ -46,24 +46,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) + void io_sq_thread_park(struct io_sq_data *sqd) + __acquires(&sqd->lock) + { +- WARN_ON_ONCE(data_race(sqd->thread) == current); ++ struct task_struct *tsk; + + atomic_inc(&sqd->park_pending); + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + mutex_lock(&sqd->lock); +- if (sqd->thread) +- wake_up_process(sqd->thread); ++ ++ tsk = sqpoll_task_locked(sqd); ++ if (tsk) { ++ WARN_ON_ONCE(tsk == current); ++ wake_up_process(tsk); ++ } + } + + void io_sq_thread_stop(struct io_sq_data *sqd) + { +- WARN_ON_ONCE(sqd->thread == current); ++ struct task_struct *tsk; ++ + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); + + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + mutex_lock(&sqd->lock); +- if (sqd->thread) +- wake_up_process(sqd->thread); ++ tsk = sqpoll_task_locked(sqd); ++ if (tsk) { ++ WARN_ON_ONCE(tsk == current); ++ wake_up_process(tsk); ++ } + mutex_unlock(&sqd->lock); + wait_for_completion(&sqd->exited); + } +@@ -486,7 +494,10 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, + goto err_sqpoll; + } + +- sqd->thread = tsk; ++ mutex_lock(&sqd->lock); ++ rcu_assign_pointer(sqd->thread, tsk); ++ mutex_unlock(&sqd->lock); ++ + task_to_put = get_task_struct(tsk); + ret = io_uring_alloc_task_context(tsk, ctx); + wake_up_new_task(tsk); +@@ -514,10 +525,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, + int ret = -EINVAL; + + if (sqd) { ++ struct task_struct *tsk; ++ + io_sq_thread_park(sqd); + /* Don't set affinity for a dying thread */ +- if (sqd->thread) +- ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask); ++ tsk = sqpoll_task_locked(sqd); ++ if (tsk) ++ ret = io_wq_cpu_affinity(tsk->io_uring, mask); + io_sq_thread_unpark(sqd); + } + +diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h +index 4171666b1cf4c..b83dcdec9765f 100644 +--- a/io_uring/sqpoll.h ++++ b/io_uring/sqpoll.h +@@ -8,7 +8,7 @@ struct io_sq_data { + /* ctx's that are using this sqd */ + struct list_head ctx_list; + +- struct task_struct *thread; ++ struct task_struct __rcu *thread; + struct wait_queue_head wait; + + unsigned sq_thread_idle; +@@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); + void io_put_sq_data(struct io_sq_data *sqd); + void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); + int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); ++ ++static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) ++{ ++ return rcu_dereference_protected(sqd->thread, ++ lockdep_is_held(&sqd->lock)); ++} +-- +2.39.5 + diff --git a/queue-6.15/io_uring-fix-spurious-drain-flushing.patch b/queue-6.15/io_uring-fix-spurious-drain-flushing.patch new file mode 100644 index 0000000000..9b2a68a8e7 --- /dev/null +++ b/queue-6.15/io_uring-fix-spurious-drain-flushing.patch @@ -0,0 +1,67 @@ +From 1833532341f4a4ebb43130aad4385336074463dd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 May 2025 12:12:48 +0100 +Subject: io_uring: fix spurious drain flushing + +From: Pavel Begunkov + +[ Upstream commit fde04c7e2775feb0746301e0ef86a04d3598c3fe ] + +io_queue_deferred() is not tolerant to spurious calls not completing +some requests. You can have an inflight drain-marked request and another +request that came after and got queued into the drain list. Now, if +io_queue_deferred() is called before the first request completes, it'll +check the 2nd req with req_need_defer(), find that there is no drain +flag set, and queue it for execution. + +To make io_queue_deferred() work, it should at least check sequences for +the first request, and then we need also need to check if there is +another drain request creating another bubble. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/972bde11b7d4ef25b3f5e3fd34f80e4d2aa345b8.1746788718.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index edda31a15c6e6..9266d4f2016ad 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -537,18 +537,30 @@ void io_req_queue_iowq(struct io_kiocb *req) + io_req_task_work_add(req); + } + ++static bool io_drain_defer_seq(struct io_kiocb *req, u32 seq) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; ++} ++ + static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx) + { ++ bool drain_seen = false, first = true; ++ + spin_lock(&ctx->completion_lock); + while (!list_empty(&ctx->defer_list)) { + struct io_defer_entry *de = list_first_entry(&ctx->defer_list, + struct io_defer_entry, list); + +- if (req_need_defer(de->req, de->seq)) ++ drain_seen |= de->req->flags & REQ_F_IO_DRAIN; ++ if ((drain_seen || first) && io_drain_defer_seq(de->req, de->seq)) + break; ++ + list_del_init(&de->list); + io_req_task_queue(de->req); + kfree(de); ++ first = false; + } + spin_unlock(&ctx->completion_lock); + } +-- +2.39.5 + diff --git a/queue-6.15/io_uring-fix-use-after-free-of-sq-thread-in-__io_uri.patch b/queue-6.15/io_uring-fix-use-after-free-of-sq-thread-in-__io_uri.patch new file mode 100644 index 0000000000..3cff88f552 --- /dev/null +++ b/queue-6.15/io_uring-fix-use-after-free-of-sq-thread-in-__io_uri.patch @@ -0,0 +1,208 @@ +From f103a13980783653fab3712b7268cbb729ea74ae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 Jun 2025 10:18:01 -0700 +Subject: io_uring: fix use-after-free of sq->thread in + __io_uring_show_fdinfo() + +From: Penglei Jiang + +[ Upstream commit ac0b8b327a5677dc6fecdf353d808161525b1ff0 ] + +syzbot reports: + +BUG: KASAN: slab-use-after-free in getrusage+0x1109/0x1a60 +Read of size 8 at addr ffff88810de2d2c8 by task a.out/304 + +CPU: 0 UID: 0 PID: 304 Comm: a.out Not tainted 6.16.0-rc1 #1 PREEMPT(voluntary) +Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 +Call Trace: + + dump_stack_lvl+0x53/0x70 + print_report+0xd0/0x670 + ? __pfx__raw_spin_lock_irqsave+0x10/0x10 + ? getrusage+0x1109/0x1a60 + kasan_report+0xce/0x100 + ? getrusage+0x1109/0x1a60 + getrusage+0x1109/0x1a60 + ? __pfx_getrusage+0x10/0x10 + __io_uring_show_fdinfo+0x9fe/0x1790 + ? ksys_read+0xf7/0x1c0 + ? do_syscall_64+0xa4/0x260 + ? vsnprintf+0x591/0x1100 + ? __pfx___io_uring_show_fdinfo+0x10/0x10 + ? __pfx_vsnprintf+0x10/0x10 + ? mutex_trylock+0xcf/0x130 + ? __pfx_mutex_trylock+0x10/0x10 + ? __pfx_show_fd_locks+0x10/0x10 + ? io_uring_show_fdinfo+0x57/0x80 + io_uring_show_fdinfo+0x57/0x80 + seq_show+0x38c/0x690 + seq_read_iter+0x3f7/0x1180 + ? inode_set_ctime_current+0x160/0x4b0 + seq_read+0x271/0x3e0 + ? __pfx_seq_read+0x10/0x10 + ? __pfx__raw_spin_lock+0x10/0x10 + ? __mark_inode_dirty+0x402/0x810 + ? selinux_file_permission+0x368/0x500 + ? file_update_time+0x10f/0x160 + vfs_read+0x177/0xa40 + ? __pfx___handle_mm_fault+0x10/0x10 + ? __pfx_vfs_read+0x10/0x10 + ? mutex_lock+0x81/0xe0 + ? __pfx_mutex_lock+0x10/0x10 + ? fdget_pos+0x24d/0x4b0 + ksys_read+0xf7/0x1c0 + ? __pfx_ksys_read+0x10/0x10 + ? do_user_addr_fault+0x43b/0x9c0 + do_syscall_64+0xa4/0x260 + entry_SYSCALL_64_after_hwframe+0x77/0x7f +RIP: 0033:0x7f0f74170fc9 +Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 8 +RSP: 002b:00007fffece049e8 EFLAGS: 00000206 ORIG_RAX: 0000000000000000 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0f74170fc9 +RDX: 0000000000001000 RSI: 00007fffece049f0 RDI: 0000000000000004 +RBP: 00007fffece05ad0 R08: 0000000000000000 R09: 00007fffece04d90 +R10: 0000000000000000 R11: 0000000000000206 R12: 00005651720a1100 +R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 + + +Allocated by task 298: + kasan_save_stack+0x33/0x60 + kasan_save_track+0x14/0x30 + __kasan_slab_alloc+0x6e/0x70 + kmem_cache_alloc_node_noprof+0xe8/0x330 + copy_process+0x376/0x5e00 + create_io_thread+0xab/0xf0 + io_sq_offload_create+0x9ed/0xf20 + io_uring_setup+0x12b0/0x1cc0 + do_syscall_64+0xa4/0x260 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +Freed by task 22: + kasan_save_stack+0x33/0x60 + kasan_save_track+0x14/0x30 + kasan_save_free_info+0x3b/0x60 + __kasan_slab_free+0x37/0x50 + kmem_cache_free+0xc4/0x360 + rcu_core+0x5ff/0x19f0 + handle_softirqs+0x18c/0x530 + run_ksoftirqd+0x20/0x30 + smpboot_thread_fn+0x287/0x6c0 + kthread+0x30d/0x630 + ret_from_fork+0xef/0x1a0 + ret_from_fork_asm+0x1a/0x30 + +Last potentially related work creation: + kasan_save_stack+0x33/0x60 + kasan_record_aux_stack+0x8c/0xa0 + __call_rcu_common.constprop.0+0x68/0x940 + __schedule+0xff2/0x2930 + __cond_resched+0x4c/0x80 + mutex_lock+0x5c/0xe0 + io_uring_del_tctx_node+0xe1/0x2b0 + io_uring_clean_tctx+0xb7/0x160 + io_uring_cancel_generic+0x34e/0x760 + do_exit+0x240/0x2350 + do_group_exit+0xab/0x220 + __x64_sys_exit_group+0x39/0x40 + x64_sys_call+0x1243/0x1840 + do_syscall_64+0xa4/0x260 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +The buggy address belongs to the object at ffff88810de2cb00 + which belongs to the cache task_struct of size 3712 +The buggy address is located 1992 bytes inside of + freed 3712-byte region [ffff88810de2cb00, ffff88810de2d980) + +which is caused by the task_struct pointed to by sq->thread being +released while it is being used in the function +__io_uring_show_fdinfo(). Holding ctx->uring_lock does not prevent ehre +relase or exit of sq->thread. + +Fix this by assigning and looking up ->thread under RCU, and grabbing a +reference to the task_struct. This ensures that it cannot get released +while fdinfo is using it. + +Reported-by: syzbot+531502bbbe51d2f769f4@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/all/682b06a5.a70a0220.3849cf.00b3.GAE@google.com +Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") +Signed-off-by: Penglei Jiang +Link: https://lore.kernel.org/r/20250610171801.70960-1-superman.xpt@gmail.com +[axboe: massage commit message] +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/fdinfo.c | 12 ++++++++++-- + io_uring/sqpoll.c | 9 ++++----- + 2 files changed, 14 insertions(+), 7 deletions(-) + +diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c +index e0d6a59a89fa1..f948917f7f707 100644 +--- a/io_uring/fdinfo.c ++++ b/io_uring/fdinfo.c +@@ -172,18 +172,26 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) + + if (ctx->flags & IORING_SETUP_SQPOLL) { + struct io_sq_data *sq = ctx->sq_data; ++ struct task_struct *tsk; + ++ rcu_read_lock(); ++ tsk = rcu_dereference(sq->thread); + /* + * sq->thread might be NULL if we raced with the sqpoll + * thread termination. + */ +- if (sq->thread) { ++ if (tsk) { ++ get_task_struct(tsk); ++ rcu_read_unlock(); ++ getrusage(tsk, RUSAGE_SELF, &sq_usage); ++ put_task_struct(tsk); + sq_pid = sq->task_pid; + sq_cpu = sq->sq_cpu; +- getrusage(sq->thread, RUSAGE_SELF, &sq_usage); + sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 + + sq_usage.ru_stime.tv_usec); + sq_work_time = sq->work_time; ++ } else { ++ rcu_read_unlock(); + } + } + +diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c +index 03c699493b5ab..0625a421626f4 100644 +--- a/io_uring/sqpoll.c ++++ b/io_uring/sqpoll.c +@@ -270,7 +270,8 @@ static int io_sq_thread(void *data) + /* offload context creation failed, just exit */ + if (!current->io_uring) { + mutex_lock(&sqd->lock); +- sqd->thread = NULL; ++ rcu_assign_pointer(sqd->thread, NULL); ++ put_task_struct(current); + mutex_unlock(&sqd->lock); + goto err_out; + } +@@ -379,7 +380,8 @@ static int io_sq_thread(void *data) + io_sq_tw(&retry_list, UINT_MAX); + + io_uring_cancel_generic(true, sqd); +- sqd->thread = NULL; ++ rcu_assign_pointer(sqd->thread, NULL); ++ put_task_struct(current); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); + io_run_task_work(); +@@ -495,9 +497,6 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, + ret = -EINVAL; + goto err; + } +- +- if (task_to_put) +- put_task_struct(task_to_put); + return 0; + err_sqpoll: + complete(&ctx->sq_data->exited); +-- +2.39.5 + diff --git a/queue-6.15/nvmet-fcloop-access-fcpreq-only-when-holding-reqlock.patch b/queue-6.15/nvmet-fcloop-access-fcpreq-only-when-holding-reqlock.patch new file mode 100644 index 0000000000..02677ee947 --- /dev/null +++ b/queue-6.15/nvmet-fcloop-access-fcpreq-only-when-holding-reqlock.patch @@ -0,0 +1,95 @@ +From e4ec2575f45f061c8965290edc07e0c3a6379666 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 May 2025 14:23:03 +0200 +Subject: nvmet-fcloop: access fcpreq only when holding reqlock + +From: Daniel Wagner + +[ Upstream commit 47a827cd7929d0550c3496d70b417fcb5649b27b ] + +The abort handling logic expects that the state and the fcpreq are only +accessed when holding the reqlock lock. + +While at it, only handle the aborts in the abort handler. + +Signed-off-by: Daniel Wagner +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/target/fcloop.c | 31 ++++++++++++++++--------------- + 1 file changed, 16 insertions(+), 15 deletions(-) + +diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c +index 641201e62c1ba..20becea1ad968 100644 +--- a/drivers/nvme/target/fcloop.c ++++ b/drivers/nvme/target/fcloop.c +@@ -618,12 +618,13 @@ fcloop_fcp_recv_work(struct work_struct *work) + { + struct fcloop_fcpreq *tfcp_req = + container_of(work, struct fcloop_fcpreq, fcp_rcv_work); +- struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; ++ struct nvmefc_fcp_req *fcpreq; + unsigned long flags; + int ret = 0; + bool aborted = false; + + spin_lock_irqsave(&tfcp_req->reqlock, flags); ++ fcpreq = tfcp_req->fcpreq; + switch (tfcp_req->inistate) { + case INI_IO_START: + tfcp_req->inistate = INI_IO_ACTIVE; +@@ -638,16 +639,19 @@ fcloop_fcp_recv_work(struct work_struct *work) + } + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); + +- if (unlikely(aborted)) +- ret = -ECANCELED; +- else { +- if (likely(!check_for_drop(tfcp_req))) +- ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, +- &tfcp_req->tgt_fcp_req, +- fcpreq->cmdaddr, fcpreq->cmdlen); +- else +- pr_info("%s: dropped command ********\n", __func__); ++ if (unlikely(aborted)) { ++ /* the abort handler will call fcloop_call_host_done */ ++ return; ++ } ++ ++ if (unlikely(check_for_drop(tfcp_req))) { ++ pr_info("%s: dropped command ********\n", __func__); ++ return; + } ++ ++ ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, ++ &tfcp_req->tgt_fcp_req, ++ fcpreq->cmdaddr, fcpreq->cmdlen); + if (ret) + fcloop_call_host_done(fcpreq, tfcp_req, ret); + } +@@ -662,9 +666,10 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) + unsigned long flags; + + spin_lock_irqsave(&tfcp_req->reqlock, flags); +- fcpreq = tfcp_req->fcpreq; + switch (tfcp_req->inistate) { + case INI_IO_ABORTED: ++ fcpreq = tfcp_req->fcpreq; ++ tfcp_req->fcpreq = NULL; + break; + case INI_IO_COMPLETED: + completed = true; +@@ -686,10 +691,6 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) + nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req); + +- spin_lock_irqsave(&tfcp_req->reqlock, flags); +- tfcp_req->fcpreq = NULL; +- spin_unlock_irqrestore(&tfcp_req->reqlock, flags); +- + fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); + /* call_host_done releases reference for abort downcall */ + } +-- +2.39.5 + diff --git a/queue-6.15/perf-ensure-bpf_perf_link-path-is-properly-serialize.patch b/queue-6.15/perf-ensure-bpf_perf_link-path-is-properly-serialize.patch new file mode 100644 index 0000000000..f0b668721f --- /dev/null +++ b/queue-6.15/perf-ensure-bpf_perf_link-path-is-properly-serialize.patch @@ -0,0 +1,98 @@ +From 99ba849522c7c0c22648a8c04e71e79e3a5611af Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 Jan 2025 10:54:50 +0100 +Subject: perf: Ensure bpf_perf_link path is properly serialized + +From: Peter Zijlstra + +[ Upstream commit 7ed9138a72829d2035ecbd8dbd35b1bc3c137c40 ] + +Ravi reported that the bpf_perf_link_attach() usage of +perf_event_set_bpf_prog() is not serialized by ctx->mutex, unlike the +PERF_EVENT_IOC_SET_BPF case. + +Reported-by: Ravi Bangoria +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Ravi Bangoria +Link: https://lkml.kernel.org/r/20250307193305.486326750@infradead.org +Signed-off-by: Sasha Levin +--- + kernel/events/core.c | 34 ++++++++++++++++++++++++++++++---- + 1 file changed, 30 insertions(+), 4 deletions(-) + +diff --git a/kernel/events/core.c b/kernel/events/core.c +index 881d768e45564..e97bc9220fd1a 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -6239,6 +6239,9 @@ static int perf_event_set_output(struct perf_event *event, + static int perf_event_set_filter(struct perf_event *event, void __user *arg); + static int perf_copy_attr(struct perf_event_attr __user *uattr, + struct perf_event_attr *attr); ++static int __perf_event_set_bpf_prog(struct perf_event *event, ++ struct bpf_prog *prog, ++ u64 bpf_cookie); + + static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) + { +@@ -6301,7 +6304,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon + if (IS_ERR(prog)) + return PTR_ERR(prog); + +- err = perf_event_set_bpf_prog(event, prog, 0); ++ err = __perf_event_set_bpf_prog(event, prog, 0); + if (err) { + bpf_prog_put(prog); + return err; +@@ -11069,8 +11072,9 @@ static inline bool perf_event_is_tracing(struct perf_event *event) + return false; + } + +-int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, +- u64 bpf_cookie) ++static int __perf_event_set_bpf_prog(struct perf_event *event, ++ struct bpf_prog *prog, ++ u64 bpf_cookie) + { + bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp; + +@@ -11108,6 +11112,20 @@ int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, + return perf_event_attach_bpf_prog(event, prog, bpf_cookie); + } + ++int perf_event_set_bpf_prog(struct perf_event *event, ++ struct bpf_prog *prog, ++ u64 bpf_cookie) ++{ ++ struct perf_event_context *ctx; ++ int ret; ++ ++ ctx = perf_event_ctx_lock(event); ++ ret = __perf_event_set_bpf_prog(event, prog, bpf_cookie); ++ perf_event_ctx_unlock(event, ctx); ++ ++ return ret; ++} ++ + void perf_event_free_bpf_prog(struct perf_event *event) + { + if (!event->prog) +@@ -11130,7 +11148,15 @@ static void perf_event_free_filter(struct perf_event *event) + { + } + +-int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, ++static int __perf_event_set_bpf_prog(struct perf_event *event, ++ struct bpf_prog *prog, ++ u64 bpf_cookie) ++{ ++ return -ENOENT; ++} ++ ++int perf_event_set_bpf_prog(struct perf_event *event, ++ struct bpf_prog *prog, + u64 bpf_cookie) + { + return -ENOENT; +-- +2.39.5 + diff --git a/queue-6.15/series b/queue-6.15/series index be9206b5e5..d39ec19267 100644 --- a/queue-6.15/series +++ b/queue-6.15/series @@ -722,3 +722,18 @@ net_sched-ets-fix-a-race-in-ets_qdisc_change.patch net-drv-netdevsim-don-t-napi_complete-from-netpoll.patch net-ethtool-don-t-check-if-rss-context-exists-in-cas.patch drm-xe-lrc-use-a-temporary-buffer-for-wa-bb.patch +btrfs-exit-after-state-insertion-failure-at-btrfs_co.patch +fs-filesystems-fix-potential-unsigned-integer-underf.patch +btrfs-fix-fsync-of-files-with-no-hard-links-not-pers.patch +gfs2-pass-through-holder-from-the-vfs-for-freeze-tha.patch +btrfs-exit-after-state-split-error-at-set_extent_bit.patch +nvmet-fcloop-access-fcpreq-only-when-holding-reqlock.patch +io_uring-fix-spurious-drain-flushing.patch +perf-ensure-bpf_perf_link-path-is-properly-serialize.patch +block-use-q-elevator-with-elevator_lock-held-in-elv_.patch +io_uring-fix-use-after-free-of-sq-thread-in-__io_uri.patch +block-don-t-use-submit_bio_noacct_nocheck-in-blk_zon.patch +io_uring-consistently-use-rcu-semantics-with-sqpoll-.patch +smb-client-fix-perf-regression-with-deferred-closes.patch +bio-fix-bio_first_folio-for-sparsemem-without-vmemma.patch +block-fix-bvec_set_folio-for-very-large-folios.patch diff --git a/queue-6.15/smb-client-fix-perf-regression-with-deferred-closes.patch b/queue-6.15/smb-client-fix-perf-regression-with-deferred-closes.patch new file mode 100644 index 0000000000..99f93a4a33 --- /dev/null +++ b/queue-6.15/smb-client-fix-perf-regression-with-deferred-closes.patch @@ -0,0 +1,123 @@ +From 6902e05c4892fe7c859a417ed5023bc17a7f7b4f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Jun 2025 12:45:04 -0300 +Subject: smb: client: fix perf regression with deferred closes + +From: Paulo Alcantara + +[ Upstream commit b64af6bcd3b0f3fc633d6a70adb0991737abfef4 ] + +Customer reported that one of their applications started failing to +open files with STATUS_INSUFFICIENT_RESOURCES due to NetApp server +hitting the maximum number of opens to same file that it would allow +for a single client connection. + +It turned out the client was failing to reuse open handles with +deferred closes because matching ->f_flags directly without masking +off O_CREAT|O_EXCL|O_TRUNC bits first broke the comparision and then +client ended up with thousands of deferred closes to same file. Those +bits are already satisfied on the original open, so no need to check +them against existing open handles. + +Reproducer: + + #include + #include + #include + #include + #include + #include + + #define NR_THREADS 4 + #define NR_ITERATIONS 2500 + #define TEST_FILE "/mnt/1/test/dir/foo" + + static char buf[64]; + + static void *worker(void *arg) + { + int i, j; + int fd; + + for (i = 0; i < NR_ITERATIONS; i++) { + fd = open(TEST_FILE, O_WRONLY|O_CREAT|O_APPEND, 0666); + for (j = 0; j < 16; j++) + write(fd, buf, sizeof(buf)); + close(fd); + } + } + + int main(int argc, char *argv[]) + { + pthread_t t[NR_THREADS]; + int fd; + int i; + + fd = open(TEST_FILE, O_WRONLY|O_CREAT|O_TRUNC, 0666); + close(fd); + memset(buf, 'a', sizeof(buf)); + for (i = 0; i < NR_THREADS; i++) + pthread_create(&t[i], NULL, worker, NULL); + for (i = 0; i < NR_THREADS; i++) + pthread_join(t[i], NULL); + return 0; + } + +Before patch: + +$ mount.cifs //srv/share /mnt/1 -o ... +$ mkdir -p /mnt/1/test/dir +$ gcc repro.c && ./a.out +... +number of opens: 1391 + +After patch: + +$ mount.cifs //srv/share /mnt/1 -o ... +$ mkdir -p /mnt/1/test/dir +$ gcc repro.c && ./a.out +... +number of opens: 1 + +Cc: linux-cifs@vger.kernel.org +Cc: David Howells +Cc: Jay Shin +Cc: Pierguido Lambri +Fixes: b8ea3b1ff544 ("smb: enable reuse of deferred file handles for write operations") +Acked-by: Shyam Prasad N +Signed-off-by: Paulo Alcantara (Red Hat) +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/smb/client/file.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c +index d2df10b8e6fd8..9835672267d27 100644 +--- a/fs/smb/client/file.c ++++ b/fs/smb/client/file.c +@@ -999,15 +999,18 @@ int cifs_open(struct inode *inode, struct file *file) + rc = cifs_get_readable_path(tcon, full_path, &cfile); + } + if (rc == 0) { +- if (file->f_flags == cfile->f_flags) { ++ unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); ++ unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); ++ ++ if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) && ++ (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) { + file->private_data = cfile; + spin_lock(&CIFS_I(inode)->deferred_lock); + cifs_del_deferred_close(cfile); + spin_unlock(&CIFS_I(inode)->deferred_lock); + goto use_cache; +- } else { +- _cifsFileInfo_put(cfile, true, false); + } ++ _cifsFileInfo_put(cfile, true, false); + } else { + /* hard link on the defeered close file */ + rc = cifs_get_hardlink_path(tcon, inode, file); +-- +2.39.5 +