From: Greg Kroah-Hartman Date: Wed, 8 Apr 2026 13:23:08 +0000 (+0200) Subject: 6.1-stable patches X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c2432516cc93a0cbe60b693c7e280d5d15ada71f;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: block-fix-resource-leak-in-blk_register_queue-error-path.patch cpufreq-governor-fix-double-free-in-cpufreq_dbs_governor_init-error-path.patch cpufreq-governor-free-dbs_data-directly-when-gov-init-fails.patch ext4-factor-out-ext4_flex_groups_free.patch ext4-factor-out-ext4_percpu_param_init-and-ext4_percpu_param_destroy.patch ext4-fix-the-might_sleep-warnings-in-kvfree.patch ext4-fix-use-after-free-in-update_super_work-when-racing-with-umount.patch ext4-handle-wraparound-when-searching-for-blocks-for-indirect-mapped-blocks.patch ext4-publish-jinode-after-initialization.patch ext4-use-ext4_group_desc_free-in-ext4_put_super-to-save-some-duplicated-code.patch hwmon-pmbus-core-add-lock-and-unlock-functions.patch hwmon-pmbus-isl68137-add-mutex-protection-for-avs-enable-sysfs-attributes.patch ksmbd-fix-memory-leaks-and-null-deref-in-smb2_lock.patch ksmbd-fix-potencial-oob-in-get_file_all_info-for-compound-requests.patch kvm-x86-mmu-drop-zap-existing-present-spte-even-when-creating-an-mmio-spte.patch mm-huge_memory-fix-folio-isn-t-locked-in-softleaf_to_folio.patch mptcp-fix-lock-class-name-family-in-pm_nl_create_listen_socket.patch net-correctly-handle-tunneled-traffic-on-ipv6_csum-gso-fallback.patch net-macb-move-devm_-free-request-_irq-out-of-spin-lock-area.patch scsi-target-tcm_loop-drain-commands-in-target_reset-handler.patch tracing-fix-potential-deadlock-in-cpu-hotplug-with-osnoise.patch wifi-virt_wifi-remove-set_netdev_dev-to-avoid-use-after-free.patch x86-cpu-enable-fsgsbase-early-in-cpu_init_exception_handling.patch --- diff --git a/queue-6.1/block-fix-resource-leak-in-blk_register_queue-error-path.patch b/queue-6.1/block-fix-resource-leak-in-blk_register_queue-error-path.patch new file mode 100644 index 0000000000..c633f8d299 --- /dev/null +++ b/queue-6.1/block-fix-resource-leak-in-blk_register_queue-error-path.patch @@ -0,0 +1,43 @@ +From stable+bounces-232891-greg=kroah.com@vger.kernel.org Thu Apr 2 04:31:44 2026 +From: Robert Garcia +Date: Thu, 2 Apr 2026 10:30:34 +0800 +Subject: block: fix resource leak in blk_register_queue() error path +To: stable@vger.kernel.org, Zheng Qixing +Cc: Jens Axboe , Robert Garcia , Christoph Hellwig , Yu Kuai , linux-block@vger.kernel.org, linux-kernel@vger.kernel.org +Message-ID: <20260402023034.3027538-1-rob_garcia@163.com> + +From: Zheng Qixing + +[ Upstream commit 40f2eb9b531475dd01b683fdaf61ca3cfd03a51e ] + +When registering a queue fails after blk_mq_sysfs_register() is +successful but the function later encounters an error, we need +to clean up the blk_mq_sysfs resources. + +Add the missing blk_mq_sysfs_unregister() call in the error path +to properly clean up these resources and prevent a memory leak. + +Fixes: 320ae51feed5 ("blk-mq: new multi-queue block IO queueing mechanism") +Signed-off-by: Zheng Qixing +Reviewed-by: Christoph Hellwig +Reviewed-by: Yu Kuai +Link: https://lore.kernel.org/r/20250412092554.475218-1-zhengqixing@huaweicloud.com +Signed-off-by: Jens Axboe +[ Minor context change fixed. ] +Signed-off-by: Robert Garcia +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-sysfs.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/block/blk-sysfs.c ++++ b/block/blk-sysfs.c +@@ -867,6 +867,8 @@ put_dev: + elv_unregister_queue(q); + disk_unregister_independent_access_ranges(disk); + mutex_unlock(&q->sysfs_lock); ++ if (queue_is_mq(q)) ++ blk_mq_sysfs_unregister(disk); + mutex_unlock(&q->sysfs_dir_lock); + kobject_del(&q->kobj); + diff --git a/queue-6.1/cpufreq-governor-fix-double-free-in-cpufreq_dbs_governor_init-error-path.patch b/queue-6.1/cpufreq-governor-fix-double-free-in-cpufreq_dbs_governor_init-error-path.patch new file mode 100644 index 0000000000..242d294252 --- /dev/null +++ b/queue-6.1/cpufreq-governor-fix-double-free-in-cpufreq_dbs_governor_init-error-path.patch @@ -0,0 +1,56 @@ +From stable+bounces-233888-greg=kroah.com@vger.kernel.org Wed Apr 8 14:47:57 2026 +From: Sasha Levin +Date: Wed, 8 Apr 2026 08:47:47 -0400 +Subject: cpufreq: governor: fix double free in cpufreq_dbs_governor_init() error path +To: stable@vger.kernel.org +Cc: Guangshuo Li , Zhongqiu Han , Viresh Kumar , "Rafael J. Wysocki" , Sasha Levin +Message-ID: <20260408124747.1019894-2-sashal@kernel.org> + +From: Guangshuo Li + +[ Upstream commit 6dcf9d0064ce2f3e3dfe5755f98b93abe6a98e1e ] + +When kobject_init_and_add() fails, cpufreq_dbs_governor_init() calls +kobject_put(&dbs_data->attr_set.kobj). + +The kobject release callback cpufreq_dbs_data_release() calls +gov->exit(dbs_data) and kfree(dbs_data), but the current error path +then calls gov->exit(dbs_data) and kfree(dbs_data) again, causing a +double free. + +Keep the direct kfree(dbs_data) for the gov->init() failure path, but +after kobject_init_and_add() has been called, let kobject_put() handle +the cleanup through cpufreq_dbs_data_release(). + +Fixes: 4ebe36c94aed ("cpufreq: Fix kobject memleak") +Signed-off-by: Guangshuo Li +Reviewed-by: Zhongqiu Han +Acked-by: Viresh Kumar +Cc: All applicable +Link: https://patch.msgid.link/20260401024535.1395801-1-lgs201920130244@gmail.com +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/cpufreq_governor.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/cpufreq/cpufreq_governor.c ++++ b/drivers/cpufreq/cpufreq_governor.c +@@ -468,13 +468,13 @@ int cpufreq_dbs_governor_init(struct cpu + /* Failure, so roll back. */ + pr_err("initialization failed (dbs_data kobject init error %d)\n", ret); + +- kobject_put(&dbs_data->attr_set.kobj); +- + policy->governor_data = NULL; + + if (!have_governor_per_policy()) + gov->gdbs_data = NULL; +- gov->exit(dbs_data); ++ ++ kobject_put(&dbs_data->attr_set.kobj); ++ goto free_policy_dbs_info; + + free_dbs_data: + kfree(dbs_data); diff --git a/queue-6.1/cpufreq-governor-free-dbs_data-directly-when-gov-init-fails.patch b/queue-6.1/cpufreq-governor-free-dbs_data-directly-when-gov-init-fails.patch new file mode 100644 index 0000000000..3ff589419b --- /dev/null +++ b/queue-6.1/cpufreq-governor-free-dbs_data-directly-when-gov-init-fails.patch @@ -0,0 +1,46 @@ +From stable+bounces-233887-greg=kroah.com@vger.kernel.org Wed Apr 8 14:47:53 2026 +From: Sasha Levin +Date: Wed, 8 Apr 2026 08:47:46 -0400 +Subject: cpufreq: governor: Free dbs_data directly when gov->init() fails +To: stable@vger.kernel.org +Cc: Liao Chang , Viresh Kumar , "Rafael J. Wysocki" , Sasha Levin +Message-ID: <20260408124747.1019894-1-sashal@kernel.org> + +From: Liao Chang + +[ Upstream commit 916f13884042f615cfbfc0b42cc68dadee826f2a ] + +Due to the kobject embedded in the dbs_data doest not has a release() +method yet, it needs to use kfree() to free dbs_data directly when +governor fails to allocate the tunner field of dbs_data. + +Signed-off-by: Liao Chang +Acked-by: Viresh Kumar +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 6dcf9d0064ce ("cpufreq: governor: fix double free in cpufreq_dbs_governor_init() error path") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/cpufreq_governor.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/cpufreq/cpufreq_governor.c ++++ b/drivers/cpufreq/cpufreq_governor.c +@@ -440,7 +440,7 @@ int cpufreq_dbs_governor_init(struct cpu + + ret = gov->init(dbs_data); + if (ret) +- goto free_policy_dbs_info; ++ goto free_dbs_data; + + /* + * The sampling interval should not be less than the transition latency +@@ -475,6 +475,8 @@ int cpufreq_dbs_governor_init(struct cpu + if (!have_governor_per_policy()) + gov->gdbs_data = NULL; + gov->exit(dbs_data); ++ ++free_dbs_data: + kfree(dbs_data); + + free_policy_dbs_info: diff --git a/queue-6.1/ext4-factor-out-ext4_flex_groups_free.patch b/queue-6.1/ext4-factor-out-ext4_flex_groups_free.patch new file mode 100644 index 0000000000..68532ab57c --- /dev/null +++ b/queue-6.1/ext4-factor-out-ext4_flex_groups_free.patch @@ -0,0 +1,94 @@ +From stable+bounces-233064-greg=kroah.com@vger.kernel.org Thu Apr 2 18:40:30 2026 +From: Sasha Levin +Date: Thu, 2 Apr 2026 12:31:14 -0400 +Subject: ext4: factor out ext4_flex_groups_free() +To: stable@vger.kernel.org +Cc: Jason Yan , Theodore Ts'o , Sasha Levin +Message-ID: <20260402163115.1385749-3-sashal@kernel.org> + +From: Jason Yan + +[ Upstream commit dcbf87589d90e3bd5a5a4cf832517f22f3c55efb ] + +Factor out ext4_flex_groups_free() and it can be used both in +__ext4_fill_super() and ext4_put_super(). + +Signed-off-by: Jason Yan +Link: https://lore.kernel.org/r/20230323140517.1070239-5-yanaijie@huawei.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 496bb99b7e66 ("ext4: fix the might_sleep() warnings in kvfree()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 35 +++++++++++++++++------------------ + 1 file changed, 17 insertions(+), 18 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1249,11 +1249,25 @@ static void ext4_group_desc_free(struct + rcu_read_unlock(); + } + ++static void ext4_flex_groups_free(struct ext4_sb_info *sbi) ++{ ++ struct flex_groups **flex_groups; ++ int i; ++ ++ rcu_read_lock(); ++ flex_groups = rcu_dereference(sbi->s_flex_groups); ++ if (flex_groups) { ++ for (i = 0; i < sbi->s_flex_groups_allocated; i++) ++ kvfree(flex_groups[i]); ++ kvfree(flex_groups); ++ } ++ rcu_read_unlock(); ++} ++ + static void ext4_put_super(struct super_block *sb) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; +- struct flex_groups **flex_groups; + int aborted = 0; + int i, err; + +@@ -1303,14 +1317,7 @@ static void ext4_put_super(struct super_ + ext4_commit_super(sb); + + ext4_group_desc_free(sbi); +- rcu_read_lock(); +- flex_groups = rcu_dereference(sbi->s_flex_groups); +- if (flex_groups) { +- for (i = 0; i < sbi->s_flex_groups_allocated; i++) +- kvfree(flex_groups[i]); +- kvfree(flex_groups); +- } +- rcu_read_unlock(); ++ ext4_flex_groups_free(sbi); + ext4_percpu_param_destroy(sbi); + #ifdef CONFIG_QUOTA + for (i = 0; i < EXT4_MAXQUOTAS; i++) +@@ -5121,7 +5128,6 @@ static int __ext4_fill_super(struct fs_c + { + struct ext4_super_block *es = NULL; + struct ext4_sb_info *sbi = EXT4_SB(sb); +- struct flex_groups **flex_groups; + ext4_fsblk_t logical_sb_block; + struct inode *root; + int ret = -ENOMEM; +@@ -5613,14 +5619,7 @@ failed_mount7: + ext4_unregister_li_request(sb); + failed_mount6: + ext4_mb_release(sb); +- rcu_read_lock(); +- flex_groups = rcu_dereference(sbi->s_flex_groups); +- if (flex_groups) { +- for (i = 0; i < sbi->s_flex_groups_allocated; i++) +- kvfree(flex_groups[i]); +- kvfree(flex_groups); +- } +- rcu_read_unlock(); ++ ext4_flex_groups_free(sbi); + ext4_percpu_param_destroy(sbi); + failed_mount5: + ext4_ext_release(sb); diff --git a/queue-6.1/ext4-factor-out-ext4_percpu_param_init-and-ext4_percpu_param_destroy.patch b/queue-6.1/ext4-factor-out-ext4_percpu_param_init-and-ext4_percpu_param_destroy.patch new file mode 100644 index 0000000000..cd51d0c67a --- /dev/null +++ b/queue-6.1/ext4-factor-out-ext4_percpu_param_init-and-ext4_percpu_param_destroy.patch @@ -0,0 +1,149 @@ +From stable+bounces-233062-greg=kroah.com@vger.kernel.org Thu Apr 2 18:40:37 2026 +From: Sasha Levin +Date: Thu, 2 Apr 2026 12:31:12 -0400 +Subject: ext4: factor out ext4_percpu_param_init() and ext4_percpu_param_destroy() +To: stable@vger.kernel.org +Cc: Jason Yan , Theodore Ts'o , Sasha Levin +Message-ID: <20260402163115.1385749-1-sashal@kernel.org> + +From: Jason Yan + +[ Upstream commit 1f79467c8a6be64940a699de1bd43338a6dd9fdd ] + +Factor out ext4_percpu_param_init() and ext4_percpu_param_destroy(). And +also use ext4_percpu_param_destroy() in ext4_put_super() to avoid +duplicated code. No functional change. + +Signed-off-by: Jason Yan +Link: https://lore.kernel.org/r/20230323140517.1070239-3-yanaijie@huawei.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 496bb99b7e66 ("ext4: fix the might_sleep() warnings in kvfree()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 85 ++++++++++++++++++++++++++++++-------------------------- + 1 file changed, 46 insertions(+), 39 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1193,6 +1193,49 @@ static inline void ext4_quota_off_umount + } + #endif + ++static int ext4_percpu_param_init(struct ext4_sb_info *sbi) ++{ ++ ext4_fsblk_t block; ++ int err; ++ ++ block = ext4_count_free_clusters(sbi->s_sb); ++ ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); ++ err = percpu_counter_init(&sbi->s_freeclusters_counter, block, ++ GFP_KERNEL); ++ if (!err) { ++ unsigned long freei = ext4_count_free_inodes(sbi->s_sb); ++ sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); ++ err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, ++ GFP_KERNEL); ++ } ++ if (!err) ++ err = percpu_counter_init(&sbi->s_dirs_counter, ++ ext4_count_dirs(sbi->s_sb), GFP_KERNEL); ++ if (!err) ++ err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, ++ GFP_KERNEL); ++ if (!err) ++ err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0, ++ GFP_KERNEL); ++ if (!err) ++ err = percpu_init_rwsem(&sbi->s_writepages_rwsem); ++ ++ if (err) ++ ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory"); ++ ++ return err; ++} ++ ++static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi) ++{ ++ percpu_counter_destroy(&sbi->s_freeclusters_counter); ++ percpu_counter_destroy(&sbi->s_freeinodes_counter); ++ percpu_counter_destroy(&sbi->s_dirs_counter); ++ percpu_counter_destroy(&sbi->s_dirtyclusters_counter); ++ percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit); ++ percpu_free_rwsem(&sbi->s_writepages_rwsem); ++} ++ + static void ext4_put_super(struct super_block *sb) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); +@@ -1259,12 +1302,7 @@ static void ext4_put_super(struct super_ + kvfree(flex_groups); + } + rcu_read_unlock(); +- percpu_counter_destroy(&sbi->s_freeclusters_counter); +- percpu_counter_destroy(&sbi->s_freeinodes_counter); +- percpu_counter_destroy(&sbi->s_dirs_counter); +- percpu_counter_destroy(&sbi->s_dirtyclusters_counter); +- percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit); +- percpu_free_rwsem(&sbi->s_writepages_rwsem); ++ ext4_percpu_param_destroy(sbi); + #ifdef CONFIG_QUOTA + for (i = 0; i < EXT4_MAXQUOTAS; i++) + kfree(get_qf_name(sb, sbi, i)); +@@ -5088,7 +5126,6 @@ static int __ext4_fill_super(struct fs_c + struct ext4_super_block *es = NULL; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct flex_groups **flex_groups; +- ext4_fsblk_t block; + ext4_fsblk_t logical_sb_block; + struct inode *root; + int ret = -ENOMEM; +@@ -5496,33 +5533,8 @@ static int __ext4_fill_super(struct fs_c + sbi->s_journal->j_commit_callback = + ext4_journal_commit_callback; + +- block = ext4_count_free_clusters(sb); +- ext4_free_blocks_count_set(sbi->s_es, +- EXT4_C2B(sbi, block)); +- err = percpu_counter_init(&sbi->s_freeclusters_counter, block, +- GFP_KERNEL); +- if (!err) { +- unsigned long freei = ext4_count_free_inodes(sb); +- sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); +- err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, +- GFP_KERNEL); +- } +- if (!err) +- err = percpu_counter_init(&sbi->s_dirs_counter, +- ext4_count_dirs(sb), GFP_KERNEL); +- if (!err) +- err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, +- GFP_KERNEL); +- if (!err) +- err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0, +- GFP_KERNEL); +- if (!err) +- err = percpu_init_rwsem(&sbi->s_writepages_rwsem); +- +- if (err) { +- ext4_msg(sb, KERN_ERR, "insufficient memory"); ++ if (ext4_percpu_param_init(sbi)) + goto failed_mount6; +- } + + if (ext4_has_feature_flex_bg(sb)) + if (!ext4_fill_flex_info(sb)) { +@@ -5613,12 +5625,7 @@ failed_mount6: + kvfree(flex_groups); + } + rcu_read_unlock(); +- percpu_counter_destroy(&sbi->s_freeclusters_counter); +- percpu_counter_destroy(&sbi->s_freeinodes_counter); +- percpu_counter_destroy(&sbi->s_dirs_counter); +- percpu_counter_destroy(&sbi->s_dirtyclusters_counter); +- percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit); +- percpu_free_rwsem(&sbi->s_writepages_rwsem); ++ ext4_percpu_param_destroy(sbi); + failed_mount5: + ext4_ext_release(sb); + ext4_release_system_zone(sb); diff --git a/queue-6.1/ext4-fix-the-might_sleep-warnings-in-kvfree.patch b/queue-6.1/ext4-fix-the-might_sleep-warnings-in-kvfree.patch new file mode 100644 index 0000000000..60b7c2bc3e --- /dev/null +++ b/queue-6.1/ext4-fix-the-might_sleep-warnings-in-kvfree.patch @@ -0,0 +1,172 @@ +From stable+bounces-233065-greg=kroah.com@vger.kernel.org Thu Apr 2 18:39:32 2026 +From: Sasha Levin +Date: Thu, 2 Apr 2026 12:31:15 -0400 +Subject: ext4: fix the might_sleep() warnings in kvfree() +To: stable@vger.kernel.org +Cc: Zqiang , Baokun Li , Theodore Ts'o , stable@kernel.org, Sasha Levin +Message-ID: <20260402163115.1385749-4-sashal@kernel.org> + +From: Zqiang + +[ Upstream commit 496bb99b7e66f48b178126626f47e9ba79e2d0fa ] + +Use the kvfree() in the RCU read critical section can trigger +the following warnings: + +EXT4-fs (vdb): unmounting filesystem cd983e5b-3c83-4f5a-a136-17b00eb9d018. + +WARNING: suspicious RCU usage + +./include/linux/rcupdate.h:409 Illegal context switch in RCU read-side critical section! + +other info that might help us debug this: + +rcu_scheduler_active = 2, debug_locks = 1 + +Call Trace: + + dump_stack_lvl+0xbb/0xd0 + dump_stack+0x14/0x20 + lockdep_rcu_suspicious+0x15a/0x1b0 + __might_resched+0x375/0x4d0 + ? put_object.part.0+0x2c/0x50 + __might_sleep+0x108/0x160 + vfree+0x58/0x910 + ? ext4_group_desc_free+0x27/0x270 + kvfree+0x23/0x40 + ext4_group_desc_free+0x111/0x270 + ext4_put_super+0x3c8/0xd40 + generic_shutdown_super+0x14c/0x4a0 + ? __pfx_shrinker_free+0x10/0x10 + kill_block_super+0x40/0x90 + ext4_kill_sb+0x6d/0xb0 + deactivate_locked_super+0xb4/0x180 + deactivate_super+0x7e/0xa0 + cleanup_mnt+0x296/0x3e0 + __cleanup_mnt+0x16/0x20 + task_work_run+0x157/0x250 + ? __pfx_task_work_run+0x10/0x10 + ? exit_to_user_mode_loop+0x6a/0x550 + exit_to_user_mode_loop+0x102/0x550 + do_syscall_64+0x44a/0x500 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + + +BUG: sleeping function called from invalid context at mm/vmalloc.c:3441 +in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556, name: umount +preempt_count: 1, expected: 0 +CPU: 3 UID: 0 PID: 556 Comm: umount +Call Trace: + + dump_stack_lvl+0xbb/0xd0 + dump_stack+0x14/0x20 + __might_resched+0x275/0x4d0 + ? put_object.part.0+0x2c/0x50 + __might_sleep+0x108/0x160 + vfree+0x58/0x910 + ? ext4_group_desc_free+0x27/0x270 + kvfree+0x23/0x40 + ext4_group_desc_free+0x111/0x270 + ext4_put_super+0x3c8/0xd40 + generic_shutdown_super+0x14c/0x4a0 + ? __pfx_shrinker_free+0x10/0x10 + kill_block_super+0x40/0x90 + ext4_kill_sb+0x6d/0xb0 + deactivate_locked_super+0xb4/0x180 + deactivate_super+0x7e/0xa0 + cleanup_mnt+0x296/0x3e0 + __cleanup_mnt+0x16/0x20 + task_work_run+0x157/0x250 + ? __pfx_task_work_run+0x10/0x10 + ? exit_to_user_mode_loop+0x6a/0x550 + exit_to_user_mode_loop+0x102/0x550 + do_syscall_64+0x44a/0x500 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +The above scenarios occur in initialization failures and teardown +paths, there are no parallel operations on the resources released +by kvfree(), this commit therefore remove rcu_read_lock/unlock() and +use rcu_access_pointer() instead of rcu_dereference() operations. + +Fixes: 7c990728b99e ("ext4: fix potential race between s_flex_groups online resizing and access") +Fixes: df3da4ea5a0f ("ext4: fix potential race between s_group_info online resizing and access") +Signed-off-by: Zqiang +Reviewed-by: Baokun Li +Link: https://patch.msgid.link/20260319094545.19291-1-qiang.zhang@linux.dev +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 10 +++------- + fs/ext4/super.c | 8 ++------ + 2 files changed, 5 insertions(+), 13 deletions(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -3298,9 +3298,7 @@ err_freebuddy: + rcu_read_unlock(); + iput(sbi->s_buddy_cache); + err_freesgi: +- rcu_read_lock(); +- kvfree(rcu_dereference(sbi->s_group_info)); +- rcu_read_unlock(); ++ kvfree(rcu_access_pointer(sbi->s_group_info)); + return -ENOMEM; + } + +@@ -3597,7 +3595,8 @@ int ext4_mb_release(struct super_block * + flush_work(&sbi->s_discard_work); + WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); + +- if (sbi->s_group_info) { ++ group_info = rcu_access_pointer(sbi->s_group_info); ++ if (group_info) { + for (i = 0; i < ngroups; i++) { + cond_resched(); + grinfo = ext4_get_group_info(sb, i); +@@ -3615,12 +3614,9 @@ int ext4_mb_release(struct super_block * + num_meta_group_infos = (ngroups + + EXT4_DESC_PER_BLOCK(sb) - 1) >> + EXT4_DESC_PER_BLOCK_BITS(sb); +- rcu_read_lock(); +- group_info = rcu_dereference(sbi->s_group_info); + for (i = 0; i < num_meta_group_infos; i++) + kfree(group_info[i]); + kvfree(group_info); +- rcu_read_unlock(); + } + kfree(sbi->s_mb_avg_fragment_size); + kfree(sbi->s_mb_avg_fragment_size_locks); +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1241,12 +1241,10 @@ static void ext4_group_desc_free(struct + struct buffer_head **group_desc; + int i; + +- rcu_read_lock(); +- group_desc = rcu_dereference(sbi->s_group_desc); ++ group_desc = rcu_access_pointer(sbi->s_group_desc); + for (i = 0; i < sbi->s_gdb_count; i++) + brelse(group_desc[i]); + kvfree(group_desc); +- rcu_read_unlock(); + } + + static void ext4_flex_groups_free(struct ext4_sb_info *sbi) +@@ -1254,14 +1252,12 @@ static void ext4_flex_groups_free(struct + struct flex_groups **flex_groups; + int i; + +- rcu_read_lock(); +- flex_groups = rcu_dereference(sbi->s_flex_groups); ++ flex_groups = rcu_access_pointer(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } +- rcu_read_unlock(); + } + + static void ext4_put_super(struct super_block *sb) diff --git a/queue-6.1/ext4-fix-use-after-free-in-update_super_work-when-racing-with-umount.patch b/queue-6.1/ext4-fix-use-after-free-in-update_super_work-when-racing-with-umount.patch new file mode 100644 index 0000000000..1348a541e4 --- /dev/null +++ b/queue-6.1/ext4-fix-use-after-free-in-update_super_work-when-racing-with-umount.patch @@ -0,0 +1,118 @@ +From stable+bounces-233066-greg=kroah.com@vger.kernel.org Thu Apr 2 18:39:34 2026 +From: Sasha Levin +Date: Thu, 2 Apr 2026 12:31:37 -0400 +Subject: ext4: fix use-after-free in update_super_work when racing with umount +To: stable@vger.kernel.org +Cc: Jiayuan Chen , Jiayuan Chen , Jan Kara , "Ritesh Harjani (IBM)" , Theodore Ts'o , stable@kernel.org, Sasha Levin +Message-ID: <20260402163137.1387692-1-sashal@kernel.org> + +From: Jiayuan Chen + +[ Upstream commit d15e4b0a418537aafa56b2cb80d44add83e83697 ] + +Commit b98535d09179 ("ext4: fix bug_on in start_this_handle during umount +filesystem") moved ext4_unregister_sysfs() before flushing s_sb_upd_work +to prevent new error work from being queued via /proc/fs/ext4/xx/mb_groups +reads during unmount. However, this introduced a use-after-free because +update_super_work calls ext4_notify_error_sysfs() -> sysfs_notify() which +accesses the kobject's kernfs_node after it has been freed by kobject_del() +in ext4_unregister_sysfs(): + + update_super_work ext4_put_super + ----------------- -------------- + ext4_unregister_sysfs(sb) + kobject_del(&sbi->s_kobj) + __kobject_del() + sysfs_remove_dir() + kobj->sd = NULL + sysfs_put(sd) + kernfs_put() // RCU free + ext4_notify_error_sysfs(sbi) + sysfs_notify(&sbi->s_kobj) + kn = kobj->sd // stale pointer + kernfs_get(kn) // UAF on freed kernfs_node + ext4_journal_destroy() + flush_work(&sbi->s_sb_upd_work) + +Instead of reordering the teardown sequence, fix this by making +ext4_notify_error_sysfs() detect that sysfs has already been torn down +by checking s_kobj.state_in_sysfs, and skipping the sysfs_notify() call +in that case. A dedicated mutex (s_error_notify_mutex) serializes +ext4_notify_error_sysfs() against kobject_del() in ext4_unregister_sysfs() +to prevent TOCTOU races where the kobject could be deleted between the +state_in_sysfs check and the sysfs_notify() call. + +Fixes: b98535d09179 ("ext4: fix bug_on in start_this_handle during umount filesystem") +Cc: Jiayuan Chen +Suggested-by: Jan Kara +Signed-off-by: Jiayuan Chen +Reviewed-by: Ritesh Harjani (IBM) +Reviewed-by: Jan Kara +Link: https://patch.msgid.link/20260319120336.157873-1-jiayuan.chen@linux.dev +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +[ adapted mutex_init placement ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ext4.h | 1 + + fs/ext4/super.c | 1 + + fs/ext4/sysfs.c | 10 +++++++++- + 3 files changed, 11 insertions(+), 1 deletion(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1557,6 +1557,7 @@ struct ext4_sb_info { + struct proc_dir_entry *s_proc; + struct kobject s_kobj; + struct completion s_kobj_unregister; ++ struct mutex s_error_notify_mutex; /* protects sysfs_notify vs kobject_del */ + struct super_block *s_sb; + struct buffer_head *s_mmp_bh; + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -5263,6 +5263,7 @@ static int __ext4_fill_super(struct fs_c + + timer_setup(&sbi->s_err_report, print_daily_error_info, 0); + spin_lock_init(&sbi->s_error_lock); ++ mutex_init(&sbi->s_error_notify_mutex); + INIT_WORK(&sbi->s_error_work, flush_stashed_error_work); + + /* Register extent status tree shrinker */ +--- a/fs/ext4/sysfs.c ++++ b/fs/ext4/sysfs.c +@@ -515,7 +515,10 @@ static struct kobj_type ext4_feat_ktype + + void ext4_notify_error_sysfs(struct ext4_sb_info *sbi) + { +- sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); ++ mutex_lock(&sbi->s_error_notify_mutex); ++ if (sbi->s_kobj.state_in_sysfs) ++ sysfs_notify(&sbi->s_kobj, NULL, "errors_count"); ++ mutex_unlock(&sbi->s_error_notify_mutex); + } + + static struct kobject *ext4_root; +@@ -528,8 +531,10 @@ int ext4_register_sysfs(struct super_blo + int err; + + init_completion(&sbi->s_kobj_unregister); ++ mutex_lock(&sbi->s_error_notify_mutex); + err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root, + "%s", sb->s_id); ++ mutex_unlock(&sbi->s_error_notify_mutex); + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); +@@ -562,7 +567,10 @@ void ext4_unregister_sysfs(struct super_ + + if (sbi->s_proc) + remove_proc_subtree(sb->s_id, ext4_proc_root); ++ ++ mutex_lock(&sbi->s_error_notify_mutex); + kobject_del(&sbi->s_kobj); ++ mutex_unlock(&sbi->s_error_notify_mutex); + } + + int __init ext4_init_sysfs(void) diff --git a/queue-6.1/ext4-handle-wraparound-when-searching-for-blocks-for-indirect-mapped-blocks.patch b/queue-6.1/ext4-handle-wraparound-when-searching-for-blocks-for-indirect-mapped-blocks.patch new file mode 100644 index 0000000000..43daedd6bb --- /dev/null +++ b/queue-6.1/ext4-handle-wraparound-when-searching-for-blocks-for-indirect-mapped-blocks.patch @@ -0,0 +1,65 @@ +From stable+bounces-233184-greg=kroah.com@vger.kernel.org Fri Apr 3 15:37:53 2026 +From: Sasha Levin +Date: Fri, 3 Apr 2026 09:35:36 -0400 +Subject: ext4: handle wraparound when searching for blocks for indirect mapped blocks +To: stable@vger.kernel.org +Cc: Theodore Ts'o , Jan Kara , Baokun Li , stable@kernel.org, Sasha Levin +Message-ID: <20260403133536.2143135-1-sashal@kernel.org> + +From: Theodore Ts'o + +[ Upstream commit bb81702370fad22c06ca12b6e1648754dbc37e0f ] + +Commit 4865c768b563 ("ext4: always allocate blocks only from groups +inode can use") restricts what blocks will be allocated for indirect +block based files to block numbers that fit within 32-bit block +numbers. + +However, when using a review bot running on the latest Gemini LLM to +check this commit when backporting into an LTS based kernel, it raised +this concern: + + If ac->ac_g_ex.fe_group is >= ngroups (for instance, if the goal + group was populated via stream allocation from s_mb_last_groups), + then start will be >= ngroups. + + Does this allow allocating blocks beyond the 32-bit limit for + indirect block mapped files? The commit message mentions that + ext4_mb_scan_groups_linear() takes care to not select unsupported + groups. However, its loop uses group = *start, and the very first + iteration will call ext4_mb_scan_group() with this unsupported + group because next_linear_group() is only called at the end of the + iteration. + +After reviewing the code paths involved and considering the LLM +review, I determined that this can happen when there is a file system +where some files/directories are extent-mapped and others are +indirect-block mapped. To address this, add a safety clamp in +ext4_mb_scan_groups(). + +Fixes: 4865c768b563 ("ext4: always allocate blocks only from groups inode can use") +Cc: Jan Kara +Reviewed-by: Baokun Li +Reviewed-by: Jan Kara +Signed-off-by: Theodore Ts'o +Link: https://patch.msgid.link/20260326045834.1175822-1-tytso@mit.edu +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +[ adapted fix from ext4_mb_scan_groups() to inline equivalent in ext4_mb_regular_allocator() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -2733,6 +2733,8 @@ repeat: + * from the goal value specified + */ + group = ac->ac_g_ex.fe_group; ++ if (group >= ngroups) ++ group = 0; + ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups; + prefetch_grp = group; + diff --git a/queue-6.1/ext4-publish-jinode-after-initialization.patch b/queue-6.1/ext4-publish-jinode-after-initialization.patch new file mode 100644 index 0000000000..f5da42d924 --- /dev/null +++ b/queue-6.1/ext4-publish-jinode-after-initialization.patch @@ -0,0 +1,152 @@ +From stable+bounces-233085-greg=kroah.com@vger.kernel.org Thu Apr 2 19:46:58 2026 +From: Sasha Levin +Date: Thu, 2 Apr 2026 13:41:23 -0400 +Subject: ext4: publish jinode after initialization +To: stable@vger.kernel.org +Cc: Li Chen , Jan Kara , Theodore Ts'o , stable@kernel.org, Sasha Levin +Message-ID: <20260402174123.1572392-1-sashal@kernel.org> + +From: Li Chen + +[ Upstream commit 1aec30021edd410b986c156f195f3d23959a9d11 ] + +ext4_inode_attach_jinode() publishes ei->jinode to concurrent users. +It used to set ei->jinode before jbd2_journal_init_jbd_inode(), +allowing a reader to observe a non-NULL jinode with i_vfs_inode +still unset. + +The fast commit flush path can then pass this jinode to +jbd2_wait_inode_data(), which dereferences i_vfs_inode->i_mapping and +may crash. + +Below is the crash I observe: +``` +BUG: unable to handle page fault for address: 000000010beb47f4 +PGD 110e51067 P4D 110e51067 PUD 0 +Oops: Oops: 0000 [#1] SMP NOPTI +CPU: 1 UID: 0 PID: 4850 Comm: fc_fsync_bench_ Not tainted 6.18.0-00764-g795a690c06a5 #1 PREEMPT(voluntary) +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.17.0-2-2 04/01/2014 +RIP: 0010:xas_find_marked+0x3d/0x2e0 +Code: e0 03 48 83 f8 02 0f 84 f0 01 00 00 48 8b 47 08 48 89 c3 48 39 c6 0f 82 fd 01 00 00 48 85 c9 74 3d 48 83 f9 03 77 63 4c 8b 0f <49> 8b 71 08 48 c7 47 18 00 00 00 00 48 89 f1 83 e1 03 48 83 f9 02 +RSP: 0018:ffffbbee806e7bf0 EFLAGS: 00010246 +RAX: 000000000010beb4 RBX: 000000000010beb4 RCX: 0000000000000003 +RDX: 0000000000000001 RSI: 0000002000300000 RDI: ffffbbee806e7c10 +RBP: 0000000000000001 R08: 0000002000300000 R09: 000000010beb47ec +R10: ffff9ea494590090 R11: 0000000000000000 R12: 0000002000300000 +R13: ffffbbee806e7c90 R14: ffff9ea494513788 R15: ffffbbee806e7c88 +FS: 00007fc2f9e3e6c0(0000) GS:ffff9ea6b1444000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000000010beb47f4 CR3: 0000000119ac5000 CR4: 0000000000750ef0 +PKRU: 55555554 +Call Trace: + +filemap_get_folios_tag+0x87/0x2a0 +__filemap_fdatawait_range+0x5f/0xd0 +? srso_alias_return_thunk+0x5/0xfbef5 +? __schedule+0x3e7/0x10c0 +? srso_alias_return_thunk+0x5/0xfbef5 +? srso_alias_return_thunk+0x5/0xfbef5 +? srso_alias_return_thunk+0x5/0xfbef5 +? preempt_count_sub+0x5f/0x80 +? srso_alias_return_thunk+0x5/0xfbef5 +? cap_safe_nice+0x37/0x70 +? srso_alias_return_thunk+0x5/0xfbef5 +? preempt_count_sub+0x5f/0x80 +? srso_alias_return_thunk+0x5/0xfbef5 +filemap_fdatawait_range_keep_errors+0x12/0x40 +ext4_fc_commit+0x697/0x8b0 +? ext4_file_write_iter+0x64b/0x950 +? srso_alias_return_thunk+0x5/0xfbef5 +? preempt_count_sub+0x5f/0x80 +? srso_alias_return_thunk+0x5/0xfbef5 +? vfs_write+0x356/0x480 +? srso_alias_return_thunk+0x5/0xfbef5 +? preempt_count_sub+0x5f/0x80 +ext4_sync_file+0xf7/0x370 +do_fsync+0x3b/0x80 +? syscall_trace_enter+0x108/0x1d0 +__x64_sys_fdatasync+0x16/0x20 +do_syscall_64+0x62/0x2c0 +entry_SYSCALL_64_after_hwframe+0x76/0x7e +... +``` + +Fix this by initializing the jbd2_inode first. +Use smp_wmb() and WRITE_ONCE() to publish ei->jinode after +initialization. Readers use READ_ONCE() to fetch the pointer. + +Fixes: a361293f5fede ("jbd2: Fix oops in jbd2_journal_file_inode()") +Cc: stable@vger.kernel.org +Signed-off-by: Li Chen +Reviewed-by: Jan Kara +Link: https://patch.msgid.link/20260225082617.147957-1-me@linux.beauty +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +[ adapted READ_ONCE(jinode) wrapping to split ext4_fc_submit_inode_data_all() and ext4_fc_wait_inode_data_all() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/fast_commit.c | 4 ++-- + fs/ext4/inode.c | 15 +++++++++++---- + 2 files changed, 13 insertions(+), 6 deletions(-) + +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -1019,7 +1019,7 @@ static int ext4_fc_submit_inode_data_all + finish_wait(&ei->i_fc_wait, &wait); + } + spin_unlock(&sbi->s_fc_lock); +- ret = jbd2_submit_inode_data(ei->jinode); ++ ret = jbd2_submit_inode_data(READ_ONCE(ei->jinode)); + if (ret) + return ret; + spin_lock(&sbi->s_fc_lock); +@@ -1044,7 +1044,7 @@ static int ext4_fc_wait_inode_data_all(j + continue; + spin_unlock(&sbi->s_fc_lock); + +- ret = jbd2_wait_inode_data(journal, pos->jinode); ++ ret = jbd2_wait_inode_data(journal, READ_ONCE(pos->jinode)); + if (ret) + return ret; + spin_lock(&sbi->s_fc_lock); +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -122,6 +122,8 @@ void ext4_inode_csum_set(struct inode *i + static inline int ext4_begin_ordered_truncate(struct inode *inode, + loff_t new_size) + { ++ struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode); ++ + trace_ext4_begin_ordered_truncate(inode, new_size); + /* + * If jinode is zero, then we never opened the file for +@@ -129,10 +131,10 @@ static inline int ext4_begin_ordered_tru + * jbd2_journal_begin_ordered_truncate() since there's no + * outstanding writes we need to flush. + */ +- if (!EXT4_I(inode)->jinode) ++ if (!jinode) + return 0; + return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), +- EXT4_I(inode)->jinode, ++ jinode, + new_size); + } + +@@ -4184,8 +4186,13 @@ int ext4_inode_attach_jinode(struct inod + spin_unlock(&inode->i_lock); + return -ENOMEM; + } +- ei->jinode = jinode; +- jbd2_journal_init_jbd_inode(ei->jinode, inode); ++ jbd2_journal_init_jbd_inode(jinode, inode); ++ /* ++ * Publish ->jinode only after it is fully initialized so that ++ * readers never observe a partially initialized jbd2_inode. ++ */ ++ smp_wmb(); ++ WRITE_ONCE(ei->jinode, jinode); + jinode = NULL; + } + spin_unlock(&inode->i_lock); diff --git a/queue-6.1/ext4-use-ext4_group_desc_free-in-ext4_put_super-to-save-some-duplicated-code.patch b/queue-6.1/ext4-use-ext4_group_desc_free-in-ext4_put_super-to-save-some-duplicated-code.patch new file mode 100644 index 0000000000..71aad1cddb --- /dev/null +++ b/queue-6.1/ext4-use-ext4_group_desc_free-in-ext4_put_super-to-save-some-duplicated-code.patch @@ -0,0 +1,86 @@ +From stable+bounces-233063-greg=kroah.com@vger.kernel.org Thu Apr 2 18:40:38 2026 +From: Sasha Levin +Date: Thu, 2 Apr 2026 12:31:13 -0400 +Subject: ext4: use ext4_group_desc_free() in ext4_put_super() to save some duplicated code +To: stable@vger.kernel.org +Cc: Jason Yan , Theodore Ts'o , Sasha Levin +Message-ID: <20260402163115.1385749-2-sashal@kernel.org> + +From: Jason Yan + +[ Upstream commit 6ef684988816fdfa29ceff260c97d725a489a942 ] + +The only difference here is that ->s_group_desc and ->s_flex_groups share +the same rcu read lock here but it is not necessary. In other places they +do not share the lock at all. + +Signed-off-by: Jason Yan +Link: https://lore.kernel.org/r/20230323140517.1070239-4-yanaijie@huawei.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 496bb99b7e66 ("ext4: fix the might_sleep() warnings in kvfree()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 32 ++++++++++++++------------------ + 1 file changed, 14 insertions(+), 18 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1236,11 +1236,23 @@ static void ext4_percpu_param_destroy(st + percpu_free_rwsem(&sbi->s_writepages_rwsem); + } + ++static void ext4_group_desc_free(struct ext4_sb_info *sbi) ++{ ++ struct buffer_head **group_desc; ++ int i; ++ ++ rcu_read_lock(); ++ group_desc = rcu_dereference(sbi->s_group_desc); ++ for (i = 0; i < sbi->s_gdb_count; i++) ++ brelse(group_desc[i]); ++ kvfree(group_desc); ++ rcu_read_unlock(); ++} ++ + static void ext4_put_super(struct super_block *sb) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; +- struct buffer_head **group_desc; + struct flex_groups **flex_groups; + int aborted = 0; + int i, err; +@@ -1290,11 +1302,8 @@ static void ext4_put_super(struct super_ + if (!sb_rdonly(sb)) + ext4_commit_super(sb); + ++ ext4_group_desc_free(sbi); + rcu_read_lock(); +- group_desc = rcu_dereference(sbi->s_group_desc); +- for (i = 0; i < sbi->s_gdb_count; i++) +- brelse(group_desc[i]); +- kvfree(group_desc); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) +@@ -4770,19 +4779,6 @@ static int ext4_geometry_check(struct su + return 0; + } + +-static void ext4_group_desc_free(struct ext4_sb_info *sbi) +-{ +- struct buffer_head **group_desc; +- int i; +- +- rcu_read_lock(); +- group_desc = rcu_dereference(sbi->s_group_desc); +- for (i = 0; i < sbi->s_gdb_count; i++) +- brelse(group_desc[i]); +- kvfree(group_desc); +- rcu_read_unlock(); +-} +- + static int ext4_group_desc_init(struct super_block *sb, + struct ext4_super_block *es, + ext4_fsblk_t logical_sb_block, diff --git a/queue-6.1/hwmon-pmbus-core-add-lock-and-unlock-functions.patch b/queue-6.1/hwmon-pmbus-core-add-lock-and-unlock-functions.patch new file mode 100644 index 0000000000..0bbaa9762b --- /dev/null +++ b/queue-6.1/hwmon-pmbus-core-add-lock-and-unlock-functions.patch @@ -0,0 +1,106 @@ +From stable+bounces-231296-greg=kroah.com@vger.kernel.org Tue Mar 31 01:46:33 2026 +From: Sasha Levin +Date: Mon, 30 Mar 2026 19:46:27 -0400 +Subject: hwmon: (pmbus/core) Add lock and unlock functions +To: stable@vger.kernel.org +Cc: Eddie James , Guenter Roeck , Sasha Levin +Message-ID: <20260330234628.1398011-1-sashal@kernel.org> + +From: Eddie James + +[ Upstream commit a7ac37183ac2a0cc46d857997b2dd24997ca2754 ] + +Debugfs operations may set the page number, which must be done +atomically with the subsequent i2c operation. Lock the update_lock +in the debugfs functions and provide a function for pmbus drivers +to lock and unlock the update_lock. + +Signed-off-by: Eddie James +Link: https://lore.kernel.org/r/20230412161526.252294-2-eajames@linux.ibm.com +Signed-off-by: Guenter Roeck +Stable-dep-of: 3075a3951f77 ("hwmon: (pmbus/isl68137) Add mutex protection for AVS enable sysfs attributes") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hwmon/pmbus/pmbus.h | 2 ++ + drivers/hwmon/pmbus/pmbus_core.c | 30 ++++++++++++++++++++++++++++++ + 2 files changed, 32 insertions(+) + +--- a/drivers/hwmon/pmbus/pmbus.h ++++ b/drivers/hwmon/pmbus/pmbus.h +@@ -510,6 +510,8 @@ int pmbus_get_fan_rate_device(struct i2c + enum pmbus_fan_mode mode); + int pmbus_get_fan_rate_cached(struct i2c_client *client, int page, int id, + enum pmbus_fan_mode mode); ++int pmbus_lock_interruptible(struct i2c_client *client); ++void pmbus_unlock(struct i2c_client *client); + int pmbus_update_fan(struct i2c_client *client, int page, int id, + u8 config, u8 mask, u16 command); + struct dentry *pmbus_get_debugfs_dir(struct i2c_client *client); +--- a/drivers/hwmon/pmbus/pmbus_core.c ++++ b/drivers/hwmon/pmbus/pmbus_core.c +@@ -3049,8 +3049,13 @@ static int pmbus_debugfs_get(void *data, + { + int rc; + struct pmbus_debugfs_entry *entry = data; ++ struct pmbus_data *pdata = i2c_get_clientdata(entry->client); + ++ rc = mutex_lock_interruptible(&pdata->update_lock); ++ if (rc) ++ return rc; + rc = _pmbus_read_byte_data(entry->client, entry->page, entry->reg); ++ mutex_unlock(&pdata->update_lock); + if (rc < 0) + return rc; + +@@ -3067,7 +3072,11 @@ static int pmbus_debugfs_get_status(void + struct pmbus_debugfs_entry *entry = data; + struct pmbus_data *pdata = i2c_get_clientdata(entry->client); + ++ rc = mutex_lock_interruptible(&pdata->update_lock); ++ if (rc) ++ return rc; + rc = pdata->read_status(entry->client, entry->page); ++ mutex_unlock(&pdata->update_lock); + if (rc < 0) + return rc; + +@@ -3083,10 +3092,15 @@ static ssize_t pmbus_debugfs_mfr_read(st + { + int rc; + struct pmbus_debugfs_entry *entry = file->private_data; ++ struct pmbus_data *pdata = i2c_get_clientdata(entry->client); + char data[I2C_SMBUS_BLOCK_MAX + 2] = { 0 }; + ++ rc = mutex_lock_interruptible(&pdata->update_lock); ++ if (rc) ++ return rc; + rc = pmbus_read_block_data(entry->client, entry->page, entry->reg, + data); ++ mutex_unlock(&pdata->update_lock); + if (rc < 0) + return rc; + +@@ -3420,6 +3434,22 @@ struct dentry *pmbus_get_debugfs_dir(str + } + EXPORT_SYMBOL_NS_GPL(pmbus_get_debugfs_dir, PMBUS); + ++int pmbus_lock_interruptible(struct i2c_client *client) ++{ ++ struct pmbus_data *data = i2c_get_clientdata(client); ++ ++ return mutex_lock_interruptible(&data->update_lock); ++} ++EXPORT_SYMBOL_NS_GPL(pmbus_lock_interruptible, PMBUS); ++ ++void pmbus_unlock(struct i2c_client *client) ++{ ++ struct pmbus_data *data = i2c_get_clientdata(client); ++ ++ mutex_unlock(&data->update_lock); ++} ++EXPORT_SYMBOL_NS_GPL(pmbus_unlock, PMBUS); ++ + static int __init pmbus_core_init(void) + { + pmbus_debugfs_dir = debugfs_create_dir("pmbus", NULL); diff --git a/queue-6.1/hwmon-pmbus-isl68137-add-mutex-protection-for-avs-enable-sysfs-attributes.patch b/queue-6.1/hwmon-pmbus-isl68137-add-mutex-protection-for-avs-enable-sysfs-attributes.patch new file mode 100644 index 0000000000..67260de445 --- /dev/null +++ b/queue-6.1/hwmon-pmbus-isl68137-add-mutex-protection-for-avs-enable-sysfs-attributes.patch @@ -0,0 +1,92 @@ +From stable+bounces-231297-greg=kroah.com@vger.kernel.org Tue Mar 31 01:46:40 2026 +From: Sasha Levin +Date: Mon, 30 Mar 2026 19:46:28 -0400 +Subject: hwmon: (pmbus/isl68137) Add mutex protection for AVS enable sysfs attributes +To: stable@vger.kernel.org +Cc: Sanman Pradhan , Guenter Roeck , Sasha Levin +Message-ID: <20260330234628.1398011-2-sashal@kernel.org> + +From: Sanman Pradhan + +[ Upstream commit 3075a3951f7708da5a8ab47b0b7d068a32f69e58 ] + +The custom avs0_enable and avs1_enable sysfs attributes access PMBus +registers through the exported API helpers (pmbus_read_byte_data, +pmbus_read_word_data, pmbus_write_word_data, pmbus_update_byte_data) +without holding the PMBus update_lock mutex. These exported helpers do +not acquire the mutex internally, unlike the core's internal callers +which hold the lock before invoking them. + +The store callback is especially vulnerable: it performs a multi-step +read-modify-write sequence (read VOUT_COMMAND, write VOUT_COMMAND, then +update OPERATION) where concurrent access from another thread could +interleave and corrupt the register state. + +Add pmbus_lock_interruptible()/pmbus_unlock() around both the show and +store callbacks to serialize PMBus register access with the rest of the +driver. + +Fixes: 038a9c3d1e424 ("hwmon: (pmbus/isl68137) Add driver for Intersil ISL68137 PWM Controller") +Cc: stable@vger.kernel.org +Signed-off-by: Sanman Pradhan +Link: https://lore.kernel.org/r/20260319173055.125271-3-sanman.pradhan@hpe.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hwmon/pmbus/isl68137.c | 21 ++++++++++++++++++--- + 1 file changed, 18 insertions(+), 3 deletions(-) + +--- a/drivers/hwmon/pmbus/isl68137.c ++++ b/drivers/hwmon/pmbus/isl68137.c +@@ -78,7 +78,15 @@ static ssize_t isl68137_avs_enable_show_ + int page, + char *buf) + { +- int val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); ++ int val; ++ ++ val = pmbus_lock_interruptible(client); ++ if (val) ++ return val; ++ ++ val = pmbus_read_byte_data(client, page, PMBUS_OPERATION); ++ ++ pmbus_unlock(client); + + if (val < 0) + return val; +@@ -100,6 +108,10 @@ static ssize_t isl68137_avs_enable_store + + op_val = result ? ISL68137_VOUT_AVS : 0; + ++ rc = pmbus_lock_interruptible(client); ++ if (rc) ++ return rc; ++ + /* + * Writes to VOUT setpoint over AVSBus will persist after the VRM is + * switched to PMBus control. Switching back to AVSBus control +@@ -111,17 +123,20 @@ static ssize_t isl68137_avs_enable_store + rc = pmbus_read_word_data(client, page, 0xff, + PMBUS_VOUT_COMMAND); + if (rc < 0) +- return rc; ++ goto unlock; + + rc = pmbus_write_word_data(client, page, PMBUS_VOUT_COMMAND, + rc); + if (rc < 0) +- return rc; ++ goto unlock; + } + + rc = pmbus_update_byte_data(client, page, PMBUS_OPERATION, + ISL68137_VOUT_AVS, op_val); + ++unlock: ++ pmbus_unlock(client); ++ + return (rc < 0) ? rc : count; + } + diff --git a/queue-6.1/ksmbd-fix-memory-leaks-and-null-deref-in-smb2_lock.patch b/queue-6.1/ksmbd-fix-memory-leaks-and-null-deref-in-smb2_lock.patch new file mode 100644 index 0000000000..951258fa47 --- /dev/null +++ b/queue-6.1/ksmbd-fix-memory-leaks-and-null-deref-in-smb2_lock.patch @@ -0,0 +1,119 @@ +From stable+bounces-231277-greg=kroah.com@vger.kernel.org Mon Mar 30 23:07:47 2026 +From: Sasha Levin +Date: Mon, 30 Mar 2026 17:07:40 -0400 +Subject: ksmbd: fix memory leaks and NULL deref in smb2_lock() +To: stable@vger.kernel.org +Cc: Werner Kasselman , ChenXiaoSong , Namjae Jeon , Steve French , Sasha Levin +Message-ID: <20260330210740.1213246-1-sashal@kernel.org> + +From: Werner Kasselman + +[ Upstream commit 309b44ed684496ed3f9c5715d10b899338623512 ] + +smb2_lock() has three error handling issues after list_del() detaches +smb_lock from lock_list at no_check_cl: + +1) If vfs_lock_file() returns an unexpected error in the non-UNLOCK + path, goto out leaks smb_lock and its flock because the out: + handler only iterates lock_list and rollback_list, neither of + which contains the detached smb_lock. + +2) If vfs_lock_file() returns -ENOENT in the UNLOCK path, goto out + leaks smb_lock and flock for the same reason. The error code + returned to the dispatcher is also stale. + +3) In the rollback path, smb_flock_init() can return NULL on + allocation failure. The result is dereferenced unconditionally, + causing a kernel NULL pointer dereference. Add a NULL check to + prevent the crash and clean up the bookkeeping; the VFS lock + itself cannot be rolled back without the allocation and will be + released at file or connection teardown. + +Fix cases 1 and 2 by hoisting the locks_free_lock()/kfree() to before +the if(!rc) check in the UNLOCK branch so all exit paths share one +free site, and by freeing smb_lock and flock before goto out in the +non-UNLOCK branch. Propagate the correct error code in both cases. +Fix case 3 by wrapping the VFS unlock in an if(rlock) guard and adding +a NULL check for locks_free_lock(rlock) in the shared cleanup. + +Found via call-graph analysis using sqry. + +Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") +Cc: stable@vger.kernel.org +Suggested-by: ChenXiaoSong +Signed-off-by: Werner Kasselman +Reviewed-by: ChenXiaoSong +Acked-by: Namjae Jeon +Signed-off-by: Steve French +[ adapted rlock->c.flc_type to rlock->fl_type ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/smb2pdu.c | 29 +++++++++++++++++++---------- + 1 file changed, 19 insertions(+), 10 deletions(-) + +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -7145,14 +7145,15 @@ retry: + rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL); + skip: + if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) { ++ locks_free_lock(flock); ++ kfree(smb_lock); + if (!rc) { + ksmbd_debug(SMB, "File unlocked\n"); + } else if (rc == -ENOENT) { + rsp->hdr.Status = STATUS_NOT_LOCKED; ++ err = rc; + goto out; + } +- locks_free_lock(flock); +- kfree(smb_lock); + } else { + if (rc == FILE_LOCK_DEFERRED) { + void **argv; +@@ -7221,6 +7222,9 @@ skip: + spin_unlock(&work->conn->llist_lock); + ksmbd_debug(SMB, "successful in taking lock\n"); + } else { ++ locks_free_lock(flock); ++ kfree(smb_lock); ++ err = rc; + goto out; + } + } +@@ -7251,13 +7255,17 @@ out: + struct file_lock *rlock = NULL; + + rlock = smb_flock_init(filp); +- rlock->fl_type = F_UNLCK; +- rlock->fl_start = smb_lock->start; +- rlock->fl_end = smb_lock->end; +- +- rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); +- if (rc) +- pr_err("rollback unlock fail : %d\n", rc); ++ if (rlock) { ++ rlock->fl_type = F_UNLCK; ++ rlock->fl_start = smb_lock->start; ++ rlock->fl_end = smb_lock->end; ++ ++ rc = vfs_lock_file(filp, F_SETLK, rlock, NULL); ++ if (rc) ++ pr_err("rollback unlock fail : %d\n", rc); ++ } else { ++ pr_err("rollback unlock alloc failed\n"); ++ } + + list_del(&smb_lock->llist); + spin_lock(&work->conn->llist_lock); +@@ -7267,7 +7275,8 @@ out: + spin_unlock(&work->conn->llist_lock); + + locks_free_lock(smb_lock->fl); +- locks_free_lock(rlock); ++ if (rlock) ++ locks_free_lock(rlock); + kfree(smb_lock); + } + out2: diff --git a/queue-6.1/ksmbd-fix-potencial-oob-in-get_file_all_info-for-compound-requests.patch b/queue-6.1/ksmbd-fix-potencial-oob-in-get_file_all_info-for-compound-requests.patch new file mode 100644 index 0000000000..b9c7d1eb1c --- /dev/null +++ b/queue-6.1/ksmbd-fix-potencial-oob-in-get_file_all_info-for-compound-requests.patch @@ -0,0 +1,76 @@ +From stable+bounces-231264-greg=kroah.com@vger.kernel.org Mon Mar 30 20:38:25 2026 +From: Sasha Levin +Date: Mon, 30 Mar 2026 14:38:19 -0400 +Subject: ksmbd: fix potencial OOB in get_file_all_info() for compound requests +To: stable@vger.kernel.org +Cc: Namjae Jeon , Asim Viladi Oglu Manizada , Steve French , Sasha Levin +Message-ID: <20260330183819.950934-1-sashal@kernel.org> + +From: Namjae Jeon + +[ Upstream commit beef2634f81f1c086208191f7228bce1d366493d ] + +When a compound request consists of QUERY_DIRECTORY + QUERY_INFO +(FILE_ALL_INFORMATION) and the first command consumes nearly the entire +max_trans_size, get_file_all_info() would blindly call smbConvertToUTF16() +with PATH_MAX, causing out-of-bounds write beyond the response buffer. +In get_file_all_info(), there was a missing validation check for +the client-provided OutputBufferLength before copying the filename into +FileName field of the smb2_file_all_info structure. +If the filename length exceeds the available buffer space, it could lead to +potential buffer overflows or memory corruption during smbConvertToUTF16 +conversion. This calculating the actual free buffer size using +smb2_calc_max_out_buf_len() and returning -EINVAL if the buffer is +insufficient and updating smbConvertToUTF16 to use the actual filename +length (clamped by PATH_MAX) to ensure a safe copy operation. + +Cc: stable@vger.kernel.org +Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") +Reported-by: Asim Viladi Oglu Manizada +Signed-off-by: Namjae Jeon +Signed-off-by: Steve French +[ adapted variable declarations ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/smb/server/smb2pdu.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -4606,6 +4606,8 @@ static int get_file_all_info(struct ksmb + int conv_len; + char *filename; + u64 time; ++ int buf_free_len, filename_len; ++ struct smb2_query_info_req *req = ksmbd_req_buf_next(work); + + if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) { + ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n", +@@ -4617,6 +4619,16 @@ static int get_file_all_info(struct ksmb + if (IS_ERR(filename)) + return PTR_ERR(filename); + ++ filename_len = strlen(filename); ++ buf_free_len = smb2_calc_max_out_buf_len(work, ++ offsetof(struct smb2_query_info_rsp, Buffer) + ++ offsetof(struct smb2_file_all_info, FileName), ++ le32_to_cpu(req->OutputBufferLength)); ++ if (buf_free_len < (filename_len + 1) * 2) { ++ kfree(filename); ++ return -EINVAL; ++ } ++ + inode = file_inode(fp->filp); + generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat); + +@@ -4648,7 +4660,8 @@ static int get_file_all_info(struct ksmb + file_info->Mode = fp->coption; + file_info->AlignmentRequirement = 0; + conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename, +- PATH_MAX, conn->local_nls, 0); ++ min(filename_len, PATH_MAX), ++ conn->local_nls, 0); + conv_len *= 2; + file_info->FileNameLength = cpu_to_le32(conv_len); + rsp->OutputBufferLength = diff --git a/queue-6.1/kvm-x86-mmu-drop-zap-existing-present-spte-even-when-creating-an-mmio-spte.patch b/queue-6.1/kvm-x86-mmu-drop-zap-existing-present-spte-even-when-creating-an-mmio-spte.patch new file mode 100644 index 0000000000..6b10dfd9d8 --- /dev/null +++ b/queue-6.1/kvm-x86-mmu-drop-zap-existing-present-spte-even-when-creating-an-mmio-spte.patch @@ -0,0 +1,89 @@ +From stable+bounces-232605-greg=kroah.com@vger.kernel.org Wed Apr 1 01:17:50 2026 +From: Sasha Levin +Date: Tue, 31 Mar 2026 19:17:21 -0400 +Subject: KVM: x86/mmu: Drop/zap existing present SPTE even when creating an MMIO SPTE +To: stable@vger.kernel.org +Cc: Sean Christopherson , Alexander Bulekov , Fred Griffoul , Sasha Levin +Message-ID: <20260331231721.3421247-1-sashal@kernel.org> + +From: Sean Christopherson + +[ Upstream commit aad885e774966e97b675dfe928da164214a71605 ] + +When installing an emulated MMIO SPTE, do so *after* dropping/zapping the +existing SPTE (if it's shadow-present). While commit a54aa15c6bda3 was +right about it being impossible to convert a shadow-present SPTE to an +MMIO SPTE due to a _guest_ write, it failed to account for writes to guest +memory that are outside the scope of KVM. + +E.g. if host userspace modifies a shadowed gPTE to switch from a memslot +to emulted MMIO and then the guest hits a relevant page fault, KVM will +install the MMIO SPTE without first zapping the shadow-present SPTE. + + ------------[ cut here ]------------ + is_shadow_present_pte(*sptep) + WARNING: arch/x86/kvm/mmu/mmu.c:484 at mark_mmio_spte+0xb2/0xc0 [kvm], CPU#0: vmx_ept_stale_r/4292 + Modules linked in: kvm_intel kvm irqbypass + CPU: 0 UID: 1000 PID: 4292 Comm: vmx_ept_stale_r Not tainted 7.0.0-rc2-eafebd2d2ab0-sink-vm #319 PREEMPT + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 + RIP: 0010:mark_mmio_spte+0xb2/0xc0 [kvm] + Call Trace: + + mmu_set_spte+0x237/0x440 [kvm] + ept_page_fault+0x535/0x7f0 [kvm] + kvm_mmu_do_page_fault+0xee/0x1f0 [kvm] + kvm_mmu_page_fault+0x8d/0x620 [kvm] + vmx_handle_exit+0x18c/0x5a0 [kvm_intel] + kvm_arch_vcpu_ioctl_run+0xc55/0x1c20 [kvm] + kvm_vcpu_ioctl+0x2d5/0x980 [kvm] + __x64_sys_ioctl+0x8a/0xd0 + do_syscall_64+0xb5/0x730 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + RIP: 0033:0x47fa3f + + ---[ end trace 0000000000000000 ]--- + +Reported-by: Alexander Bulekov +Debugged-by: Alexander Bulekov +Suggested-by: Fred Griffoul +Fixes: a54aa15c6bda3 ("KVM: x86/mmu: Handle MMIO SPTEs directly in mmu_set_spte()") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +[ replaced kvm_flush_remote_tlbs_gfn() with kvm_flush_remote_tlbs_with_address() and preserved pgprintk call ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/mmu.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -2814,12 +2814,6 @@ static int mmu_set_spte(struct kvm_vcpu + pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, + *sptep, write_fault, gfn); + +- if (unlikely(is_noslot_pfn(pfn))) { +- vcpu->stat.pf_mmio_spte_created++; +- mark_mmio_spte(vcpu, sptep, gfn, pte_access); +- return RET_PF_EMULATE; +- } +- + if (is_shadow_present_pte(*sptep)) { + /* + * If we overwrite a PTE page pointer with a 2MB PMD, unlink +@@ -2841,6 +2835,15 @@ static int mmu_set_spte(struct kvm_vcpu + was_rmapped = 1; + } + ++ if (unlikely(is_noslot_pfn(pfn))) { ++ vcpu->stat.pf_mmio_spte_created++; ++ mark_mmio_spte(vcpu, sptep, gfn, pte_access); ++ if (flush) ++ kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, ++ KVM_PAGES_PER_HPAGE(level)); ++ return RET_PF_EMULATE; ++ } ++ + wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch, + true, host_writable, &spte); + diff --git a/queue-6.1/mm-huge_memory-fix-folio-isn-t-locked-in-softleaf_to_folio.patch b/queue-6.1/mm-huge_memory-fix-folio-isn-t-locked-in-softleaf_to_folio.patch new file mode 100644 index 0000000000..fdb3bcff70 --- /dev/null +++ b/queue-6.1/mm-huge_memory-fix-folio-isn-t-locked-in-softleaf_to_folio.patch @@ -0,0 +1,101 @@ +From stable+bounces-231414-greg=kroah.com@vger.kernel.org Tue Mar 31 14:21:13 2026 +From: Sasha Levin +Date: Tue, 31 Mar 2026 08:16:44 -0400 +Subject: mm/huge_memory: fix folio isn't locked in softleaf_to_folio() +To: stable@vger.kernel.org +Cc: Jinjiang Tu , "David Hildenbrand (Arm)" , "Lorenzo Stoakes (Oracle)" , Barry Song , Kefeng Wang , Liam Howlett , Michal Hocko , Mike Rapoport , Nanyong Sun , Ryan Roberts , Suren Baghdasaryan , Vlastimil Babka , Andrew Morton , Sasha Levin +Message-ID: <20260331121644.2195769-1-sashal@kernel.org> + +From: Jinjiang Tu + +[ Upstream commit 4c5e7f0fcd592801c9cc18f29f80fbee84eb8669 ] + +On arm64 server, we found folio that get from migration entry isn't locked +in softleaf_to_folio(). This issue triggers when mTHP splitting and +zap_nonpresent_ptes() races, and the root cause is lack of memory barrier +in softleaf_to_folio(). The race is as follows: + + CPU0 CPU1 + +deferred_split_scan() zap_nonpresent_ptes() + lock folio + split_folio() + unmap_folio() + change ptes to migration entries + __split_folio_to_order() softleaf_to_folio() + set flags(including PG_locked) for tail pages folio = pfn_folio(softleaf_to_pfn(entry)) + smp_wmb() VM_WARN_ON_ONCE(!folio_test_locked(folio)) + prep_compound_page() for tail pages + +In __split_folio_to_order(), smp_wmb() guarantees page flags of tail pages +are visible before the tail page becomes non-compound. smp_wmb() should +be paired with smp_rmb() in softleaf_to_folio(), which is missed. As a +result, if zap_nonpresent_ptes() accesses migration entry that stores tail +pfn, softleaf_to_folio() may see the updated compound_head of tail page +before page->flags. + +This issue will trigger VM_WARN_ON_ONCE() in pfn_swap_entry_folio() +because of the race between folio split and zap_nonpresent_ptes() +leading to a folio incorrectly undergoing modification without a folio +lock being held. + +This is a BUG_ON() before commit 93976a20345b ("mm: eliminate further +swapops predicates"), which in merged in v6.19-rc1. + +To fix it, add missing smp_rmb() if the softleaf entry is migration entry +in softleaf_to_folio() and softleaf_to_page(). + +[tujinjiang@huawei.com: update function name and comments] + Link: https://lkml.kernel.org/r/20260321075214.3305564-1-tujinjiang@huawei.com +Link: https://lkml.kernel.org/r/20260319012541.4158561-1-tujinjiang@huawei.com +Fixes: e9b61f19858a ("thp: reintroduce split_huge_page()") +Signed-off-by: Jinjiang Tu +Acked-by: David Hildenbrand (Arm) +Reviewed-by: Lorenzo Stoakes (Oracle) +Cc: Barry Song +Cc: Kefeng Wang +Cc: Liam Howlett +Cc: Michal Hocko +Cc: Mike Rapoport +Cc: Nanyong Sun +Cc: Ryan Roberts +Cc: Suren Baghdasaryan +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +[ adapted fix from leafops.h softleaf_to_page()/softleaf_to_folio() ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/swapops.h | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/include/linux/swapops.h ++++ b/include/linux/swapops.h +@@ -541,11 +541,21 @@ static inline struct page *pfn_swap_entr + { + struct page *p = pfn_to_page(swp_offset_pfn(entry)); + +- /* +- * Any use of migration entries may only occur while the +- * corresponding page is locked +- */ +- BUG_ON(is_migration_entry(entry) && !PageLocked(p)); ++ if (is_migration_entry(entry)) { ++ /* ++ * Ensure we do not race with split, which might alter tail ++ * pages into new folios and thus result in observing an ++ * unlocked folio. ++ * This matches the write barrier in __split_folio_to_order(). ++ */ ++ smp_rmb(); ++ ++ /* ++ * Any use of migration entries may only occur while the ++ * corresponding page is locked ++ */ ++ BUG_ON(!PageLocked(p)); ++ } + + return p; + } diff --git a/queue-6.1/mptcp-fix-lock-class-name-family-in-pm_nl_create_listen_socket.patch b/queue-6.1/mptcp-fix-lock-class-name-family-in-pm_nl_create_listen_socket.patch new file mode 100644 index 0000000000..19fd0241e6 --- /dev/null +++ b/queue-6.1/mptcp-fix-lock-class-name-family-in-pm_nl_create_listen_socket.patch @@ -0,0 +1,41 @@ +From matttbe@kernel.org Thu Apr 2 19:39:16 2026 +From: "Matthieu Baerts (NGI0)" +Date: Thu, 2 Apr 2026 19:39:08 +0200 +Subject: MPTCP: fix lock class name family in pm_nl_create_listen_socket +To: stable@vger.kernel.org, gregkh@linuxfoundation.org +Cc: MPTCP Upstream , Li Xiasong , "Matthieu Baerts (NGI0)" , Jakub Kicinski +Message-ID: <20260402173907.3408529-2-matttbe@kernel.org> + +From: Li Xiasong + +commit 7ab4a7c5d969642782b8a5b608da0dd02aa9f229 upstream. + +In mptcp_pm_nl_create_listen_socket(), use entry->addr.family +instead of sk->sk_family for lock class setup. The 'sk' parameter +is a netlink socket, not the MPTCP subflow socket being created. + +Fixes: cee4034a3db1 ("mptcp: fix lockdep false positive in mptcp_pm_nl_create_listen_socket()") +Signed-off-by: Li Xiasong +Reviewed-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260319112159.3118874-1-lixiasong1@huawei.com +Signed-off-by: Jakub Kicinski +[ Conflict in pm_kernel.c, because commit 8617e85e04bd ("mptcp: pm: + split in-kernel PM specific code") is not in this version, and moves + code from pm_netlink.c to pm_kernel.c. ] +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -1184,7 +1184,7 @@ static struct lock_class_key mptcp_keys[ + static int mptcp_pm_nl_create_listen_socket(struct sock *sk, + struct mptcp_pm_addr_entry *entry) + { +- bool is_ipv6 = sk->sk_family == AF_INET6; ++ bool is_ipv6 = entry->addr.family == AF_INET6; + int addrlen = sizeof(struct sockaddr_in); + struct sockaddr_storage addr; + struct socket *ssock; diff --git a/queue-6.1/net-correctly-handle-tunneled-traffic-on-ipv6_csum-gso-fallback.patch b/queue-6.1/net-correctly-handle-tunneled-traffic-on-ipv6_csum-gso-fallback.patch new file mode 100644 index 0000000000..ae956eac06 --- /dev/null +++ b/queue-6.1/net-correctly-handle-tunneled-traffic-on-ipv6_csum-gso-fallback.patch @@ -0,0 +1,77 @@ +From stable+bounces-232604-greg=kroah.com@vger.kernel.org Wed Apr 1 01:17:40 2026 +From: Sasha Levin +Date: Tue, 31 Mar 2026 19:17:02 -0400 +Subject: net: correctly handle tunneled traffic on IPV6_CSUM GSO fallback +To: stable@vger.kernel.org +Cc: Willem de Bruijn , Tangxin Xie , Paolo Abeni , Sasha Levin +Message-ID: <20260331231702.3419414-1-sashal@kernel.org> + +From: Willem de Bruijn + +[ Upstream commit c4336a07eb6b2526dc2b62928b5104b41a7f81f5 ] + +NETIF_F_IPV6_CSUM only advertises support for checksum offload of +packets without IPv6 extension headers. Packets with extension +headers must fall back onto software checksumming. Since TSO +depends on checksum offload, those must revert to GSO. + +The below commit introduces that fallback. It always checks +network header length. For tunneled packets, the inner header length +must be checked instead. Extend the check accordingly. + +A special case is tunneled packets without inner IP protocol. Such as +RFC 6951 SCTP in UDP. Those are not standard IPv6 followed by +transport header either, so also must revert to the software GSO path. + +Cc: stable@vger.kernel.org +Fixes: 864e3396976e ("net: gso: Forbid IPv6 TSO with extensions on devices with only IPV6_CSUM") +Reported-by: Tangxin Xie +Closes: https://lore.kernel.org/netdev/0414e7e2-9a1c-4d7c-a99d-b9039cf68f40@yeah.net/ +Suggested-by: Paolo Abeni +Signed-off-by: Willem de Bruijn +Link: https://patch.msgid.link/20260320190148.2409107-1-willemdebruijn.kernel@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 22 +++++++++++++++++----- + 1 file changed, 17 insertions(+), 5 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3586,6 +3586,22 @@ static netdev_features_t dflt_features_c + return vlan_features_check(skb, features); + } + ++static bool skb_gso_has_extension_hdr(const struct sk_buff *skb) ++{ ++ if (!skb->encapsulation) ++ return ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || ++ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && ++ vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && ++ skb_transport_header_was_set(skb) && ++ skb_network_header_len(skb) != sizeof(struct ipv6hdr)); ++ else ++ return (!skb_inner_network_header_was_set(skb) || ++ ((skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || ++ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && ++ inner_ip_hdr(skb)->version == 6)) && ++ skb_inner_network_header_len(skb) != sizeof(struct ipv6hdr))); ++} ++ + static netdev_features_t gso_features_check(const struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +@@ -3627,11 +3643,7 @@ static netdev_features_t gso_features_ch + * so neither does TSO that depends on it. + */ + if (features & NETIF_F_IPV6_CSUM && +- (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6 || +- (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && +- vlan_get_protocol(skb) == htons(ETH_P_IPV6))) && +- skb_transport_header_was_set(skb) && +- skb_network_header_len(skb) != sizeof(struct ipv6hdr) && ++ skb_gso_has_extension_hdr(skb) && + !ipv6_has_hopopt_jumbo(skb)) + features &= ~(NETIF_F_IPV6_CSUM | NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4); + diff --git a/queue-6.1/net-macb-move-devm_-free-request-_irq-out-of-spin-lock-area.patch b/queue-6.1/net-macb-move-devm_-free-request-_irq-out-of-spin-lock-area.patch new file mode 100644 index 0000000000..75078dc08a --- /dev/null +++ b/queue-6.1/net-macb-move-devm_-free-request-_irq-out-of-spin-lock-area.patch @@ -0,0 +1,147 @@ +From stable+bounces-232603-greg=kroah.com@vger.kernel.org Wed Apr 1 01:17:05 2026 +From: Sasha Levin +Date: Tue, 31 Mar 2026 19:17:00 -0400 +Subject: net: macb: Move devm_{free,request}_irq() out of spin lock area +To: stable@vger.kernel.org +Cc: "Kevin Hao" , "Théo Lebrun" , "Jakub Kicinski" , "Sasha Levin" +Message-ID: <20260331231700.3419361-1-sashal@kernel.org> + +From: Kevin Hao + +[ Upstream commit 317e49358ebbf6390fa439ef3c142f9239dd25fb ] + +The devm_free_irq() and devm_request_irq() functions should not be +executed in an atomic context. + +During device suspend, all userspace processes and most kernel threads +are frozen. Additionally, we flush all tx/rx status, disable all macb +interrupts, and halt rx operations. Therefore, it is safe to split the +region protected by bp->lock into two independent sections, allowing +devm_free_irq() and devm_request_irq() to run in a non-atomic context. +This modification resolves the following lockdep warning: + BUG: sleeping function called from invalid context at kernel/locking/mutex.c:591 + in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 501, name: rtcwake + preempt_count: 1, expected: 0 + RCU nest depth: 1, expected: 0 + 7 locks held by rtcwake/501: + #0: ffff0008038c3408 (sb_writers#5){.+.+}-{0:0}, at: vfs_write+0xf8/0x368 + #1: ffff0008049a5e88 (&of->mutex#2){+.+.}-{4:4}, at: kernfs_fop_write_iter+0xbc/0x1c8 + #2: ffff00080098d588 (kn->active#70){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xcc/0x1c8 + #3: ffff800081c84888 (system_transition_mutex){+.+.}-{4:4}, at: pm_suspend+0x1ec/0x290 + #4: ffff0008009ba0f8 (&dev->mutex){....}-{4:4}, at: device_suspend+0x118/0x4f0 + #5: ffff800081d00458 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire+0x4/0x48 + #6: ffff0008031fb9e0 (&bp->lock){-.-.}-{3:3}, at: macb_suspend+0x144/0x558 + irq event stamp: 8682 + hardirqs last enabled at (8681): [] _raw_spin_unlock_irqrestore+0x44/0x88 + hardirqs last disabled at (8682): [] _raw_spin_lock_irqsave+0x38/0x98 + softirqs last enabled at (7322): [] handle_softirqs+0x52c/0x588 + softirqs last disabled at (7317): [] __do_softirq+0x20/0x2c + CPU: 1 UID: 0 PID: 501 Comm: rtcwake Not tainted 7.0.0-rc3-next-20260310-yocto-standard+ #125 PREEMPT + Hardware name: ZynqMP ZCU102 Rev1.1 (DT) + Call trace: + show_stack+0x24/0x38 (C) + __dump_stack+0x28/0x38 + dump_stack_lvl+0x64/0x88 + dump_stack+0x18/0x24 + __might_resched+0x200/0x218 + __might_sleep+0x38/0x98 + __mutex_lock_common+0x7c/0x1378 + mutex_lock_nested+0x38/0x50 + free_irq+0x68/0x2b0 + devm_irq_release+0x24/0x38 + devres_release+0x40/0x80 + devm_free_irq+0x48/0x88 + macb_suspend+0x298/0x558 + device_suspend+0x218/0x4f0 + dpm_suspend+0x244/0x3a0 + dpm_suspend_start+0x50/0x78 + suspend_devices_and_enter+0xec/0x560 + pm_suspend+0x194/0x290 + state_store+0x110/0x158 + kobj_attr_store+0x1c/0x30 + sysfs_kf_write+0xa8/0xd0 + kernfs_fop_write_iter+0x11c/0x1c8 + vfs_write+0x248/0x368 + ksys_write+0x7c/0xf8 + __arm64_sys_write+0x28/0x40 + invoke_syscall+0x4c/0xe8 + el0_svc_common+0x98/0xf0 + do_el0_svc+0x28/0x40 + el0_svc+0x54/0x1e0 + el0t_64_sync_handler+0x84/0x130 + el0t_64_sync+0x198/0x1a0 + +Fixes: 558e35ccfe95 ("net: macb: WoL support for GEM type of Ethernet controller") +Cc: stable@vger.kernel.org +Reviewed-by: Théo Lebrun +Signed-off-by: Kevin Hao +Link: https://patch.msgid.link/20260318-macb-irq-v2-1-f1179768ab24@gmail.com +Signed-off-by: Jakub Kicinski +[ replaced `tmp` variable with direct `MACB_BIT(MAG)` ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/macb_main.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/cadence/macb_main.c ++++ b/drivers/net/ethernet/cadence/macb_main.c +@@ -5270,6 +5270,8 @@ static int __maybe_unused macb_suspend(s + macb_writel(bp, TSR, -1); + macb_writel(bp, RSR, -1); + ++ spin_unlock_irqrestore(&bp->lock, flags); ++ + /* Change interrupt handler and + * Enable WoL IRQ on queue 0 + */ +@@ -5281,11 +5283,12 @@ static int __maybe_unused macb_suspend(s + dev_err(dev, + "Unable to request IRQ %d (error %d)\n", + bp->queues[0].irq, err); +- spin_unlock_irqrestore(&bp->lock, flags); + return err; + } ++ spin_lock_irqsave(&bp->lock, flags); + queue_writel(bp->queues, IER, GEM_BIT(WOL)); + gem_writel(bp, WOL, MACB_BIT(MAG)); ++ spin_unlock_irqrestore(&bp->lock, flags); + } else { + err = devm_request_irq(dev, bp->queues[0].irq, macb_wol_interrupt, + IRQF_SHARED, netdev->name, bp->queues); +@@ -5293,13 +5296,13 @@ static int __maybe_unused macb_suspend(s + dev_err(dev, + "Unable to request IRQ %d (error %d)\n", + bp->queues[0].irq, err); +- spin_unlock_irqrestore(&bp->lock, flags); + return err; + } ++ spin_lock_irqsave(&bp->lock, flags); + queue_writel(bp->queues, IER, MACB_BIT(WOL)); + macb_writel(bp, WOL, MACB_BIT(MAG)); ++ spin_unlock_irqrestore(&bp->lock, flags); + } +- spin_unlock_irqrestore(&bp->lock, flags); + + enable_irq_wake(bp->queues[0].irq); + } +@@ -5366,6 +5369,8 @@ static int __maybe_unused macb_resume(st + queue_readl(bp->queues, ISR); + if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) + queue_writel(bp->queues, ISR, -1); ++ spin_unlock_irqrestore(&bp->lock, flags); ++ + /* Replace interrupt handler on queue 0 */ + devm_free_irq(dev, bp->queues[0].irq, bp->queues); + err = devm_request_irq(dev, bp->queues[0].irq, macb_interrupt, +@@ -5374,10 +5379,8 @@ static int __maybe_unused macb_resume(st + dev_err(dev, + "Unable to request IRQ %d (error %d)\n", + bp->queues[0].irq, err); +- spin_unlock_irqrestore(&bp->lock, flags); + return err; + } +- spin_unlock_irqrestore(&bp->lock, flags); + + disable_irq_wake(bp->queues[0].irq); + diff --git a/queue-6.1/scsi-target-tcm_loop-drain-commands-in-target_reset-handler.patch b/queue-6.1/scsi-target-tcm_loop-drain-commands-in-target_reset-handler.patch new file mode 100644 index 0000000000..d719e23080 --- /dev/null +++ b/queue-6.1/scsi-target-tcm_loop-drain-commands-in-target_reset-handler.patch @@ -0,0 +1,147 @@ +From stable+bounces-231449-greg=kroah.com@vger.kernel.org Tue Mar 31 17:33:38 2026 +From: Sasha Levin +Date: Tue, 31 Mar 2026 11:29:17 -0400 +Subject: scsi: target: tcm_loop: Drain commands in target_reset handler +To: stable@vger.kernel.org +Cc: Josef Bacik , "Martin K. Petersen" , Sasha Levin +Message-ID: <20260331152917.2628599-1-sashal@kernel.org> + +From: Josef Bacik + +[ Upstream commit 1333eee56cdf3f0cf67c6ab4114c2c9e0a952026 ] + +tcm_loop_target_reset() violates the SCSI EH contract: it returns SUCCESS +without draining any in-flight commands. The SCSI EH documentation +(scsi_eh.rst) requires that when a reset handler returns SUCCESS the driver +has made lower layers "forget about timed out scmds" and is ready for new +commands. Every other SCSI LLD (virtio_scsi, mpt3sas, ipr, scsi_debug, +mpi3mr) enforces this by draining or completing outstanding commands before +returning SUCCESS. + +Because tcm_loop_target_reset() doesn't drain, the SCSI EH reuses in-flight +scsi_cmnd structures for recovery commands (e.g. TUR) while the target core +still has async completion work queued for the old se_cmd. The memset in +queuecommand zeroes se_lun and lun_ref_active, causing +transport_lun_remove_cmd() to skip its percpu_ref_put(). The leaked LUN +reference prevents transport_clear_lun_ref() from completing, hanging +configfs LUN unlink forever in D-state: + + INFO: task rm:264 blocked for more than 122 seconds. + rm D 0 264 258 0x00004000 + Call Trace: + __schedule+0x3d0/0x8e0 + schedule+0x36/0xf0 + transport_clear_lun_ref+0x78/0x90 [target_core_mod] + core_tpg_remove_lun+0x28/0xb0 [target_core_mod] + target_fabric_port_unlink+0x50/0x60 [target_core_mod] + configfs_unlink+0x156/0x1f0 [configfs] + vfs_unlink+0x109/0x290 + do_unlinkat+0x1d5/0x2d0 + +Fix this by making tcm_loop_target_reset() actually drain commands: + + 1. Issue TMR_LUN_RESET via tcm_loop_issue_tmr() to drain all commands that + the target core knows about (those not yet CMD_T_COMPLETE). + + 2. Use blk_mq_tagset_busy_iter() to iterate all started requests and + flush_work() on each se_cmd — this drains any deferred completion work + for commands that already had CMD_T_COMPLETE set before the TMR (which + the TMR skips via __target_check_io_state()). This is the same pattern + used by mpi3mr, scsi_debug, and libsas to drain outstanding commands + during reset. + +Fixes: e0eb5d38b732 ("scsi: target: tcm_loop: Use block cmd allocator for se_cmds") +Cc: stable@vger.kernel.org +Assisted-by: Claude:claude-opus-4-6 +Signed-off-by: Josef Bacik +Link: https://patch.msgid.link/27011aa34c8f6b1b94d2e3cf5655b6d037f53428.1773706803.git.josef@toxicpanda.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/target/loopback/tcm_loop.c | 52 ++++++++++++++++++++++++++++++++----- + 1 file changed, 46 insertions(+), 6 deletions(-) + +--- a/drivers/target/loopback/tcm_loop.c ++++ b/drivers/target/loopback/tcm_loop.c +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -274,15 +275,27 @@ static int tcm_loop_device_reset(struct + return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; + } + ++static bool tcm_loop_flush_work_iter(struct request *rq, void *data) ++{ ++ struct scsi_cmnd *sc = blk_mq_rq_to_pdu(rq); ++ struct tcm_loop_cmd *tl_cmd = scsi_cmd_priv(sc); ++ struct se_cmd *se_cmd = &tl_cmd->tl_se_cmd; ++ ++ flush_work(&se_cmd->work); ++ return true; ++} ++ + static int tcm_loop_target_reset(struct scsi_cmnd *sc) + { + struct tcm_loop_hba *tl_hba; + struct tcm_loop_tpg *tl_tpg; ++ struct Scsi_Host *sh = sc->device->host; ++ int ret; + + /* + * Locate the tcm_loop_hba_t pointer + */ +- tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); ++ tl_hba = *(struct tcm_loop_hba **)shost_priv(sh); + if (!tl_hba) { + pr_err("Unable to perform device reset without active I_T Nexus\n"); + return FAILED; +@@ -291,11 +304,38 @@ static int tcm_loop_target_reset(struct + * Locate the tl_tpg pointer from TargetID in sc->device->id + */ + tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; +- if (tl_tpg) { +- tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; +- return SUCCESS; +- } +- return FAILED; ++ if (!tl_tpg) ++ return FAILED; ++ ++ /* ++ * Issue a LUN_RESET to drain all commands that the target core ++ * knows about. This handles commands not yet marked CMD_T_COMPLETE. ++ */ ++ ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, 0, TMR_LUN_RESET); ++ if (ret != TMR_FUNCTION_COMPLETE) ++ return FAILED; ++ ++ /* ++ * Flush any deferred target core completion work that may still be ++ * queued. Commands that already had CMD_T_COMPLETE set before the TMR ++ * are skipped by the TMR drain, but their async completion work ++ * (transport_lun_remove_cmd → percpu_ref_put, release_cmd → scsi_done) ++ * may still be pending in target_completion_wq. ++ * ++ * The SCSI EH will reuse in-flight scsi_cmnd structures for recovery ++ * commands (e.g. TUR) immediately after this handler returns SUCCESS — ++ * if deferred work is still pending, the memset in queuecommand would ++ * zero the se_cmd while the work accesses it, leaking the LUN ++ * percpu_ref and hanging configfs unlink forever. ++ * ++ * Use blk_mq_tagset_busy_iter() to find all started requests and ++ * flush_work() on each — the same pattern used by mpi3mr, scsi_debug, ++ * and other SCSI drivers to drain outstanding commands during reset. ++ */ ++ blk_mq_tagset_busy_iter(&sh->tag_set, tcm_loop_flush_work_iter, NULL); ++ ++ tl_tpg->tl_transport_status = TCM_TRANSPORT_ONLINE; ++ return SUCCESS; + } + + static struct scsi_host_template tcm_loop_driver_template = { diff --git a/queue-6.1/series b/queue-6.1/series index 5a70daf5ae..7d958b960b 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -269,3 +269,26 @@ usb-gadget-uvc-fix-null-pointer-dereference-during-unbind-race.patch usb-gadget-f_subset-fix-unbalanced-refcnt-in-geth_free.patch usb-gadget-f_rndis-protect-rndis-options-with-mutex.patch usb-gadget-f_uac1_legacy-validate-control-request-size.patch +wifi-virt_wifi-remove-set_netdev_dev-to-avoid-use-after-free.patch +ext4-fix-use-after-free-in-update_super_work-when-racing-with-umount.patch +block-fix-resource-leak-in-blk_register_queue-error-path.patch +kvm-x86-mmu-drop-zap-existing-present-spte-even-when-creating-an-mmio-spte.patch +net-correctly-handle-tunneled-traffic-on-ipv6_csum-gso-fallback.patch +net-macb-move-devm_-free-request-_irq-out-of-spin-lock-area.patch +scsi-target-tcm_loop-drain-commands-in-target_reset-handler.patch +mm-huge_memory-fix-folio-isn-t-locked-in-softleaf_to_folio.patch +x86-cpu-enable-fsgsbase-early-in-cpu_init_exception_handling.patch +ksmbd-fix-memory-leaks-and-null-deref-in-smb2_lock.patch +ksmbd-fix-potencial-oob-in-get_file_all_info-for-compound-requests.patch +tracing-fix-potential-deadlock-in-cpu-hotplug-with-osnoise.patch +hwmon-pmbus-core-add-lock-and-unlock-functions.patch +hwmon-pmbus-isl68137-add-mutex-protection-for-avs-enable-sysfs-attributes.patch +ext4-factor-out-ext4_percpu_param_init-and-ext4_percpu_param_destroy.patch +ext4-use-ext4_group_desc_free-in-ext4_put_super-to-save-some-duplicated-code.patch +ext4-factor-out-ext4_flex_groups_free.patch +ext4-fix-the-might_sleep-warnings-in-kvfree.patch +ext4-publish-jinode-after-initialization.patch +mptcp-fix-lock-class-name-family-in-pm_nl_create_listen_socket.patch +ext4-handle-wraparound-when-searching-for-blocks-for-indirect-mapped-blocks.patch +cpufreq-governor-free-dbs_data-directly-when-gov-init-fails.patch +cpufreq-governor-fix-double-free-in-cpufreq_dbs_governor_init-error-path.patch diff --git a/queue-6.1/tracing-fix-potential-deadlock-in-cpu-hotplug-with-osnoise.patch b/queue-6.1/tracing-fix-potential-deadlock-in-cpu-hotplug-with-osnoise.patch new file mode 100644 index 0000000000..9b10527e4d --- /dev/null +++ b/queue-6.1/tracing-fix-potential-deadlock-in-cpu-hotplug-with-osnoise.patch @@ -0,0 +1,93 @@ +From stable+bounces-231262-greg=kroah.com@vger.kernel.org Mon Mar 30 20:37:47 2026 +From: Sasha Levin +Date: Mon, 30 Mar 2026 14:37:39 -0400 +Subject: tracing: Fix potential deadlock in cpu hotplug with osnoise +To: stable@vger.kernel.org +Cc: Luo Haiyang , mathieu.desnoyers@efficios.com, zhang.run@zte.com.cn, yang.tao172@zte.com.cn, ran.xiaokai@zte.com.cn, "Masami Hiramatsu (Google)" , "Steven Rostedt (Google)" , Sasha Levin +Message-ID: <20260330183739.934195-1-sashal@kernel.org> + +From: Luo Haiyang + +[ Upstream commit 1f9885732248d22f788e4992c739a98c88ab8a55 ] + +The following sequence may leads deadlock in cpu hotplug: + + task1 task2 task3 + ----- ----- ----- + + mutex_lock(&interface_lock) + + [CPU GOING OFFLINE] + + cpus_write_lock(); + osnoise_cpu_die(); + kthread_stop(task3); + wait_for_completion(); + + osnoise_sleep(); + mutex_lock(&interface_lock); + + cpus_read_lock(); + + [DEAD LOCK] + +Fix by swap the order of cpus_read_lock() and mutex_lock(&interface_lock). + +Cc: stable@vger.kernel.org +Cc: +Cc: +Cc: +Cc: +Fixes: bce29ac9ce0bb ("trace: Add osnoise tracer") +Link: https://patch.msgid.link/20260326141953414bVSj33dAYktqp9Oiyizq8@zte.com.cn +Reviewed-by: Masami Hiramatsu (Google) +Signed-off-by: Luo Haiyang +Signed-off-by: Steven Rostedt (Google) +[ adapted guard() macros to lock/unlock calls ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_osnoise.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/kernel/trace/trace_osnoise.c ++++ b/kernel/trace/trace_osnoise.c +@@ -1809,8 +1809,8 @@ static void osnoise_hotplug_workfn(struc + if (!osnoise_has_registered_instances()) + goto out_unlock_trace; + +- mutex_lock(&interface_lock); + cpus_read_lock(); ++ mutex_lock(&interface_lock); + + if (!cpu_online(cpu)) + goto out_unlock; +@@ -1820,8 +1820,8 @@ static void osnoise_hotplug_workfn(struc + start_kthread(cpu); + + out_unlock: +- cpus_read_unlock(); + mutex_unlock(&interface_lock); ++ cpus_read_unlock(); + out_unlock_trace: + mutex_unlock(&trace_types_lock); + } +@@ -1950,16 +1950,16 @@ osnoise_cpus_write(struct file *filp, co + if (running) + stop_per_cpu_kthreads(); + +- mutex_lock(&interface_lock); + /* + * osnoise_cpumask is read by CPU hotplug operations. + */ + cpus_read_lock(); ++ mutex_lock(&interface_lock); + + cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); + +- cpus_read_unlock(); + mutex_unlock(&interface_lock); ++ cpus_read_unlock(); + + if (running) + start_per_cpu_kthreads(); diff --git a/queue-6.1/wifi-virt_wifi-remove-set_netdev_dev-to-avoid-use-after-free.patch b/queue-6.1/wifi-virt_wifi-remove-set_netdev_dev-to-avoid-use-after-free.patch new file mode 100644 index 0000000000..ae83fc33cb --- /dev/null +++ b/queue-6.1/wifi-virt_wifi-remove-set_netdev_dev-to-avoid-use-after-free.patch @@ -0,0 +1,101 @@ +From 789b06f9f39cdc7e895bdab2c034e39c41c8f8d6 Mon Sep 17 00:00:00 2001 +From: Alexander Popov +Date: Wed, 25 Mar 2026 01:46:02 +0300 +Subject: wifi: virt_wifi: remove SET_NETDEV_DEV to avoid use-after-free + +From: Alexander Popov + +commit 789b06f9f39cdc7e895bdab2c034e39c41c8f8d6 upstream. + +Currently we execute `SET_NETDEV_DEV(dev, &priv->lowerdev->dev)` for +the virt_wifi net devices. However, unregistering a virt_wifi device in +netdev_run_todo() can happen together with the device referenced by +SET_NETDEV_DEV(). + +It can result in use-after-free during the ethtool operations performed +on a virt_wifi device that is currently being unregistered. Such a net +device can have the `dev.parent` field pointing to the freed memory, +but ethnl_ops_begin() calls `pm_runtime_get_sync(dev->dev.parent)`. + +Let's remove SET_NETDEV_DEV for virt_wifi to avoid bugs like this: + + ================================================================== + BUG: KASAN: slab-use-after-free in __pm_runtime_resume+0xe2/0xf0 + Read of size 2 at addr ffff88810cfc46f8 by task pm/606 + + Call Trace: + + dump_stack_lvl+0x4d/0x70 + print_report+0x170/0x4f3 + ? __pfx__raw_spin_lock_irqsave+0x10/0x10 + kasan_report+0xda/0x110 + ? __pm_runtime_resume+0xe2/0xf0 + ? __pm_runtime_resume+0xe2/0xf0 + __pm_runtime_resume+0xe2/0xf0 + ethnl_ops_begin+0x49/0x270 + ethnl_set_features+0x23c/0xab0 + ? __pfx_ethnl_set_features+0x10/0x10 + ? kvm_sched_clock_read+0x11/0x20 + ? local_clock_noinstr+0xf/0xf0 + ? local_clock+0x10/0x30 + ? kasan_save_track+0x25/0x60 + ? __kasan_kmalloc+0x7f/0x90 + ? genl_family_rcv_msg_attrs_parse.isra.0+0x150/0x2c0 + genl_family_rcv_msg_doit+0x1e7/0x2c0 + ? __pfx_genl_family_rcv_msg_doit+0x10/0x10 + ? __pfx_cred_has_capability.isra.0+0x10/0x10 + ? stack_trace_save+0x8e/0xc0 + genl_rcv_msg+0x411/0x660 + ? __pfx_genl_rcv_msg+0x10/0x10 + ? __pfx_ethnl_set_features+0x10/0x10 + netlink_rcv_skb+0x121/0x380 + ? __pfx_genl_rcv_msg+0x10/0x10 + ? __pfx_netlink_rcv_skb+0x10/0x10 + ? __pfx_down_read+0x10/0x10 + genl_rcv+0x23/0x30 + netlink_unicast+0x60f/0x830 + ? __pfx_netlink_unicast+0x10/0x10 + ? __pfx___alloc_skb+0x10/0x10 + netlink_sendmsg+0x6ea/0xbc0 + ? __pfx_netlink_sendmsg+0x10/0x10 + ? __futex_queue+0x10b/0x1f0 + ____sys_sendmsg+0x7a2/0x950 + ? copy_msghdr_from_user+0x26b/0x430 + ? __pfx_____sys_sendmsg+0x10/0x10 + ? __pfx_copy_msghdr_from_user+0x10/0x10 + ___sys_sendmsg+0xf8/0x180 + ? __pfx____sys_sendmsg+0x10/0x10 + ? __pfx_futex_wait+0x10/0x10 + ? fdget+0x2e4/0x4a0 + __sys_sendmsg+0x11f/0x1c0 + ? __pfx___sys_sendmsg+0x10/0x10 + do_syscall_64+0xe2/0x570 + ? exc_page_fault+0x66/0xb0 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + + +This fix may be combined with another one in the ethtool subsystem: +https://lore.kernel.org/all/20260322075917.254874-1-alex.popov@linux.com/T/#u + +Fixes: d43c65b05b848e0b ("ethtool: runtime-resume netdev parent in ethnl_ops_begin") +Cc: stable@vger.kernel.org +Signed-off-by: Alexander Popov +Acked-by: Greg Kroah-Hartman +Reviewed-by: Breno Leitao +Link: https://patch.msgid.link/20260324224607.374327-1-alex.popov@linux.com +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/virt_wifi.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/net/wireless/virt_wifi.c ++++ b/drivers/net/wireless/virt_wifi.c +@@ -553,7 +553,6 @@ static int virt_wifi_newlink(struct net + eth_hw_addr_inherit(dev, priv->lowerdev); + netif_stacked_transfer_operstate(priv->lowerdev, dev); + +- SET_NETDEV_DEV(dev, &priv->lowerdev->dev); + dev->ieee80211_ptr = kzalloc(sizeof(*dev->ieee80211_ptr), GFP_KERNEL); + + if (!dev->ieee80211_ptr) { diff --git a/queue-6.1/x86-cpu-enable-fsgsbase-early-in-cpu_init_exception_handling.patch b/queue-6.1/x86-cpu-enable-fsgsbase-early-in-cpu_init_exception_handling.patch new file mode 100644 index 0000000000..358bec9ee9 --- /dev/null +++ b/queue-6.1/x86-cpu-enable-fsgsbase-early-in-cpu_init_exception_handling.patch @@ -0,0 +1,142 @@ +From stable+bounces-231407-greg=kroah.com@vger.kernel.org Tue Mar 31 13:56:52 2026 +From: Sasha Levin +Date: Tue, 31 Mar 2026 07:49:50 -0400 +Subject: x86/cpu: Enable FSGSBASE early in cpu_init_exception_handling() +To: stable@vger.kernel.org +Cc: Nikunj A Dadhania , Borislav Petkov , Sohil Mehta , stable@kernel.org, Sasha Levin +Message-ID: <20260331114950.2119438-1-sashal@kernel.org> + +From: Nikunj A Dadhania + +[ Upstream commit 05243d490bb7852a8acca7b5b5658019c7797a52 ] + +Move FSGSBASE enablement from identify_cpu() to cpu_init_exception_handling() +to ensure it is enabled before any exceptions can occur on both boot and +secondary CPUs. + +== Background == + +Exception entry code (paranoid_entry()) uses ALTERNATIVE patching based on +X86_FEATURE_FSGSBASE to decide whether to use RDGSBASE/WRGSBASE instructions +or the slower RDMSR/SWAPGS sequence for saving/restoring GSBASE. + +On boot CPU, ALTERNATIVE patching happens after enabling FSGSBASE in CR4. +When the feature is available, the code is permanently patched to use +RDGSBASE/WRGSBASE, which require CR4.FSGSBASE=1 to execute without triggering + +== Boot Sequence == + +Boot CPU (with CR pinning enabled): + trap_init() + cpu_init() <- Uses unpatched code (RDMSR/SWAPGS) + x2apic_setup() + ... + arch_cpu_finalize_init() + identify_boot_cpu() + identify_cpu() + cr4_set_bits(X86_CR4_FSGSBASE) # Enables the feature + # This becomes part of cr4_pinned_bits + ... + alternative_instructions() <- Patches code to use RDGSBASE/WRGSBASE + +Secondary CPUs (with CR pinning enabled): + start_secondary() + cr4_init() <- Code already patched, CR4.FSGSBASE=1 + set implicitly via cr4_pinned_bits + + cpu_init() <- exceptions work because FSGSBASE is + already enabled + +Secondary CPU (with CR pinning disabled): + start_secondary() + cr4_init() <- Code already patched, CR4.FSGSBASE=0 + cpu_init() + x2apic_setup() + rdmsrq(MSR_IA32_APICBASE) <- Triggers #VC in SNP guests + exc_vmm_communication() + paranoid_entry() <- Uses RDGSBASE with CR4.FSGSBASE=0 + (patched code) + ... + ap_starting() + identify_secondary_cpu() + identify_cpu() + cr4_set_bits(X86_CR4_FSGSBASE) <- Enables the feature, which is + too late + +== CR Pinning == + +Currently, for secondary CPUs, CR4.FSGSBASE is set implicitly through +CR-pinning: the boot CPU sets it during identify_cpu(), it becomes part of +cr4_pinned_bits, and cr4_init() applies those pinned bits to secondary CPUs. +This works but creates an undocumented dependency between cr4_init() and the +pinning mechanism. + +== Problem == + +Secondary CPUs boot after alternatives have been applied globally. They +execute already-patched paranoid_entry() code that uses RDGSBASE/WRGSBASE +instructions, which require CR4.FSGSBASE=1. Upcoming changes to CR pinning +behavior will break the implicit dependency, causing secondary CPUs to +generate #UD. + +This issue manifests itself on AMD SEV-SNP guests, where the rdmsrq() in +x2apic_setup() triggers a #VC exception early during cpu_init(). The #VC +handler (exc_vmm_communication()) executes the patched paranoid_entry() path. +Without CR4.FSGSBASE enabled, RDGSBASE instructions trigger #UD. + +== Fix == + +Enable FSGSBASE explicitly in cpu_init_exception_handling() before loading +exception handlers. This makes the dependency explicit and ensures both +boot and secondary CPUs have FSGSBASE enabled before paranoid_entry() +executes. + +Fixes: c82965f9e530 ("x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit") +Reported-by: Borislav Petkov +Suggested-by: Sohil Mehta +Signed-off-by: Nikunj A Dadhania +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Sohil Mehta +Cc: +Link: https://patch.msgid.link/20260318075654.1792916-2-nikunj@amd.com +[ adapted to cpu_init_exception_handling(void) lacking FRED and LASS support ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/common.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1992,12 +1992,6 @@ static void identify_cpu(struct cpuinfo_ + setup_smap(c); + setup_umip(c); + +- /* Enable FSGSBASE instructions if available. */ +- if (cpu_has(c, X86_FEATURE_FSGSBASE)) { +- cr4_set_bits(X86_CR4_FSGSBASE); +- elf_hwcap2 |= HWCAP2_FSGSBASE; +- } +- + /* + * The vendor-specific functions might have changed features. + * Now we do "generic changes." +@@ -2384,6 +2378,18 @@ void cpu_init_exception_handling(void) + /* GHCB needs to be setup to handle #VC. */ + setup_ghcb(); + ++ /* ++ * On CPUs with FSGSBASE support, paranoid_entry() uses ++ * ALTERNATIVE-patched RDGSBASE/WRGSBASE instructions. Secondary CPUs ++ * boot after alternatives are patched globally, so early exceptions ++ * execute patched code that depends on FSGSBASE. Enable the feature ++ * before any exceptions occur. ++ */ ++ if (cpu_feature_enabled(X86_FEATURE_FSGSBASE)) { ++ cr4_set_bits(X86_CR4_FSGSBASE); ++ elf_hwcap2 |= HWCAP2_FSGSBASE; ++ } ++ + /* Finally load the IDT */ + load_current_idt(); + }