From: Greg Kroah-Hartman Date: Mon, 29 Dec 2025 13:17:04 +0000 (+0100) Subject: 6.18-stable patches X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e36139470bec79ce945b7f0984da3f154bf3c8c2;p=thirdparty%2Fkernel%2Fstable-queue.git 6.18-stable patches added patches: block-remove-queue-freezing-from-several-sysfs-store-callbacks.patch crash-let-architecture-decide-crash-memory-export-to-iomem_resource.patch dma-mapping-fix-dma_bit_mask-macro-being-broken.patch ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch ext4-check-if-mount_opts-is-nul-terminated-in-ext4_ioctl_set_tune_sb.patch ext4-clear-i_state_flags-when-alloc-inode.patch ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch ext4-fix-string-copying-in-parse_apply_sb_mount_options.patch ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch jbd2-use-a-weaker-annotation-in-journal-handling.patch media-v4l2-mem2mem-fix-outdated-documentation.patch mm-huge_memory-add-pmd-folio-to-ds_queue-in-do_huge_zero_wp_pmd.patch mm-ksm-fix-exec-fork-inheritance-support-for-prctl.patch mm-slab-introduce-kvfree_rcu_barrier_on_cache-for-cache-destruction.patch mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch mptcp-pm-ignore-unknown-endpoint-flags.patch mptcp-schedule-rtx-timer-only-after-pushing-data.patch printk-allow-printk_trigger_flush-to-flush-all-types.patch printk-avoid-irq_work-for-printk_deferred-on-suspend.patch selftests-mptcp-pm-ensure-unknown-flags-are-ignored.patch tpm2-sessions-fix-out-of-range-indexing-in-name_size.patch tpm2-sessions-fix-tpm2_read_public-range-checks.patch --- diff --git a/queue-6.18/block-remove-queue-freezing-from-several-sysfs-store-callbacks.patch b/queue-6.18/block-remove-queue-freezing-from-several-sysfs-store-callbacks.patch new file mode 100644 index 0000000000..4a4212d03c --- /dev/null +++ b/queue-6.18/block-remove-queue-freezing-from-several-sysfs-store-callbacks.patch @@ -0,0 +1,216 @@ +From 935a20d1bebf6236076785fac3ff81e3931834e9 Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Fri, 14 Nov 2025 13:04:07 -0800 +Subject: block: Remove queue freezing from several sysfs store callbacks + +From: Bart Van Assche + +commit 935a20d1bebf6236076785fac3ff81e3931834e9 upstream. + +Freezing the request queue from inside sysfs store callbacks may cause a +deadlock in combination with the dm-multipath driver and the +queue_if_no_path option. Additionally, freezing the request queue slows +down system boot on systems where sysfs attributes are set synchronously. + +Fix this by removing the blk_mq_freeze_queue() / blk_mq_unfreeze_queue() +calls from the store callbacks that do not strictly need these callbacks. +Add the __data_racy annotation to request_queue.rq_timeout to suppress +KCSAN data race reports about the rq_timeout reads. + +This patch may cause a small delay in applying the new settings. + +For all the attributes affected by this patch, I/O will complete +correctly whether the old or the new value of the attribute is used. + +This patch affects the following sysfs attributes: +* io_poll_delay +* io_timeout +* nomerges +* read_ahead_kb +* rq_affinity + +Here is an example of a deadlock triggered by running test srp/002 +if this patch is not applied: + +task:multipathd +Call Trace: + + __schedule+0x8c1/0x1bf0 + schedule+0xdd/0x270 + schedule_preempt_disabled+0x1c/0x30 + __mutex_lock+0xb89/0x1650 + mutex_lock_nested+0x1f/0x30 + dm_table_set_restrictions+0x823/0xdf0 + __bind+0x166/0x590 + dm_swap_table+0x2a7/0x490 + do_resume+0x1b1/0x610 + dev_suspend+0x55/0x1a0 + ctl_ioctl+0x3a5/0x7e0 + dm_ctl_ioctl+0x12/0x20 + __x64_sys_ioctl+0x127/0x1a0 + x64_sys_call+0xe2b/0x17d0 + do_syscall_64+0x96/0x3a0 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + +task:(udev-worker) +Call Trace: + + __schedule+0x8c1/0x1bf0 + schedule+0xdd/0x270 + blk_mq_freeze_queue_wait+0xf2/0x140 + blk_mq_freeze_queue_nomemsave+0x23/0x30 + queue_ra_store+0x14e/0x290 + queue_attr_store+0x23e/0x2c0 + sysfs_kf_write+0xde/0x140 + kernfs_fop_write_iter+0x3b2/0x630 + vfs_write+0x4fd/0x1390 + ksys_write+0xfd/0x230 + __x64_sys_write+0x76/0xc0 + x64_sys_call+0x276/0x17d0 + do_syscall_64+0x96/0x3a0 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + + +Cc: Christoph Hellwig +Cc: Ming Lei +Cc: Nilay Shroff +Cc: Martin Wilck +Cc: Benjamin Marzinski +Cc: stable@vger.kernel.org +Fixes: af2814149883 ("block: freeze the queue in queue_attr_store") +Signed-off-by: Bart Van Assche +Reviewed-by: Nilay Shroff +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-sysfs.c | 26 ++++++++------------------ + include/linux/blkdev.h | 2 +- + 2 files changed, 9 insertions(+), 19 deletions(-) + +--- a/block/blk-sysfs.c ++++ b/block/blk-sysfs.c +@@ -143,21 +143,22 @@ queue_ra_store(struct gendisk *disk, con + { + unsigned long ra_kb; + ssize_t ret; +- unsigned int memflags; + struct request_queue *q = disk->queue; + + ret = queue_var_store(&ra_kb, page, count); + if (ret < 0) + return ret; + /* +- * ->ra_pages is protected by ->limits_lock because it is usually +- * calculated from the queue limits by queue_limits_commit_update. ++ * The ->ra_pages change below is protected by ->limits_lock because it ++ * is usually calculated from the queue limits by ++ * queue_limits_commit_update(). ++ * ++ * bdi->ra_pages reads are not serialized against bdi->ra_pages writes. ++ * Use WRITE_ONCE() to write bdi->ra_pages once. + */ + mutex_lock(&q->limits_lock); +- memflags = blk_mq_freeze_queue(q); +- disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10); ++ WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10)); + mutex_unlock(&q->limits_lock); +- blk_mq_unfreeze_queue(q, memflags); + + return ret; + } +@@ -375,21 +376,18 @@ static ssize_t queue_nomerges_store(stru + size_t count) + { + unsigned long nm; +- unsigned int memflags; + struct request_queue *q = disk->queue; + ssize_t ret = queue_var_store(&nm, page, count); + + if (ret < 0) + return ret; + +- memflags = blk_mq_freeze_queue(q); + blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q); + blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); + if (nm == 2) + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); + else if (nm) + blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); +- blk_mq_unfreeze_queue(q, memflags); + + return ret; + } +@@ -409,7 +407,6 @@ queue_rq_affinity_store(struct gendisk * + #ifdef CONFIG_SMP + struct request_queue *q = disk->queue; + unsigned long val; +- unsigned int memflags; + + ret = queue_var_store(&val, page, count); + if (ret < 0) +@@ -421,7 +418,6 @@ queue_rq_affinity_store(struct gendisk * + * are accessed individually using atomic test_bit operation. So we + * don't grab any lock while updating these flags. + */ +- memflags = blk_mq_freeze_queue(q); + if (val == 2) { + blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); +@@ -432,7 +428,6 @@ queue_rq_affinity_store(struct gendisk * + blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); + blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + } +- blk_mq_unfreeze_queue(q, memflags); + #endif + return ret; + } +@@ -446,11 +441,9 @@ static ssize_t queue_poll_delay_store(st + static ssize_t queue_poll_store(struct gendisk *disk, const char *page, + size_t count) + { +- unsigned int memflags; + ssize_t ret = count; + struct request_queue *q = disk->queue; + +- memflags = blk_mq_freeze_queue(q); + if (!(q->limits.features & BLK_FEAT_POLL)) { + ret = -EINVAL; + goto out; +@@ -459,7 +452,6 @@ static ssize_t queue_poll_store(struct g + pr_info_ratelimited("writes to the poll attribute are ignored.\n"); + pr_info_ratelimited("please use driver specific parameters instead.\n"); + out: +- blk_mq_unfreeze_queue(q, memflags); + return ret; + } + +@@ -472,7 +464,7 @@ static ssize_t queue_io_timeout_show(str + static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page, + size_t count) + { +- unsigned int val, memflags; ++ unsigned int val; + int err; + struct request_queue *q = disk->queue; + +@@ -480,9 +472,7 @@ static ssize_t queue_io_timeout_store(st + if (err || val == 0) + return -EINVAL; + +- memflags = blk_mq_freeze_queue(q); + blk_queue_rq_timeout(q, msecs_to_jiffies(val)); +- blk_mq_unfreeze_queue(q, memflags); + + return count; + } +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -485,7 +485,7 @@ struct request_queue { + */ + unsigned long queue_flags; + +- unsigned int rq_timeout; ++ unsigned int __data_racy rq_timeout; + + unsigned int queue_depth; + diff --git a/queue-6.18/crash-let-architecture-decide-crash-memory-export-to-iomem_resource.patch b/queue-6.18/crash-let-architecture-decide-crash-memory-export-to-iomem_resource.patch new file mode 100644 index 0000000000..230c8717c4 --- /dev/null +++ b/queue-6.18/crash-let-architecture-decide-crash-memory-export-to-iomem_resource.patch @@ -0,0 +1,141 @@ +From adc15829fb73e402903b7030729263b6ee4a7232 Mon Sep 17 00:00:00 2001 +From: Sourabh Jain +Date: Thu, 16 Oct 2025 19:58:31 +0530 +Subject: crash: let architecture decide crash memory export to iomem_resource + +From: Sourabh Jain + +commit adc15829fb73e402903b7030729263b6ee4a7232 upstream. + +With the generic crashkernel reservation, the kernel emits the following +warning on powerpc: + +WARNING: CPU: 0 PID: 1 at arch/powerpc/mm/mem.c:341 add_system_ram_resources+0xfc/0x180 +Modules linked in: +CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.17.0-auto-12607-g5472d60c129f #1 VOLUNTARY +Hardware name: IBM,9080-HEX Power11 (architected) 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries +NIP: c00000000201de3c LR: c00000000201de34 CTR: 0000000000000000 +REGS: c000000127cef8a0 TRAP: 0700 Not tainted (6.17.0-auto-12607-g5472d60c129f) +MSR: 8000000002029033 CR: 84000840 XER: 20040010 +CFAR: c00000000017eed0 IRQMASK: 0 +GPR00: c00000000201de34 c000000127cefb40 c0000000016a8100 0000000000000001 +GPR04: c00000012005aa00 0000000020000000 c000000002b705c8 0000000000000000 +GPR08: 000000007fffffff fffffffffffffff0 c000000002db8100 000000011fffffff +GPR12: c00000000201dd40 c000000002ff0000 c0000000000112bc 0000000000000000 +GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +GPR20: 0000000000000000 0000000000000000 0000000000000000 c0000000015a3808 +GPR24: c00000000200468c c000000001699888 0000000000000106 c0000000020d1950 +GPR28: c0000000014683f8 0000000081000200 c0000000015c1868 c000000002b9f710 +NIP [c00000000201de3c] add_system_ram_resources+0xfc/0x180 +LR [c00000000201de34] add_system_ram_resources+0xf4/0x180 +Call Trace: +add_system_ram_resources+0xf4/0x180 (unreliable) +do_one_initcall+0x60/0x36c +do_initcalls+0x120/0x220 +kernel_init_freeable+0x23c/0x390 +kernel_init+0x34/0x26c +ret_from_kernel_user_thread+0x14/0x1c + +This warning occurs due to a conflict between crashkernel and System RAM +iomem resources. + +The generic crashkernel reservation adds the crashkernel memory range to +/proc/iomem during early initialization. Later, all memblock ranges are +added to /proc/iomem as System RAM. If the crashkernel region overlaps +with any memblock range, it causes a conflict while adding those memblock +regions as iomem resources, triggering the above warning. The conflicting +memblock regions are then omitted from /proc/iomem. + +For example, if the following crashkernel region is added to /proc/iomem: +20000000-11fffffff : Crash kernel + +then the following memblock regions System RAM regions fail to be inserted: +00000000-7fffffff : System RAM +80000000-257fffffff : System RAM + +Fix this by not adding the crashkernel memory to /proc/iomem on powerpc. +Introduce an architecture hook to let each architecture decide whether to +export the crashkernel region to /proc/iomem. + +For more info checkout commit c40dd2f766440 ("powerpc: Add System RAM +to /proc/iomem") and commit bce074bdbc36 ("powerpc: insert System RAM +resource to prevent crashkernel conflict") + +Note: Before switching to the generic crashkernel reservation, powerpc +never exported the crashkernel region to /proc/iomem. + +Link: https://lkml.kernel.org/r/20251016142831.144515-1-sourabhjain@linux.ibm.com +Fixes: e3185ee438c2 ("powerpc/crash: use generic crashkernel reservation"). +Signed-off-by: Sourabh Jain +Reported-by: Venkat Rao Bagalkote +Closes: https://lore.kernel.org/all/90937fe0-2e76-4c82-b27e-7b8a7fe3ac69@linux.ibm.com/ +Tested-by: Venkat Rao Bagalkote +Cc: Baoquan he +Cc: Hari Bathini +Cc: Madhavan Srinivasan +Cc: Mahesh Salgaonkar +Cc: Michael Ellerman +Cc: Ritesh Harjani (IBM) +Cc: Vivek Goyal +Cc: Dave Young +Cc: Mike Rapoport +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/include/asm/crash_reserve.h | 8 ++++++++ + include/linux/crash_reserve.h | 6 ++++++ + kernel/crash_reserve.c | 3 +++ + 3 files changed, 17 insertions(+) + +diff --git a/arch/powerpc/include/asm/crash_reserve.h b/arch/powerpc/include/asm/crash_reserve.h +index 6467ce29b1fa..d1b570ddbf98 100644 +--- a/arch/powerpc/include/asm/crash_reserve.h ++++ b/arch/powerpc/include/asm/crash_reserve.h +@@ -5,4 +5,12 @@ + /* crash kernel regions are Page size agliged */ + #define CRASH_ALIGN PAGE_SIZE + ++#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION ++static inline bool arch_add_crash_res_to_iomem(void) ++{ ++ return false; ++} ++#define arch_add_crash_res_to_iomem arch_add_crash_res_to_iomem ++#endif ++ + #endif /* _ASM_POWERPC_CRASH_RESERVE_H */ +diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h +index 7b44b41d0a20..f0dc03d94ca2 100644 +--- a/include/linux/crash_reserve.h ++++ b/include/linux/crash_reserve.h +@@ -32,6 +32,12 @@ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, + void __init reserve_crashkernel_cma(unsigned long long cma_size); + + #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION ++#ifndef arch_add_crash_res_to_iomem ++static inline bool arch_add_crash_res_to_iomem(void) ++{ ++ return true; ++} ++#endif + #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE + #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) + #endif +diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c +index 87bf4d41eabb..62e60e0223cf 100644 +--- a/kernel/crash_reserve.c ++++ b/kernel/crash_reserve.c +@@ -524,6 +524,9 @@ void __init reserve_crashkernel_cma(unsigned long long cma_size) + #ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + static __init int insert_crashkernel_resources(void) + { ++ if (!arch_add_crash_res_to_iomem()) ++ return 0; ++ + if (crashk_res.start < crashk_res.end) + insert_resource(&iomem_resource, &crashk_res); + +-- +2.52.0 + diff --git a/queue-6.18/dma-mapping-fix-dma_bit_mask-macro-being-broken.patch b/queue-6.18/dma-mapping-fix-dma_bit_mask-macro-being-broken.patch new file mode 100644 index 0000000000..9276cb9581 --- /dev/null +++ b/queue-6.18/dma-mapping-fix-dma_bit_mask-macro-being-broken.patch @@ -0,0 +1,64 @@ +From 31b931bebd11a0f00967114f62c8c38952f483e5 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Sun, 7 Dec 2025 19:47:56 +0100 +Subject: dma-mapping: Fix DMA_BIT_MASK() macro being broken + +From: Hans de Goede + +commit 31b931bebd11a0f00967114f62c8c38952f483e5 upstream. + +After commit a50f7456f853 ("dma-mapping: Allow use of DMA_BIT_MASK(64) in +global scope"), the DMA_BIT_MASK() macro is broken when passed non trivial +statements for the value of 'n'. This is caused by the new version missing +parenthesis around 'n' when evaluating 'n'. + +One example of this breakage is the IPU6 driver now crashing due to +it getting DMA-addresses with address bit 32 set even though it has +tried to set a 32 bit DMA mask. + +The IPU6 CSI2 engine has a DMA mask of either 31 or 32 bits depending +on if it is in secure mode or not and it sets this masks like this: + + mmu_info->aperture_end = + (dma_addr_t)DMA_BIT_MASK(isp->secure_mode ? + IPU6_MMU_ADDR_BITS : + IPU6_MMU_ADDR_BITS_NON_SECURE); + +So the 'n' argument here is "isp->secure_mode ? IPU6_MMU_ADDR_BITS : +IPU6_MMU_ADDR_BITS_NON_SECURE" which gets expanded into: + +isp->secure_mode ? IPU6_MMU_ADDR_BITS : IPU6_MMU_ADDR_BITS_NON_SECURE - 1 + +With the -1 only being applied in the non secure case, causing +the secure mode mask to be one 1 bit too large. + +Fixes: a50f7456f853 ("dma-mapping: Allow use of DMA_BIT_MASK(64) in global scope") +Cc: Sakari Ailus +Cc: James Clark +Cc: Nathan Chancellor +Cc: stable@vger.kernel.org +Signed-off-by: Hans de Goede +Reviewed-by: Nathan Chancellor +Signed-off-by: Marek Szyprowski +Link: https://lore.kernel.org/r/20251207184756.97904-1-johannes.goede@oss.qualcomm.com +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/dma-mapping.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h +index 2ceda49c609f..aa36a0d1d9df 100644 +--- a/include/linux/dma-mapping.h ++++ b/include/linux/dma-mapping.h +@@ -90,7 +90,7 @@ + */ + #define DMA_MAPPING_ERROR (~(dma_addr_t)0) + +-#define DMA_BIT_MASK(n) GENMASK_ULL(n - 1, 0) ++#define DMA_BIT_MASK(n) GENMASK_ULL((n) - 1, 0) + + struct dma_iova_state { + dma_addr_t addr; +-- +2.52.0 + diff --git a/queue-6.18/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch b/queue-6.18/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch new file mode 100644 index 0000000000..8276336ef6 --- /dev/null +++ b/queue-6.18/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch @@ -0,0 +1,59 @@ +From 7c11c56eb32eae96893eebafdbe3decadefe88ad Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Thu, 20 Nov 2025 21:42:33 +0800 +Subject: ext4: align max orphan file size with e2fsprogs limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Baokun Li + +commit 7c11c56eb32eae96893eebafdbe3decadefe88ad upstream. + +Kernel commit 0a6ce20c1564 ("ext4: verify orphan file size is not too big") +limits the maximum supported orphan file size to 8 << 20. + +However, in e2fsprogs, the orphan file size is set to 32–512 filesystem +blocks when creating a filesystem. + +With 64k block size, formatting an ext4 fs >32G gives an orphan file bigger +than the kernel allows, so mount prints an error and fails: + + EXT4-fs (vdb): orphan file too big: 8650752 + EXT4-fs (vdb): mount failed + +To prevent this issue and allow previously created 64KB filesystems to +mount, we updates the maximum allowed orphan file size in the kernel to +512 filesystem blocks. + +Fixes: 0a6ce20c1564 ("ext4: verify orphan file size is not too big") +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Message-ID: <20251120134233.2994147-1-libaokun@huaweicloud.com> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/orphan.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/ext4/orphan.c ++++ b/fs/ext4/orphan.c +@@ -8,6 +8,8 @@ + #include "ext4.h" + #include "ext4_jbd2.h" + ++#define EXT4_MAX_ORPHAN_FILE_BLOCKS 512 ++ + static int ext4_orphan_file_add(handle_t *handle, struct inode *inode) + { + int i, j, start; +@@ -588,7 +590,7 @@ int ext4_init_orphan_info(struct super_b + * consuming absurd amounts of memory when pinning blocks of orphan + * file in memory. + */ +- if (inode->i_size > 8 << 20) { ++ if (inode->i_size > (EXT4_MAX_ORPHAN_FILE_BLOCKS << inode->i_blkbits)) { + ext4_msg(sb, KERN_ERR, "orphan file too big: %llu", + (unsigned long long)inode->i_size); + ret = -EFSCORRUPTED; diff --git a/queue-6.18/ext4-check-if-mount_opts-is-nul-terminated-in-ext4_ioctl_set_tune_sb.patch b/queue-6.18/ext4-check-if-mount_opts-is-nul-terminated-in-ext4_ioctl_set_tune_sb.patch new file mode 100644 index 0000000000..f0a1369a22 --- /dev/null +++ b/queue-6.18/ext4-check-if-mount_opts-is-nul-terminated-in-ext4_ioctl_set_tune_sb.patch @@ -0,0 +1,40 @@ +From 3db63d2c2d1d1e78615dd742568c5a2d55291ad1 Mon Sep 17 00:00:00 2001 +From: Fedor Pchelkin +Date: Sat, 1 Nov 2025 19:04:29 +0300 +Subject: ext4: check if mount_opts is NUL-terminated in ext4_ioctl_set_tune_sb() + +From: Fedor Pchelkin + +commit 3db63d2c2d1d1e78615dd742568c5a2d55291ad1 upstream. + +params.mount_opts may come as potentially non-NUL-term string. Userspace +is expected to pass a NUL-term string. Add an extra check to ensure this +holds true. Note that further code utilizes strscpy_pad() so this is just +for proper informing the user of incorrect data being provided. + +Found by Linux Verification Center (linuxtesting.org). + +Signed-off-by: Fedor Pchelkin +Reviewed-by: Baokun Li +Reviewed-by: Jan Kara +Message-ID: <20251101160430.222297-2-pchelkin@ispras.ru> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ioctl.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -1394,6 +1394,10 @@ static int ext4_ioctl_set_tune_sb(struct + if (copy_from_user(¶ms, in, sizeof(params))) + return -EFAULT; + ++ if (strnlen(params.mount_opts, sizeof(params.mount_opts)) == ++ sizeof(params.mount_opts)) ++ return -E2BIG; ++ + if ((params.set_flags & ~TUNE_OPS_SUPPORTED) != 0) + return -EOPNOTSUPP; + diff --git a/queue-6.18/ext4-clear-i_state_flags-when-alloc-inode.patch b/queue-6.18/ext4-clear-i_state_flags-when-alloc-inode.patch new file mode 100644 index 0000000000..dc27819659 --- /dev/null +++ b/queue-6.18/ext4-clear-i_state_flags-when-alloc-inode.patch @@ -0,0 +1,59 @@ +From 4091c8206cfd2e3bb529ef260887296b90d9b6a2 Mon Sep 17 00:00:00 2001 +From: Haibo Chen +Date: Tue, 4 Nov 2025 16:12:24 +0800 +Subject: ext4: clear i_state_flags when alloc inode + +From: Haibo Chen + +commit 4091c8206cfd2e3bb529ef260887296b90d9b6a2 upstream. + +i_state_flags used on 32-bit archs, need to clear this flag when +alloc inode. +Find this issue when umount ext4, sometimes track the inode as orphan +accidently, cause ext4 mesg dump. + +Fixes: acf943e9768e ("ext4: fix checks for orphan inodes") +Signed-off-by: Haibo Chen +Reviewed-by: Baokun Li +Reviewed-by: Zhang Yi +Reviewed-by: Jan Kara +Message-ID: <20251104-ext4-v1-1-73691a0800f9@nxp.com> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ialloc.c | 1 - + fs/ext4/inode.c | 1 - + fs/ext4/super.c | 1 + + 3 files changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -1293,7 +1293,6 @@ got: + ei->i_csum_seed = ext4_chksum(csum, (__u8 *)&gen, sizeof(gen)); + } + +- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ + ext4_set_inode_state(inode, EXT4_STATE_NEW); + + ei->i_extra_isize = sbi->s_want_extra_isize; +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -5288,7 +5288,6 @@ struct inode *__ext4_iget(struct super_b + ei->i_projid = make_kprojid(&init_user_ns, i_projid); + set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); + +- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ + ei->i_inline_off = 0; + ei->i_dir_start_lookup = 0; + ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1396,6 +1396,7 @@ static struct inode *ext4_alloc_inode(st + + inode_set_iversion(&ei->vfs_inode, 1); + ei->i_flags = 0; ++ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ + spin_lock_init(&ei->i_raw_lock); + ei->i_prealloc_node = RB_ROOT; + atomic_set(&ei->i_prealloc_active, 0); diff --git a/queue-6.18/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch b/queue-6.18/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch new file mode 100644 index 0000000000..7b65224dfe --- /dev/null +++ b/queue-6.18/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch @@ -0,0 +1,63 @@ +From 3f7a79d05c692c7cfec70bf104b1b3c3d0ce6247 Mon Sep 17 00:00:00 2001 +From: Yongjian Sun +Date: Thu, 6 Nov 2025 14:06:13 +0800 +Subject: ext4: fix incorrect group number assertion in mb_check_buddy + +From: Yongjian Sun + +commit 3f7a79d05c692c7cfec70bf104b1b3c3d0ce6247 upstream. + +When the MB_CHECK_ASSERT macro is enabled, an assertion failure can +occur in __mb_check_buddy when checking preallocated blocks (pa) in +a block group: + +Assertion failure in mb_free_blocks() : "groupnr == e4b->bd_group" + +This happens when a pa at the very end of a block group (e.g., +pa_pstart=32765, pa_len=3 in a group of 32768 blocks) becomes +exhausted - its pa_pstart is advanced by pa_len to 32768, which +lies in the next block group. If this exhausted pa (with pa_len == 0) +is still in the bb_prealloc_list during the buddy check, the assertion +incorrectly flags it as belonging to the wrong group. A possible +sequence is as follows: + +ext4_mb_new_blocks + ext4_mb_release_context + pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len) + pa->pa_len -= ac->ac_b_ex.fe_len + + __mb_check_buddy + for each pa in group + ext4_get_group_no_and_offset + MB_CHECK_ASSERT(groupnr == e4b->bd_group) + +To fix this, we modify the check to skip block group validation for +exhausted preallocations (where pa_len == 0). Such entries are in a +transitional state and will be removed from the list soon, so they +should not trigger an assertion. This change prevents the false +positive while maintaining the integrity of the checks for active +allocations. + +Fixes: c9de560ded61f ("ext4: Add multi block allocator for ext4") +Signed-off-by: Yongjian Sun +Reviewed-by: Baokun Li +Reviewed-by: Jan Kara +Message-ID: <20251106060614.631382-2-sunyongjian@huaweicloud.com> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -783,6 +783,8 @@ static void __mb_check_buddy(struct ext4 + ext4_group_t groupnr; + struct ext4_prealloc_space *pa; + pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); ++ if (!pa->pa_len) ++ continue; + ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k); + MB_CHECK_ASSERT(groupnr == e4b->bd_group); + for (i = 0; i < pa->pa_len; i++) diff --git a/queue-6.18/ext4-fix-string-copying-in-parse_apply_sb_mount_options.patch b/queue-6.18/ext4-fix-string-copying-in-parse_apply_sb_mount_options.patch new file mode 100644 index 0000000000..9572fd3e0a --- /dev/null +++ b/queue-6.18/ext4-fix-string-copying-in-parse_apply_sb_mount_options.patch @@ -0,0 +1,81 @@ +From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001 +From: Fedor Pchelkin +Date: Sat, 1 Nov 2025 19:04:28 +0300 +Subject: ext4: fix string copying in parse_apply_sb_mount_options() + +From: Fedor Pchelkin + +commit ee5a977b4e771cc181f39d504426dbd31ed701cc upstream. + +strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term +string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce +memtostr() and memtostr_pad()") provides additional information in that +regard. So if this happens, the following warning is observed: + +strnlen: detected buffer overflow: 65 byte read of buffer size 64 +WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032 +Modules linked in: +CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 +RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032 +Call Trace: + + __fortify_panic+0x1f/0x30 lib/string_helpers.c:1039 + strnlen include/linux/fortify-string.h:235 [inline] + sized_strscpy include/linux/fortify-string.h:309 [inline] + parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline] + __ext4_fill_super fs/ext4/super.c:5261 [inline] + ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706 + get_tree_bdev_flags+0x387/0x620 fs/super.c:1636 + vfs_get_tree+0x93/0x380 fs/super.c:1814 + do_new_mount fs/namespace.c:3553 [inline] + path_mount+0x6ae/0x1f70 fs/namespace.c:3880 + do_mount fs/namespace.c:3893 [inline] + __do_sys_mount fs/namespace.c:4103 [inline] + __se_sys_mount fs/namespace.c:4080 [inline] + __x64_sys_mount+0x280/0x300 fs/namespace.c:4080 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Since userspace is expected to provide s_mount_opts field to be at most 63 +characters long with the ending byte being NUL-term, use a 64-byte buffer +which matches the size of s_mount_opts, so that strscpy_pad() does its job +properly. Return with error if the user still managed to provide a +non-NUL-term string here. + +Found by Linux Verification Center (linuxtesting.org) with Syzkaller. + +Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()") +Cc: stable@vger.kernel.org +Signed-off-by: Fedor Pchelkin +Reviewed-by: Baokun Li +Reviewed-by: Jan Kara +Message-ID: <20251101160430.222297-1-pchelkin@ispras.ru> +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/super.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2475,7 +2475,7 @@ static int parse_apply_sb_mount_options( + struct ext4_fs_context *m_ctx) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); +- char s_mount_opts[65]; ++ char s_mount_opts[64]; + struct ext4_fs_context *s_ctx = NULL; + struct fs_context *fc = NULL; + int ret = -ENOMEM; +@@ -2483,7 +2483,8 @@ static int parse_apply_sb_mount_options( + if (!sbi->s_es->s_mount_opts[0]) + return 0; + +- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts); ++ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0) ++ return -E2BIG; + + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + if (!fc) diff --git a/queue-6.18/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch b/queue-6.18/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch new file mode 100644 index 0000000000..6d3df4ad37 --- /dev/null +++ b/queue-6.18/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch @@ -0,0 +1,43 @@ +From b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 Mon Sep 17 00:00:00 2001 +From: Karina Yankevich +Date: Wed, 22 Oct 2025 12:32:53 +0300 +Subject: ext4: xattr: fix null pointer deref in ext4_raw_inode() + +From: Karina Yankevich + +commit b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 upstream. + +If ext4_get_inode_loc() fails (e.g. if it returns -EFSCORRUPTED), +iloc.bh will remain set to NULL. Since ext4_xattr_inode_dec_ref_all() +lacks error checking, this will lead to a null pointer dereference +in ext4_raw_inode(), called right after ext4_get_inode_loc(). + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: c8e008b60492 ("ext4: ignore xattrs past end") +Cc: stable@kernel.org +Signed-off-by: Karina Yankevich +Reviewed-by: Sergey Shtylyov +Reviewed-by: Baokun Li +Message-ID: <20251022093253.3546296-1-k.yankevich@omp.ru> +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/xattr.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1174,7 +1174,11 @@ ext4_xattr_inode_dec_ref_all(handle_t *h + if (block_csum) + end = (void *)bh->b_data + bh->b_size; + else { +- ext4_get_inode_loc(parent, &iloc); ++ err = ext4_get_inode_loc(parent, &iloc); ++ if (err) { ++ EXT4_ERROR_INODE(parent, "parent inode loc (error %d)", err); ++ return; ++ } + end = (void *)ext4_raw_inode(&iloc) + EXT4_SB(parent->i_sb)->s_inode_size; + } + diff --git a/queue-6.18/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch b/queue-6.18/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch new file mode 100644 index 0000000000..d5d3410361 --- /dev/null +++ b/queue-6.18/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch @@ -0,0 +1,85 @@ +From 524c3853831cf4f7e1db579e487c757c3065165c Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Wed, 22 Oct 2025 20:11:37 +0900 +Subject: jbd2: use a per-journal lock_class_key for jbd2_trans_commit_key + +From: Tetsuo Handa + +commit 524c3853831cf4f7e1db579e487c757c3065165c upstream. + +syzbot is reporting possibility of deadlock due to sharing lock_class_key +for jbd2_handle across ext4 and ocfs2. But this is a false positive, for +one disk partition can't have two filesystems at the same time. + +Reported-by: syzbot+6e493c165d26d6fcbf72@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=6e493c165d26d6fcbf72 +Signed-off-by: Tetsuo Handa +Tested-by: syzbot+6e493c165d26d6fcbf72@syzkaller.appspotmail.com +Reviewed-by: Jan Kara +Message-ID: <987110fc-5470-457a-a218-d286a09dd82f@I-love.SAKURA.ne.jp> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/journal.c | 6 ++++-- + include/linux/jbd2.h | 6 ++++++ + 2 files changed, 10 insertions(+), 2 deletions(-) + +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -1521,7 +1521,6 @@ static journal_t *journal_init_common(st + struct block_device *fs_dev, + unsigned long long start, int len, int blocksize) + { +- static struct lock_class_key jbd2_trans_commit_key; + journal_t *journal; + int err; + int n; +@@ -1530,6 +1529,7 @@ static journal_t *journal_init_common(st + if (!journal) + return ERR_PTR(-ENOMEM); + ++ lockdep_register_key(&journal->jbd2_trans_commit_key); + journal->j_blocksize = blocksize; + journal->j_dev = bdev; + journal->j_fs_dev = fs_dev; +@@ -1560,7 +1560,7 @@ static journal_t *journal_init_common(st + journal->j_max_batch_time = 15000; /* 15ms */ + atomic_set(&journal->j_reserved_credits, 0); + lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle", +- &jbd2_trans_commit_key, 0); ++ &journal->jbd2_trans_commit_key, 0); + + /* The journal is marked for error until we succeed with recovery! */ + journal->j_flags = JBD2_ABORT; +@@ -1611,6 +1611,7 @@ err_cleanup: + kfree(journal->j_wbuf); + jbd2_journal_destroy_revoke(journal); + journal_fail_superblock(journal); ++ lockdep_unregister_key(&journal->jbd2_trans_commit_key); + kfree(journal); + return ERR_PTR(err); + } +@@ -2187,6 +2188,7 @@ int jbd2_journal_destroy(journal_t *jour + jbd2_journal_destroy_revoke(journal); + kfree(journal->j_fc_wbuf); + kfree(journal->j_wbuf); ++ lockdep_unregister_key(&journal->jbd2_trans_commit_key); + kfree(journal); + + return err; +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1253,6 +1253,12 @@ struct journal_s + */ + struct lockdep_map j_trans_commit_map; + #endif ++ /** ++ * @jbd2_trans_commit_key: ++ * ++ * "struct lock_class_key" for @j_trans_commit_map ++ */ ++ struct lock_class_key jbd2_trans_commit_key; + + /** + * @j_fc_cleanup_callback: diff --git a/queue-6.18/jbd2-use-a-weaker-annotation-in-journal-handling.patch b/queue-6.18/jbd2-use-a-weaker-annotation-in-journal-handling.patch new file mode 100644 index 0000000000..4fb46b9550 --- /dev/null +++ b/queue-6.18/jbd2-use-a-weaker-annotation-in-journal-handling.patch @@ -0,0 +1,49 @@ +From 40a71b53d5a6d4ea17e4d54b99b2ac03a7f5e783 Mon Sep 17 00:00:00 2001 +From: Byungchul Park +Date: Fri, 24 Oct 2025 16:39:40 +0900 +Subject: jbd2: use a weaker annotation in journal handling + +From: Byungchul Park + +commit 40a71b53d5a6d4ea17e4d54b99b2ac03a7f5e783 upstream. + +jbd2 journal handling code doesn't want jbd2_might_wait_for_commit() +to be placed between start_this_handle() and stop_this_handle(). So it +marks the region with rwsem_acquire_read() and rwsem_release(). + +However, the annotation is too strong for that purpose. We don't have +to use more than try lock annotation for that. + +rwsem_acquire_read() implies: + + 1. might be a waiter on contention of the lock. + 2. enter to the critical section of the lock. + +All we need in here is to act 2, not 1. So trylock version of +annotation is sufficient for that purpose. Now that dept partially +relies on lockdep annotaions, dept interpets rwsem_acquire_read() as a +potential wait and might report a deadlock by the wait. + +Replace it with trylock version of annotation. + +Signed-off-by: Byungchul Park +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Message-ID: <20251024073940.1063-1-byungchul@sk.com> +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/transaction.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -441,7 +441,7 @@ repeat: + read_unlock(&journal->j_state_lock); + current->journal_info = handle; + +- rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_); ++ rwsem_acquire_read(&journal->j_trans_commit_map, 0, 1, _THIS_IP_); + jbd2_journal_free_transaction(new_transaction); + /* + * Ensure that no allocations done while the transaction is open are diff --git a/queue-6.18/media-v4l2-mem2mem-fix-outdated-documentation.patch b/queue-6.18/media-v4l2-mem2mem-fix-outdated-documentation.patch new file mode 100644 index 0000000000..b6773c07d2 --- /dev/null +++ b/queue-6.18/media-v4l2-mem2mem-fix-outdated-documentation.patch @@ -0,0 +1,37 @@ +From 082b86919b7a94de01d849021b4da820a6cb89dc Mon Sep 17 00:00:00 2001 +From: Laurent Pinchart +Date: Wed, 8 Oct 2025 12:55:18 +0300 +Subject: media: v4l2-mem2mem: Fix outdated documentation + +From: Laurent Pinchart + +commit 082b86919b7a94de01d849021b4da820a6cb89dc upstream. + +Commit cbd9463da1b1 ("media: v4l2-mem2mem: Avoid calling .device_run in +v4l2_m2m_job_finish") deferred calls to .device_run() to a work queue to +avoid recursive calls when a job is finished right away from +.device_run(). It failed to update the v4l2_m2m_job_finish() +documentation that still states the function must not be called from +.device_run(). Fix it. + +Fixes: cbd9463da1b1 ("media: v4l2-mem2mem: Avoid calling .device_run in v4l2_m2m_job_finish") +Cc: stable@vger.kernel.org +Signed-off-by: Laurent Pinchart +Signed-off-by: Hans Verkuil +Signed-off-by: Greg Kroah-Hartman +--- + include/media/v4l2-mem2mem.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/include/media/v4l2-mem2mem.h ++++ b/include/media/v4l2-mem2mem.h +@@ -192,8 +192,7 @@ void v4l2_m2m_try_schedule(struct v4l2_m + * other instances to take control of the device. + * + * This function has to be called only after &v4l2_m2m_ops->device_run +- * callback has been called on the driver. To prevent recursion, it should +- * not be called directly from the &v4l2_m2m_ops->device_run callback though. ++ * callback has been called on the driver. + */ + void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev, + struct v4l2_m2m_ctx *m2m_ctx); diff --git a/queue-6.18/mm-huge_memory-add-pmd-folio-to-ds_queue-in-do_huge_zero_wp_pmd.patch b/queue-6.18/mm-huge_memory-add-pmd-folio-to-ds_queue-in-do_huge_zero_wp_pmd.patch new file mode 100644 index 0000000000..ff40e8bd7e --- /dev/null +++ b/queue-6.18/mm-huge_memory-add-pmd-folio-to-ds_queue-in-do_huge_zero_wp_pmd.patch @@ -0,0 +1,55 @@ +From 2a1351cd4176ee1809b0900d386919d03b7652f8 Mon Sep 17 00:00:00 2001 +From: Wei Yang +Date: Wed, 8 Oct 2025 09:54:52 +0000 +Subject: mm/huge_memory: add pmd folio to ds_queue in do_huge_zero_wp_pmd() + +From: Wei Yang + +commit 2a1351cd4176ee1809b0900d386919d03b7652f8 upstream. + +We add pmd folio into ds_queue on the first page fault in +__do_huge_pmd_anonymous_page(), so that we can split it in case of memory +pressure. This should be the same for a pmd folio during wp page fault. + +Commit 1ced09e0331f ("mm: allocate THP on hugezeropage wp-fault") miss to +add it to ds_queue, which means system may not reclaim enough memory in +case of memory pressure even the pmd folio is under used. + +Move deferred_split_folio() into map_anon_folio_pmd() to make the pmd +folio installation consistent. + +Link: https://lkml.kernel.org/r/20251008095453.18772-2-richard.weiyang@gmail.com +Fixes: 1ced09e0331f ("mm: allocate THP on hugezeropage wp-fault") +Signed-off-by: Wei Yang +Acked-by: David Hildenbrand +Reviewed-by: Lance Yang +Reviewed-by: Dev Jain +Acked-by: Usama Arif +Reviewed-by: Zi Yan +Reviewed-by: Baolin Wang +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1233,6 +1233,7 @@ static void map_anon_folio_pmd(struct fo + count_vm_event(THP_FAULT_ALLOC); + count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC); + count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC); ++ deferred_split_folio(folio, false); + } + + static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf) +@@ -1273,7 +1274,6 @@ static vm_fault_t __do_huge_pmd_anonymou + pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); + map_anon_folio_pmd(folio, vmf->pmd, vma, haddr); + mm_inc_nr_ptes(vma->vm_mm); +- deferred_split_folio(folio, false); + spin_unlock(vmf->ptl); + } + diff --git a/queue-6.18/mm-ksm-fix-exec-fork-inheritance-support-for-prctl.patch b/queue-6.18/mm-ksm-fix-exec-fork-inheritance-support-for-prctl.patch new file mode 100644 index 0000000000..ced6222dcc --- /dev/null +++ b/queue-6.18/mm-ksm-fix-exec-fork-inheritance-support-for-prctl.patch @@ -0,0 +1,143 @@ +From 590c03ca6a3fbb114396673314e2aa483839608b Mon Sep 17 00:00:00 2001 +From: xu xin +Date: Tue, 7 Oct 2025 18:28:21 +0800 +Subject: mm/ksm: fix exec/fork inheritance support for prctl + +From: xu xin + +commit 590c03ca6a3fbb114396673314e2aa483839608b upstream. + +Patch series "ksm: fix exec/fork inheritance", v2. + +This series fixes exec/fork inheritance. See the detailed description of +the issue below. + + +This patch (of 2): + +Background +========== + +commit d7597f59d1d33 ("mm: add new api to enable ksm per process") +introduced MMF_VM_MERGE_ANY for mm->flags, and allowed user to set it by +prctl() so that the process's VMAs are forcibly scanned by ksmd. + +Subsequently, the 3c6f33b7273a ("mm/ksm: support fork/exec for prctl") +supported inheriting the MMF_VM_MERGE_ANY flag when a task calls execve(). + +Finally, commit 3a9e567ca45fb ("mm/ksm: fix ksm exec support for prctl") +fixed the issue that ksmd doesn't scan the mm_struct with MMF_VM_MERGE_ANY +by adding the mm_slot to ksm_mm_head in __bprm_mm_init(). + +Problem +======= + +In some extreme scenarios, however, this inheritance of MMF_VM_MERGE_ANY +during exec/fork can fail. For example, when the scanning frequency of +ksmd is tuned extremely high, a process carrying MMF_VM_MERGE_ANY may +still fail to pass it to the newly exec'd process. This happens because +ksm_execve() is executed too early in the do_execve flow (prematurely +adding the new mm_struct to the ksm_mm_slot list). + +As a result, before do_execve completes, ksmd may have already performed a +scan and found that this new mm_struct has no VM_MERGEABLE VMAs, thus +clearing its MMF_VM_MERGE_ANY flag. Consequently, when the new program +executes, the flag MMF_VM_MERGE_ANY inheritance missed. + +Root reason +=========== + +commit d7597f59d1d33 ("mm: add new api to enable ksm per process") clear +the flag MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE VMAs. + +Solution +======== + +Firstly, Don't clear MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE +VMAs, because perhaps their mm_struct has just been added to ksm_mm_slot +list, and its process has not yet officially started running or has not +yet performed mmap/brk to allocate anonymous VMAS. + +Secondly, recheck MMF_VM_MERGEABLE again if a process takes +MMF_VM_MERGE_ANY, and create a mm_slot and join it into ksm_scan_list +again. + +Link: https://lkml.kernel.org/r/20251007182504440BJgK8VXRHh8TD7IGSUIY4@zte.com.cn +Link: https://lkml.kernel.org/r/20251007182821572h_SoFqYZXEP1mvWI4n9VL@zte.com.cn +Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl") +Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process") +Signed-off-by: xu xin +Cc: Stefan Roesch +Cc: David Hildenbrand +Cc: Jinjiang Tu +Cc: Wang Yaxin +Cc: Yang Yang +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ksm.h | 4 ++-- + mm/ksm.c | 20 +++++++++++++++++--- + 2 files changed, 19 insertions(+), 5 deletions(-) + +--- a/include/linux/ksm.h ++++ b/include/linux/ksm.h +@@ -17,7 +17,7 @@ + #ifdef CONFIG_KSM + int ksm_madvise(struct vm_area_struct *vma, unsigned long start, + unsigned long end, int advice, vm_flags_t *vm_flags); +-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file, ++vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file, + vm_flags_t vm_flags); + int ksm_enable_merge_any(struct mm_struct *mm); + int ksm_disable_merge_any(struct mm_struct *mm); +@@ -103,7 +103,7 @@ bool ksm_process_mergeable(struct mm_str + + #else /* !CONFIG_KSM */ + +-static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, ++static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm, + const struct file *file, vm_flags_t vm_flags) + { + return vm_flags; +--- a/mm/ksm.c ++++ b/mm/ksm.c +@@ -2712,8 +2712,14 @@ no_vmas: + spin_unlock(&ksm_mmlist_lock); + + mm_slot_free(mm_slot_cache, mm_slot); ++ /* ++ * Only clear MMF_VM_MERGEABLE. We must not clear ++ * MMF_VM_MERGE_ANY, because for those MMF_VM_MERGE_ANY process, ++ * perhaps their mm_struct has just been added to ksm_mm_slot ++ * list, and its process has not yet officially started running ++ * or has not yet performed mmap/brk to allocate anonymous VMAS. ++ */ + mm_flags_clear(MMF_VM_MERGEABLE, mm); +- mm_flags_clear(MMF_VM_MERGE_ANY, mm); + mmap_read_unlock(mm); + mmdrop(mm); + } else { +@@ -2831,12 +2837,20 @@ static int __ksm_del_vma(struct vm_area_ + * + * Returns: @vm_flags possibly updated to mark mergeable. + */ +-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file, ++vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file, + vm_flags_t vm_flags) + { + if (mm_flags_test(MMF_VM_MERGE_ANY, mm) && +- __ksm_should_add_vma(file, vm_flags)) ++ __ksm_should_add_vma(file, vm_flags)) { + vm_flags |= VM_MERGEABLE; ++ /* ++ * Generally, the flags here always include MMF_VM_MERGEABLE. ++ * However, in rare cases, this flag may be cleared by ksmd who ++ * scans a cycle without finding any mergeable vma. ++ */ ++ if (unlikely(!mm_flags_test(MMF_VM_MERGEABLE, mm))) ++ __ksm_enter(mm); ++ } + + return vm_flags; + } diff --git a/queue-6.18/mm-slab-introduce-kvfree_rcu_barrier_on_cache-for-cache-destruction.patch b/queue-6.18/mm-slab-introduce-kvfree_rcu_barrier_on_cache-for-cache-destruction.patch new file mode 100644 index 0000000000..d76fd21678 --- /dev/null +++ b/queue-6.18/mm-slab-introduce-kvfree_rcu_barrier_on_cache-for-cache-destruction.patch @@ -0,0 +1,253 @@ +From 0f35040de59371ad542b915d7b91176c9910dadc Mon Sep 17 00:00:00 2001 +From: Harry Yoo +Date: Mon, 8 Dec 2025 00:41:47 +0900 +Subject: mm/slab: introduce kvfree_rcu_barrier_on_cache() for cache destruction + +From: Harry Yoo + +commit 0f35040de59371ad542b915d7b91176c9910dadc upstream. + +Currently, kvfree_rcu_barrier() flushes RCU sheaves across all slab +caches when a cache is destroyed. This is unnecessary; only the RCU +sheaves belonging to the cache being destroyed need to be flushed. + +As suggested by Vlastimil Babka, introduce a weaker form of +kvfree_rcu_barrier() that operates on a specific slab cache. + +Factor out flush_rcu_sheaves_on_cache() from flush_all_rcu_sheaves() and +call it from flush_all_rcu_sheaves() and kvfree_rcu_barrier_on_cache(). + +Call kvfree_rcu_barrier_on_cache() instead of kvfree_rcu_barrier() on +cache destruction. + +The performance benefit is evaluated on a 12 core 24 threads AMD Ryzen +5900X machine (1 socket), by loading slub_kunit module. + +Before: + Total calls: 19 + Average latency (us): 18127 + Total time (us): 344414 + +After: + Total calls: 19 + Average latency (us): 10066 + Total time (us): 191264 + +Two performance regression have been reported: + - stress module loader test's runtime increases by 50-60% (Daniel) + - internal graphics test's runtime on Tegra234 increases by 35% (Jon) + +They are fixed by this change. + +Suggested-by: Vlastimil Babka +Fixes: ec66e0d59952 ("slab: add sheaf support for batching kfree_rcu() operations") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/linux-mm/1bda09da-93be-4737-aef0-d47f8c5c9301@suse.cz +Reported-and-tested-by: Daniel Gomez +Closes: https://lore.kernel.org/linux-mm/0406562e-2066-4cf8-9902-b2b0616dd742@kernel.org +Reported-and-tested-by: Jon Hunter +Closes: https://lore.kernel.org/linux-mm/e988eff6-1287-425e-a06c-805af5bbf262@nvidia.com +Signed-off-by: Harry Yoo +Link: https://patch.msgid.link/20251207154148.117723-1-harry.yoo@oracle.com +Signed-off-by: Vlastimil Babka +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/slab.h | 7 ++++++ + mm/slab.h | 1 + mm/slab_common.c | 52 +++++++++++++++++++++++++++++++++------------- + mm/slub.c | 57 +++++++++++++++++++++++++++------------------------ + 4 files changed, 76 insertions(+), 41 deletions(-) + +--- a/include/linux/slab.h ++++ b/include/linux/slab.h +@@ -1150,10 +1150,17 @@ static inline void kvfree_rcu_barrier(vo + rcu_barrier(); + } + ++static inline void kvfree_rcu_barrier_on_cache(struct kmem_cache *s) ++{ ++ rcu_barrier(); ++} ++ + static inline void kfree_rcu_scheduler_running(void) { } + #else + void kvfree_rcu_barrier(void); + ++void kvfree_rcu_barrier_on_cache(struct kmem_cache *s); ++ + void kfree_rcu_scheduler_running(void); + #endif + +--- a/mm/slab.h ++++ b/mm/slab.h +@@ -442,6 +442,7 @@ static inline bool is_kmalloc_normal(str + + bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj); + void flush_all_rcu_sheaves(void); ++void flush_rcu_sheaves_on_cache(struct kmem_cache *s); + + #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ + SLAB_CACHE_DMA32 | SLAB_PANIC | \ +--- a/mm/slab_common.c ++++ b/mm/slab_common.c +@@ -492,7 +492,7 @@ void kmem_cache_destroy(struct kmem_cach + return; + + /* in-flight kfree_rcu()'s may include objects from our cache */ +- kvfree_rcu_barrier(); ++ kvfree_rcu_barrier_on_cache(s); + + if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) && + (s->flags & SLAB_TYPESAFE_BY_RCU)) { +@@ -2039,25 +2039,13 @@ unlock_return: + } + EXPORT_SYMBOL_GPL(kvfree_call_rcu); + +-/** +- * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete. +- * +- * Note that a single argument of kvfree_rcu() call has a slow path that +- * triggers synchronize_rcu() following by freeing a pointer. It is done +- * before the return from the function. Therefore for any single-argument +- * call that will result in a kfree() to a cache that is to be destroyed +- * during module exit, it is developer's responsibility to ensure that all +- * such calls have returned before the call to kmem_cache_destroy(). +- */ +-void kvfree_rcu_barrier(void) ++static inline void __kvfree_rcu_barrier(void) + { + struct kfree_rcu_cpu_work *krwp; + struct kfree_rcu_cpu *krcp; + bool queued; + int i, cpu; + +- flush_all_rcu_sheaves(); +- + /* + * Firstly we detach objects and queue them over an RCU-batch + * for all CPUs. Finally queued works are flushed for each CPU. +@@ -2119,8 +2107,43 @@ void kvfree_rcu_barrier(void) + } + } + } ++ ++/** ++ * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete. ++ * ++ * Note that a single argument of kvfree_rcu() call has a slow path that ++ * triggers synchronize_rcu() following by freeing a pointer. It is done ++ * before the return from the function. Therefore for any single-argument ++ * call that will result in a kfree() to a cache that is to be destroyed ++ * during module exit, it is developer's responsibility to ensure that all ++ * such calls have returned before the call to kmem_cache_destroy(). ++ */ ++void kvfree_rcu_barrier(void) ++{ ++ flush_all_rcu_sheaves(); ++ __kvfree_rcu_barrier(); ++} + EXPORT_SYMBOL_GPL(kvfree_rcu_barrier); + ++/** ++ * kvfree_rcu_barrier_on_cache - Wait for in-flight kvfree_rcu() calls on a ++ * specific slab cache. ++ * @s: slab cache to wait for ++ * ++ * See the description of kvfree_rcu_barrier() for details. ++ */ ++void kvfree_rcu_barrier_on_cache(struct kmem_cache *s) ++{ ++ if (s->cpu_sheaves) ++ flush_rcu_sheaves_on_cache(s); ++ /* ++ * TODO: Introduce a version of __kvfree_rcu_barrier() that works ++ * on a specific slab cache. ++ */ ++ __kvfree_rcu_barrier(); ++} ++EXPORT_SYMBOL_GPL(kvfree_rcu_barrier_on_cache); ++ + static unsigned long + kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) + { +@@ -2216,4 +2239,3 @@ void __init kvfree_rcu_init(void) + } + + #endif /* CONFIG_KVFREE_RCU_BATCHED */ +- +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -4118,42 +4118,47 @@ static void flush_rcu_sheaf(struct work_ + + + /* needed for kvfree_rcu_barrier() */ +-void flush_all_rcu_sheaves(void) ++void flush_rcu_sheaves_on_cache(struct kmem_cache *s) + { + struct slub_flush_work *sfw; +- struct kmem_cache *s; + unsigned int cpu; + ++ mutex_lock(&flush_lock); ++ ++ for_each_online_cpu(cpu) { ++ sfw = &per_cpu(slub_flush, cpu); ++ ++ /* ++ * we don't check if rcu_free sheaf exists - racing ++ * __kfree_rcu_sheaf() might have just removed it. ++ * by executing flush_rcu_sheaf() on the cpu we make ++ * sure the __kfree_rcu_sheaf() finished its call_rcu() ++ */ ++ ++ INIT_WORK(&sfw->work, flush_rcu_sheaf); ++ sfw->s = s; ++ queue_work_on(cpu, flushwq, &sfw->work); ++ } ++ ++ for_each_online_cpu(cpu) { ++ sfw = &per_cpu(slub_flush, cpu); ++ flush_work(&sfw->work); ++ } ++ ++ mutex_unlock(&flush_lock); ++} ++ ++void flush_all_rcu_sheaves(void) ++{ ++ struct kmem_cache *s; ++ + cpus_read_lock(); + mutex_lock(&slab_mutex); + + list_for_each_entry(s, &slab_caches, list) { + if (!s->cpu_sheaves) + continue; +- +- mutex_lock(&flush_lock); +- +- for_each_online_cpu(cpu) { +- sfw = &per_cpu(slub_flush, cpu); +- +- /* +- * we don't check if rcu_free sheaf exists - racing +- * __kfree_rcu_sheaf() might have just removed it. +- * by executing flush_rcu_sheaf() on the cpu we make +- * sure the __kfree_rcu_sheaf() finished its call_rcu() +- */ +- +- INIT_WORK(&sfw->work, flush_rcu_sheaf); +- sfw->s = s; +- queue_work_on(cpu, flushwq, &sfw->work); +- } +- +- for_each_online_cpu(cpu) { +- sfw = &per_cpu(slub_flush, cpu); +- flush_work(&sfw->work); +- } +- +- mutex_unlock(&flush_lock); ++ flush_rcu_sheaves_on_cache(s); + } + + mutex_unlock(&slab_mutex); diff --git a/queue-6.18/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch b/queue-6.18/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch new file mode 100644 index 0000000000..3e761926fd --- /dev/null +++ b/queue-6.18/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch @@ -0,0 +1,110 @@ +From ffb8c27b0539dd90262d1021488e7817fae57c42 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Fri, 5 Dec 2025 19:55:17 +0100 +Subject: mptcp: avoid deadlock on fallback while reinjecting + +From: Paolo Abeni + +commit ffb8c27b0539dd90262d1021488e7817fae57c42 upstream. + +Jakub reported an MPTCP deadlock at fallback time: + + WARNING: possible recursive locking detected + 6.18.0-rc7-virtme #1 Not tainted + -------------------------------------------- + mptcp_connect/20858 is trying to acquire lock: + ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_try_fallback+0xd8/0x280 + + but task is already holding lock: + ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0 + + other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&msk->fallback_lock); + lock(&msk->fallback_lock); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + + 3 locks held by mptcp_connect/20858: + #0: ff1100001da18290 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x114/0x1bc0 + #1: ff1100001db40fd0 (k-sk_lock-AF_INET#2){+.+.}-{0:0}, at: __mptcp_retrans+0x2cb/0xaa0 + #2: ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0 + + stack backtrace: + CPU: 0 UID: 0 PID: 20858 Comm: mptcp_connect Not tainted 6.18.0-rc7-virtme #1 PREEMPT(full) + Hardware name: Bochs, BIOS Bochs 01/01/2011 + Call Trace: + + dump_stack_lvl+0x6f/0xa0 + print_deadlock_bug.cold+0xc0/0xcd + validate_chain+0x2ff/0x5f0 + __lock_acquire+0x34c/0x740 + lock_acquire.part.0+0xbc/0x260 + _raw_spin_lock_bh+0x38/0x50 + __mptcp_try_fallback+0xd8/0x280 + mptcp_sendmsg_frag+0x16c2/0x3050 + __mptcp_retrans+0x421/0xaa0 + mptcp_release_cb+0x5aa/0xa70 + release_sock+0xab/0x1d0 + mptcp_sendmsg+0xd5b/0x1bc0 + sock_write_iter+0x281/0x4d0 + new_sync_write+0x3c5/0x6f0 + vfs_write+0x65e/0xbb0 + ksys_write+0x17e/0x200 + do_syscall_64+0xbb/0xfd0 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + RIP: 0033:0x7fa5627cbc5e + Code: 4d 89 d8 e8 14 bd 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 c3 83 e2 39 83 fa 08 75 e7 e8 13 ff ff ff 0f 1f 00 f3 0f 1e fa + RSP: 002b:00007fff1fe14700 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 + RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fa5627cbc5e + RDX: 0000000000001f9c RSI: 00007fff1fe16984 RDI: 0000000000000005 + RBP: 00007fff1fe14710 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff1fe16920 + R13: 0000000000002000 R14: 0000000000001f9c R15: 0000000000001f9c + +The packet scheduler could attempt a reinjection after receiving an +MP_FAIL and before the infinite map has been transmitted, causing a +deadlock since MPTCP needs to do the reinjection atomically from WRT +fallback. + +Address the issue explicitly avoiding the reinjection in the critical +scenario. Note that this is the only fallback critical section that +could potentially send packets and hit the double-lock. + +Reported-by: Jakub Kicinski +Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/mptcp-dbg/results/412720/1-mptcp-join-sh/stderr +Fixes: f8a1d9b18c5e ("mptcp: make fallback action and fallback decision atomic") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-4-9e4781a6c1b8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2693,10 +2693,13 @@ static void __mptcp_retrans(struct sock + + /* + * make the whole retrans decision, xmit, disallow +- * fallback atomic ++ * fallback atomic, note that we can't retrans even ++ * when an infinite fallback is in progress, i.e. new ++ * subflows are disallowed. + */ + spin_lock_bh(&msk->fallback_lock); +- if (__mptcp_check_fallback(msk)) { ++ if (__mptcp_check_fallback(msk) || ++ !msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + release_sock(ssk); + goto clear_scheduled; diff --git a/queue-6.18/mptcp-pm-ignore-unknown-endpoint-flags.patch b/queue-6.18/mptcp-pm-ignore-unknown-endpoint-flags.patch new file mode 100644 index 0000000000..feb91f6c61 --- /dev/null +++ b/queue-6.18/mptcp-pm-ignore-unknown-endpoint-flags.patch @@ -0,0 +1,63 @@ +From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Fri, 5 Dec 2025 19:55:14 +0100 +Subject: mptcp: pm: ignore unknown endpoint flags + +From: Matthieu Baerts (NGI0) + +commit 0ace3297a7301911e52d8195cb1006414897c859 upstream. + +Before this patch, the kernel was saving any flags set by the userspace, +even unknown ones. This doesn't cause critical issues because the kernel +is only looking at specific ones. But on the other hand, endpoints dumps +could tell the userspace some recent flags seem to be supported on older +kernel versions. + +Instead, ignore all unknown flags when parsing them. By doing that, the +userspace can continue to set unsupported flags, but it has a way to +verify what is supported by the kernel. + +Note that it sounds better to continue accepting unsupported flags not +to change the behaviour, but also that eases things on the userspace +side by adding "optional" endpoint types only supported by newer kernel +versions without having to deal with the different kernel versions. + +A note for the backports: there will be conflicts in mptcp.h on older +versions not having the mentioned flags, the new line should still be +added last, and the '5' needs to be adapted to have the same value as +the last entry. + +Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781a6c1b8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/mptcp.h | 1 + + net/mptcp/pm_netlink.c | 3 ++- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/include/uapi/linux/mptcp.h ++++ b/include/uapi/linux/mptcp.h +@@ -40,6 +40,7 @@ + #define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3) + #define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4) + #define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5) ++#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0) + + struct mptcp_info { + __u8 mptcpi_subflows; +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr * + } + + if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) +- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); ++ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) & ++ MPTCP_PM_ADDR_FLAGS_MASK; + + if (tb[MPTCP_PM_ADDR_ATTR_PORT]) + entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); diff --git a/queue-6.18/mptcp-schedule-rtx-timer-only-after-pushing-data.patch b/queue-6.18/mptcp-schedule-rtx-timer-only-after-pushing-data.patch new file mode 100644 index 0000000000..289be2bc5c --- /dev/null +++ b/queue-6.18/mptcp-schedule-rtx-timer-only-after-pushing-data.patch @@ -0,0 +1,71 @@ +From 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Fri, 5 Dec 2025 19:55:16 +0100 +Subject: mptcp: schedule rtx timer only after pushing data + +From: Paolo Abeni + +commit 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb upstream. + +The MPTCP protocol usually schedule the retransmission timer only +when there is some chances for such retransmissions to happen. + +With a notable exception: __mptcp_push_pending() currently schedule +such timer unconditionally, potentially leading to unnecessary rtx +timer expiration. + +The issue is present since the blamed commit below but become easily +reproducible after commit 27b0e701d387 ("mptcp: drop bogus optimization +in __mptcp_check_push()") + +Fixes: 33d41c9cd74c ("mptcp: more accurate timeout") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-3-9e4781a6c1b8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1597,7 +1597,7 @@ void __mptcp_push_pending(struct sock *s + struct mptcp_sendmsg_info info = { + .flags = flags, + }; +- bool do_check_data_fin = false; ++ bool copied = false; + int push_count = 1; + + while (mptcp_send_head(sk) && (push_count > 0)) { +@@ -1639,7 +1639,7 @@ void __mptcp_push_pending(struct sock *s + push_count--; + continue; + } +- do_check_data_fin = true; ++ copied = true; + } + } + } +@@ -1648,11 +1648,14 @@ void __mptcp_push_pending(struct sock *s + if (ssk) + mptcp_push_release(ssk, &info); + +- /* ensure the rtx timer is running */ +- if (!mptcp_rtx_timer_pending(sk)) +- mptcp_reset_rtx_timer(sk); +- if (do_check_data_fin) ++ /* Avoid scheduling the rtx timer if no data has been pushed; the timer ++ * will be updated on positive acks by __mptcp_cleanup_una(). ++ */ ++ if (copied) { ++ if (!mptcp_rtx_timer_pending(sk)) ++ mptcp_reset_rtx_timer(sk); + mptcp_check_send_data_fin(sk); ++ } + } + + static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first) diff --git a/queue-6.18/printk-allow-printk_trigger_flush-to-flush-all-types.patch b/queue-6.18/printk-allow-printk_trigger_flush-to-flush-all-types.patch new file mode 100644 index 0000000000..1a1a32e4e2 --- /dev/null +++ b/queue-6.18/printk-allow-printk_trigger_flush-to-flush-all-types.patch @@ -0,0 +1,81 @@ +From d01ff281bd9b1bfeac9ab98ec8a9ee41da900d5e Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Thu, 13 Nov 2025 17:09:47 +0106 +Subject: printk: Allow printk_trigger_flush() to flush all types + +From: John Ogness + +commit d01ff281bd9b1bfeac9ab98ec8a9ee41da900d5e upstream. + +Currently printk_trigger_flush() only triggers legacy offloaded +flushing, even if that may not be the appropriate method to flush +for currently registered consoles. (The function predates the +NBCON consoles.) + +Since commit 6690d6b52726 ("printk: Add helper for flush type +logic") there is printk_get_console_flush_type(), which also +considers NBCON consoles and reports all the methods of flushing +appropriate based on the system state and consoles available. + +Update printk_trigger_flush() to use +printk_get_console_flush_type() to appropriately flush registered +consoles. + +Suggested-by: Petr Mladek +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Link: https://lore.kernel.org/stable/20251113160351.113031-2-john.ogness%40linutronix.de +Tested-by: Sherry Sun +Link: https://patch.msgid.link/20251113160351.113031-2-john.ogness@linutronix.de +Signed-off-by: Petr Mladek +Signed-off-by: Greg Kroah-Hartman +--- + kernel/printk/nbcon.c | 2 +- + kernel/printk/printk.c | 23 ++++++++++++++++++++++- + 2 files changed, 23 insertions(+), 2 deletions(-) + +--- a/kernel/printk/nbcon.c ++++ b/kernel/printk/nbcon.c +@@ -1856,7 +1856,7 @@ void nbcon_device_release(struct console + if (console_trylock()) + console_unlock(); + } else if (ft.legacy_offload) { +- printk_trigger_flush(); ++ defer_console_output(); + } + } + console_srcu_read_unlock(cookie); +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -4595,9 +4595,30 @@ void defer_console_output(void) + __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); + } + ++/** ++ * printk_trigger_flush - Attempt to flush printk buffer to consoles. ++ * ++ * If possible, flush the printk buffer to all consoles in the caller's ++ * context. If offloading is available, trigger deferred printing. ++ * ++ * This is best effort. Depending on the system state, console states, ++ * and caller context, no actual flushing may result from this call. ++ */ + void printk_trigger_flush(void) + { +- defer_console_output(); ++ struct console_flush_type ft; ++ ++ printk_get_console_flush_type(&ft); ++ if (ft.nbcon_atomic) ++ nbcon_atomic_flush_pending(); ++ if (ft.nbcon_offload) ++ nbcon_kthreads_wake(); ++ if (ft.legacy_direct) { ++ if (console_trylock()) ++ console_unlock(); ++ } ++ if (ft.legacy_offload) ++ defer_console_output(); + } + + int vprintk_deferred(const char *fmt, va_list args) diff --git a/queue-6.18/printk-avoid-irq_work-for-printk_deferred-on-suspend.patch b/queue-6.18/printk-avoid-irq_work-for-printk_deferred-on-suspend.patch new file mode 100644 index 0000000000..ca33a5f62f --- /dev/null +++ b/queue-6.18/printk-avoid-irq_work-for-printk_deferred-on-suspend.patch @@ -0,0 +1,50 @@ +From 66e7c1e0ee08cfb6db64f8f3f6e5a3cc930145c8 Mon Sep 17 00:00:00 2001 +From: John Ogness +Date: Fri, 21 Nov 2025 11:26:00 +0106 +Subject: printk: Avoid irq_work for printk_deferred() on suspend + +From: John Ogness + +commit 66e7c1e0ee08cfb6db64f8f3f6e5a3cc930145c8 upstream. + +With commit ("printk: Avoid scheduling irq_work on suspend") the +implementation of printk_get_console_flush_type() was modified to +avoid offloading when irq_work should be blocked during suspend. +Since printk uses the returned flush type to determine what +flushing methods are used, this was thought to be sufficient for +avoiding irq_work usage during the suspend phase. + +However, vprintk_emit() implements a hack to support +printk_deferred(). In this hack, the returned flush type is +adjusted to make sure no legacy direct printing occurs when +printk_deferred() was used. + +Because of this hack, the legacy offloading flushing method can +still be used, causing irq_work to be queued when it should not +be. + +Adjust the vprintk_emit() hack to also consider +@console_irqwork_blocked so that legacy offloading will not be +chosen when irq_work should be blocked. + +Link: https://lore.kernel.org/lkml/87fra90xv4.fsf@jogness.linutronix.de +Signed-off-by: John Ogness +Fixes: 26873e3e7f0c ("printk: Avoid scheduling irq_work on suspend") +Reviewed-by: Petr Mladek +Signed-off-by: Petr Mladek +Signed-off-by: Greg Kroah-Hartman +--- + kernel/printk/printk.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2393,7 +2393,7 @@ asmlinkage int vprintk_emit(int facility + /* If called from the scheduler, we can not call up(). */ + if (level == LOGLEVEL_SCHED) { + level = LOGLEVEL_DEFAULT; +- ft.legacy_offload |= ft.legacy_direct; ++ ft.legacy_offload |= ft.legacy_direct && !console_irqwork_blocked; + ft.legacy_direct = false; + } + diff --git a/queue-6.18/selftests-mptcp-pm-ensure-unknown-flags-are-ignored.patch b/queue-6.18/selftests-mptcp-pm-ensure-unknown-flags-are-ignored.patch new file mode 100644 index 0000000000..e6848ceff4 --- /dev/null +++ b/queue-6.18/selftests-mptcp-pm-ensure-unknown-flags-are-ignored.patch @@ -0,0 +1,77 @@ +From 29f4801e9c8dfd12bdcb33b61a6ac479c7162bd7 Mon Sep 17 00:00:00 2001 +From: "Matthieu Baerts (NGI0)" +Date: Fri, 5 Dec 2025 19:55:15 +0100 +Subject: selftests: mptcp: pm: ensure unknown flags are ignored + +From: Matthieu Baerts (NGI0) + +commit 29f4801e9c8dfd12bdcb33b61a6ac479c7162bd7 upstream. + +This validates the previous commit: the userspace can set unknown flags +-- the 7th bit is currently unused -- without errors, but only the +supported ones are printed in the endpoints dumps. + +The 'Fixes' tag here below is the same as the one from the previous +commit: this patch here is not fixing anything wrong in the selftests, +but it validates the previous fix for an issue introduced by this commit +ID. + +Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") +Cc: stable@vger.kernel.org +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-2-9e4781a6c1b8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/pm_netlink.sh | 4 ++++ + tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 11 +++++++++++ + 2 files changed, 15 insertions(+) + +--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh ++++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh +@@ -192,6 +192,10 @@ check "show_endpoints" \ + flush_endpoint + check "show_endpoints" "" "flush addrs" + ++add_endpoint 10.0.1.1 flags unknown ++check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags" ++flush_endpoint ++ + set_limits 9 1 2>/dev/null + check "get_limits" "${default_limits}" "rcv addrs above hard limit" + +--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c ++++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +@@ -24,6 +24,8 @@ + #define IPPROTO_MPTCP 262 + #endif + ++#define MPTCP_PM_ADDR_FLAG_UNKNOWN _BITUL(7) ++ + static void syntax(char *argv[]) + { + fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept []\n", argv[0]); +@@ -836,6 +838,8 @@ int add_addr(int fd, int pm_family, int + flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; ++ else if (!strcmp(tok, "unknown")) ++ flags |= MPTCP_PM_ADDR_FLAG_UNKNOWN; + else + error(1, errno, + "unknown flag %s", argv[arg]); +@@ -1047,6 +1051,13 @@ static void print_addr(struct rtattr *at + if (flags) + printf(","); + } ++ ++ if (flags & MPTCP_PM_ADDR_FLAG_UNKNOWN) { ++ printf("unknown"); ++ flags &= ~MPTCP_PM_ADDR_FLAG_UNKNOWN; ++ if (flags) ++ printf(","); ++ } + + /* bump unknown flags, if any */ + if (flags) diff --git a/queue-6.18/series b/queue-6.18/series index b3f88ade41..0186ad59e5 100644 --- a/queue-6.18/series +++ b/queue-6.18/series @@ -254,3 +254,26 @@ fs-ntfs3-fix-mount-failure-for-sparse-runs-in-run_unpack.patch ktest.pl-fix-uninitialized-var-in-config-bisect.pl.patch tpm-cap-the-number-of-pcr-banks.patch fs-pm-fix-reverse-check-in-filesystems_freeze_callback.patch +printk-allow-printk_trigger_flush-to-flush-all-types.patch +printk-avoid-irq_work-for-printk_deferred-on-suspend.patch +ext4-fix-string-copying-in-parse_apply_sb_mount_options.patch +ext4-check-if-mount_opts-is-nul-terminated-in-ext4_ioctl_set_tune_sb.patch +ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch +ext4-clear-i_state_flags-when-alloc-inode.patch +ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch +ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch +jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch +jbd2-use-a-weaker-annotation-in-journal-handling.patch +block-remove-queue-freezing-from-several-sysfs-store-callbacks.patch +mm-ksm-fix-exec-fork-inheritance-support-for-prctl.patch +media-v4l2-mem2mem-fix-outdated-documentation.patch +mm-huge_memory-add-pmd-folio-to-ds_queue-in-do_huge_zero_wp_pmd.patch +tpm2-sessions-fix-out-of-range-indexing-in-name_size.patch +tpm2-sessions-fix-tpm2_read_public-range-checks.patch +crash-let-architecture-decide-crash-memory-export-to-iomem_resource.patch +dma-mapping-fix-dma_bit_mask-macro-being-broken.patch +mm-slab-introduce-kvfree_rcu_barrier_on_cache-for-cache-destruction.patch +mptcp-pm-ignore-unknown-endpoint-flags.patch +selftests-mptcp-pm-ensure-unknown-flags-are-ignored.patch +mptcp-schedule-rtx-timer-only-after-pushing-data.patch +mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch diff --git a/queue-6.18/tpm2-sessions-fix-out-of-range-indexing-in-name_size.patch b/queue-6.18/tpm2-sessions-fix-out-of-range-indexing-in-name_size.patch new file mode 100644 index 0000000000..441e0d30c2 --- /dev/null +++ b/queue-6.18/tpm2-sessions-fix-out-of-range-indexing-in-name_size.patch @@ -0,0 +1,435 @@ +From 6e9722e9a7bfe1bbad649937c811076acf86e1fd Mon Sep 17 00:00:00 2001 +From: Jarkko Sakkinen +Date: Sun, 30 Nov 2025 21:07:12 +0200 +Subject: tpm2-sessions: Fix out of range indexing in name_size + +From: Jarkko Sakkinen + +commit 6e9722e9a7bfe1bbad649937c811076acf86e1fd upstream. + +'name_size' does not have any range checks, and it just directly indexes +with TPM_ALG_ID, which could lead into memory corruption at worst. + +Address the issue by only processing known values and returning -EINVAL for +unrecognized values. + +Make also 'tpm_buf_append_name' and 'tpm_buf_fill_hmac_session' fallible so +that errors are detected before causing any spurious TPM traffic. + +End also the authorization session on failure in both of the functions, as +the session state would be then by definition corrupted. + +Cc: stable@vger.kernel.org # v6.10+ +Fixes: 1085b8276bb4 ("tpm: Add the rest of the session HMAC API") +Reviewed-by: Jonathan McDowell +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm2-cmd.c | 23 ++++- + drivers/char/tpm/tpm2-sessions.c | 132 ++++++++++++++++++++---------- + include/linux/tpm.h | 13 +- + security/keys/trusted-keys/trusted_tpm2.c | 29 +++++- + 4 files changed, 142 insertions(+), 55 deletions(-) + +--- a/drivers/char/tpm/tpm2-cmd.c ++++ b/drivers/char/tpm/tpm2-cmd.c +@@ -187,7 +187,11 @@ int tpm2_pcr_extend(struct tpm_chip *chi + } + + if (!disable_pcr_integrity) { +- tpm_buf_append_name(chip, &buf, pcr_idx, NULL); ++ rc = tpm_buf_append_name(chip, &buf, pcr_idx, NULL); ++ if (rc) { ++ tpm_buf_destroy(&buf); ++ return rc; ++ } + tpm_buf_append_hmac_session(chip, &buf, 0, NULL, 0); + } else { + tpm_buf_append_handle(chip, &buf, pcr_idx); +@@ -202,8 +206,14 @@ int tpm2_pcr_extend(struct tpm_chip *chi + chip->allocated_banks[i].digest_size); + } + +- if (!disable_pcr_integrity) +- tpm_buf_fill_hmac_session(chip, &buf); ++ if (!disable_pcr_integrity) { ++ rc = tpm_buf_fill_hmac_session(chip, &buf); ++ if (rc) { ++ tpm_buf_destroy(&buf); ++ return rc; ++ } ++ } ++ + rc = tpm_transmit_cmd(chip, &buf, 0, "attempting extend a PCR value"); + if (!disable_pcr_integrity) + rc = tpm_buf_check_hmac_response(chip, &buf, rc); +@@ -261,7 +271,12 @@ int tpm2_get_random(struct tpm_chip *chi + | TPM2_SA_CONTINUE_SESSION, + NULL, 0); + tpm_buf_append_u16(&buf, num_bytes); +- tpm_buf_fill_hmac_session(chip, &buf); ++ err = tpm_buf_fill_hmac_session(chip, &buf); ++ if (err) { ++ tpm_buf_destroy(&buf); ++ return err; ++ } ++ + err = tpm_transmit_cmd(chip, &buf, + offsetof(struct tpm2_get_random_out, + buffer), +--- a/drivers/char/tpm/tpm2-sessions.c ++++ b/drivers/char/tpm/tpm2-sessions.c +@@ -144,16 +144,23 @@ struct tpm2_auth { + /* + * Name Size based on TPM algorithm (assumes no hash bigger than 255) + */ +-static u8 name_size(const u8 *name) ++static int name_size(const u8 *name) + { +- static u8 size_map[] = { +- [TPM_ALG_SHA1] = SHA1_DIGEST_SIZE, +- [TPM_ALG_SHA256] = SHA256_DIGEST_SIZE, +- [TPM_ALG_SHA384] = SHA384_DIGEST_SIZE, +- [TPM_ALG_SHA512] = SHA512_DIGEST_SIZE, +- }; +- u16 alg = get_unaligned_be16(name); +- return size_map[alg] + 2; ++ u16 hash_alg = get_unaligned_be16(name); ++ ++ switch (hash_alg) { ++ case TPM_ALG_SHA1: ++ return SHA1_DIGEST_SIZE + 2; ++ case TPM_ALG_SHA256: ++ return SHA256_DIGEST_SIZE + 2; ++ case TPM_ALG_SHA384: ++ return SHA384_DIGEST_SIZE + 2; ++ case TPM_ALG_SHA512: ++ return SHA512_DIGEST_SIZE + 2; ++ default: ++ pr_warn("tpm: unsupported name algorithm: 0x%04x\n", hash_alg); ++ return -EINVAL; ++ } + } + + static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) +@@ -161,6 +168,7 @@ static int tpm2_parse_read_public(char * + struct tpm_header *head = (struct tpm_header *)buf->data; + off_t offset = TPM_HEADER_SIZE; + u32 tot_len = be32_to_cpu(head->length); ++ int ret; + u32 val; + + /* we're starting after the header so adjust the length */ +@@ -173,8 +181,13 @@ static int tpm2_parse_read_public(char * + offset += val; + /* name */ + val = tpm_buf_read_u16(buf, &offset); +- if (val != name_size(&buf->data[offset])) ++ ret = name_size(&buf->data[offset]); ++ if (ret < 0) ++ return ret; ++ ++ if (val != ret) + return -EINVAL; ++ + memcpy(name, &buf->data[offset], val); + /* forget the rest */ + return 0; +@@ -221,46 +234,72 @@ static int tpm2_read_public(struct tpm_c + * As with most tpm_buf operations, success is assumed because failure + * will be caused by an incorrect programming model and indicated by a + * kernel message. ++ * ++ * Ends the authorization session on failure. + */ +-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, +- u32 handle, u8 *name) ++int tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, ++ u32 handle, u8 *name) + { + #ifdef CONFIG_TCG_TPM2_HMAC + enum tpm2_mso_type mso = tpm2_handle_mso(handle); + struct tpm2_auth *auth; + int slot; ++ int ret; + #endif + + if (!tpm2_chip_auth(chip)) { + tpm_buf_append_handle(chip, buf, handle); +- return; ++ return 0; + } + + #ifdef CONFIG_TCG_TPM2_HMAC + slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4; + if (slot >= AUTH_MAX_NAMES) { +- dev_err(&chip->dev, "TPM: too many handles\n"); +- return; ++ dev_err(&chip->dev, "too many handles\n"); ++ ret = -EIO; ++ goto err; + } + auth = chip->auth; +- WARN(auth->session != tpm_buf_length(buf), +- "name added in wrong place\n"); ++ if (auth->session != tpm_buf_length(buf)) { ++ dev_err(&chip->dev, "session state malformed"); ++ ret = -EIO; ++ goto err; ++ } + tpm_buf_append_u32(buf, handle); + auth->session += 4; + + if (mso == TPM2_MSO_PERSISTENT || + mso == TPM2_MSO_VOLATILE || + mso == TPM2_MSO_NVRAM) { +- if (!name) +- tpm2_read_public(chip, handle, auth->name[slot]); ++ if (!name) { ++ ret = tpm2_read_public(chip, handle, auth->name[slot]); ++ if (ret) ++ goto err; ++ } + } else { +- if (name) +- dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n"); ++ if (name) { ++ dev_err(&chip->dev, "handle 0x%08x does not use a name\n", ++ handle); ++ ret = -EIO; ++ goto err; ++ } + } + + auth->name_h[slot] = handle; +- if (name) +- memcpy(auth->name[slot], name, name_size(name)); ++ if (name) { ++ ret = name_size(name); ++ if (ret < 0) ++ goto err; ++ ++ memcpy(auth->name[slot], name, ret); ++ } ++#endif ++ return 0; ++ ++#ifdef CONFIG_TCG_TPM2_HMAC ++err: ++ tpm2_end_auth_session(chip); ++ return tpm_ret_to_err(ret); + #endif + } + EXPORT_SYMBOL_GPL(tpm_buf_append_name); +@@ -533,11 +572,9 @@ static void tpm_buf_append_salt(struct t + * encryption key and encrypts the first parameter of the command + * buffer with it. + * +- * As with most tpm_buf operations, success is assumed because failure +- * will be caused by an incorrect programming model and indicated by a +- * kernel message. ++ * Ends the authorization session on failure. + */ +-void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) ++int tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) + { + u32 cc, handles, val; + struct tpm2_auth *auth = chip->auth; +@@ -549,9 +586,12 @@ void tpm_buf_fill_hmac_session(struct tp + u8 cphash[SHA256_DIGEST_SIZE]; + struct sha256_ctx sctx; + struct hmac_sha256_ctx hctx; ++ int ret; + +- if (!auth) +- return; ++ if (!auth) { ++ ret = -EIO; ++ goto err; ++ } + + /* save the command code in BE format */ + auth->ordinal = head->ordinal; +@@ -560,9 +600,11 @@ void tpm_buf_fill_hmac_session(struct tp + + i = tpm2_find_cc(chip, cc); + if (i < 0) { +- dev_err(&chip->dev, "Command 0x%x not found in TPM\n", cc); +- return; ++ dev_err(&chip->dev, "command 0x%08x not found\n", cc); ++ ret = -EIO; ++ goto err; + } ++ + attrs = chip->cc_attrs_tbl[i]; + + handles = (attrs >> TPM2_CC_ATTR_CHANDLES) & GENMASK(2, 0); +@@ -576,9 +618,9 @@ void tpm_buf_fill_hmac_session(struct tp + u32 handle = tpm_buf_read_u32(buf, &offset_s); + + if (auth->name_h[i] != handle) { +- dev_err(&chip->dev, "TPM: handle %d wrong for name\n", +- i); +- return; ++ dev_err(&chip->dev, "invalid handle 0x%08x\n", handle); ++ ret = -EIO; ++ goto err; + } + } + /* point offset_s to the start of the sessions */ +@@ -609,12 +651,14 @@ void tpm_buf_fill_hmac_session(struct tp + offset_s += len; + } + if (offset_s != offset_p) { +- dev_err(&chip->dev, "TPM session length is incorrect\n"); +- return; ++ dev_err(&chip->dev, "session length is incorrect\n"); ++ ret = -EIO; ++ goto err; + } + if (!hmac) { +- dev_err(&chip->dev, "TPM could not find HMAC session\n"); +- return; ++ dev_err(&chip->dev, "could not find HMAC session\n"); ++ ret = -EIO; ++ goto err; + } + + /* encrypt before HMAC */ +@@ -646,8 +690,11 @@ void tpm_buf_fill_hmac_session(struct tp + if (mso == TPM2_MSO_PERSISTENT || + mso == TPM2_MSO_VOLATILE || + mso == TPM2_MSO_NVRAM) { +- sha256_update(&sctx, auth->name[i], +- name_size(auth->name[i])); ++ ret = name_size(auth->name[i]); ++ if (ret < 0) ++ goto err; ++ ++ sha256_update(&sctx, auth->name[i], ret); + } else { + __be32 h = cpu_to_be32(auth->name_h[i]); + +@@ -668,6 +715,11 @@ void tpm_buf_fill_hmac_session(struct tp + hmac_sha256_update(&hctx, auth->tpm_nonce, sizeof(auth->tpm_nonce)); + hmac_sha256_update(&hctx, &auth->attrs, 1); + hmac_sha256_final(&hctx, hmac); ++ return 0; ++ ++err: ++ tpm2_end_auth_session(chip); ++ return ret; + } + EXPORT_SYMBOL(tpm_buf_fill_hmac_session); + +--- a/include/linux/tpm.h ++++ b/include/linux/tpm.h +@@ -526,8 +526,8 @@ static inline struct tpm2_auth *tpm2_chi + #endif + } + +-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, +- u32 handle, u8 *name); ++int tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, ++ u32 handle, u8 *name); + void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, + u8 attributes, u8 *passphrase, + int passphraselen); +@@ -560,7 +560,7 @@ static inline void tpm_buf_append_hmac_s + #ifdef CONFIG_TCG_TPM2_HMAC + + int tpm2_start_auth_session(struct tpm_chip *chip); +-void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); ++int tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); + int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, + int rc); + void tpm2_end_auth_session(struct tpm_chip *chip); +@@ -574,10 +574,13 @@ static inline int tpm2_start_auth_sessio + static inline void tpm2_end_auth_session(struct tpm_chip *chip) + { + } +-static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip, +- struct tpm_buf *buf) ++ ++static inline int tpm_buf_fill_hmac_session(struct tpm_chip *chip, ++ struct tpm_buf *buf) + { ++ return 0; + } ++ + static inline int tpm_buf_check_hmac_response(struct tpm_chip *chip, + struct tpm_buf *buf, + int rc) +--- a/security/keys/trusted-keys/trusted_tpm2.c ++++ b/security/keys/trusted-keys/trusted_tpm2.c +@@ -283,7 +283,10 @@ int tpm2_seal_trusted(struct tpm_chip *c + goto out_put; + } + +- tpm_buf_append_name(chip, &buf, options->keyhandle, NULL); ++ rc = tpm_buf_append_name(chip, &buf, options->keyhandle, NULL); ++ if (rc) ++ goto out; ++ + tpm_buf_append_hmac_session(chip, &buf, TPM2_SA_DECRYPT, + options->keyauth, TPM_DIGEST_SIZE); + +@@ -331,7 +334,10 @@ int tpm2_seal_trusted(struct tpm_chip *c + goto out; + } + +- tpm_buf_fill_hmac_session(chip, &buf); ++ rc = tpm_buf_fill_hmac_session(chip, &buf); ++ if (rc) ++ goto out; ++ + rc = tpm_transmit_cmd(chip, &buf, 4, "sealing data"); + rc = tpm_buf_check_hmac_response(chip, &buf, rc); + if (rc) +@@ -448,7 +454,10 @@ static int tpm2_load_cmd(struct tpm_chip + return rc; + } + +- tpm_buf_append_name(chip, &buf, options->keyhandle, NULL); ++ rc = tpm_buf_append_name(chip, &buf, options->keyhandle, NULL); ++ if (rc) ++ goto out; ++ + tpm_buf_append_hmac_session(chip, &buf, 0, options->keyauth, + TPM_DIGEST_SIZE); + +@@ -460,7 +469,10 @@ static int tpm2_load_cmd(struct tpm_chip + goto out; + } + +- tpm_buf_fill_hmac_session(chip, &buf); ++ rc = tpm_buf_fill_hmac_session(chip, &buf); ++ if (rc) ++ goto out; ++ + rc = tpm_transmit_cmd(chip, &buf, 4, "loading blob"); + rc = tpm_buf_check_hmac_response(chip, &buf, rc); + if (!rc) +@@ -508,7 +520,9 @@ static int tpm2_unseal_cmd(struct tpm_ch + return rc; + } + +- tpm_buf_append_name(chip, &buf, blob_handle, NULL); ++ rc = tpm_buf_append_name(chip, &buf, options->keyhandle, NULL); ++ if (rc) ++ goto out; + + if (!options->policyhandle) { + tpm_buf_append_hmac_session(chip, &buf, TPM2_SA_ENCRYPT, +@@ -533,7 +547,10 @@ static int tpm2_unseal_cmd(struct tpm_ch + NULL, 0); + } + +- tpm_buf_fill_hmac_session(chip, &buf); ++ rc = tpm_buf_fill_hmac_session(chip, &buf); ++ if (rc) ++ goto out; ++ + rc = tpm_transmit_cmd(chip, &buf, 6, "unsealing"); + rc = tpm_buf_check_hmac_response(chip, &buf, rc); + if (rc > 0) diff --git a/queue-6.18/tpm2-sessions-fix-tpm2_read_public-range-checks.patch b/queue-6.18/tpm2-sessions-fix-tpm2_read_public-range-checks.patch new file mode 100644 index 0000000000..529e571582 --- /dev/null +++ b/queue-6.18/tpm2-sessions-fix-tpm2_read_public-range-checks.patch @@ -0,0 +1,176 @@ +From bda1cbf73c6e241267c286427f2ed52b5735d872 Mon Sep 17 00:00:00 2001 +From: Jarkko Sakkinen +Date: Mon, 1 Dec 2025 15:38:02 +0200 +Subject: tpm2-sessions: Fix tpm2_read_public range checks + +From: Jarkko Sakkinen + +commit bda1cbf73c6e241267c286427f2ed52b5735d872 upstream. + +tpm2_read_public() has some rudimentary range checks but the function does +not ensure that the response buffer has enough bytes for the full TPMT_HA +payload. + +Re-implement the function with necessary checks and validation, and return +name and name size for all handle types back to the caller. + +Cc: stable@vger.kernel.org # v6.10+ +Fixes: d0a25bb961e6 ("tpm: Add HMAC session name/handle append") +Signed-off-by: Jarkko Sakkinen +Reviewed-by: Jonathan McDowell +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm2-cmd.c | 3 + + drivers/char/tpm/tpm2-sessions.c | 94 ++++++++++++++++++++------------------- + 2 files changed, 53 insertions(+), 44 deletions(-) + +--- a/drivers/char/tpm/tpm2-cmd.c ++++ b/drivers/char/tpm/tpm2-cmd.c +@@ -11,8 +11,11 @@ + * used by the kernel internally. + */ + ++#include "linux/dev_printk.h" ++#include "linux/tpm.h" + #include "tpm.h" + #include ++#include + + static bool disable_pcr_integrity; + module_param(disable_pcr_integrity, bool, 0444); +--- a/drivers/char/tpm/tpm2-sessions.c ++++ b/drivers/char/tpm/tpm2-sessions.c +@@ -163,53 +163,61 @@ static int name_size(const u8 *name) + } + } + +-static int tpm2_parse_read_public(char *name, struct tpm_buf *buf) ++static int tpm2_read_public(struct tpm_chip *chip, u32 handle, void *name) + { +- struct tpm_header *head = (struct tpm_header *)buf->data; ++ u32 mso = tpm2_handle_mso(handle); + off_t offset = TPM_HEADER_SIZE; +- u32 tot_len = be32_to_cpu(head->length); +- int ret; +- u32 val; +- +- /* we're starting after the header so adjust the length */ +- tot_len -= TPM_HEADER_SIZE; +- +- /* skip public */ +- val = tpm_buf_read_u16(buf, &offset); +- if (val > tot_len) +- return -EINVAL; +- offset += val; +- /* name */ +- val = tpm_buf_read_u16(buf, &offset); +- ret = name_size(&buf->data[offset]); +- if (ret < 0) +- return ret; +- +- if (val != ret) +- return -EINVAL; +- +- memcpy(name, &buf->data[offset], val); +- /* forget the rest */ +- return 0; +-} +- +-static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name) +-{ ++ int rc, name_size_alg; + struct tpm_buf buf; +- int rc; ++ ++ if (mso != TPM2_MSO_PERSISTENT && mso != TPM2_MSO_VOLATILE && ++ mso != TPM2_MSO_NVRAM) { ++ memcpy(name, &handle, sizeof(u32)); ++ return sizeof(u32); ++ } + + rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC); + if (rc) + return rc; + + tpm_buf_append_u32(&buf, handle); +- rc = tpm_transmit_cmd(chip, &buf, 0, "read public"); +- if (rc == TPM2_RC_SUCCESS) +- rc = tpm2_parse_read_public(name, &buf); + +- tpm_buf_destroy(&buf); ++ rc = tpm_transmit_cmd(chip, &buf, 0, "TPM2_ReadPublic"); ++ if (rc) { ++ tpm_buf_destroy(&buf); ++ return tpm_ret_to_err(rc); ++ } ++ ++ /* Skip TPMT_PUBLIC: */ ++ offset += tpm_buf_read_u16(&buf, &offset); + +- return rc; ++ /* ++ * Ensure space for the length field of TPM2B_NAME and hashAlg field of ++ * TPMT_HA (the extra four bytes). ++ */ ++ if (offset + 4 > tpm_buf_length(&buf)) { ++ tpm_buf_destroy(&buf); ++ return -EIO; ++ } ++ ++ rc = tpm_buf_read_u16(&buf, &offset); ++ name_size_alg = name_size(&buf.data[offset]); ++ ++ if (name_size_alg < 0) ++ return name_size_alg; ++ ++ if (rc != name_size_alg) { ++ tpm_buf_destroy(&buf); ++ return -EIO; ++ } ++ ++ if (offset + rc > tpm_buf_length(&buf)) { ++ tpm_buf_destroy(&buf); ++ return -EIO; ++ } ++ ++ memcpy(name, &buf.data[offset], rc); ++ return name_size_alg; + } + #endif /* CONFIG_TCG_TPM2_HMAC */ + +@@ -243,6 +251,7 @@ int tpm_buf_append_name(struct tpm_chip + #ifdef CONFIG_TCG_TPM2_HMAC + enum tpm2_mso_type mso = tpm2_handle_mso(handle); + struct tpm2_auth *auth; ++ u16 name_size_alg; + int slot; + int ret; + #endif +@@ -273,8 +282,10 @@ int tpm_buf_append_name(struct tpm_chip + mso == TPM2_MSO_NVRAM) { + if (!name) { + ret = tpm2_read_public(chip, handle, auth->name[slot]); +- if (ret) ++ if (ret < 0) + goto err; ++ ++ name_size_alg = ret; + } + } else { + if (name) { +@@ -286,13 +297,8 @@ int tpm_buf_append_name(struct tpm_chip + } + + auth->name_h[slot] = handle; +- if (name) { +- ret = name_size(name); +- if (ret < 0) +- goto err; +- +- memcpy(auth->name[slot], name, ret); +- } ++ if (name) ++ memcpy(auth->name[slot], name, name_size_alg); + #endif + return 0; +