--- /dev/null
+From 935a20d1bebf6236076785fac3ff81e3931834e9 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bvanassche@acm.org>
+Date: Fri, 14 Nov 2025 13:04:07 -0800
+Subject: block: Remove queue freezing from several sysfs store callbacks
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+commit 935a20d1bebf6236076785fac3ff81e3931834e9 upstream.
+
+Freezing the request queue from inside sysfs store callbacks may cause a
+deadlock in combination with the dm-multipath driver and the
+queue_if_no_path option. Additionally, freezing the request queue slows
+down system boot on systems where sysfs attributes are set synchronously.
+
+Fix this by removing the blk_mq_freeze_queue() / blk_mq_unfreeze_queue()
+calls from the store callbacks that do not strictly need these callbacks.
+Add the __data_racy annotation to request_queue.rq_timeout to suppress
+KCSAN data race reports about the rq_timeout reads.
+
+This patch may cause a small delay in applying the new settings.
+
+For all the attributes affected by this patch, I/O will complete
+correctly whether the old or the new value of the attribute is used.
+
+This patch affects the following sysfs attributes:
+* io_poll_delay
+* io_timeout
+* nomerges
+* read_ahead_kb
+* rq_affinity
+
+Here is an example of a deadlock triggered by running test srp/002
+if this patch is not applied:
+
+task:multipathd
+Call Trace:
+ <TASK>
+ __schedule+0x8c1/0x1bf0
+ schedule+0xdd/0x270
+ schedule_preempt_disabled+0x1c/0x30
+ __mutex_lock+0xb89/0x1650
+ mutex_lock_nested+0x1f/0x30
+ dm_table_set_restrictions+0x823/0xdf0
+ __bind+0x166/0x590
+ dm_swap_table+0x2a7/0x490
+ do_resume+0x1b1/0x610
+ dev_suspend+0x55/0x1a0
+ ctl_ioctl+0x3a5/0x7e0
+ dm_ctl_ioctl+0x12/0x20
+ __x64_sys_ioctl+0x127/0x1a0
+ x64_sys_call+0xe2b/0x17d0
+ do_syscall_64+0x96/0x3a0
+ entry_SYSCALL_64_after_hwframe+0x4b/0x53
+ </TASK>
+task:(udev-worker)
+Call Trace:
+ <TASK>
+ __schedule+0x8c1/0x1bf0
+ schedule+0xdd/0x270
+ blk_mq_freeze_queue_wait+0xf2/0x140
+ blk_mq_freeze_queue_nomemsave+0x23/0x30
+ queue_ra_store+0x14e/0x290
+ queue_attr_store+0x23e/0x2c0
+ sysfs_kf_write+0xde/0x140
+ kernfs_fop_write_iter+0x3b2/0x630
+ vfs_write+0x4fd/0x1390
+ ksys_write+0xfd/0x230
+ __x64_sys_write+0x76/0xc0
+ x64_sys_call+0x276/0x17d0
+ do_syscall_64+0x96/0x3a0
+ entry_SYSCALL_64_after_hwframe+0x4b/0x53
+ </TASK>
+
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Ming Lei <ming.lei@redhat.com>
+Cc: Nilay Shroff <nilay@linux.ibm.com>
+Cc: Martin Wilck <mwilck@suse.com>
+Cc: Benjamin Marzinski <bmarzins@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: af2814149883 ("block: freeze the queue in queue_attr_store")
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-sysfs.c | 26 ++++++++------------------
+ include/linux/blkdev.h | 2 +-
+ 2 files changed, 9 insertions(+), 19 deletions(-)
+
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -143,21 +143,22 @@ queue_ra_store(struct gendisk *disk, con
+ {
+ unsigned long ra_kb;
+ ssize_t ret;
+- unsigned int memflags;
+ struct request_queue *q = disk->queue;
+
+ ret = queue_var_store(&ra_kb, page, count);
+ if (ret < 0)
+ return ret;
+ /*
+- * ->ra_pages is protected by ->limits_lock because it is usually
+- * calculated from the queue limits by queue_limits_commit_update.
++ * The ->ra_pages change below is protected by ->limits_lock because it
++ * is usually calculated from the queue limits by
++ * queue_limits_commit_update().
++ *
++ * bdi->ra_pages reads are not serialized against bdi->ra_pages writes.
++ * Use WRITE_ONCE() to write bdi->ra_pages once.
+ */
+ mutex_lock(&q->limits_lock);
+- memflags = blk_mq_freeze_queue(q);
+- disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
++ WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10));
+ mutex_unlock(&q->limits_lock);
+- blk_mq_unfreeze_queue(q, memflags);
+
+ return ret;
+ }
+@@ -375,21 +376,18 @@ static ssize_t queue_nomerges_store(stru
+ size_t count)
+ {
+ unsigned long nm;
+- unsigned int memflags;
+ struct request_queue *q = disk->queue;
+ ssize_t ret = queue_var_store(&nm, page, count);
+
+ if (ret < 0)
+ return ret;
+
+- memflags = blk_mq_freeze_queue(q);
+ blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
+ if (nm == 2)
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+ else if (nm)
+ blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+- blk_mq_unfreeze_queue(q, memflags);
+
+ return ret;
+ }
+@@ -409,7 +407,6 @@ queue_rq_affinity_store(struct gendisk *
+ #ifdef CONFIG_SMP
+ struct request_queue *q = disk->queue;
+ unsigned long val;
+- unsigned int memflags;
+
+ ret = queue_var_store(&val, page, count);
+ if (ret < 0)
+@@ -421,7 +418,6 @@ queue_rq_affinity_store(struct gendisk *
+ * are accessed individually using atomic test_bit operation. So we
+ * don't grab any lock while updating these flags.
+ */
+- memflags = blk_mq_freeze_queue(q);
+ if (val == 2) {
+ blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
+ blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
+@@ -432,7 +428,6 @@ queue_rq_affinity_store(struct gendisk *
+ blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
+ blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
+ }
+- blk_mq_unfreeze_queue(q, memflags);
+ #endif
+ return ret;
+ }
+@@ -446,11 +441,9 @@ static ssize_t queue_poll_delay_store(st
+ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
+ size_t count)
+ {
+- unsigned int memflags;
+ ssize_t ret = count;
+ struct request_queue *q = disk->queue;
+
+- memflags = blk_mq_freeze_queue(q);
+ if (!(q->limits.features & BLK_FEAT_POLL)) {
+ ret = -EINVAL;
+ goto out;
+@@ -459,7 +452,6 @@ static ssize_t queue_poll_store(struct g
+ pr_info_ratelimited("writes to the poll attribute are ignored.\n");
+ pr_info_ratelimited("please use driver specific parameters instead.\n");
+ out:
+- blk_mq_unfreeze_queue(q, memflags);
+ return ret;
+ }
+
+@@ -472,7 +464,7 @@ static ssize_t queue_io_timeout_show(str
+ static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
+ size_t count)
+ {
+- unsigned int val, memflags;
++ unsigned int val;
+ int err;
+ struct request_queue *q = disk->queue;
+
+@@ -480,9 +472,7 @@ static ssize_t queue_io_timeout_store(st
+ if (err || val == 0)
+ return -EINVAL;
+
+- memflags = blk_mq_freeze_queue(q);
+ blk_queue_rq_timeout(q, msecs_to_jiffies(val));
+- blk_mq_unfreeze_queue(q, memflags);
+
+ return count;
+ }
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -485,7 +485,7 @@ struct request_queue {
+ */
+ unsigned long queue_flags;
+
+- unsigned int rq_timeout;
++ unsigned int __data_racy rq_timeout;
+
+ unsigned int queue_depth;
+
--- /dev/null
+From adc15829fb73e402903b7030729263b6ee4a7232 Mon Sep 17 00:00:00 2001
+From: Sourabh Jain <sourabhjain@linux.ibm.com>
+Date: Thu, 16 Oct 2025 19:58:31 +0530
+Subject: crash: let architecture decide crash memory export to iomem_resource
+
+From: Sourabh Jain <sourabhjain@linux.ibm.com>
+
+commit adc15829fb73e402903b7030729263b6ee4a7232 upstream.
+
+With the generic crashkernel reservation, the kernel emits the following
+warning on powerpc:
+
+WARNING: CPU: 0 PID: 1 at arch/powerpc/mm/mem.c:341 add_system_ram_resources+0xfc/0x180
+Modules linked in:
+CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.17.0-auto-12607-g5472d60c129f #1 VOLUNTARY
+Hardware name: IBM,9080-HEX Power11 (architected) 0x820200 0xf000007 of:IBM,FW1110.01 (NH1110_069) hv:phyp pSeries
+NIP: c00000000201de3c LR: c00000000201de34 CTR: 0000000000000000
+REGS: c000000127cef8a0 TRAP: 0700 Not tainted (6.17.0-auto-12607-g5472d60c129f)
+MSR: 8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE> CR: 84000840 XER: 20040010
+CFAR: c00000000017eed0 IRQMASK: 0
+GPR00: c00000000201de34 c000000127cefb40 c0000000016a8100 0000000000000001
+GPR04: c00000012005aa00 0000000020000000 c000000002b705c8 0000000000000000
+GPR08: 000000007fffffff fffffffffffffff0 c000000002db8100 000000011fffffff
+GPR12: c00000000201dd40 c000000002ff0000 c0000000000112bc 0000000000000000
+GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
+GPR20: 0000000000000000 0000000000000000 0000000000000000 c0000000015a3808
+GPR24: c00000000200468c c000000001699888 0000000000000106 c0000000020d1950
+GPR28: c0000000014683f8 0000000081000200 c0000000015c1868 c000000002b9f710
+NIP [c00000000201de3c] add_system_ram_resources+0xfc/0x180
+LR [c00000000201de34] add_system_ram_resources+0xf4/0x180
+Call Trace:
+add_system_ram_resources+0xf4/0x180 (unreliable)
+do_one_initcall+0x60/0x36c
+do_initcalls+0x120/0x220
+kernel_init_freeable+0x23c/0x390
+kernel_init+0x34/0x26c
+ret_from_kernel_user_thread+0x14/0x1c
+
+This warning occurs due to a conflict between crashkernel and System RAM
+iomem resources.
+
+The generic crashkernel reservation adds the crashkernel memory range to
+/proc/iomem during early initialization. Later, all memblock ranges are
+added to /proc/iomem as System RAM. If the crashkernel region overlaps
+with any memblock range, it causes a conflict while adding those memblock
+regions as iomem resources, triggering the above warning. The conflicting
+memblock regions are then omitted from /proc/iomem.
+
+For example, if the following crashkernel region is added to /proc/iomem:
+20000000-11fffffff : Crash kernel
+
+then the following memblock regions System RAM regions fail to be inserted:
+00000000-7fffffff : System RAM
+80000000-257fffffff : System RAM
+
+Fix this by not adding the crashkernel memory to /proc/iomem on powerpc.
+Introduce an architecture hook to let each architecture decide whether to
+export the crashkernel region to /proc/iomem.
+
+For more info checkout commit c40dd2f766440 ("powerpc: Add System RAM
+to /proc/iomem") and commit bce074bdbc36 ("powerpc: insert System RAM
+resource to prevent crashkernel conflict")
+
+Note: Before switching to the generic crashkernel reservation, powerpc
+never exported the crashkernel region to /proc/iomem.
+
+Link: https://lkml.kernel.org/r/20251016142831.144515-1-sourabhjain@linux.ibm.com
+Fixes: e3185ee438c2 ("powerpc/crash: use generic crashkernel reservation").
+Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
+Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
+Closes: https://lore.kernel.org/all/90937fe0-2e76-4c82-b27e-7b8a7fe3ac69@linux.ibm.com/
+Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
+Cc: Baoquan he <bhe@redhat.com>
+Cc: Hari Bathini <hbathini@linux.ibm.com>
+Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
+Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Cc: Vivek Goyal <vgoyal@redhat.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/include/asm/crash_reserve.h | 8 ++++++++
+ include/linux/crash_reserve.h | 6 ++++++
+ kernel/crash_reserve.c | 3 +++
+ 3 files changed, 17 insertions(+)
+
+diff --git a/arch/powerpc/include/asm/crash_reserve.h b/arch/powerpc/include/asm/crash_reserve.h
+index 6467ce29b1fa..d1b570ddbf98 100644
+--- a/arch/powerpc/include/asm/crash_reserve.h
++++ b/arch/powerpc/include/asm/crash_reserve.h
+@@ -5,4 +5,12 @@
+ /* crash kernel regions are Page size agliged */
+ #define CRASH_ALIGN PAGE_SIZE
+
++#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
++static inline bool arch_add_crash_res_to_iomem(void)
++{
++ return false;
++}
++#define arch_add_crash_res_to_iomem arch_add_crash_res_to_iomem
++#endif
++
+ #endif /* _ASM_POWERPC_CRASH_RESERVE_H */
+diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h
+index 7b44b41d0a20..f0dc03d94ca2 100644
+--- a/include/linux/crash_reserve.h
++++ b/include/linux/crash_reserve.h
+@@ -32,6 +32,12 @@ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+ void __init reserve_crashkernel_cma(unsigned long long cma_size);
+
+ #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
++#ifndef arch_add_crash_res_to_iomem
++static inline bool arch_add_crash_res_to_iomem(void)
++{
++ return true;
++}
++#endif
+ #ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
+ #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
+ #endif
+diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
+index 87bf4d41eabb..62e60e0223cf 100644
+--- a/kernel/crash_reserve.c
++++ b/kernel/crash_reserve.c
+@@ -524,6 +524,9 @@ void __init reserve_crashkernel_cma(unsigned long long cma_size)
+ #ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+ static __init int insert_crashkernel_resources(void)
+ {
++ if (!arch_add_crash_res_to_iomem())
++ return 0;
++
+ if (crashk_res.start < crashk_res.end)
+ insert_resource(&iomem_resource, &crashk_res);
+
+--
+2.52.0
+
--- /dev/null
+From 31b931bebd11a0f00967114f62c8c38952f483e5 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <johannes.goede@oss.qualcomm.com>
+Date: Sun, 7 Dec 2025 19:47:56 +0100
+Subject: dma-mapping: Fix DMA_BIT_MASK() macro being broken
+
+From: Hans de Goede <johannes.goede@oss.qualcomm.com>
+
+commit 31b931bebd11a0f00967114f62c8c38952f483e5 upstream.
+
+After commit a50f7456f853 ("dma-mapping: Allow use of DMA_BIT_MASK(64) in
+global scope"), the DMA_BIT_MASK() macro is broken when passed non trivial
+statements for the value of 'n'. This is caused by the new version missing
+parenthesis around 'n' when evaluating 'n'.
+
+One example of this breakage is the IPU6 driver now crashing due to
+it getting DMA-addresses with address bit 32 set even though it has
+tried to set a 32 bit DMA mask.
+
+The IPU6 CSI2 engine has a DMA mask of either 31 or 32 bits depending
+on if it is in secure mode or not and it sets this masks like this:
+
+ mmu_info->aperture_end =
+ (dma_addr_t)DMA_BIT_MASK(isp->secure_mode ?
+ IPU6_MMU_ADDR_BITS :
+ IPU6_MMU_ADDR_BITS_NON_SECURE);
+
+So the 'n' argument here is "isp->secure_mode ? IPU6_MMU_ADDR_BITS :
+IPU6_MMU_ADDR_BITS_NON_SECURE" which gets expanded into:
+
+isp->secure_mode ? IPU6_MMU_ADDR_BITS : IPU6_MMU_ADDR_BITS_NON_SECURE - 1
+
+With the -1 only being applied in the non secure case, causing
+the secure mode mask to be one 1 bit too large.
+
+Fixes: a50f7456f853 ("dma-mapping: Allow use of DMA_BIT_MASK(64) in global scope")
+Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
+Cc: James Clark <james.clark@linaro.org>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: stable@vger.kernel.org
+Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
+Reviewed-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Link: https://lore.kernel.org/r/20251207184756.97904-1-johannes.goede@oss.qualcomm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/dma-mapping.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
+index 2ceda49c609f..aa36a0d1d9df 100644
+--- a/include/linux/dma-mapping.h
++++ b/include/linux/dma-mapping.h
+@@ -90,7 +90,7 @@
+ */
+ #define DMA_MAPPING_ERROR (~(dma_addr_t)0)
+
+-#define DMA_BIT_MASK(n) GENMASK_ULL(n - 1, 0)
++#define DMA_BIT_MASK(n) GENMASK_ULL((n) - 1, 0)
+
+ struct dma_iova_state {
+ dma_addr_t addr;
+--
+2.52.0
+
--- /dev/null
+From 7c11c56eb32eae96893eebafdbe3decadefe88ad Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Thu, 20 Nov 2025 21:42:33 +0800
+Subject: ext4: align max orphan file size with e2fsprogs limit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit 7c11c56eb32eae96893eebafdbe3decadefe88ad upstream.
+
+Kernel commit 0a6ce20c1564 ("ext4: verify orphan file size is not too big")
+limits the maximum supported orphan file size to 8 << 20.
+
+However, in e2fsprogs, the orphan file size is set to 32–512 filesystem
+blocks when creating a filesystem.
+
+With 64k block size, formatting an ext4 fs >32G gives an orphan file bigger
+than the kernel allows, so mount prints an error and fails:
+
+ EXT4-fs (vdb): orphan file too big: 8650752
+ EXT4-fs (vdb): mount failed
+
+To prevent this issue and allow previously created 64KB filesystems to
+mount, we updates the maximum allowed orphan file size in the kernel to
+512 filesystem blocks.
+
+Fixes: 0a6ce20c1564 ("ext4: verify orphan file size is not too big")
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <20251120134233.2994147-1-libaokun@huaweicloud.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/orphan.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/orphan.c
++++ b/fs/ext4/orphan.c
+@@ -8,6 +8,8 @@
+ #include "ext4.h"
+ #include "ext4_jbd2.h"
+
++#define EXT4_MAX_ORPHAN_FILE_BLOCKS 512
++
+ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
+ {
+ int i, j, start;
+@@ -588,7 +590,7 @@ int ext4_init_orphan_info(struct super_b
+ * consuming absurd amounts of memory when pinning blocks of orphan
+ * file in memory.
+ */
+- if (inode->i_size > 8 << 20) {
++ if (inode->i_size > (EXT4_MAX_ORPHAN_FILE_BLOCKS << inode->i_blkbits)) {
+ ext4_msg(sb, KERN_ERR, "orphan file too big: %llu",
+ (unsigned long long)inode->i_size);
+ ret = -EFSCORRUPTED;
--- /dev/null
+From 3db63d2c2d1d1e78615dd742568c5a2d55291ad1 Mon Sep 17 00:00:00 2001
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Sat, 1 Nov 2025 19:04:29 +0300
+Subject: ext4: check if mount_opts is NUL-terminated in ext4_ioctl_set_tune_sb()
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+commit 3db63d2c2d1d1e78615dd742568c5a2d55291ad1 upstream.
+
+params.mount_opts may come as potentially non-NUL-term string. Userspace
+is expected to pass a NUL-term string. Add an extra check to ensure this
+holds true. Note that further code utilizes strscpy_pad() so this is just
+for proper informing the user of incorrect data being provided.
+
+Found by Linux Verification Center (linuxtesting.org).
+
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <20251101160430.222297-2-pchelkin@ispras.ru>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/ioctl.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -1394,6 +1394,10 @@ static int ext4_ioctl_set_tune_sb(struct
+ if (copy_from_user(¶ms, in, sizeof(params)))
+ return -EFAULT;
+
++ if (strnlen(params.mount_opts, sizeof(params.mount_opts)) ==
++ sizeof(params.mount_opts))
++ return -E2BIG;
++
+ if ((params.set_flags & ~TUNE_OPS_SUPPORTED) != 0)
+ return -EOPNOTSUPP;
+
--- /dev/null
+From 4091c8206cfd2e3bb529ef260887296b90d9b6a2 Mon Sep 17 00:00:00 2001
+From: Haibo Chen <haibo.chen@nxp.com>
+Date: Tue, 4 Nov 2025 16:12:24 +0800
+Subject: ext4: clear i_state_flags when alloc inode
+
+From: Haibo Chen <haibo.chen@nxp.com>
+
+commit 4091c8206cfd2e3bb529ef260887296b90d9b6a2 upstream.
+
+i_state_flags used on 32-bit archs, need to clear this flag when
+alloc inode.
+Find this issue when umount ext4, sometimes track the inode as orphan
+accidently, cause ext4 mesg dump.
+
+Fixes: acf943e9768e ("ext4: fix checks for orphan inodes")
+Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <20251104-ext4-v1-1-73691a0800f9@nxp.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/ialloc.c | 1 -
+ fs/ext4/inode.c | 1 -
+ fs/ext4/super.c | 1 +
+ 3 files changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -1293,7 +1293,6 @@ got:
+ ei->i_csum_seed = ext4_chksum(csum, (__u8 *)&gen, sizeof(gen));
+ }
+
+- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
+ ext4_set_inode_state(inode, EXT4_STATE_NEW);
+
+ ei->i_extra_isize = sbi->s_want_extra_isize;
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5288,7 +5288,6 @@ struct inode *__ext4_iget(struct super_b
+ ei->i_projid = make_kprojid(&init_user_ns, i_projid);
+ set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+
+- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
+ ei->i_inline_off = 0;
+ ei->i_dir_start_lookup = 0;
+ ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1396,6 +1396,7 @@ static struct inode *ext4_alloc_inode(st
+
+ inode_set_iversion(&ei->vfs_inode, 1);
+ ei->i_flags = 0;
++ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
+ spin_lock_init(&ei->i_raw_lock);
+ ei->i_prealloc_node = RB_ROOT;
+ atomic_set(&ei->i_prealloc_active, 0);
--- /dev/null
+From 3f7a79d05c692c7cfec70bf104b1b3c3d0ce6247 Mon Sep 17 00:00:00 2001
+From: Yongjian Sun <sunyongjian1@huawei.com>
+Date: Thu, 6 Nov 2025 14:06:13 +0800
+Subject: ext4: fix incorrect group number assertion in mb_check_buddy
+
+From: Yongjian Sun <sunyongjian1@huawei.com>
+
+commit 3f7a79d05c692c7cfec70bf104b1b3c3d0ce6247 upstream.
+
+When the MB_CHECK_ASSERT macro is enabled, an assertion failure can
+occur in __mb_check_buddy when checking preallocated blocks (pa) in
+a block group:
+
+Assertion failure in mb_free_blocks() : "groupnr == e4b->bd_group"
+
+This happens when a pa at the very end of a block group (e.g.,
+pa_pstart=32765, pa_len=3 in a group of 32768 blocks) becomes
+exhausted - its pa_pstart is advanced by pa_len to 32768, which
+lies in the next block group. If this exhausted pa (with pa_len == 0)
+is still in the bb_prealloc_list during the buddy check, the assertion
+incorrectly flags it as belonging to the wrong group. A possible
+sequence is as follows:
+
+ext4_mb_new_blocks
+ ext4_mb_release_context
+ pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len)
+ pa->pa_len -= ac->ac_b_ex.fe_len
+
+ __mb_check_buddy
+ for each pa in group
+ ext4_get_group_no_and_offset
+ MB_CHECK_ASSERT(groupnr == e4b->bd_group)
+
+To fix this, we modify the check to skip block group validation for
+exhausted preallocations (where pa_len == 0). Such entries are in a
+transitional state and will be removed from the list soon, so they
+should not trigger an assertion. This change prevents the false
+positive while maintaining the integrity of the checks for active
+allocations.
+
+Fixes: c9de560ded61f ("ext4: Add multi block allocator for ext4")
+Signed-off-by: Yongjian Sun <sunyongjian1@huawei.com>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <20251106060614.631382-2-sunyongjian@huaweicloud.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -783,6 +783,8 @@ static void __mb_check_buddy(struct ext4
+ ext4_group_t groupnr;
+ struct ext4_prealloc_space *pa;
+ pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
++ if (!pa->pa_len)
++ continue;
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
+ MB_CHECK_ASSERT(groupnr == e4b->bd_group);
+ for (i = 0; i < pa->pa_len; i++)
--- /dev/null
+From ee5a977b4e771cc181f39d504426dbd31ed701cc Mon Sep 17 00:00:00 2001
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Sat, 1 Nov 2025 19:04:28 +0300
+Subject: ext4: fix string copying in parse_apply_sb_mount_options()
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+commit ee5a977b4e771cc181f39d504426dbd31ed701cc upstream.
+
+strscpy_pad() can't be used to copy a non-NUL-term string into a NUL-term
+string of possibly bigger size. Commit 0efc5990bca5 ("string.h: Introduce
+memtostr() and memtostr_pad()") provides additional information in that
+regard. So if this happens, the following warning is observed:
+
+strnlen: detected buffer overflow: 65 byte read of buffer size 64
+WARNING: CPU: 0 PID: 28655 at lib/string_helpers.c:1032 __fortify_report+0x96/0xc0 lib/string_helpers.c:1032
+Modules linked in:
+CPU: 0 UID: 0 PID: 28655 Comm: syz-executor.3 Not tainted 6.12.54-syzkaller-00144-g5f0270f1ba00 #0
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
+RIP: 0010:__fortify_report+0x96/0xc0 lib/string_helpers.c:1032
+Call Trace:
+ <TASK>
+ __fortify_panic+0x1f/0x30 lib/string_helpers.c:1039
+ strnlen include/linux/fortify-string.h:235 [inline]
+ sized_strscpy include/linux/fortify-string.h:309 [inline]
+ parse_apply_sb_mount_options fs/ext4/super.c:2504 [inline]
+ __ext4_fill_super fs/ext4/super.c:5261 [inline]
+ ext4_fill_super+0x3c35/0xad00 fs/ext4/super.c:5706
+ get_tree_bdev_flags+0x387/0x620 fs/super.c:1636
+ vfs_get_tree+0x93/0x380 fs/super.c:1814
+ do_new_mount fs/namespace.c:3553 [inline]
+ path_mount+0x6ae/0x1f70 fs/namespace.c:3880
+ do_mount fs/namespace.c:3893 [inline]
+ __do_sys_mount fs/namespace.c:4103 [inline]
+ __se_sys_mount fs/namespace.c:4080 [inline]
+ __x64_sys_mount+0x280/0x300 fs/namespace.c:4080
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0x64/0x140 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Since userspace is expected to provide s_mount_opts field to be at most 63
+characters long with the ending byte being NUL-term, use a 64-byte buffer
+which matches the size of s_mount_opts, so that strscpy_pad() does its job
+properly. Return with error if the user still managed to provide a
+non-NUL-term string here.
+
+Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
+
+Fixes: 8ecb790ea8c3 ("ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <20251101160430.222297-1-pchelkin@ispras.ru>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/super.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2475,7 +2475,7 @@ static int parse_apply_sb_mount_options(
+ struct ext4_fs_context *m_ctx)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+- char s_mount_opts[65];
++ char s_mount_opts[64];
+ struct ext4_fs_context *s_ctx = NULL;
+ struct fs_context *fc = NULL;
+ int ret = -ENOMEM;
+@@ -2483,7 +2483,8 @@ static int parse_apply_sb_mount_options(
+ if (!sbi->s_es->s_mount_opts[0])
+ return 0;
+
+- strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts);
++ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
++ return -E2BIG;
+
+ fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
+ if (!fc)
--- /dev/null
+From b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 Mon Sep 17 00:00:00 2001
+From: Karina Yankevich <k.yankevich@omp.ru>
+Date: Wed, 22 Oct 2025 12:32:53 +0300
+Subject: ext4: xattr: fix null pointer deref in ext4_raw_inode()
+
+From: Karina Yankevich <k.yankevich@omp.ru>
+
+commit b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 upstream.
+
+If ext4_get_inode_loc() fails (e.g. if it returns -EFSCORRUPTED),
+iloc.bh will remain set to NULL. Since ext4_xattr_inode_dec_ref_all()
+lacks error checking, this will lead to a null pointer dereference
+in ext4_raw_inode(), called right after ext4_get_inode_loc().
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: c8e008b60492 ("ext4: ignore xattrs past end")
+Cc: stable@kernel.org
+Signed-off-by: Karina Yankevich <k.yankevich@omp.ru>
+Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Message-ID: <20251022093253.3546296-1-k.yankevich@omp.ru>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/xattr.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1174,7 +1174,11 @@ ext4_xattr_inode_dec_ref_all(handle_t *h
+ if (block_csum)
+ end = (void *)bh->b_data + bh->b_size;
+ else {
+- ext4_get_inode_loc(parent, &iloc);
++ err = ext4_get_inode_loc(parent, &iloc);
++ if (err) {
++ EXT4_ERROR_INODE(parent, "parent inode loc (error %d)", err);
++ return;
++ }
+ end = (void *)ext4_raw_inode(&iloc) + EXT4_SB(parent->i_sb)->s_inode_size;
+ }
+
--- /dev/null
+From 524c3853831cf4f7e1db579e487c757c3065165c Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Wed, 22 Oct 2025 20:11:37 +0900
+Subject: jbd2: use a per-journal lock_class_key for jbd2_trans_commit_key
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit 524c3853831cf4f7e1db579e487c757c3065165c upstream.
+
+syzbot is reporting possibility of deadlock due to sharing lock_class_key
+for jbd2_handle across ext4 and ocfs2. But this is a false positive, for
+one disk partition can't have two filesystems at the same time.
+
+Reported-by: syzbot+6e493c165d26d6fcbf72@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=6e493c165d26d6fcbf72
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Tested-by: syzbot+6e493c165d26d6fcbf72@syzkaller.appspotmail.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <987110fc-5470-457a-a218-d286a09dd82f@I-love.SAKURA.ne.jp>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/journal.c | 6 ++++--
+ include/linux/jbd2.h | 6 ++++++
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -1521,7 +1521,6 @@ static journal_t *journal_init_common(st
+ struct block_device *fs_dev,
+ unsigned long long start, int len, int blocksize)
+ {
+- static struct lock_class_key jbd2_trans_commit_key;
+ journal_t *journal;
+ int err;
+ int n;
+@@ -1530,6 +1529,7 @@ static journal_t *journal_init_common(st
+ if (!journal)
+ return ERR_PTR(-ENOMEM);
+
++ lockdep_register_key(&journal->jbd2_trans_commit_key);
+ journal->j_blocksize = blocksize;
+ journal->j_dev = bdev;
+ journal->j_fs_dev = fs_dev;
+@@ -1560,7 +1560,7 @@ static journal_t *journal_init_common(st
+ journal->j_max_batch_time = 15000; /* 15ms */
+ atomic_set(&journal->j_reserved_credits, 0);
+ lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
+- &jbd2_trans_commit_key, 0);
++ &journal->jbd2_trans_commit_key, 0);
+
+ /* The journal is marked for error until we succeed with recovery! */
+ journal->j_flags = JBD2_ABORT;
+@@ -1611,6 +1611,7 @@ err_cleanup:
+ kfree(journal->j_wbuf);
+ jbd2_journal_destroy_revoke(journal);
+ journal_fail_superblock(journal);
++ lockdep_unregister_key(&journal->jbd2_trans_commit_key);
+ kfree(journal);
+ return ERR_PTR(err);
+ }
+@@ -2187,6 +2188,7 @@ int jbd2_journal_destroy(journal_t *jour
+ jbd2_journal_destroy_revoke(journal);
+ kfree(journal->j_fc_wbuf);
+ kfree(journal->j_wbuf);
++ lockdep_unregister_key(&journal->jbd2_trans_commit_key);
+ kfree(journal);
+
+ return err;
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -1253,6 +1253,12 @@ struct journal_s
+ */
+ struct lockdep_map j_trans_commit_map;
+ #endif
++ /**
++ * @jbd2_trans_commit_key:
++ *
++ * "struct lock_class_key" for @j_trans_commit_map
++ */
++ struct lock_class_key jbd2_trans_commit_key;
+
+ /**
+ * @j_fc_cleanup_callback:
--- /dev/null
+From 40a71b53d5a6d4ea17e4d54b99b2ac03a7f5e783 Mon Sep 17 00:00:00 2001
+From: Byungchul Park <byungchul@sk.com>
+Date: Fri, 24 Oct 2025 16:39:40 +0900
+Subject: jbd2: use a weaker annotation in journal handling
+
+From: Byungchul Park <byungchul@sk.com>
+
+commit 40a71b53d5a6d4ea17e4d54b99b2ac03a7f5e783 upstream.
+
+jbd2 journal handling code doesn't want jbd2_might_wait_for_commit()
+to be placed between start_this_handle() and stop_this_handle(). So it
+marks the region with rwsem_acquire_read() and rwsem_release().
+
+However, the annotation is too strong for that purpose. We don't have
+to use more than try lock annotation for that.
+
+rwsem_acquire_read() implies:
+
+ 1. might be a waiter on contention of the lock.
+ 2. enter to the critical section of the lock.
+
+All we need in here is to act 2, not 1. So trylock version of
+annotation is sufficient for that purpose. Now that dept partially
+relies on lockdep annotaions, dept interpets rwsem_acquire_read() as a
+potential wait and might report a deadlock by the wait.
+
+Replace it with trylock version of annotation.
+
+Signed-off-by: Byungchul Park <byungchul@sk.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: stable@kernel.org
+Message-ID: <20251024073940.1063-1-byungchul@sk.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/transaction.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/jbd2/transaction.c
++++ b/fs/jbd2/transaction.c
+@@ -441,7 +441,7 @@ repeat:
+ read_unlock(&journal->j_state_lock);
+ current->journal_info = handle;
+
+- rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
++ rwsem_acquire_read(&journal->j_trans_commit_map, 0, 1, _THIS_IP_);
+ jbd2_journal_free_transaction(new_transaction);
+ /*
+ * Ensure that no allocations done while the transaction is open are
--- /dev/null
+From 082b86919b7a94de01d849021b4da820a6cb89dc Mon Sep 17 00:00:00 2001
+From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Date: Wed, 8 Oct 2025 12:55:18 +0300
+Subject: media: v4l2-mem2mem: Fix outdated documentation
+
+From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+
+commit 082b86919b7a94de01d849021b4da820a6cb89dc upstream.
+
+Commit cbd9463da1b1 ("media: v4l2-mem2mem: Avoid calling .device_run in
+v4l2_m2m_job_finish") deferred calls to .device_run() to a work queue to
+avoid recursive calls when a job is finished right away from
+.device_run(). It failed to update the v4l2_m2m_job_finish()
+documentation that still states the function must not be called from
+.device_run(). Fix it.
+
+Fixes: cbd9463da1b1 ("media: v4l2-mem2mem: Avoid calling .device_run in v4l2_m2m_job_finish")
+Cc: stable@vger.kernel.org
+Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/media/v4l2-mem2mem.h | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/include/media/v4l2-mem2mem.h
++++ b/include/media/v4l2-mem2mem.h
+@@ -192,8 +192,7 @@ void v4l2_m2m_try_schedule(struct v4l2_m
+ * other instances to take control of the device.
+ *
+ * This function has to be called only after &v4l2_m2m_ops->device_run
+- * callback has been called on the driver. To prevent recursion, it should
+- * not be called directly from the &v4l2_m2m_ops->device_run callback though.
++ * callback has been called on the driver.
+ */
+ void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
+ struct v4l2_m2m_ctx *m2m_ctx);
--- /dev/null
+From 2a1351cd4176ee1809b0900d386919d03b7652f8 Mon Sep 17 00:00:00 2001
+From: Wei Yang <richard.weiyang@gmail.com>
+Date: Wed, 8 Oct 2025 09:54:52 +0000
+Subject: mm/huge_memory: add pmd folio to ds_queue in do_huge_zero_wp_pmd()
+
+From: Wei Yang <richard.weiyang@gmail.com>
+
+commit 2a1351cd4176ee1809b0900d386919d03b7652f8 upstream.
+
+We add pmd folio into ds_queue on the first page fault in
+__do_huge_pmd_anonymous_page(), so that we can split it in case of memory
+pressure. This should be the same for a pmd folio during wp page fault.
+
+Commit 1ced09e0331f ("mm: allocate THP on hugezeropage wp-fault") miss to
+add it to ds_queue, which means system may not reclaim enough memory in
+case of memory pressure even the pmd folio is under used.
+
+Move deferred_split_folio() into map_anon_folio_pmd() to make the pmd
+folio installation consistent.
+
+Link: https://lkml.kernel.org/r/20251008095453.18772-2-richard.weiyang@gmail.com
+Fixes: 1ced09e0331f ("mm: allocate THP on hugezeropage wp-fault")
+Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Lance Yang <lance.yang@linux.dev>
+Reviewed-by: Dev Jain <dev.jain@arm.com>
+Acked-by: Usama Arif <usamaarif642@gmail.com>
+Reviewed-by: Zi Yan <ziy@nvidia.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1233,6 +1233,7 @@ static void map_anon_folio_pmd(struct fo
+ count_vm_event(THP_FAULT_ALLOC);
+ count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
+ count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
++ deferred_split_folio(folio, false);
+ }
+
+ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
+@@ -1273,7 +1274,6 @@ static vm_fault_t __do_huge_pmd_anonymou
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+ map_anon_folio_pmd(folio, vmf->pmd, vma, haddr);
+ mm_inc_nr_ptes(vma->vm_mm);
+- deferred_split_folio(folio, false);
+ spin_unlock(vmf->ptl);
+ }
+
--- /dev/null
+From 590c03ca6a3fbb114396673314e2aa483839608b Mon Sep 17 00:00:00 2001
+From: xu xin <xu.xin16@zte.com.cn>
+Date: Tue, 7 Oct 2025 18:28:21 +0800
+Subject: mm/ksm: fix exec/fork inheritance support for prctl
+
+From: xu xin <xu.xin16@zte.com.cn>
+
+commit 590c03ca6a3fbb114396673314e2aa483839608b upstream.
+
+Patch series "ksm: fix exec/fork inheritance", v2.
+
+This series fixes exec/fork inheritance. See the detailed description of
+the issue below.
+
+
+This patch (of 2):
+
+Background
+==========
+
+commit d7597f59d1d33 ("mm: add new api to enable ksm per process")
+introduced MMF_VM_MERGE_ANY for mm->flags, and allowed user to set it by
+prctl() so that the process's VMAs are forcibly scanned by ksmd.
+
+Subsequently, the 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
+supported inheriting the MMF_VM_MERGE_ANY flag when a task calls execve().
+
+Finally, commit 3a9e567ca45fb ("mm/ksm: fix ksm exec support for prctl")
+fixed the issue that ksmd doesn't scan the mm_struct with MMF_VM_MERGE_ANY
+by adding the mm_slot to ksm_mm_head in __bprm_mm_init().
+
+Problem
+=======
+
+In some extreme scenarios, however, this inheritance of MMF_VM_MERGE_ANY
+during exec/fork can fail. For example, when the scanning frequency of
+ksmd is tuned extremely high, a process carrying MMF_VM_MERGE_ANY may
+still fail to pass it to the newly exec'd process. This happens because
+ksm_execve() is executed too early in the do_execve flow (prematurely
+adding the new mm_struct to the ksm_mm_slot list).
+
+As a result, before do_execve completes, ksmd may have already performed a
+scan and found that this new mm_struct has no VM_MERGEABLE VMAs, thus
+clearing its MMF_VM_MERGE_ANY flag. Consequently, when the new program
+executes, the flag MMF_VM_MERGE_ANY inheritance missed.
+
+Root reason
+===========
+
+commit d7597f59d1d33 ("mm: add new api to enable ksm per process") clear
+the flag MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE VMAs.
+
+Solution
+========
+
+Firstly, Don't clear MMF_VM_MERGE_ANY when ksmd found no VM_MERGEABLE
+VMAs, because perhaps their mm_struct has just been added to ksm_mm_slot
+list, and its process has not yet officially started running or has not
+yet performed mmap/brk to allocate anonymous VMAS.
+
+Secondly, recheck MMF_VM_MERGEABLE again if a process takes
+MMF_VM_MERGE_ANY, and create a mm_slot and join it into ksm_scan_list
+again.
+
+Link: https://lkml.kernel.org/r/20251007182504440BJgK8VXRHh8TD7IGSUIY4@zte.com.cn
+Link: https://lkml.kernel.org/r/20251007182821572h_SoFqYZXEP1mvWI4n9VL@zte.com.cn
+Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
+Fixes: d7597f59d1d3 ("mm: add new api to enable ksm per process")
+Signed-off-by: xu xin <xu.xin16@zte.com.cn>
+Cc: Stefan Roesch <shr@devkernel.io>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jinjiang Tu <tujinjiang@huawei.com>
+Cc: Wang Yaxin <wang.yaxin@zte.com.cn>
+Cc: Yang Yang <yang.yang29@zte.com.cn>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ksm.h | 4 ++--
+ mm/ksm.c | 20 +++++++++++++++++---
+ 2 files changed, 19 insertions(+), 5 deletions(-)
+
+--- a/include/linux/ksm.h
++++ b/include/linux/ksm.h
+@@ -17,7 +17,7 @@
+ #ifdef CONFIG_KSM
+ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, int advice, vm_flags_t *vm_flags);
+-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
++vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
+ vm_flags_t vm_flags);
+ int ksm_enable_merge_any(struct mm_struct *mm);
+ int ksm_disable_merge_any(struct mm_struct *mm);
+@@ -103,7 +103,7 @@ bool ksm_process_mergeable(struct mm_str
+
+ #else /* !CONFIG_KSM */
+
+-static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
++static inline vm_flags_t ksm_vma_flags(struct mm_struct *mm,
+ const struct file *file, vm_flags_t vm_flags)
+ {
+ return vm_flags;
+--- a/mm/ksm.c
++++ b/mm/ksm.c
+@@ -2712,8 +2712,14 @@ no_vmas:
+ spin_unlock(&ksm_mmlist_lock);
+
+ mm_slot_free(mm_slot_cache, mm_slot);
++ /*
++ * Only clear MMF_VM_MERGEABLE. We must not clear
++ * MMF_VM_MERGE_ANY, because for those MMF_VM_MERGE_ANY process,
++ * perhaps their mm_struct has just been added to ksm_mm_slot
++ * list, and its process has not yet officially started running
++ * or has not yet performed mmap/brk to allocate anonymous VMAS.
++ */
+ mm_flags_clear(MMF_VM_MERGEABLE, mm);
+- mm_flags_clear(MMF_VM_MERGE_ANY, mm);
+ mmap_read_unlock(mm);
+ mmdrop(mm);
+ } else {
+@@ -2831,12 +2837,20 @@ static int __ksm_del_vma(struct vm_area_
+ *
+ * Returns: @vm_flags possibly updated to mark mergeable.
+ */
+-vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
++vm_flags_t ksm_vma_flags(struct mm_struct *mm, const struct file *file,
+ vm_flags_t vm_flags)
+ {
+ if (mm_flags_test(MMF_VM_MERGE_ANY, mm) &&
+- __ksm_should_add_vma(file, vm_flags))
++ __ksm_should_add_vma(file, vm_flags)) {
+ vm_flags |= VM_MERGEABLE;
++ /*
++ * Generally, the flags here always include MMF_VM_MERGEABLE.
++ * However, in rare cases, this flag may be cleared by ksmd who
++ * scans a cycle without finding any mergeable vma.
++ */
++ if (unlikely(!mm_flags_test(MMF_VM_MERGEABLE, mm)))
++ __ksm_enter(mm);
++ }
+
+ return vm_flags;
+ }
--- /dev/null
+From 0f35040de59371ad542b915d7b91176c9910dadc Mon Sep 17 00:00:00 2001
+From: Harry Yoo <harry.yoo@oracle.com>
+Date: Mon, 8 Dec 2025 00:41:47 +0900
+Subject: mm/slab: introduce kvfree_rcu_barrier_on_cache() for cache destruction
+
+From: Harry Yoo <harry.yoo@oracle.com>
+
+commit 0f35040de59371ad542b915d7b91176c9910dadc upstream.
+
+Currently, kvfree_rcu_barrier() flushes RCU sheaves across all slab
+caches when a cache is destroyed. This is unnecessary; only the RCU
+sheaves belonging to the cache being destroyed need to be flushed.
+
+As suggested by Vlastimil Babka, introduce a weaker form of
+kvfree_rcu_barrier() that operates on a specific slab cache.
+
+Factor out flush_rcu_sheaves_on_cache() from flush_all_rcu_sheaves() and
+call it from flush_all_rcu_sheaves() and kvfree_rcu_barrier_on_cache().
+
+Call kvfree_rcu_barrier_on_cache() instead of kvfree_rcu_barrier() on
+cache destruction.
+
+The performance benefit is evaluated on a 12 core 24 threads AMD Ryzen
+5900X machine (1 socket), by loading slub_kunit module.
+
+Before:
+ Total calls: 19
+ Average latency (us): 18127
+ Total time (us): 344414
+
+After:
+ Total calls: 19
+ Average latency (us): 10066
+ Total time (us): 191264
+
+Two performance regression have been reported:
+ - stress module loader test's runtime increases by 50-60% (Daniel)
+ - internal graphics test's runtime on Tegra234 increases by 35% (Jon)
+
+They are fixed by this change.
+
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Fixes: ec66e0d59952 ("slab: add sheaf support for batching kfree_rcu() operations")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/linux-mm/1bda09da-93be-4737-aef0-d47f8c5c9301@suse.cz
+Reported-and-tested-by: Daniel Gomez <da.gomez@samsung.com>
+Closes: https://lore.kernel.org/linux-mm/0406562e-2066-4cf8-9902-b2b0616dd742@kernel.org
+Reported-and-tested-by: Jon Hunter <jonathanh@nvidia.com>
+Closes: https://lore.kernel.org/linux-mm/e988eff6-1287-425e-a06c-805af5bbf262@nvidia.com
+Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
+Link: https://patch.msgid.link/20251207154148.117723-1-harry.yoo@oracle.com
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/slab.h | 7 ++++++
+ mm/slab.h | 1
+ mm/slab_common.c | 52 +++++++++++++++++++++++++++++++++-------------
+ mm/slub.c | 57 +++++++++++++++++++++++++++------------------------
+ 4 files changed, 76 insertions(+), 41 deletions(-)
+
+--- a/include/linux/slab.h
++++ b/include/linux/slab.h
+@@ -1150,10 +1150,17 @@ static inline void kvfree_rcu_barrier(vo
+ rcu_barrier();
+ }
+
++static inline void kvfree_rcu_barrier_on_cache(struct kmem_cache *s)
++{
++ rcu_barrier();
++}
++
+ static inline void kfree_rcu_scheduler_running(void) { }
+ #else
+ void kvfree_rcu_barrier(void);
+
++void kvfree_rcu_barrier_on_cache(struct kmem_cache *s);
++
+ void kfree_rcu_scheduler_running(void);
+ #endif
+
+--- a/mm/slab.h
++++ b/mm/slab.h
+@@ -442,6 +442,7 @@ static inline bool is_kmalloc_normal(str
+
+ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj);
+ void flush_all_rcu_sheaves(void);
++void flush_rcu_sheaves_on_cache(struct kmem_cache *s);
+
+ #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
+ SLAB_CACHE_DMA32 | SLAB_PANIC | \
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -492,7 +492,7 @@ void kmem_cache_destroy(struct kmem_cach
+ return;
+
+ /* in-flight kfree_rcu()'s may include objects from our cache */
+- kvfree_rcu_barrier();
++ kvfree_rcu_barrier_on_cache(s);
+
+ if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) &&
+ (s->flags & SLAB_TYPESAFE_BY_RCU)) {
+@@ -2039,25 +2039,13 @@ unlock_return:
+ }
+ EXPORT_SYMBOL_GPL(kvfree_call_rcu);
+
+-/**
+- * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete.
+- *
+- * Note that a single argument of kvfree_rcu() call has a slow path that
+- * triggers synchronize_rcu() following by freeing a pointer. It is done
+- * before the return from the function. Therefore for any single-argument
+- * call that will result in a kfree() to a cache that is to be destroyed
+- * during module exit, it is developer's responsibility to ensure that all
+- * such calls have returned before the call to kmem_cache_destroy().
+- */
+-void kvfree_rcu_barrier(void)
++static inline void __kvfree_rcu_barrier(void)
+ {
+ struct kfree_rcu_cpu_work *krwp;
+ struct kfree_rcu_cpu *krcp;
+ bool queued;
+ int i, cpu;
+
+- flush_all_rcu_sheaves();
+-
+ /*
+ * Firstly we detach objects and queue them over an RCU-batch
+ * for all CPUs. Finally queued works are flushed for each CPU.
+@@ -2119,8 +2107,43 @@ void kvfree_rcu_barrier(void)
+ }
+ }
+ }
++
++/**
++ * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete.
++ *
++ * Note that a single argument of kvfree_rcu() call has a slow path that
++ * triggers synchronize_rcu() following by freeing a pointer. It is done
++ * before the return from the function. Therefore for any single-argument
++ * call that will result in a kfree() to a cache that is to be destroyed
++ * during module exit, it is developer's responsibility to ensure that all
++ * such calls have returned before the call to kmem_cache_destroy().
++ */
++void kvfree_rcu_barrier(void)
++{
++ flush_all_rcu_sheaves();
++ __kvfree_rcu_barrier();
++}
+ EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
+
++/**
++ * kvfree_rcu_barrier_on_cache - Wait for in-flight kvfree_rcu() calls on a
++ * specific slab cache.
++ * @s: slab cache to wait for
++ *
++ * See the description of kvfree_rcu_barrier() for details.
++ */
++void kvfree_rcu_barrier_on_cache(struct kmem_cache *s)
++{
++ if (s->cpu_sheaves)
++ flush_rcu_sheaves_on_cache(s);
++ /*
++ * TODO: Introduce a version of __kvfree_rcu_barrier() that works
++ * on a specific slab cache.
++ */
++ __kvfree_rcu_barrier();
++}
++EXPORT_SYMBOL_GPL(kvfree_rcu_barrier_on_cache);
++
+ static unsigned long
+ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+ {
+@@ -2216,4 +2239,3 @@ void __init kvfree_rcu_init(void)
+ }
+
+ #endif /* CONFIG_KVFREE_RCU_BATCHED */
+-
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -4118,42 +4118,47 @@ static void flush_rcu_sheaf(struct work_
+
+
+ /* needed for kvfree_rcu_barrier() */
+-void flush_all_rcu_sheaves(void)
++void flush_rcu_sheaves_on_cache(struct kmem_cache *s)
+ {
+ struct slub_flush_work *sfw;
+- struct kmem_cache *s;
+ unsigned int cpu;
+
++ mutex_lock(&flush_lock);
++
++ for_each_online_cpu(cpu) {
++ sfw = &per_cpu(slub_flush, cpu);
++
++ /*
++ * we don't check if rcu_free sheaf exists - racing
++ * __kfree_rcu_sheaf() might have just removed it.
++ * by executing flush_rcu_sheaf() on the cpu we make
++ * sure the __kfree_rcu_sheaf() finished its call_rcu()
++ */
++
++ INIT_WORK(&sfw->work, flush_rcu_sheaf);
++ sfw->s = s;
++ queue_work_on(cpu, flushwq, &sfw->work);
++ }
++
++ for_each_online_cpu(cpu) {
++ sfw = &per_cpu(slub_flush, cpu);
++ flush_work(&sfw->work);
++ }
++
++ mutex_unlock(&flush_lock);
++}
++
++void flush_all_rcu_sheaves(void)
++{
++ struct kmem_cache *s;
++
+ cpus_read_lock();
+ mutex_lock(&slab_mutex);
+
+ list_for_each_entry(s, &slab_caches, list) {
+ if (!s->cpu_sheaves)
+ continue;
+-
+- mutex_lock(&flush_lock);
+-
+- for_each_online_cpu(cpu) {
+- sfw = &per_cpu(slub_flush, cpu);
+-
+- /*
+- * we don't check if rcu_free sheaf exists - racing
+- * __kfree_rcu_sheaf() might have just removed it.
+- * by executing flush_rcu_sheaf() on the cpu we make
+- * sure the __kfree_rcu_sheaf() finished its call_rcu()
+- */
+-
+- INIT_WORK(&sfw->work, flush_rcu_sheaf);
+- sfw->s = s;
+- queue_work_on(cpu, flushwq, &sfw->work);
+- }
+-
+- for_each_online_cpu(cpu) {
+- sfw = &per_cpu(slub_flush, cpu);
+- flush_work(&sfw->work);
+- }
+-
+- mutex_unlock(&flush_lock);
++ flush_rcu_sheaves_on_cache(s);
+ }
+
+ mutex_unlock(&slab_mutex);
--- /dev/null
+From ffb8c27b0539dd90262d1021488e7817fae57c42 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 5 Dec 2025 19:55:17 +0100
+Subject: mptcp: avoid deadlock on fallback while reinjecting
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit ffb8c27b0539dd90262d1021488e7817fae57c42 upstream.
+
+Jakub reported an MPTCP deadlock at fallback time:
+
+ WARNING: possible recursive locking detected
+ 6.18.0-rc7-virtme #1 Not tainted
+ --------------------------------------------
+ mptcp_connect/20858 is trying to acquire lock:
+ ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_try_fallback+0xd8/0x280
+
+ but task is already holding lock:
+ ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0
+
+ other info that might help us debug this:
+ Possible unsafe locking scenario:
+
+ CPU0
+ ----
+ lock(&msk->fallback_lock);
+ lock(&msk->fallback_lock);
+
+ *** DEADLOCK ***
+
+ May be due to missing lock nesting notation
+
+ 3 locks held by mptcp_connect/20858:
+ #0: ff1100001da18290 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x114/0x1bc0
+ #1: ff1100001db40fd0 (k-sk_lock-AF_INET#2){+.+.}-{0:0}, at: __mptcp_retrans+0x2cb/0xaa0
+ #2: ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0
+
+ stack backtrace:
+ CPU: 0 UID: 0 PID: 20858 Comm: mptcp_connect Not tainted 6.18.0-rc7-virtme #1 PREEMPT(full)
+ Hardware name: Bochs, BIOS Bochs 01/01/2011
+ Call Trace:
+ <TASK>
+ dump_stack_lvl+0x6f/0xa0
+ print_deadlock_bug.cold+0xc0/0xcd
+ validate_chain+0x2ff/0x5f0
+ __lock_acquire+0x34c/0x740
+ lock_acquire.part.0+0xbc/0x260
+ _raw_spin_lock_bh+0x38/0x50
+ __mptcp_try_fallback+0xd8/0x280
+ mptcp_sendmsg_frag+0x16c2/0x3050
+ __mptcp_retrans+0x421/0xaa0
+ mptcp_release_cb+0x5aa/0xa70
+ release_sock+0xab/0x1d0
+ mptcp_sendmsg+0xd5b/0x1bc0
+ sock_write_iter+0x281/0x4d0
+ new_sync_write+0x3c5/0x6f0
+ vfs_write+0x65e/0xbb0
+ ksys_write+0x17e/0x200
+ do_syscall_64+0xbb/0xfd0
+ entry_SYSCALL_64_after_hwframe+0x4b/0x53
+ RIP: 0033:0x7fa5627cbc5e
+ Code: 4d 89 d8 e8 14 bd 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 <c9> c3 83 e2 39 83 fa 08 75 e7 e8 13 ff ff ff 0f 1f 00 f3 0f 1e fa
+ RSP: 002b:00007fff1fe14700 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
+ RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fa5627cbc5e
+ RDX: 0000000000001f9c RSI: 00007fff1fe16984 RDI: 0000000000000005
+ RBP: 00007fff1fe14710 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff1fe16920
+ R13: 0000000000002000 R14: 0000000000001f9c R15: 0000000000001f9c
+
+The packet scheduler could attempt a reinjection after receiving an
+MP_FAIL and before the infinite map has been transmitted, causing a
+deadlock since MPTCP needs to do the reinjection atomically from WRT
+fallback.
+
+Address the issue explicitly avoiding the reinjection in the critical
+scenario. Note that this is the only fallback critical section that
+could potentially send packets and hit the double-lock.
+
+Reported-by: Jakub Kicinski <kuba@kernel.org>
+Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/mptcp-dbg/results/412720/1-mptcp-join-sh/stderr
+Fixes: f8a1d9b18c5e ("mptcp: make fallback action and fallback decision atomic")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-4-9e4781a6c1b8@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2693,10 +2693,13 @@ static void __mptcp_retrans(struct sock
+
+ /*
+ * make the whole retrans decision, xmit, disallow
+- * fallback atomic
++ * fallback atomic, note that we can't retrans even
++ * when an infinite fallback is in progress, i.e. new
++ * subflows are disallowed.
+ */
+ spin_lock_bh(&msk->fallback_lock);
+- if (__mptcp_check_fallback(msk)) {
++ if (__mptcp_check_fallback(msk) ||
++ !msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ release_sock(ssk);
+ goto clear_scheduled;
--- /dev/null
+From 0ace3297a7301911e52d8195cb1006414897c859 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Fri, 5 Dec 2025 19:55:14 +0100
+Subject: mptcp: pm: ignore unknown endpoint flags
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 0ace3297a7301911e52d8195cb1006414897c859 upstream.
+
+Before this patch, the kernel was saving any flags set by the userspace,
+even unknown ones. This doesn't cause critical issues because the kernel
+is only looking at specific ones. But on the other hand, endpoints dumps
+could tell the userspace some recent flags seem to be supported on older
+kernel versions.
+
+Instead, ignore all unknown flags when parsing them. By doing that, the
+userspace can continue to set unsupported flags, but it has a way to
+verify what is supported by the kernel.
+
+Note that it sounds better to continue accepting unsupported flags not
+to change the behaviour, but also that eases things on the userspace
+side by adding "optional" endpoint types only supported by newer kernel
+versions without having to deal with the different kernel versions.
+
+A note for the backports: there will be conflicts in mptcp.h on older
+versions not having the mentioned flags, the new line should still be
+added last, and the '5' needs to be adapted to have the same value as
+the last entry.
+
+Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-1-9e4781a6c1b8@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/mptcp.h | 1 +
+ net/mptcp/pm_netlink.c | 3 ++-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/uapi/linux/mptcp.h
++++ b/include/uapi/linux/mptcp.h
+@@ -40,6 +40,7 @@
+ #define MPTCP_PM_ADDR_FLAG_FULLMESH _BITUL(3)
+ #define MPTCP_PM_ADDR_FLAG_IMPLICIT _BITUL(4)
+ #define MPTCP_PM_ADDR_FLAG_LAMINAR _BITUL(5)
++#define MPTCP_PM_ADDR_FLAGS_MASK GENMASK(5, 0)
+
+ struct mptcp_info {
+ __u8 mptcpi_subflows;
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -119,7 +119,8 @@ int mptcp_pm_parse_entry(struct nlattr *
+ }
+
+ if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
+- entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
++ entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]) &
++ MPTCP_PM_ADDR_FLAGS_MASK;
+
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT])
+ entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
--- /dev/null
+From 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 5 Dec 2025 19:55:16 +0100
+Subject: mptcp: schedule rtx timer only after pushing data
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb upstream.
+
+The MPTCP protocol usually schedule the retransmission timer only
+when there is some chances for such retransmissions to happen.
+
+With a notable exception: __mptcp_push_pending() currently schedule
+such timer unconditionally, potentially leading to unnecessary rtx
+timer expiration.
+
+The issue is present since the blamed commit below but become easily
+reproducible after commit 27b0e701d387 ("mptcp: drop bogus optimization
+in __mptcp_check_push()")
+
+Fixes: 33d41c9cd74c ("mptcp: more accurate timeout")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-3-9e4781a6c1b8@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c | 15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1597,7 +1597,7 @@ void __mptcp_push_pending(struct sock *s
+ struct mptcp_sendmsg_info info = {
+ .flags = flags,
+ };
+- bool do_check_data_fin = false;
++ bool copied = false;
+ int push_count = 1;
+
+ while (mptcp_send_head(sk) && (push_count > 0)) {
+@@ -1639,7 +1639,7 @@ void __mptcp_push_pending(struct sock *s
+ push_count--;
+ continue;
+ }
+- do_check_data_fin = true;
++ copied = true;
+ }
+ }
+ }
+@@ -1648,11 +1648,14 @@ void __mptcp_push_pending(struct sock *s
+ if (ssk)
+ mptcp_push_release(ssk, &info);
+
+- /* ensure the rtx timer is running */
+- if (!mptcp_rtx_timer_pending(sk))
+- mptcp_reset_rtx_timer(sk);
+- if (do_check_data_fin)
++ /* Avoid scheduling the rtx timer if no data has been pushed; the timer
++ * will be updated on positive acks by __mptcp_cleanup_una().
++ */
++ if (copied) {
++ if (!mptcp_rtx_timer_pending(sk))
++ mptcp_reset_rtx_timer(sk);
+ mptcp_check_send_data_fin(sk);
++ }
+ }
+
+ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
--- /dev/null
+From d01ff281bd9b1bfeac9ab98ec8a9ee41da900d5e Mon Sep 17 00:00:00 2001
+From: John Ogness <john.ogness@linutronix.de>
+Date: Thu, 13 Nov 2025 17:09:47 +0106
+Subject: printk: Allow printk_trigger_flush() to flush all types
+
+From: John Ogness <john.ogness@linutronix.de>
+
+commit d01ff281bd9b1bfeac9ab98ec8a9ee41da900d5e upstream.
+
+Currently printk_trigger_flush() only triggers legacy offloaded
+flushing, even if that may not be the appropriate method to flush
+for currently registered consoles. (The function predates the
+NBCON consoles.)
+
+Since commit 6690d6b52726 ("printk: Add helper for flush type
+logic") there is printk_get_console_flush_type(), which also
+considers NBCON consoles and reports all the methods of flushing
+appropriate based on the system state and consoles available.
+
+Update printk_trigger_flush() to use
+printk_get_console_flush_type() to appropriately flush registered
+consoles.
+
+Suggested-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: John Ogness <john.ogness@linutronix.de>
+Reviewed-by: Petr Mladek <pmladek@suse.com>
+Link: https://lore.kernel.org/stable/20251113160351.113031-2-john.ogness%40linutronix.de
+Tested-by: Sherry Sun <sherry.sun@nxp.com>
+Link: https://patch.msgid.link/20251113160351.113031-2-john.ogness@linutronix.de
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/printk/nbcon.c | 2 +-
+ kernel/printk/printk.c | 23 ++++++++++++++++++++++-
+ 2 files changed, 23 insertions(+), 2 deletions(-)
+
+--- a/kernel/printk/nbcon.c
++++ b/kernel/printk/nbcon.c
+@@ -1856,7 +1856,7 @@ void nbcon_device_release(struct console
+ if (console_trylock())
+ console_unlock();
+ } else if (ft.legacy_offload) {
+- printk_trigger_flush();
++ defer_console_output();
+ }
+ }
+ console_srcu_read_unlock(cookie);
+--- a/kernel/printk/printk.c
++++ b/kernel/printk/printk.c
+@@ -4595,9 +4595,30 @@ void defer_console_output(void)
+ __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
+ }
+
++/**
++ * printk_trigger_flush - Attempt to flush printk buffer to consoles.
++ *
++ * If possible, flush the printk buffer to all consoles in the caller's
++ * context. If offloading is available, trigger deferred printing.
++ *
++ * This is best effort. Depending on the system state, console states,
++ * and caller context, no actual flushing may result from this call.
++ */
+ void printk_trigger_flush(void)
+ {
+- defer_console_output();
++ struct console_flush_type ft;
++
++ printk_get_console_flush_type(&ft);
++ if (ft.nbcon_atomic)
++ nbcon_atomic_flush_pending();
++ if (ft.nbcon_offload)
++ nbcon_kthreads_wake();
++ if (ft.legacy_direct) {
++ if (console_trylock())
++ console_unlock();
++ }
++ if (ft.legacy_offload)
++ defer_console_output();
+ }
+
+ int vprintk_deferred(const char *fmt, va_list args)
--- /dev/null
+From 66e7c1e0ee08cfb6db64f8f3f6e5a3cc930145c8 Mon Sep 17 00:00:00 2001
+From: John Ogness <john.ogness@linutronix.de>
+Date: Fri, 21 Nov 2025 11:26:00 +0106
+Subject: printk: Avoid irq_work for printk_deferred() on suspend
+
+From: John Ogness <john.ogness@linutronix.de>
+
+commit 66e7c1e0ee08cfb6db64f8f3f6e5a3cc930145c8 upstream.
+
+With commit ("printk: Avoid scheduling irq_work on suspend") the
+implementation of printk_get_console_flush_type() was modified to
+avoid offloading when irq_work should be blocked during suspend.
+Since printk uses the returned flush type to determine what
+flushing methods are used, this was thought to be sufficient for
+avoiding irq_work usage during the suspend phase.
+
+However, vprintk_emit() implements a hack to support
+printk_deferred(). In this hack, the returned flush type is
+adjusted to make sure no legacy direct printing occurs when
+printk_deferred() was used.
+
+Because of this hack, the legacy offloading flushing method can
+still be used, causing irq_work to be queued when it should not
+be.
+
+Adjust the vprintk_emit() hack to also consider
+@console_irqwork_blocked so that legacy offloading will not be
+chosen when irq_work should be blocked.
+
+Link: https://lore.kernel.org/lkml/87fra90xv4.fsf@jogness.linutronix.de
+Signed-off-by: John Ogness <john.ogness@linutronix.de>
+Fixes: 26873e3e7f0c ("printk: Avoid scheduling irq_work on suspend")
+Reviewed-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/printk/printk.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/printk/printk.c
++++ b/kernel/printk/printk.c
+@@ -2393,7 +2393,7 @@ asmlinkage int vprintk_emit(int facility
+ /* If called from the scheduler, we can not call up(). */
+ if (level == LOGLEVEL_SCHED) {
+ level = LOGLEVEL_DEFAULT;
+- ft.legacy_offload |= ft.legacy_direct;
++ ft.legacy_offload |= ft.legacy_direct && !console_irqwork_blocked;
+ ft.legacy_direct = false;
+ }
+
--- /dev/null
+From 29f4801e9c8dfd12bdcb33b61a6ac479c7162bd7 Mon Sep 17 00:00:00 2001
+From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
+Date: Fri, 5 Dec 2025 19:55:15 +0100
+Subject: selftests: mptcp: pm: ensure unknown flags are ignored
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+commit 29f4801e9c8dfd12bdcb33b61a6ac479c7162bd7 upstream.
+
+This validates the previous commit: the userspace can set unknown flags
+-- the 7th bit is currently unused -- without errors, but only the
+supported ones are printed in the endpoints dumps.
+
+The 'Fixes' tag here below is the same as the one from the previous
+commit: this patch here is not fixing anything wrong in the selftests,
+but it validates the previous fix for an issue introduced by this commit
+ID.
+
+Fixes: 01cacb00b35c ("mptcp: add netlink-based PM")
+Cc: stable@vger.kernel.org
+Reviewed-by: Mat Martineau <martineau@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-2-9e4781a6c1b8@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/pm_netlink.sh | 4 ++++
+ tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 11 +++++++++++
+ 2 files changed, 15 insertions(+)
+
+--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
++++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
+@@ -192,6 +192,10 @@ check "show_endpoints" \
+ flush_endpoint
+ check "show_endpoints" "" "flush addrs"
+
++add_endpoint 10.0.1.1 flags unknown
++check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags"
++flush_endpoint
++
+ set_limits 9 1 2>/dev/null
+ check "get_limits" "${default_limits}" "rcv addrs above hard limit"
+
+--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
++++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+@@ -24,6 +24,8 @@
+ #define IPPROTO_MPTCP 262
+ #endif
+
++#define MPTCP_PM_ADDR_FLAG_UNKNOWN _BITUL(7)
++
+ static void syntax(char *argv[])
+ {
+ fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]);
+@@ -836,6 +838,8 @@ int add_addr(int fd, int pm_family, int
+ flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else if (!strcmp(tok, "fullmesh"))
+ flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
++ else if (!strcmp(tok, "unknown"))
++ flags |= MPTCP_PM_ADDR_FLAG_UNKNOWN;
+ else
+ error(1, errno,
+ "unknown flag %s", argv[arg]);
+@@ -1047,6 +1051,13 @@ static void print_addr(struct rtattr *at
+ if (flags)
+ printf(",");
+ }
++
++ if (flags & MPTCP_PM_ADDR_FLAG_UNKNOWN) {
++ printf("unknown");
++ flags &= ~MPTCP_PM_ADDR_FLAG_UNKNOWN;
++ if (flags)
++ printf(",");
++ }
+
+ /* bump unknown flags, if any */
+ if (flags)
ktest.pl-fix-uninitialized-var-in-config-bisect.pl.patch
tpm-cap-the-number-of-pcr-banks.patch
fs-pm-fix-reverse-check-in-filesystems_freeze_callback.patch
+printk-allow-printk_trigger_flush-to-flush-all-types.patch
+printk-avoid-irq_work-for-printk_deferred-on-suspend.patch
+ext4-fix-string-copying-in-parse_apply_sb_mount_options.patch
+ext4-check-if-mount_opts-is-nul-terminated-in-ext4_ioctl_set_tune_sb.patch
+ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch
+ext4-clear-i_state_flags-when-alloc-inode.patch
+ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch
+ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch
+jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch
+jbd2-use-a-weaker-annotation-in-journal-handling.patch
+block-remove-queue-freezing-from-several-sysfs-store-callbacks.patch
+mm-ksm-fix-exec-fork-inheritance-support-for-prctl.patch
+media-v4l2-mem2mem-fix-outdated-documentation.patch
+mm-huge_memory-add-pmd-folio-to-ds_queue-in-do_huge_zero_wp_pmd.patch
+tpm2-sessions-fix-out-of-range-indexing-in-name_size.patch
+tpm2-sessions-fix-tpm2_read_public-range-checks.patch
+crash-let-architecture-decide-crash-memory-export-to-iomem_resource.patch
+dma-mapping-fix-dma_bit_mask-macro-being-broken.patch
+mm-slab-introduce-kvfree_rcu_barrier_on_cache-for-cache-destruction.patch
+mptcp-pm-ignore-unknown-endpoint-flags.patch
+selftests-mptcp-pm-ensure-unknown-flags-are-ignored.patch
+mptcp-schedule-rtx-timer-only-after-pushing-data.patch
+mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch
--- /dev/null
+From 6e9722e9a7bfe1bbad649937c811076acf86e1fd Mon Sep 17 00:00:00 2001
+From: Jarkko Sakkinen <jarkko@kernel.org>
+Date: Sun, 30 Nov 2025 21:07:12 +0200
+Subject: tpm2-sessions: Fix out of range indexing in name_size
+
+From: Jarkko Sakkinen <jarkko@kernel.org>
+
+commit 6e9722e9a7bfe1bbad649937c811076acf86e1fd upstream.
+
+'name_size' does not have any range checks, and it just directly indexes
+with TPM_ALG_ID, which could lead into memory corruption at worst.
+
+Address the issue by only processing known values and returning -EINVAL for
+unrecognized values.
+
+Make also 'tpm_buf_append_name' and 'tpm_buf_fill_hmac_session' fallible so
+that errors are detected before causing any spurious TPM traffic.
+
+End also the authorization session on failure in both of the functions, as
+the session state would be then by definition corrupted.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 1085b8276bb4 ("tpm: Add the rest of the session HMAC API")
+Reviewed-by: Jonathan McDowell <noodles@meta.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm2-cmd.c | 23 ++++-
+ drivers/char/tpm/tpm2-sessions.c | 132 ++++++++++++++++++++----------
+ include/linux/tpm.h | 13 +-
+ security/keys/trusted-keys/trusted_tpm2.c | 29 +++++-
+ 4 files changed, 142 insertions(+), 55 deletions(-)
+
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -187,7 +187,11 @@ int tpm2_pcr_extend(struct tpm_chip *chi
+ }
+
+ if (!disable_pcr_integrity) {
+- tpm_buf_append_name(chip, &buf, pcr_idx, NULL);
++ rc = tpm_buf_append_name(chip, &buf, pcr_idx, NULL);
++ if (rc) {
++ tpm_buf_destroy(&buf);
++ return rc;
++ }
+ tpm_buf_append_hmac_session(chip, &buf, 0, NULL, 0);
+ } else {
+ tpm_buf_append_handle(chip, &buf, pcr_idx);
+@@ -202,8 +206,14 @@ int tpm2_pcr_extend(struct tpm_chip *chi
+ chip->allocated_banks[i].digest_size);
+ }
+
+- if (!disable_pcr_integrity)
+- tpm_buf_fill_hmac_session(chip, &buf);
++ if (!disable_pcr_integrity) {
++ rc = tpm_buf_fill_hmac_session(chip, &buf);
++ if (rc) {
++ tpm_buf_destroy(&buf);
++ return rc;
++ }
++ }
++
+ rc = tpm_transmit_cmd(chip, &buf, 0, "attempting extend a PCR value");
+ if (!disable_pcr_integrity)
+ rc = tpm_buf_check_hmac_response(chip, &buf, rc);
+@@ -261,7 +271,12 @@ int tpm2_get_random(struct tpm_chip *chi
+ | TPM2_SA_CONTINUE_SESSION,
+ NULL, 0);
+ tpm_buf_append_u16(&buf, num_bytes);
+- tpm_buf_fill_hmac_session(chip, &buf);
++ err = tpm_buf_fill_hmac_session(chip, &buf);
++ if (err) {
++ tpm_buf_destroy(&buf);
++ return err;
++ }
++
+ err = tpm_transmit_cmd(chip, &buf,
+ offsetof(struct tpm2_get_random_out,
+ buffer),
+--- a/drivers/char/tpm/tpm2-sessions.c
++++ b/drivers/char/tpm/tpm2-sessions.c
+@@ -144,16 +144,23 @@ struct tpm2_auth {
+ /*
+ * Name Size based on TPM algorithm (assumes no hash bigger than 255)
+ */
+-static u8 name_size(const u8 *name)
++static int name_size(const u8 *name)
+ {
+- static u8 size_map[] = {
+- [TPM_ALG_SHA1] = SHA1_DIGEST_SIZE,
+- [TPM_ALG_SHA256] = SHA256_DIGEST_SIZE,
+- [TPM_ALG_SHA384] = SHA384_DIGEST_SIZE,
+- [TPM_ALG_SHA512] = SHA512_DIGEST_SIZE,
+- };
+- u16 alg = get_unaligned_be16(name);
+- return size_map[alg] + 2;
++ u16 hash_alg = get_unaligned_be16(name);
++
++ switch (hash_alg) {
++ case TPM_ALG_SHA1:
++ return SHA1_DIGEST_SIZE + 2;
++ case TPM_ALG_SHA256:
++ return SHA256_DIGEST_SIZE + 2;
++ case TPM_ALG_SHA384:
++ return SHA384_DIGEST_SIZE + 2;
++ case TPM_ALG_SHA512:
++ return SHA512_DIGEST_SIZE + 2;
++ default:
++ pr_warn("tpm: unsupported name algorithm: 0x%04x\n", hash_alg);
++ return -EINVAL;
++ }
+ }
+
+ static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
+@@ -161,6 +168,7 @@ static int tpm2_parse_read_public(char *
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+ off_t offset = TPM_HEADER_SIZE;
+ u32 tot_len = be32_to_cpu(head->length);
++ int ret;
+ u32 val;
+
+ /* we're starting after the header so adjust the length */
+@@ -173,8 +181,13 @@ static int tpm2_parse_read_public(char *
+ offset += val;
+ /* name */
+ val = tpm_buf_read_u16(buf, &offset);
+- if (val != name_size(&buf->data[offset]))
++ ret = name_size(&buf->data[offset]);
++ if (ret < 0)
++ return ret;
++
++ if (val != ret)
+ return -EINVAL;
++
+ memcpy(name, &buf->data[offset], val);
+ /* forget the rest */
+ return 0;
+@@ -221,46 +234,72 @@ static int tpm2_read_public(struct tpm_c
+ * As with most tpm_buf operations, success is assumed because failure
+ * will be caused by an incorrect programming model and indicated by a
+ * kernel message.
++ *
++ * Ends the authorization session on failure.
+ */
+-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
+- u32 handle, u8 *name)
++int tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
++ u32 handle, u8 *name)
+ {
+ #ifdef CONFIG_TCG_TPM2_HMAC
+ enum tpm2_mso_type mso = tpm2_handle_mso(handle);
+ struct tpm2_auth *auth;
+ int slot;
++ int ret;
+ #endif
+
+ if (!tpm2_chip_auth(chip)) {
+ tpm_buf_append_handle(chip, buf, handle);
+- return;
++ return 0;
+ }
+
+ #ifdef CONFIG_TCG_TPM2_HMAC
+ slot = (tpm_buf_length(buf) - TPM_HEADER_SIZE) / 4;
+ if (slot >= AUTH_MAX_NAMES) {
+- dev_err(&chip->dev, "TPM: too many handles\n");
+- return;
++ dev_err(&chip->dev, "too many handles\n");
++ ret = -EIO;
++ goto err;
+ }
+ auth = chip->auth;
+- WARN(auth->session != tpm_buf_length(buf),
+- "name added in wrong place\n");
++ if (auth->session != tpm_buf_length(buf)) {
++ dev_err(&chip->dev, "session state malformed");
++ ret = -EIO;
++ goto err;
++ }
+ tpm_buf_append_u32(buf, handle);
+ auth->session += 4;
+
+ if (mso == TPM2_MSO_PERSISTENT ||
+ mso == TPM2_MSO_VOLATILE ||
+ mso == TPM2_MSO_NVRAM) {
+- if (!name)
+- tpm2_read_public(chip, handle, auth->name[slot]);
++ if (!name) {
++ ret = tpm2_read_public(chip, handle, auth->name[slot]);
++ if (ret)
++ goto err;
++ }
+ } else {
+- if (name)
+- dev_err(&chip->dev, "TPM: Handle does not require name but one is specified\n");
++ if (name) {
++ dev_err(&chip->dev, "handle 0x%08x does not use a name\n",
++ handle);
++ ret = -EIO;
++ goto err;
++ }
+ }
+
+ auth->name_h[slot] = handle;
+- if (name)
+- memcpy(auth->name[slot], name, name_size(name));
++ if (name) {
++ ret = name_size(name);
++ if (ret < 0)
++ goto err;
++
++ memcpy(auth->name[slot], name, ret);
++ }
++#endif
++ return 0;
++
++#ifdef CONFIG_TCG_TPM2_HMAC
++err:
++ tpm2_end_auth_session(chip);
++ return tpm_ret_to_err(ret);
+ #endif
+ }
+ EXPORT_SYMBOL_GPL(tpm_buf_append_name);
+@@ -533,11 +572,9 @@ static void tpm_buf_append_salt(struct t
+ * encryption key and encrypts the first parameter of the command
+ * buffer with it.
+ *
+- * As with most tpm_buf operations, success is assumed because failure
+- * will be caused by an incorrect programming model and indicated by a
+- * kernel message.
++ * Ends the authorization session on failure.
+ */
+-void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
++int tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf)
+ {
+ u32 cc, handles, val;
+ struct tpm2_auth *auth = chip->auth;
+@@ -549,9 +586,12 @@ void tpm_buf_fill_hmac_session(struct tp
+ u8 cphash[SHA256_DIGEST_SIZE];
+ struct sha256_ctx sctx;
+ struct hmac_sha256_ctx hctx;
++ int ret;
+
+- if (!auth)
+- return;
++ if (!auth) {
++ ret = -EIO;
++ goto err;
++ }
+
+ /* save the command code in BE format */
+ auth->ordinal = head->ordinal;
+@@ -560,9 +600,11 @@ void tpm_buf_fill_hmac_session(struct tp
+
+ i = tpm2_find_cc(chip, cc);
+ if (i < 0) {
+- dev_err(&chip->dev, "Command 0x%x not found in TPM\n", cc);
+- return;
++ dev_err(&chip->dev, "command 0x%08x not found\n", cc);
++ ret = -EIO;
++ goto err;
+ }
++
+ attrs = chip->cc_attrs_tbl[i];
+
+ handles = (attrs >> TPM2_CC_ATTR_CHANDLES) & GENMASK(2, 0);
+@@ -576,9 +618,9 @@ void tpm_buf_fill_hmac_session(struct tp
+ u32 handle = tpm_buf_read_u32(buf, &offset_s);
+
+ if (auth->name_h[i] != handle) {
+- dev_err(&chip->dev, "TPM: handle %d wrong for name\n",
+- i);
+- return;
++ dev_err(&chip->dev, "invalid handle 0x%08x\n", handle);
++ ret = -EIO;
++ goto err;
+ }
+ }
+ /* point offset_s to the start of the sessions */
+@@ -609,12 +651,14 @@ void tpm_buf_fill_hmac_session(struct tp
+ offset_s += len;
+ }
+ if (offset_s != offset_p) {
+- dev_err(&chip->dev, "TPM session length is incorrect\n");
+- return;
++ dev_err(&chip->dev, "session length is incorrect\n");
++ ret = -EIO;
++ goto err;
+ }
+ if (!hmac) {
+- dev_err(&chip->dev, "TPM could not find HMAC session\n");
+- return;
++ dev_err(&chip->dev, "could not find HMAC session\n");
++ ret = -EIO;
++ goto err;
+ }
+
+ /* encrypt before HMAC */
+@@ -646,8 +690,11 @@ void tpm_buf_fill_hmac_session(struct tp
+ if (mso == TPM2_MSO_PERSISTENT ||
+ mso == TPM2_MSO_VOLATILE ||
+ mso == TPM2_MSO_NVRAM) {
+- sha256_update(&sctx, auth->name[i],
+- name_size(auth->name[i]));
++ ret = name_size(auth->name[i]);
++ if (ret < 0)
++ goto err;
++
++ sha256_update(&sctx, auth->name[i], ret);
+ } else {
+ __be32 h = cpu_to_be32(auth->name_h[i]);
+
+@@ -668,6 +715,11 @@ void tpm_buf_fill_hmac_session(struct tp
+ hmac_sha256_update(&hctx, auth->tpm_nonce, sizeof(auth->tpm_nonce));
+ hmac_sha256_update(&hctx, &auth->attrs, 1);
+ hmac_sha256_final(&hctx, hmac);
++ return 0;
++
++err:
++ tpm2_end_auth_session(chip);
++ return ret;
+ }
+ EXPORT_SYMBOL(tpm_buf_fill_hmac_session);
+
+--- a/include/linux/tpm.h
++++ b/include/linux/tpm.h
+@@ -526,8 +526,8 @@ static inline struct tpm2_auth *tpm2_chi
+ #endif
+ }
+
+-void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
+- u32 handle, u8 *name);
++int tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
++ u32 handle, u8 *name);
+ void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
+ u8 attributes, u8 *passphrase,
+ int passphraselen);
+@@ -560,7 +560,7 @@ static inline void tpm_buf_append_hmac_s
+ #ifdef CONFIG_TCG_TPM2_HMAC
+
+ int tpm2_start_auth_session(struct tpm_chip *chip);
+-void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf);
++int tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf);
+ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
+ int rc);
+ void tpm2_end_auth_session(struct tpm_chip *chip);
+@@ -574,10 +574,13 @@ static inline int tpm2_start_auth_sessio
+ static inline void tpm2_end_auth_session(struct tpm_chip *chip)
+ {
+ }
+-static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip,
+- struct tpm_buf *buf)
++
++static inline int tpm_buf_fill_hmac_session(struct tpm_chip *chip,
++ struct tpm_buf *buf)
+ {
++ return 0;
+ }
++
+ static inline int tpm_buf_check_hmac_response(struct tpm_chip *chip,
+ struct tpm_buf *buf,
+ int rc)
+--- a/security/keys/trusted-keys/trusted_tpm2.c
++++ b/security/keys/trusted-keys/trusted_tpm2.c
+@@ -283,7 +283,10 @@ int tpm2_seal_trusted(struct tpm_chip *c
+ goto out_put;
+ }
+
+- tpm_buf_append_name(chip, &buf, options->keyhandle, NULL);
++ rc = tpm_buf_append_name(chip, &buf, options->keyhandle, NULL);
++ if (rc)
++ goto out;
++
+ tpm_buf_append_hmac_session(chip, &buf, TPM2_SA_DECRYPT,
+ options->keyauth, TPM_DIGEST_SIZE);
+
+@@ -331,7 +334,10 @@ int tpm2_seal_trusted(struct tpm_chip *c
+ goto out;
+ }
+
+- tpm_buf_fill_hmac_session(chip, &buf);
++ rc = tpm_buf_fill_hmac_session(chip, &buf);
++ if (rc)
++ goto out;
++
+ rc = tpm_transmit_cmd(chip, &buf, 4, "sealing data");
+ rc = tpm_buf_check_hmac_response(chip, &buf, rc);
+ if (rc)
+@@ -448,7 +454,10 @@ static int tpm2_load_cmd(struct tpm_chip
+ return rc;
+ }
+
+- tpm_buf_append_name(chip, &buf, options->keyhandle, NULL);
++ rc = tpm_buf_append_name(chip, &buf, options->keyhandle, NULL);
++ if (rc)
++ goto out;
++
+ tpm_buf_append_hmac_session(chip, &buf, 0, options->keyauth,
+ TPM_DIGEST_SIZE);
+
+@@ -460,7 +469,10 @@ static int tpm2_load_cmd(struct tpm_chip
+ goto out;
+ }
+
+- tpm_buf_fill_hmac_session(chip, &buf);
++ rc = tpm_buf_fill_hmac_session(chip, &buf);
++ if (rc)
++ goto out;
++
+ rc = tpm_transmit_cmd(chip, &buf, 4, "loading blob");
+ rc = tpm_buf_check_hmac_response(chip, &buf, rc);
+ if (!rc)
+@@ -508,7 +520,9 @@ static int tpm2_unseal_cmd(struct tpm_ch
+ return rc;
+ }
+
+- tpm_buf_append_name(chip, &buf, blob_handle, NULL);
++ rc = tpm_buf_append_name(chip, &buf, options->keyhandle, NULL);
++ if (rc)
++ goto out;
+
+ if (!options->policyhandle) {
+ tpm_buf_append_hmac_session(chip, &buf, TPM2_SA_ENCRYPT,
+@@ -533,7 +547,10 @@ static int tpm2_unseal_cmd(struct tpm_ch
+ NULL, 0);
+ }
+
+- tpm_buf_fill_hmac_session(chip, &buf);
++ rc = tpm_buf_fill_hmac_session(chip, &buf);
++ if (rc)
++ goto out;
++
+ rc = tpm_transmit_cmd(chip, &buf, 6, "unsealing");
+ rc = tpm_buf_check_hmac_response(chip, &buf, rc);
+ if (rc > 0)
--- /dev/null
+From bda1cbf73c6e241267c286427f2ed52b5735d872 Mon Sep 17 00:00:00 2001
+From: Jarkko Sakkinen <jarkko@kernel.org>
+Date: Mon, 1 Dec 2025 15:38:02 +0200
+Subject: tpm2-sessions: Fix tpm2_read_public range checks
+
+From: Jarkko Sakkinen <jarkko@kernel.org>
+
+commit bda1cbf73c6e241267c286427f2ed52b5735d872 upstream.
+
+tpm2_read_public() has some rudimentary range checks but the function does
+not ensure that the response buffer has enough bytes for the full TPMT_HA
+payload.
+
+Re-implement the function with necessary checks and validation, and return
+name and name size for all handle types back to the caller.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: d0a25bb961e6 ("tpm: Add HMAC session name/handle append")
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Reviewed-by: Jonathan McDowell <noodles@meta.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm2-cmd.c | 3 +
+ drivers/char/tpm/tpm2-sessions.c | 94 ++++++++++++++++++++-------------------
+ 2 files changed, 53 insertions(+), 44 deletions(-)
+
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -11,8 +11,11 @@
+ * used by the kernel internally.
+ */
+
++#include "linux/dev_printk.h"
++#include "linux/tpm.h"
+ #include "tpm.h"
+ #include <crypto/hash_info.h>
++#include <linux/unaligned.h>
+
+ static bool disable_pcr_integrity;
+ module_param(disable_pcr_integrity, bool, 0444);
+--- a/drivers/char/tpm/tpm2-sessions.c
++++ b/drivers/char/tpm/tpm2-sessions.c
+@@ -163,53 +163,61 @@ static int name_size(const u8 *name)
+ }
+ }
+
+-static int tpm2_parse_read_public(char *name, struct tpm_buf *buf)
++static int tpm2_read_public(struct tpm_chip *chip, u32 handle, void *name)
+ {
+- struct tpm_header *head = (struct tpm_header *)buf->data;
++ u32 mso = tpm2_handle_mso(handle);
+ off_t offset = TPM_HEADER_SIZE;
+- u32 tot_len = be32_to_cpu(head->length);
+- int ret;
+- u32 val;
+-
+- /* we're starting after the header so adjust the length */
+- tot_len -= TPM_HEADER_SIZE;
+-
+- /* skip public */
+- val = tpm_buf_read_u16(buf, &offset);
+- if (val > tot_len)
+- return -EINVAL;
+- offset += val;
+- /* name */
+- val = tpm_buf_read_u16(buf, &offset);
+- ret = name_size(&buf->data[offset]);
+- if (ret < 0)
+- return ret;
+-
+- if (val != ret)
+- return -EINVAL;
+-
+- memcpy(name, &buf->data[offset], val);
+- /* forget the rest */
+- return 0;
+-}
+-
+-static int tpm2_read_public(struct tpm_chip *chip, u32 handle, char *name)
+-{
++ int rc, name_size_alg;
+ struct tpm_buf buf;
+- int rc;
++
++ if (mso != TPM2_MSO_PERSISTENT && mso != TPM2_MSO_VOLATILE &&
++ mso != TPM2_MSO_NVRAM) {
++ memcpy(name, &handle, sizeof(u32));
++ return sizeof(u32);
++ }
+
+ rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_READ_PUBLIC);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, handle);
+- rc = tpm_transmit_cmd(chip, &buf, 0, "read public");
+- if (rc == TPM2_RC_SUCCESS)
+- rc = tpm2_parse_read_public(name, &buf);
+
+- tpm_buf_destroy(&buf);
++ rc = tpm_transmit_cmd(chip, &buf, 0, "TPM2_ReadPublic");
++ if (rc) {
++ tpm_buf_destroy(&buf);
++ return tpm_ret_to_err(rc);
++ }
++
++ /* Skip TPMT_PUBLIC: */
++ offset += tpm_buf_read_u16(&buf, &offset);
+
+- return rc;
++ /*
++ * Ensure space for the length field of TPM2B_NAME and hashAlg field of
++ * TPMT_HA (the extra four bytes).
++ */
++ if (offset + 4 > tpm_buf_length(&buf)) {
++ tpm_buf_destroy(&buf);
++ return -EIO;
++ }
++
++ rc = tpm_buf_read_u16(&buf, &offset);
++ name_size_alg = name_size(&buf.data[offset]);
++
++ if (name_size_alg < 0)
++ return name_size_alg;
++
++ if (rc != name_size_alg) {
++ tpm_buf_destroy(&buf);
++ return -EIO;
++ }
++
++ if (offset + rc > tpm_buf_length(&buf)) {
++ tpm_buf_destroy(&buf);
++ return -EIO;
++ }
++
++ memcpy(name, &buf.data[offset], rc);
++ return name_size_alg;
+ }
+ #endif /* CONFIG_TCG_TPM2_HMAC */
+
+@@ -243,6 +251,7 @@ int tpm_buf_append_name(struct tpm_chip
+ #ifdef CONFIG_TCG_TPM2_HMAC
+ enum tpm2_mso_type mso = tpm2_handle_mso(handle);
+ struct tpm2_auth *auth;
++ u16 name_size_alg;
+ int slot;
+ int ret;
+ #endif
+@@ -273,8 +282,10 @@ int tpm_buf_append_name(struct tpm_chip
+ mso == TPM2_MSO_NVRAM) {
+ if (!name) {
+ ret = tpm2_read_public(chip, handle, auth->name[slot]);
+- if (ret)
++ if (ret < 0)
+ goto err;
++
++ name_size_alg = ret;
+ }
+ } else {
+ if (name) {
+@@ -286,13 +297,8 @@ int tpm_buf_append_name(struct tpm_chip
+ }
+
+ auth->name_h[slot] = handle;
+- if (name) {
+- ret = name_size(name);
+- if (ret < 0)
+- goto err;
+-
+- memcpy(auth->name[slot], name, ret);
+- }
++ if (name)
++ memcpy(auth->name[slot], name, name_size_alg);
+ #endif
+ return 0;
+