]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for all trees
authorSasha Levin <sashal@kernel.org>
Fri, 17 Oct 2025 12:42:10 +0000 (08:42 -0400)
committerSasha Levin <sashal@kernel.org>
Fri, 17 Oct 2025 12:42:10 +0000 (08:42 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
63 files changed:
queue-5.10/cramfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-5.10/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch [new file with mode: 0644]
queue-5.10/minixfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-5.10/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch [new file with mode: 0644]
queue-5.10/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch [new file with mode: 0644]
queue-5.10/series
queue-5.15/cramfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-5.15/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch [new file with mode: 0644]
queue-5.15/minixfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-5.15/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch [new file with mode: 0644]
queue-5.15/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/writeback-avoid-excessively-long-inode-switching-tim.patch [new file with mode: 0644]
queue-5.15/writeback-avoid-softlockup-when-switching-many-inode.patch [new file with mode: 0644]
queue-5.4/cramfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-5.4/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch [new file with mode: 0644]
queue-5.4/minixfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-5.4/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch [new file with mode: 0644]
queue-5.4/series
queue-6.1/cramfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-6.1/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch [new file with mode: 0644]
queue-6.1/minixfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-6.1/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch [new file with mode: 0644]
queue-6.1/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/writeback-avoid-excessively-long-inode-switching-tim.patch [new file with mode: 0644]
queue-6.1/writeback-avoid-softlockup-when-switching-many-inode.patch [new file with mode: 0644]
queue-6.12/copy_file_range-limit-size-if-in-compat-mode.patch [new file with mode: 0644]
queue-6.12/cramfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-6.12/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch [new file with mode: 0644]
queue-6.12/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch [new file with mode: 0644]
queue-6.12/irqchip-sifive-plic-make-use-of-__assign_bit.patch [new file with mode: 0644]
queue-6.12/minixfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-6.12/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch [new file with mode: 0644]
queue-6.12/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch [new file with mode: 0644]
queue-6.12/series
queue-6.12/writeback-avoid-excessively-long-inode-switching-tim.patch [new file with mode: 0644]
queue-6.12/writeback-avoid-softlockup-when-switching-many-inode.patch [new file with mode: 0644]
queue-6.17/copy_file_range-limit-size-if-in-compat-mode.patch [new file with mode: 0644]
queue-6.17/cramfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-6.17/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch [new file with mode: 0644]
queue-6.17/iomap-error-out-on-file-io-when-there-is-no-inline_d.patch [new file with mode: 0644]
queue-6.17/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch [new file with mode: 0644]
queue-6.17/minixfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-6.17/mnt_ns_tree_remove-dtrt-if-mnt_ns-had-never-been-add.patch [new file with mode: 0644]
queue-6.17/mount-handle-null-values-in-mnt_ns_release.patch [new file with mode: 0644]
queue-6.17/nsfs-validate-extensible-ioctls.patch [new file with mode: 0644]
queue-6.17/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch [new file with mode: 0644]
queue-6.17/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch [new file with mode: 0644]
queue-6.17/pidfs-validate-extensible-ioctls.patch [new file with mode: 0644]
queue-6.17/series
queue-6.17/writeback-avoid-excessively-long-inode-switching-tim.patch [new file with mode: 0644]
queue-6.17/writeback-avoid-softlockup-when-switching-many-inode.patch [new file with mode: 0644]
queue-6.6/cramfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-6.6/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch [new file with mode: 0644]
queue-6.6/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch [new file with mode: 0644]
queue-6.6/irqchip-sifive-plic-make-use-of-__assign_bit.patch [new file with mode: 0644]
queue-6.6/minixfs-verify-inode-mode-when-loading-from-disk.patch [new file with mode: 0644]
queue-6.6/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch [new file with mode: 0644]
queue-6.6/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/writeback-avoid-excessively-long-inode-switching-tim.patch [new file with mode: 0644]
queue-6.6/writeback-avoid-softlockup-when-switching-many-inode.patch [new file with mode: 0644]

diff --git a/queue-5.10/cramfs-verify-inode-mode-when-loading-from-disk.patch b/queue-5.10/cramfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..c62b1db
--- /dev/null
@@ -0,0 +1,51 @@
+From 89480951e6756b2332eac1ec7f21d95d4951d528 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Aug 2025 19:01:01 +0900
+Subject: cramfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 7f9d34b0a7cb93d678ee7207f0634dbf79e47fe5 ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/429b3ef1-13de-4310-9a8e-c2dc9a36234a@I-love.SAKURA.ne.jp
+Acked-by: Nicolas Pitre <nico@fluxnic.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cramfs/inode.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
+index 4b90cfd1ec360..fda92e56ccd69 100644
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -117,9 +117,18 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
+               inode_nohighmem(inode);
+               inode->i_data.a_ops = &cramfs_aops;
+               break;
+-      default:
++      case S_IFCHR:
++      case S_IFBLK:
++      case S_IFIFO:
++      case S_IFSOCK:
+               init_special_inode(inode, cramfs_inode->mode,
+                               old_decode_dev(cramfs_inode->size));
++              break;
++      default:
++              printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              iget_failed(inode);
++              return ERR_PTR(-EIO);
+       }
+       inode->i_mode = cramfs_inode->mode;
+-- 
+2.51.0
+
diff --git a/queue-5.10/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch b/queue-5.10/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
new file mode 100644 (file)
index 0000000..8663437
--- /dev/null
@@ -0,0 +1,116 @@
+From be98da7c43d8586c95a63076c6b69a07923ee23e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 20:14:59 +0800
+Subject: fs: Add 'initramfs_options' to set initramfs mount options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lichen Liu <lichliu@redhat.com>
+
+[ Upstream commit 278033a225e13ec21900f0a92b8351658f5377f2 ]
+
+When CONFIG_TMPFS is enabled, the initial root filesystem is a tmpfs.
+By default, a tmpfs mount is limited to using 50% of the available RAM
+for its content. This can be problematic in memory-constrained
+environments, particularly during a kdump capture.
+
+In a kdump scenario, the capture kernel boots with a limited amount of
+memory specified by the 'crashkernel' parameter. If the initramfs is
+large, it may fail to unpack into the tmpfs rootfs due to insufficient
+space. This is because to get X MB of usable space in tmpfs, 2*X MB of
+memory must be available for the mount. This leads to an OOM failure
+during the early boot process, preventing a successful crash dump.
+
+This patch introduces a new kernel command-line parameter,
+initramfs_options, which allows passing specific mount options directly
+to the rootfs when it is first mounted. This gives users control over
+the rootfs behavior.
+
+For example, a user can now specify initramfs_options=size=75% to allow
+the tmpfs to use up to 75% of the available memory. This can
+significantly reduce the memory pressure for kdump.
+
+Consider a practical example:
+
+To unpack a 48MB initramfs, the tmpfs needs 48MB of usable space. With
+the default 50% limit, this requires a memory pool of 96MB to be
+available for the tmpfs mount. The total memory requirement is therefore
+approximately: 16MB (vmlinuz) + 48MB (loaded initramfs) + 48MB (unpacked
+kernel) + 96MB (for tmpfs) + 12MB (runtime overhead) ≈ 220MB.
+
+By using initramfs_options=size=75%, the memory pool required for the
+48MB tmpfs is reduced to 48MB / 0.75 = 64MB. This reduces the total
+memory requirement by 32MB (96MB - 64MB), allowing the kdump to succeed
+with a smaller crashkernel size, such as 192MB.
+
+An alternative approach of reusing the existing rootflags parameter was
+considered. However, a new, dedicated initramfs_options parameter was
+chosen to avoid altering the current behavior of rootflags (which
+applies to the final root filesystem) and to prevent any potential
+regressions.
+
+Also add documentation for the new kernel parameter "initramfs_options"
+
+This approach is inspired by prior discussions and patches on the topic.
+Ref: https://www.lightofdawn.org/blog/?viewDetailed=00128
+Ref: https://landley.net/notes-2015.html#01-01-2015
+Ref: https://lkml.org/lkml/2021/6/29/783
+Ref: https://www.kernel.org/doc/html/latest/filesystems/ramfs-rootfs-initramfs.html#what-is-rootfs
+
+Signed-off-by: Lichen Liu <lichliu@redhat.com>
+Link: https://lore.kernel.org/20250815121459.3391223-1-lichliu@redhat.com
+Tested-by: Rob Landley <rob@landley.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  3 +++
+ fs/namespace.c                                  | 11 ++++++++++-
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index bac4b1493222a..0f1605d78e83d 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4846,6 +4846,9 @@
+       rootflags=      [KNL] Set root filesystem mount option string
++      initramfs_options= [KNL]
++                        Specify mount options for for the initramfs mount.
++
+       rootfstype=     [KNL] Set root filesystem type
+       rootwait        [KNL] Wait (indefinitely) for root device to show up.
+diff --git a/fs/namespace.c b/fs/namespace.c
+index d1751f9b6f1ce..e9b8d516f1919 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -62,6 +62,15 @@ static int __init set_mphash_entries(char *str)
+ }
+ __setup("mphash_entries=", set_mphash_entries);
++static char * __initdata initramfs_options;
++static int __init initramfs_options_setup(char *str)
++{
++      initramfs_options = str;
++      return 1;
++}
++
++__setup("initramfs_options=", initramfs_options_setup);
++
+ static u64 event;
+ static DEFINE_IDA(mnt_id_ida);
+ static DEFINE_IDA(mnt_group_ida);
+@@ -3913,7 +3922,7 @@ static void __init init_mount_tree(void)
+       struct mnt_namespace *ns;
+       struct path root;
+-      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
++      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
+       if (IS_ERR(mnt))
+               panic("Can't create rootfs");
+-- 
+2.51.0
+
diff --git a/queue-5.10/minixfs-verify-inode-mode-when-loading-from-disk.patch b/queue-5.10/minixfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..7c7afda
--- /dev/null
@@ -0,0 +1,46 @@
+From 8d17883806bb2f9429da4ecb4c13c8b45dc8c4af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 00:17:44 +0900
+Subject: minixfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 73861970938ad1323eb02bbbc87f6fbd1e5bacca ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/ec982681-84b8-4624-94fa-8af15b77cbd2@I-love.SAKURA.ne.jp
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/minix/inode.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/minix/inode.c b/fs/minix/inode.c
+index e938f5b1e4b94..7636e789eb49b 100644
+--- a/fs/minix/inode.c
++++ b/fs/minix/inode.c
+@@ -469,8 +469,14 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
+               inode->i_op = &minix_symlink_inode_operations;
+               inode_nohighmem(inode);
+               inode->i_mapping->a_ops = &minix_aops;
+-      } else
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               init_special_inode(inode, inode->i_mode, rdev);
++      } else {
++              printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              make_bad_inode(inode);
++      }
+ }
+ /*
+-- 
+2.51.0
+
diff --git a/queue-5.10/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch b/queue-5.10/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
new file mode 100644 (file)
index 0000000..862947a
--- /dev/null
@@ -0,0 +1,95 @@
+From 7b31773999c2f0b5a8192246075c7c97ec1efa96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 2 Aug 2025 10:21:23 +0800
+Subject: pid: Add a judgment for ns null in pid_nr_ns
+
+From: gaoxiang17 <gaoxiang17@xiaomi.com>
+
+[ Upstream commit 006568ab4c5ca2309ceb36fa553e390b4aa9c0c7 ]
+
+__task_pid_nr_ns
+        ns = task_active_pid_ns(current);
+        pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+                if (pid && ns->level <= pid->level) {
+
+Sometimes null is returned for task_active_pid_ns. Then it will trigger kernel panic in pid_nr_ns.
+
+For example:
+       Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058
+       Mem abort info:
+       ESR = 0x0000000096000007
+       EC = 0x25: DABT (current EL), IL = 32 bits
+       SET = 0, FnV = 0
+       EA = 0, S1PTW = 0
+       FSC = 0x07: level 3 translation fault
+       Data abort info:
+       ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000
+       CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+       GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+       user pgtable: 4k pages, 39-bit VAs, pgdp=00000002175aa000
+       [0000000000000058] pgd=08000002175ab003, p4d=08000002175ab003, pud=08000002175ab003, pmd=08000002175be003, pte=0000000000000000
+       pstate: 834000c5 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+       pc : __task_pid_nr_ns+0x74/0xd0
+       lr : __task_pid_nr_ns+0x24/0xd0
+       sp : ffffffc08001bd10
+       x29: ffffffc08001bd10 x28: ffffffd4422b2000 x27: 0000000000000001
+       x26: ffffffd442821168 x25: ffffffd442821000 x24: 00000f89492eab31
+       x23: 00000000000000c0 x22: ffffff806f5693c0 x21: ffffff806f5693c0
+       x20: 0000000000000001 x19: 0000000000000000 x18: 0000000000000000
+       x17: 00000000529c6ef0 x16: 00000000529c6ef0 x15: 00000000023a1adc
+       x14: 0000000000000003 x13: 00000000007ef6d8 x12: 001167c391c78800
+       x11: 00ffffffffffffff x10: 0000000000000000 x9 : 0000000000000001
+       x8 : ffffff80816fa3c0 x7 : 0000000000000000 x6 : 49534d702d535449
+       x5 : ffffffc080c4c2c0 x4 : ffffffd43ee128c8 x3 : ffffffd43ee124dc
+       x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffff806f5693c0
+       Call trace:
+       __task_pid_nr_ns+0x74/0xd0
+       ...
+       __handle_irq_event_percpu+0xd4/0x284
+       handle_irq_event+0x48/0xb0
+       handle_fasteoi_irq+0x160/0x2d8
+       generic_handle_domain_irq+0x44/0x60
+       gic_handle_irq+0x4c/0x114
+       call_on_irq_stack+0x3c/0x74
+       do_interrupt_handler+0x4c/0x84
+       el1_interrupt+0x34/0x58
+       el1h_64_irq_handler+0x18/0x24
+       el1h_64_irq+0x68/0x6c
+       account_kernel_stack+0x60/0x144
+       exit_task_stack_account+0x1c/0x80
+       do_exit+0x7e4/0xaf8
+       ...
+       get_signal+0x7bc/0x8d8
+       do_notify_resume+0x128/0x828
+       el0_svc+0x6c/0x70
+       el0t_64_sync_handler+0x68/0xbc
+       el0t_64_sync+0x1a8/0x1ac
+       Code: 35fffe54 911a02a8 f9400108 b4000128 (b9405a69)
+       ---[ end trace 0000000000000000 ]---
+       Kernel panic - not syncing: Oops: Fatal exception in interrupt
+
+Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/20250802022123.3536934-1-gxxa03070307@gmail.com
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 0820f2c50bb0c..a5a08476f3756 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -474,7 +474,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+       struct upid *upid;
+       pid_t nr = 0;
+-      if (pid && ns->level <= pid->level) {
++      if (pid && ns && ns->level <= pid->level) {
+               upid = &pid->numbers[ns->level];
+               if (upid->ns == ns)
+                       nr = upid->nr;
+-- 
+2.51.0
+
diff --git a/queue-5.10/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch b/queue-5.10/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
new file mode 100644 (file)
index 0000000..bf1bcf3
--- /dev/null
@@ -0,0 +1,48 @@
+From f713b9c102237b7107812f051d9be3bf7fd94baa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Aug 2025 19:36:04 +0200
+Subject: pid: make __task_pid_nr_ns(ns => NULL) safe for zombie callers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit abdfd4948e45c51b19162cf8b3f5003f8f53c9b9 ]
+
+task_pid_vnr(another_task) will crash if the caller was already reaped.
+The pid_alive(current) check can't really help, the parent/debugger can
+call release_task() right after this check.
+
+This also means that even task_ppid_nr_ns(current, NULL) is not safe,
+pid_alive() only ensures that it is safe to dereference ->real_parent.
+
+Change __task_pid_nr_ns() to ensure ns != NULL.
+
+Originally-by: 高翔 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/all/20250802022123.3536934-1-gxxa03070307@gmail.com/
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lore.kernel.org/20250810173604.GA19991@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index a5a08476f3756..6dfec6bef6ab3 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -497,7 +497,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+       rcu_read_lock();
+       if (!ns)
+               ns = task_active_pid_ns(current);
+-      nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
++      if (ns)
++              nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+       rcu_read_unlock();
+       return nr;
+-- 
+2.51.0
+
index 6baeadbb8092cddad858aef69a421cb6b30ff07f..5cc570dc1c61630ebba0101afea8cc63382b1956 100644 (file)
@@ -186,3 +186,8 @@ asoc-codecs-wcd934x-simplify-with-dev_err_probe.patch
 asoc-wcd934x-fix-error-handling-in-wcd934x_codec_parse_data.patch
 kvm-x86-don-t-re-check-l1-intercepts-when-completing-userspace-i-o.patch
 net-9p-fix-double-req-put-in-p9_fd_cancelled.patch
+minixfs-verify-inode-mode-when-loading-from-disk.patch
+pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
+pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
+fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
+cramfs-verify-inode-mode-when-loading-from-disk.patch
diff --git a/queue-5.15/cramfs-verify-inode-mode-when-loading-from-disk.patch b/queue-5.15/cramfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..3c7419f
--- /dev/null
@@ -0,0 +1,51 @@
+From 5dd72f483fef349192cfc9aa03658bd4d7983bf9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Aug 2025 19:01:01 +0900
+Subject: cramfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 7f9d34b0a7cb93d678ee7207f0634dbf79e47fe5 ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/429b3ef1-13de-4310-9a8e-c2dc9a36234a@I-love.SAKURA.ne.jp
+Acked-by: Nicolas Pitre <nico@fluxnic.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cramfs/inode.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
+index 2be65269a987c..c893066e77ab4 100644
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -117,9 +117,18 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
+               inode_nohighmem(inode);
+               inode->i_data.a_ops = &cramfs_aops;
+               break;
+-      default:
++      case S_IFCHR:
++      case S_IFBLK:
++      case S_IFIFO:
++      case S_IFSOCK:
+               init_special_inode(inode, cramfs_inode->mode,
+                               old_decode_dev(cramfs_inode->size));
++              break;
++      default:
++              printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              iget_failed(inode);
++              return ERR_PTR(-EIO);
+       }
+       inode->i_mode = cramfs_inode->mode;
+-- 
+2.51.0
+
diff --git a/queue-5.15/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch b/queue-5.15/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
new file mode 100644 (file)
index 0000000..42d4c69
--- /dev/null
@@ -0,0 +1,116 @@
+From b1daf8b079662619a11b485e5bc2c63431d58fae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 20:14:59 +0800
+Subject: fs: Add 'initramfs_options' to set initramfs mount options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lichen Liu <lichliu@redhat.com>
+
+[ Upstream commit 278033a225e13ec21900f0a92b8351658f5377f2 ]
+
+When CONFIG_TMPFS is enabled, the initial root filesystem is a tmpfs.
+By default, a tmpfs mount is limited to using 50% of the available RAM
+for its content. This can be problematic in memory-constrained
+environments, particularly during a kdump capture.
+
+In a kdump scenario, the capture kernel boots with a limited amount of
+memory specified by the 'crashkernel' parameter. If the initramfs is
+large, it may fail to unpack into the tmpfs rootfs due to insufficient
+space. This is because to get X MB of usable space in tmpfs, 2*X MB of
+memory must be available for the mount. This leads to an OOM failure
+during the early boot process, preventing a successful crash dump.
+
+This patch introduces a new kernel command-line parameter,
+initramfs_options, which allows passing specific mount options directly
+to the rootfs when it is first mounted. This gives users control over
+the rootfs behavior.
+
+For example, a user can now specify initramfs_options=size=75% to allow
+the tmpfs to use up to 75% of the available memory. This can
+significantly reduce the memory pressure for kdump.
+
+Consider a practical example:
+
+To unpack a 48MB initramfs, the tmpfs needs 48MB of usable space. With
+the default 50% limit, this requires a memory pool of 96MB to be
+available for the tmpfs mount. The total memory requirement is therefore
+approximately: 16MB (vmlinuz) + 48MB (loaded initramfs) + 48MB (unpacked
+kernel) + 96MB (for tmpfs) + 12MB (runtime overhead) ≈ 220MB.
+
+By using initramfs_options=size=75%, the memory pool required for the
+48MB tmpfs is reduced to 48MB / 0.75 = 64MB. This reduces the total
+memory requirement by 32MB (96MB - 64MB), allowing the kdump to succeed
+with a smaller crashkernel size, such as 192MB.
+
+An alternative approach of reusing the existing rootflags parameter was
+considered. However, a new, dedicated initramfs_options parameter was
+chosen to avoid altering the current behavior of rootflags (which
+applies to the final root filesystem) and to prevent any potential
+regressions.
+
+Also add documentation for the new kernel parameter "initramfs_options"
+
+This approach is inspired by prior discussions and patches on the topic.
+Ref: https://www.lightofdawn.org/blog/?viewDetailed=00128
+Ref: https://landley.net/notes-2015.html#01-01-2015
+Ref: https://lkml.org/lkml/2021/6/29/783
+Ref: https://www.kernel.org/doc/html/latest/filesystems/ramfs-rootfs-initramfs.html#what-is-rootfs
+
+Signed-off-by: Lichen Liu <lichliu@redhat.com>
+Link: https://lore.kernel.org/20250815121459.3391223-1-lichliu@redhat.com
+Tested-by: Rob Landley <rob@landley.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  3 +++
+ fs/namespace.c                                  | 11 ++++++++++-
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 25e07ac5c1caf..ae09a6c701f02 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5148,6 +5148,9 @@
+       rootflags=      [KNL] Set root filesystem mount option string
++      initramfs_options= [KNL]
++                        Specify mount options for for the initramfs mount.
++
+       rootfstype=     [KNL] Set root filesystem type
+       rootwait        [KNL] Wait (indefinitely) for root device to show up.
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 35d63bb3b22dc..ae1b8530eb939 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -64,6 +64,15 @@ static int __init set_mphash_entries(char *str)
+ }
+ __setup("mphash_entries=", set_mphash_entries);
++static char * __initdata initramfs_options;
++static int __init initramfs_options_setup(char *str)
++{
++      initramfs_options = str;
++      return 1;
++}
++
++__setup("initramfs_options=", initramfs_options_setup);
++
+ static u64 event;
+ static DEFINE_IDA(mnt_id_ida);
+ static DEFINE_IDA(mnt_group_ida);
+@@ -4352,7 +4361,7 @@ static void __init init_mount_tree(void)
+       struct mnt_namespace *ns;
+       struct path root;
+-      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
++      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
+       if (IS_ERR(mnt))
+               panic("Can't create rootfs");
+-- 
+2.51.0
+
diff --git a/queue-5.15/minixfs-verify-inode-mode-when-loading-from-disk.patch b/queue-5.15/minixfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..8b2c07d
--- /dev/null
@@ -0,0 +1,46 @@
+From 4c54504b2b04bf7ba7e1b330fc715a8490170f7e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 00:17:44 +0900
+Subject: minixfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 73861970938ad1323eb02bbbc87f6fbd1e5bacca ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/ec982681-84b8-4624-94fa-8af15b77cbd2@I-love.SAKURA.ne.jp
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/minix/inode.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/minix/inode.c b/fs/minix/inode.c
+index d4bd94234ef73..807ae40b64b06 100644
+--- a/fs/minix/inode.c
++++ b/fs/minix/inode.c
+@@ -470,8 +470,14 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
+               inode->i_op = &minix_symlink_inode_operations;
+               inode_nohighmem(inode);
+               inode->i_mapping->a_ops = &minix_aops;
+-      } else
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               init_special_inode(inode, inode->i_mode, rdev);
++      } else {
++              printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              make_bad_inode(inode);
++      }
+ }
+ /*
+-- 
+2.51.0
+
diff --git a/queue-5.15/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch b/queue-5.15/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
new file mode 100644 (file)
index 0000000..572edf4
--- /dev/null
@@ -0,0 +1,95 @@
+From 0f279b5b334740e8c06827f36456a21c1ed8a5ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 2 Aug 2025 10:21:23 +0800
+Subject: pid: Add a judgment for ns null in pid_nr_ns
+
+From: gaoxiang17 <gaoxiang17@xiaomi.com>
+
+[ Upstream commit 006568ab4c5ca2309ceb36fa553e390b4aa9c0c7 ]
+
+__task_pid_nr_ns
+        ns = task_active_pid_ns(current);
+        pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+                if (pid && ns->level <= pid->level) {
+
+Sometimes null is returned for task_active_pid_ns. Then it will trigger kernel panic in pid_nr_ns.
+
+For example:
+       Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058
+       Mem abort info:
+       ESR = 0x0000000096000007
+       EC = 0x25: DABT (current EL), IL = 32 bits
+       SET = 0, FnV = 0
+       EA = 0, S1PTW = 0
+       FSC = 0x07: level 3 translation fault
+       Data abort info:
+       ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000
+       CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+       GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+       user pgtable: 4k pages, 39-bit VAs, pgdp=00000002175aa000
+       [0000000000000058] pgd=08000002175ab003, p4d=08000002175ab003, pud=08000002175ab003, pmd=08000002175be003, pte=0000000000000000
+       pstate: 834000c5 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+       pc : __task_pid_nr_ns+0x74/0xd0
+       lr : __task_pid_nr_ns+0x24/0xd0
+       sp : ffffffc08001bd10
+       x29: ffffffc08001bd10 x28: ffffffd4422b2000 x27: 0000000000000001
+       x26: ffffffd442821168 x25: ffffffd442821000 x24: 00000f89492eab31
+       x23: 00000000000000c0 x22: ffffff806f5693c0 x21: ffffff806f5693c0
+       x20: 0000000000000001 x19: 0000000000000000 x18: 0000000000000000
+       x17: 00000000529c6ef0 x16: 00000000529c6ef0 x15: 00000000023a1adc
+       x14: 0000000000000003 x13: 00000000007ef6d8 x12: 001167c391c78800
+       x11: 00ffffffffffffff x10: 0000000000000000 x9 : 0000000000000001
+       x8 : ffffff80816fa3c0 x7 : 0000000000000000 x6 : 49534d702d535449
+       x5 : ffffffc080c4c2c0 x4 : ffffffd43ee128c8 x3 : ffffffd43ee124dc
+       x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffff806f5693c0
+       Call trace:
+       __task_pid_nr_ns+0x74/0xd0
+       ...
+       __handle_irq_event_percpu+0xd4/0x284
+       handle_irq_event+0x48/0xb0
+       handle_fasteoi_irq+0x160/0x2d8
+       generic_handle_domain_irq+0x44/0x60
+       gic_handle_irq+0x4c/0x114
+       call_on_irq_stack+0x3c/0x74
+       do_interrupt_handler+0x4c/0x84
+       el1_interrupt+0x34/0x58
+       el1h_64_irq_handler+0x18/0x24
+       el1h_64_irq+0x68/0x6c
+       account_kernel_stack+0x60/0x144
+       exit_task_stack_account+0x1c/0x80
+       do_exit+0x7e4/0xaf8
+       ...
+       get_signal+0x7bc/0x8d8
+       do_notify_resume+0x128/0x828
+       el0_svc+0x6c/0x70
+       el0t_64_sync_handler+0x68/0xbc
+       el0t_64_sync+0x1a8/0x1ac
+       Code: 35fffe54 911a02a8 f9400108 b4000128 (b9405a69)
+       ---[ end trace 0000000000000000 ]---
+       Kernel panic - not syncing: Oops: Fatal exception in interrupt
+
+Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/20250802022123.3536934-1-gxxa03070307@gmail.com
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index efe87db446836..61f6649568b25 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -474,7 +474,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+       struct upid *upid;
+       pid_t nr = 0;
+-      if (pid && ns->level <= pid->level) {
++      if (pid && ns && ns->level <= pid->level) {
+               upid = &pid->numbers[ns->level];
+               if (upid->ns == ns)
+                       nr = upid->nr;
+-- 
+2.51.0
+
diff --git a/queue-5.15/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch b/queue-5.15/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
new file mode 100644 (file)
index 0000000..ba4572d
--- /dev/null
@@ -0,0 +1,48 @@
+From fc8d3c7d133deb82d720bed679b17f22d97c0d81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Aug 2025 19:36:04 +0200
+Subject: pid: make __task_pid_nr_ns(ns => NULL) safe for zombie callers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit abdfd4948e45c51b19162cf8b3f5003f8f53c9b9 ]
+
+task_pid_vnr(another_task) will crash if the caller was already reaped.
+The pid_alive(current) check can't really help, the parent/debugger can
+call release_task() right after this check.
+
+This also means that even task_ppid_nr_ns(current, NULL) is not safe,
+pid_alive() only ensures that it is safe to dereference ->real_parent.
+
+Change __task_pid_nr_ns() to ensure ns != NULL.
+
+Originally-by: 高翔 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/all/20250802022123.3536934-1-gxxa03070307@gmail.com/
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lore.kernel.org/20250810173604.GA19991@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 61f6649568b25..18f67751d0a51 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -497,7 +497,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+       rcu_read_lock();
+       if (!ns)
+               ns = task_active_pid_ns(current);
+-      nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
++      if (ns)
++              nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+       rcu_read_unlock();
+       return nr;
+-- 
+2.51.0
+
index a84e9b8572c04e0ac94577c5ea3d6d8141af1135..92306577650b159ea1b082ff484904e5d503db0b 100644 (file)
@@ -258,3 +258,10 @@ minmax.h-use-build_bug_on_msg-for-the-lo-hi-test-in-clamp.patch
 minmax.h-move-all-the-clamp-definitions-after-the-min-max-ones.patch
 minmax.h-simplify-the-variants-of-clamp.patch
 minmax.h-remove-some-defines-that-are-only-expanded-once.patch
+minixfs-verify-inode-mode-when-loading-from-disk.patch
+pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
+pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
+fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
+cramfs-verify-inode-mode-when-loading-from-disk.patch
+writeback-avoid-softlockup-when-switching-many-inode.patch
+writeback-avoid-excessively-long-inode-switching-tim.patch
diff --git a/queue-5.15/writeback-avoid-excessively-long-inode-switching-tim.patch b/queue-5.15/writeback-avoid-excessively-long-inode-switching-tim.patch
new file mode 100644 (file)
index 0000000..c319a7f
--- /dev/null
@@ -0,0 +1,102 @@
+From 255bfd15a1f4ee94521f194b99f4f247d5425302 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:37 +0200
+Subject: writeback: Avoid excessively long inode switching times
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 9a6ebbdbd41235ea3bc0c4f39e2076599b8113cc ]
+
+With lazytime mount option enabled we can be switching many dirty inodes
+on cgroup exit to the parent cgroup. The numbers observed in practice
+when systemd slice of a large cron job exits can easily reach hundreds
+of thousands or millions. The logic in inode_do_switch_wbs() which sorts
+the inode into appropriate place in b_dirty list of the target wb
+however has linear complexity in the number of dirty inodes thus overall
+time complexity of switching all the inodes is quadratic leading to
+workers being pegged for hours consuming 100% of the CPU and switching
+inodes to the parent wb.
+
+Simple reproducer of the issue:
+  FILES=10000
+  # Filesystem mounted with lazytime mount option
+  MNT=/mnt/
+  echo "Creating files and switching timestamps"
+  for (( j = 0; j < 50; j ++ )); do
+      mkdir $MNT/dir$j
+      for (( i = 0; i < $FILES; i++ )); do
+          echo "foo" >$MNT/dir$j/file$i
+      done
+      touch -a -t 202501010000 $MNT/dir$j/file*
+  done
+  wait
+  echo "Syncing and flushing"
+  sync
+  echo 3 >/proc/sys/vm/drop_caches
+
+  echo "Reading all files from a cgroup"
+  mkdir /sys/fs/cgroup/unified/mycg1 || exit
+  echo $$ >/sys/fs/cgroup/unified/mycg1/cgroup.procs || exit
+  for (( j = 0; j < 50; j ++ )); do
+      cat /mnt/dir$j/file* >/dev/null &
+  done
+  wait
+  echo "Switching wbs"
+  # Now rmdir the cgroup after the script exits
+
+We need to maintain b_dirty list ordering to keep writeback happy so
+instead of sorting inode into appropriate place just append it at the
+end of the list and clobber dirtied_time_when. This may result in inode
+writeback starting later after cgroup switch however cgroup switches are
+rare so it shouldn't matter much. Since the cgroup had write access to
+the inode, there are no practical concerns of the possible DoS issues.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 3b002ac407434..095eaa896cbe2 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -418,22 +418,23 @@ static bool inode_do_switch_wbs(struct inode *inode,
+        * Transfer to @new_wb's IO list if necessary.  If the @inode is dirty,
+        * the specific list @inode was on is ignored and the @inode is put on
+        * ->b_dirty which is always correct including from ->b_dirty_time.
+-       * The transfer preserves @inode->dirtied_when ordering.  If the @inode
+-       * was clean, it means it was on the b_attached list, so move it onto
+-       * the b_attached list of @new_wb.
++       * If the @inode was clean, it means it was on the b_attached list, so
++       * move it onto the b_attached list of @new_wb.
+        */
+       if (!list_empty(&inode->i_io_list)) {
+               inode->i_wb = new_wb;
+               if (inode->i_state & I_DIRTY_ALL) {
+-                      struct inode *pos;
+-
+-                      list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+-                              if (time_after_eq(inode->dirtied_when,
+-                                                pos->dirtied_when))
+-                                      break;
++                      /*
++                       * We need to keep b_dirty list sorted by
++                       * dirtied_time_when. However properly sorting the
++                       * inode in the list gets too expensive when switching
++                       * many inodes. So just attach inode at the end of the
++                       * dirty list and clobber the dirtied_time_when.
++                       */
++                      inode->dirtied_time_when = jiffies;
+                       inode_io_list_move_locked(inode, new_wb,
+-                                                pos->i_io_list.prev);
++                                                &new_wb->b_dirty);
+               } else {
+                       inode_cgwb_move_to_attached(inode, new_wb);
+               }
+-- 
+2.51.0
+
diff --git a/queue-5.15/writeback-avoid-softlockup-when-switching-many-inode.patch b/queue-5.15/writeback-avoid-softlockup-when-switching-many-inode.patch
new file mode 100644 (file)
index 0000000..3ee46ad
--- /dev/null
@@ -0,0 +1,65 @@
+From 4d825e5cd058cfb89aab66765f65f134a245b793 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:36 +0200
+Subject: writeback: Avoid softlockup when switching many inodes
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 66c14dccd810d42ec5c73bb8a9177489dfd62278 ]
+
+process_inode_switch_wbs_work() can be switching over 100 inodes to a
+different cgroup. Since switching an inode requires counting all dirty &
+under-writeback pages in the address space of each inode, this can take
+a significant amount of time. Add a possibility to reschedule after
+processing each inode to avoid softlockups.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index cb3f1790a296e..3b002ac407434 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -475,6 +475,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        */
+       down_read(&bdi->wb_switch_rwsem);
++      inodep = isw->inodes;
+       /*
+        * By the time control reaches here, RCU grace period has passed
+        * since I_WB_SWITCH assertion and all wb stat update transactions
+@@ -485,6 +486,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        * gives us exclusion against all wb related operations on @inode
+        * including IO list manipulations and stat updates.
+        */
++relock:
+       if (old_wb < new_wb) {
+               spin_lock(&old_wb->list_lock);
+               spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
+@@ -493,10 +495,17 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+               spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
+       }
+-      for (inodep = isw->inodes; *inodep; inodep++) {
++      while (*inodep) {
+               WARN_ON_ONCE((*inodep)->i_wb != old_wb);
+               if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
+                       nr_switched++;
++              inodep++;
++              if (*inodep && need_resched()) {
++                      spin_unlock(&new_wb->list_lock);
++                      spin_unlock(&old_wb->list_lock);
++                      cond_resched();
++                      goto relock;
++              }
+       }
+       spin_unlock(&new_wb->list_lock);
+-- 
+2.51.0
+
diff --git a/queue-5.4/cramfs-verify-inode-mode-when-loading-from-disk.patch b/queue-5.4/cramfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..c8d7d65
--- /dev/null
@@ -0,0 +1,51 @@
+From 1992ae4798a422a35ace1530e04c7e7b6e96bc2b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Aug 2025 19:01:01 +0900
+Subject: cramfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 7f9d34b0a7cb93d678ee7207f0634dbf79e47fe5 ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/429b3ef1-13de-4310-9a8e-c2dc9a36234a@I-love.SAKURA.ne.jp
+Acked-by: Nicolas Pitre <nico@fluxnic.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cramfs/inode.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
+index 2f04024c3588e..82c45ca453216 100644
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -117,9 +117,18 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
+               inode_nohighmem(inode);
+               inode->i_data.a_ops = &cramfs_aops;
+               break;
+-      default:
++      case S_IFCHR:
++      case S_IFBLK:
++      case S_IFIFO:
++      case S_IFSOCK:
+               init_special_inode(inode, cramfs_inode->mode,
+                               old_decode_dev(cramfs_inode->size));
++              break;
++      default:
++              printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              iget_failed(inode);
++              return ERR_PTR(-EIO);
+       }
+       inode->i_mode = cramfs_inode->mode;
+-- 
+2.51.0
+
diff --git a/queue-5.4/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch b/queue-5.4/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
new file mode 100644 (file)
index 0000000..f70bf95
--- /dev/null
@@ -0,0 +1,116 @@
+From a53cc9813a0ede779e5d69661df27c6461368baa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 20:14:59 +0800
+Subject: fs: Add 'initramfs_options' to set initramfs mount options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lichen Liu <lichliu@redhat.com>
+
+[ Upstream commit 278033a225e13ec21900f0a92b8351658f5377f2 ]
+
+When CONFIG_TMPFS is enabled, the initial root filesystem is a tmpfs.
+By default, a tmpfs mount is limited to using 50% of the available RAM
+for its content. This can be problematic in memory-constrained
+environments, particularly during a kdump capture.
+
+In a kdump scenario, the capture kernel boots with a limited amount of
+memory specified by the 'crashkernel' parameter. If the initramfs is
+large, it may fail to unpack into the tmpfs rootfs due to insufficient
+space. This is because to get X MB of usable space in tmpfs, 2*X MB of
+memory must be available for the mount. This leads to an OOM failure
+during the early boot process, preventing a successful crash dump.
+
+This patch introduces a new kernel command-line parameter,
+initramfs_options, which allows passing specific mount options directly
+to the rootfs when it is first mounted. This gives users control over
+the rootfs behavior.
+
+For example, a user can now specify initramfs_options=size=75% to allow
+the tmpfs to use up to 75% of the available memory. This can
+significantly reduce the memory pressure for kdump.
+
+Consider a practical example:
+
+To unpack a 48MB initramfs, the tmpfs needs 48MB of usable space. With
+the default 50% limit, this requires a memory pool of 96MB to be
+available for the tmpfs mount. The total memory requirement is therefore
+approximately: 16MB (vmlinuz) + 48MB (loaded initramfs) + 48MB (unpacked
+kernel) + 96MB (for tmpfs) + 12MB (runtime overhead) ≈ 220MB.
+
+By using initramfs_options=size=75%, the memory pool required for the
+48MB tmpfs is reduced to 48MB / 0.75 = 64MB. This reduces the total
+memory requirement by 32MB (96MB - 64MB), allowing the kdump to succeed
+with a smaller crashkernel size, such as 192MB.
+
+An alternative approach of reusing the existing rootflags parameter was
+considered. However, a new, dedicated initramfs_options parameter was
+chosen to avoid altering the current behavior of rootflags (which
+applies to the final root filesystem) and to prevent any potential
+regressions.
+
+Also add documentation for the new kernel parameter "initramfs_options"
+
+This approach is inspired by prior discussions and patches on the topic.
+Ref: https://www.lightofdawn.org/blog/?viewDetailed=00128
+Ref: https://landley.net/notes-2015.html#01-01-2015
+Ref: https://lkml.org/lkml/2021/6/29/783
+Ref: https://www.kernel.org/doc/html/latest/filesystems/ramfs-rootfs-initramfs.html#what-is-rootfs
+
+Signed-off-by: Lichen Liu <lichliu@redhat.com>
+Link: https://lore.kernel.org/20250815121459.3391223-1-lichliu@redhat.com
+Tested-by: Rob Landley <rob@landley.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  3 +++
+ fs/namespace.c                                  | 11 ++++++++++-
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 9975dcab99c35..5c2594d7c9ac9 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4409,6 +4409,9 @@
+       rootflags=      [KNL] Set root filesystem mount option string
++      initramfs_options= [KNL]
++                        Specify mount options for for the initramfs mount.
++
+       rootfstype=     [KNL] Set root filesystem type
+       rootwait        [KNL] Wait (indefinitely) for root device to show up.
+diff --git a/fs/namespace.c b/fs/namespace.c
+index c87f847c959d9..3c6f0586ae218 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -62,6 +62,15 @@ static int __init set_mphash_entries(char *str)
+ }
+ __setup("mphash_entries=", set_mphash_entries);
++static char * __initdata initramfs_options;
++static int __init initramfs_options_setup(char *str)
++{
++      initramfs_options = str;
++      return 1;
++}
++
++__setup("initramfs_options=", initramfs_options_setup);
++
+ static u64 event;
+ static DEFINE_IDA(mnt_id_ida);
+ static DEFINE_IDA(mnt_group_ida);
+@@ -3829,7 +3838,7 @@ static void __init init_mount_tree(void)
+       struct mnt_namespace *ns;
+       struct path root;
+-      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
++      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
+       if (IS_ERR(mnt))
+               panic("Can't create rootfs");
+-- 
+2.51.0
+
diff --git a/queue-5.4/minixfs-verify-inode-mode-when-loading-from-disk.patch b/queue-5.4/minixfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..7b123bd
--- /dev/null
@@ -0,0 +1,46 @@
+From d7406d8bc361b52e4a3c69f7a334d6601b2ebf29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 00:17:44 +0900
+Subject: minixfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 73861970938ad1323eb02bbbc87f6fbd1e5bacca ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/ec982681-84b8-4624-94fa-8af15b77cbd2@I-love.SAKURA.ne.jp
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/minix/inode.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/minix/inode.c b/fs/minix/inode.c
+index 3fffc709afd43..c026706aec0cc 100644
+--- a/fs/minix/inode.c
++++ b/fs/minix/inode.c
+@@ -470,8 +470,14 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
+               inode->i_op = &minix_symlink_inode_operations;
+               inode_nohighmem(inode);
+               inode->i_mapping->a_ops = &minix_aops;
+-      } else
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               init_special_inode(inode, inode->i_mode, rdev);
++      } else {
++              printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              make_bad_inode(inode);
++      }
+ }
+ /*
+-- 
+2.51.0
+
diff --git a/queue-5.4/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch b/queue-5.4/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
new file mode 100644 (file)
index 0000000..23be414
--- /dev/null
@@ -0,0 +1,95 @@
+From 2ed2880b4416be2bf1e542d96667d589becdb374 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 2 Aug 2025 10:21:23 +0800
+Subject: pid: Add a judgment for ns null in pid_nr_ns
+
+From: gaoxiang17 <gaoxiang17@xiaomi.com>
+
+[ Upstream commit 006568ab4c5ca2309ceb36fa553e390b4aa9c0c7 ]
+
+__task_pid_nr_ns
+        ns = task_active_pid_ns(current);
+        pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+                if (pid && ns->level <= pid->level) {
+
+Sometimes null is returned for task_active_pid_ns. Then it will trigger kernel panic in pid_nr_ns.
+
+For example:
+       Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058
+       Mem abort info:
+       ESR = 0x0000000096000007
+       EC = 0x25: DABT (current EL), IL = 32 bits
+       SET = 0, FnV = 0
+       EA = 0, S1PTW = 0
+       FSC = 0x07: level 3 translation fault
+       Data abort info:
+       ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000
+       CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+       GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+       user pgtable: 4k pages, 39-bit VAs, pgdp=00000002175aa000
+       [0000000000000058] pgd=08000002175ab003, p4d=08000002175ab003, pud=08000002175ab003, pmd=08000002175be003, pte=0000000000000000
+       pstate: 834000c5 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+       pc : __task_pid_nr_ns+0x74/0xd0
+       lr : __task_pid_nr_ns+0x24/0xd0
+       sp : ffffffc08001bd10
+       x29: ffffffc08001bd10 x28: ffffffd4422b2000 x27: 0000000000000001
+       x26: ffffffd442821168 x25: ffffffd442821000 x24: 00000f89492eab31
+       x23: 00000000000000c0 x22: ffffff806f5693c0 x21: ffffff806f5693c0
+       x20: 0000000000000001 x19: 0000000000000000 x18: 0000000000000000
+       x17: 00000000529c6ef0 x16: 00000000529c6ef0 x15: 00000000023a1adc
+       x14: 0000000000000003 x13: 00000000007ef6d8 x12: 001167c391c78800
+       x11: 00ffffffffffffff x10: 0000000000000000 x9 : 0000000000000001
+       x8 : ffffff80816fa3c0 x7 : 0000000000000000 x6 : 49534d702d535449
+       x5 : ffffffc080c4c2c0 x4 : ffffffd43ee128c8 x3 : ffffffd43ee124dc
+       x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffff806f5693c0
+       Call trace:
+       __task_pid_nr_ns+0x74/0xd0
+       ...
+       __handle_irq_event_percpu+0xd4/0x284
+       handle_irq_event+0x48/0xb0
+       handle_fasteoi_irq+0x160/0x2d8
+       generic_handle_domain_irq+0x44/0x60
+       gic_handle_irq+0x4c/0x114
+       call_on_irq_stack+0x3c/0x74
+       do_interrupt_handler+0x4c/0x84
+       el1_interrupt+0x34/0x58
+       el1h_64_irq_handler+0x18/0x24
+       el1h_64_irq+0x68/0x6c
+       account_kernel_stack+0x60/0x144
+       exit_task_stack_account+0x1c/0x80
+       do_exit+0x7e4/0xaf8
+       ...
+       get_signal+0x7bc/0x8d8
+       do_notify_resume+0x128/0x828
+       el0_svc+0x6c/0x70
+       el0t_64_sync_handler+0x68/0xbc
+       el0t_64_sync+0x1a8/0x1ac
+       Code: 35fffe54 911a02a8 f9400108 b4000128 (b9405a69)
+       ---[ end trace 0000000000000000 ]---
+       Kernel panic - not syncing: Oops: Fatal exception in interrupt
+
+Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/20250802022123.3536934-1-gxxa03070307@gmail.com
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 0a9f2e4372176..3a7b71258047f 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -407,7 +407,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+       struct upid *upid;
+       pid_t nr = 0;
+-      if (pid && ns->level <= pid->level) {
++      if (pid && ns && ns->level <= pid->level) {
+               upid = &pid->numbers[ns->level];
+               if (upid->ns == ns)
+                       nr = upid->nr;
+-- 
+2.51.0
+
index 71c6f097a60df1c22d869564e2151d06e36c965a..27bac166d6c6f3afb756d9a7aee81308379c924e 100644 (file)
@@ -142,3 +142,7 @@ mfd-intel_soc_pmic_chtdc_ti-drop-unneeded-assignment-for-cache_type.patch
 mfd-intel_soc_pmic_chtdc_ti-set-use_single_read-regmap_config-flag.patch
 dm-fix-null-pointer-dereference-in-__dm_suspend.patch
 tracing-fix-race-condition-in-kprobe-initialization-causing-null-pointer-dereference.patch
+minixfs-verify-inode-mode-when-loading-from-disk.patch
+pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
+fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
+cramfs-verify-inode-mode-when-loading-from-disk.patch
diff --git a/queue-6.1/cramfs-verify-inode-mode-when-loading-from-disk.patch b/queue-6.1/cramfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..e110ab5
--- /dev/null
@@ -0,0 +1,51 @@
+From 0f3e4f92ce8ab5ba39827abbc2ee28eb560949f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Aug 2025 19:01:01 +0900
+Subject: cramfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 7f9d34b0a7cb93d678ee7207f0634dbf79e47fe5 ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/429b3ef1-13de-4310-9a8e-c2dc9a36234a@I-love.SAKURA.ne.jp
+Acked-by: Nicolas Pitre <nico@fluxnic.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cramfs/inode.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
+index 6dae27d6f553f..9979187a4b3c5 100644
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -117,9 +117,18 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
+               inode_nohighmem(inode);
+               inode->i_data.a_ops = &cramfs_aops;
+               break;
+-      default:
++      case S_IFCHR:
++      case S_IFBLK:
++      case S_IFIFO:
++      case S_IFSOCK:
+               init_special_inode(inode, cramfs_inode->mode,
+                               old_decode_dev(cramfs_inode->size));
++              break;
++      default:
++              printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              iget_failed(inode);
++              return ERR_PTR(-EIO);
+       }
+       inode->i_mode = cramfs_inode->mode;
+-- 
+2.51.0
+
diff --git a/queue-6.1/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch b/queue-6.1/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
new file mode 100644 (file)
index 0000000..a7c5118
--- /dev/null
@@ -0,0 +1,116 @@
+From f1eec19b191a431847c3444f440eeead3ec1c9e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 20:14:59 +0800
+Subject: fs: Add 'initramfs_options' to set initramfs mount options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lichen Liu <lichliu@redhat.com>
+
+[ Upstream commit 278033a225e13ec21900f0a92b8351658f5377f2 ]
+
+When CONFIG_TMPFS is enabled, the initial root filesystem is a tmpfs.
+By default, a tmpfs mount is limited to using 50% of the available RAM
+for its content. This can be problematic in memory-constrained
+environments, particularly during a kdump capture.
+
+In a kdump scenario, the capture kernel boots with a limited amount of
+memory specified by the 'crashkernel' parameter. If the initramfs is
+large, it may fail to unpack into the tmpfs rootfs due to insufficient
+space. This is because to get X MB of usable space in tmpfs, 2*X MB of
+memory must be available for the mount. This leads to an OOM failure
+during the early boot process, preventing a successful crash dump.
+
+This patch introduces a new kernel command-line parameter,
+initramfs_options, which allows passing specific mount options directly
+to the rootfs when it is first mounted. This gives users control over
+the rootfs behavior.
+
+For example, a user can now specify initramfs_options=size=75% to allow
+the tmpfs to use up to 75% of the available memory. This can
+significantly reduce the memory pressure for kdump.
+
+Consider a practical example:
+
+To unpack a 48MB initramfs, the tmpfs needs 48MB of usable space. With
+the default 50% limit, this requires a memory pool of 96MB to be
+available for the tmpfs mount. The total memory requirement is therefore
+approximately: 16MB (vmlinuz) + 48MB (loaded initramfs) + 48MB (unpacked
+kernel) + 96MB (for tmpfs) + 12MB (runtime overhead) ≈ 220MB.
+
+By using initramfs_options=size=75%, the memory pool required for the
+48MB tmpfs is reduced to 48MB / 0.75 = 64MB. This reduces the total
+memory requirement by 32MB (96MB - 64MB), allowing the kdump to succeed
+with a smaller crashkernel size, such as 192MB.
+
+An alternative approach of reusing the existing rootflags parameter was
+considered. However, a new, dedicated initramfs_options parameter was
+chosen to avoid altering the current behavior of rootflags (which
+applies to the final root filesystem) and to prevent any potential
+regressions.
+
+Also add documentation for the new kernel parameter "initramfs_options"
+
+This approach is inspired by prior discussions and patches on the topic.
+Ref: https://www.lightofdawn.org/blog/?viewDetailed=00128
+Ref: https://landley.net/notes-2015.html#01-01-2015
+Ref: https://lkml.org/lkml/2021/6/29/783
+Ref: https://www.kernel.org/doc/html/latest/filesystems/ramfs-rootfs-initramfs.html#what-is-rootfs
+
+Signed-off-by: Lichen Liu <lichliu@redhat.com>
+Link: https://lore.kernel.org/20250815121459.3391223-1-lichliu@redhat.com
+Tested-by: Rob Landley <rob@landley.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  3 +++
+ fs/namespace.c                                  | 11 ++++++++++-
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index cce2731727392..05ab068c1cc6d 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5446,6 +5446,9 @@
+       rootflags=      [KNL] Set root filesystem mount option string
++      initramfs_options= [KNL]
++                        Specify mount options for for the initramfs mount.
++
+       rootfstype=     [KNL] Set root filesystem type
+       rootwait        [KNL] Wait (indefinitely) for root device to show up.
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 2a76269f2a4e7..f22f76d9c22f9 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -64,6 +64,15 @@ static int __init set_mphash_entries(char *str)
+ }
+ __setup("mphash_entries=", set_mphash_entries);
++static char * __initdata initramfs_options;
++static int __init initramfs_options_setup(char *str)
++{
++      initramfs_options = str;
++      return 1;
++}
++
++__setup("initramfs_options=", initramfs_options_setup);
++
+ static u64 event;
+ static DEFINE_IDA(mnt_id_ida);
+ static DEFINE_IDA(mnt_group_ida);
+@@ -4414,7 +4423,7 @@ static void __init init_mount_tree(void)
+       struct mnt_namespace *ns;
+       struct path root;
+-      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
++      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
+       if (IS_ERR(mnt))
+               panic("Can't create rootfs");
+-- 
+2.51.0
+
diff --git a/queue-6.1/minixfs-verify-inode-mode-when-loading-from-disk.patch b/queue-6.1/minixfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..56105de
--- /dev/null
@@ -0,0 +1,46 @@
+From 4e45893bcda29bb2a129e7992345e4a15ba240a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 00:17:44 +0900
+Subject: minixfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 73861970938ad1323eb02bbbc87f6fbd1e5bacca ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/ec982681-84b8-4624-94fa-8af15b77cbd2@I-love.SAKURA.ne.jp
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/minix/inode.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/minix/inode.c b/fs/minix/inode.c
+index da8bdd1712a70..9da6903e306c6 100644
+--- a/fs/minix/inode.c
++++ b/fs/minix/inode.c
+@@ -470,8 +470,14 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
+               inode->i_op = &minix_symlink_inode_operations;
+               inode_nohighmem(inode);
+               inode->i_mapping->a_ops = &minix_aops;
+-      } else
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               init_special_inode(inode, inode->i_mode, rdev);
++      } else {
++              printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              make_bad_inode(inode);
++      }
+ }
+ /*
+-- 
+2.51.0
+
diff --git a/queue-6.1/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch b/queue-6.1/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
new file mode 100644 (file)
index 0000000..011cf90
--- /dev/null
@@ -0,0 +1,95 @@
+From fdc9413906a876918f08ce133c1995c2f7dee73c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 2 Aug 2025 10:21:23 +0800
+Subject: pid: Add a judgment for ns null in pid_nr_ns
+
+From: gaoxiang17 <gaoxiang17@xiaomi.com>
+
+[ Upstream commit 006568ab4c5ca2309ceb36fa553e390b4aa9c0c7 ]
+
+__task_pid_nr_ns
+        ns = task_active_pid_ns(current);
+        pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+                if (pid && ns->level <= pid->level) {
+
+Sometimes null is returned for task_active_pid_ns. Then it will trigger kernel panic in pid_nr_ns.
+
+For example:
+       Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058
+       Mem abort info:
+       ESR = 0x0000000096000007
+       EC = 0x25: DABT (current EL), IL = 32 bits
+       SET = 0, FnV = 0
+       EA = 0, S1PTW = 0
+       FSC = 0x07: level 3 translation fault
+       Data abort info:
+       ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000
+       CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+       GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+       user pgtable: 4k pages, 39-bit VAs, pgdp=00000002175aa000
+       [0000000000000058] pgd=08000002175ab003, p4d=08000002175ab003, pud=08000002175ab003, pmd=08000002175be003, pte=0000000000000000
+       pstate: 834000c5 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+       pc : __task_pid_nr_ns+0x74/0xd0
+       lr : __task_pid_nr_ns+0x24/0xd0
+       sp : ffffffc08001bd10
+       x29: ffffffc08001bd10 x28: ffffffd4422b2000 x27: 0000000000000001
+       x26: ffffffd442821168 x25: ffffffd442821000 x24: 00000f89492eab31
+       x23: 00000000000000c0 x22: ffffff806f5693c0 x21: ffffff806f5693c0
+       x20: 0000000000000001 x19: 0000000000000000 x18: 0000000000000000
+       x17: 00000000529c6ef0 x16: 00000000529c6ef0 x15: 00000000023a1adc
+       x14: 0000000000000003 x13: 00000000007ef6d8 x12: 001167c391c78800
+       x11: 00ffffffffffffff x10: 0000000000000000 x9 : 0000000000000001
+       x8 : ffffff80816fa3c0 x7 : 0000000000000000 x6 : 49534d702d535449
+       x5 : ffffffc080c4c2c0 x4 : ffffffd43ee128c8 x3 : ffffffd43ee124dc
+       x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffff806f5693c0
+       Call trace:
+       __task_pid_nr_ns+0x74/0xd0
+       ...
+       __handle_irq_event_percpu+0xd4/0x284
+       handle_irq_event+0x48/0xb0
+       handle_fasteoi_irq+0x160/0x2d8
+       generic_handle_domain_irq+0x44/0x60
+       gic_handle_irq+0x4c/0x114
+       call_on_irq_stack+0x3c/0x74
+       do_interrupt_handler+0x4c/0x84
+       el1_interrupt+0x34/0x58
+       el1h_64_irq_handler+0x18/0x24
+       el1h_64_irq+0x68/0x6c
+       account_kernel_stack+0x60/0x144
+       exit_task_stack_account+0x1c/0x80
+       do_exit+0x7e4/0xaf8
+       ...
+       get_signal+0x7bc/0x8d8
+       do_notify_resume+0x128/0x828
+       el0_svc+0x6c/0x70
+       el0t_64_sync_handler+0x68/0xbc
+       el0t_64_sync+0x1a8/0x1ac
+       Code: 35fffe54 911a02a8 f9400108 b4000128 (b9405a69)
+       ---[ end trace 0000000000000000 ]---
+       Kernel panic - not syncing: Oops: Fatal exception in interrupt
+
+Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/20250802022123.3536934-1-gxxa03070307@gmail.com
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 8bce3aebc949f..e1d0c9d952278 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -474,7 +474,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+       struct upid *upid;
+       pid_t nr = 0;
+-      if (pid && ns->level <= pid->level) {
++      if (pid && ns && ns->level <= pid->level) {
+               upid = &pid->numbers[ns->level];
+               if (upid->ns == ns)
+                       nr = upid->nr;
+-- 
+2.51.0
+
diff --git a/queue-6.1/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch b/queue-6.1/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
new file mode 100644 (file)
index 0000000..095135e
--- /dev/null
@@ -0,0 +1,48 @@
+From ecf91d0814b73954603515999846941820223193 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Aug 2025 19:36:04 +0200
+Subject: pid: make __task_pid_nr_ns(ns => NULL) safe for zombie callers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit abdfd4948e45c51b19162cf8b3f5003f8f53c9b9 ]
+
+task_pid_vnr(another_task) will crash if the caller was already reaped.
+The pid_alive(current) check can't really help, the parent/debugger can
+call release_task() right after this check.
+
+This also means that even task_ppid_nr_ns(current, NULL) is not safe,
+pid_alive() only ensures that it is safe to dereference ->real_parent.
+
+Change __task_pid_nr_ns() to ensure ns != NULL.
+
+Originally-by: 高翔 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/all/20250802022123.3536934-1-gxxa03070307@gmail.com/
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lore.kernel.org/20250810173604.GA19991@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index e1d0c9d952278..62a8349267de1 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -497,7 +497,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+       rcu_read_lock();
+       if (!ns)
+               ns = task_active_pid_ns(current);
+-      nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
++      if (ns)
++              nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+       rcu_read_unlock();
+       return nr;
+-- 
+2.51.0
+
index 18f31b8c9c853047212e6dad79499cf1fde62d9f..fe1b0e105054383145c9da98958b76a5dd2ea9b3 100644 (file)
@@ -158,3 +158,10 @@ asm-generic-io-add-_ret_ip_-to-mmio-trace-for-more-accurate-debug-info.patch
 asm-generic-io.h-suppress-endianness-warnings-for-relaxed-accessors.patch
 asm-generic-io.h-skip-trace-helpers-if-rwmmio-events-are-disabled.patch
 mptcp-pm-in-kernel-usable-client-side-with-c-flag.patch
+minixfs-verify-inode-mode-when-loading-from-disk.patch
+pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
+pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
+fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
+cramfs-verify-inode-mode-when-loading-from-disk.patch
+writeback-avoid-softlockup-when-switching-many-inode.patch
+writeback-avoid-excessively-long-inode-switching-tim.patch
diff --git a/queue-6.1/writeback-avoid-excessively-long-inode-switching-tim.patch b/queue-6.1/writeback-avoid-excessively-long-inode-switching-tim.patch
new file mode 100644 (file)
index 0000000..904555e
--- /dev/null
@@ -0,0 +1,102 @@
+From 72326df292a4827988a5837c9a54c38ca3b6ec36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:37 +0200
+Subject: writeback: Avoid excessively long inode switching times
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 9a6ebbdbd41235ea3bc0c4f39e2076599b8113cc ]
+
+With lazytime mount option enabled we can be switching many dirty inodes
+on cgroup exit to the parent cgroup. The numbers observed in practice
+when systemd slice of a large cron job exits can easily reach hundreds
+of thousands or millions. The logic in inode_do_switch_wbs() which sorts
+the inode into appropriate place in b_dirty list of the target wb
+however has linear complexity in the number of dirty inodes thus overall
+time complexity of switching all the inodes is quadratic leading to
+workers being pegged for hours consuming 100% of the CPU and switching
+inodes to the parent wb.
+
+Simple reproducer of the issue:
+  FILES=10000
+  # Filesystem mounted with lazytime mount option
+  MNT=/mnt/
+  echo "Creating files and switching timestamps"
+  for (( j = 0; j < 50; j ++ )); do
+      mkdir $MNT/dir$j
+      for (( i = 0; i < $FILES; i++ )); do
+          echo "foo" >$MNT/dir$j/file$i
+      done
+      touch -a -t 202501010000 $MNT/dir$j/file*
+  done
+  wait
+  echo "Syncing and flushing"
+  sync
+  echo 3 >/proc/sys/vm/drop_caches
+
+  echo "Reading all files from a cgroup"
+  mkdir /sys/fs/cgroup/unified/mycg1 || exit
+  echo $$ >/sys/fs/cgroup/unified/mycg1/cgroup.procs || exit
+  for (( j = 0; j < 50; j ++ )); do
+      cat /mnt/dir$j/file* >/dev/null &
+  done
+  wait
+  echo "Switching wbs"
+  # Now rmdir the cgroup after the script exits
+
+We need to maintain b_dirty list ordering to keep writeback happy so
+instead of sorting inode into appropriate place just append it at the
+end of the list and clobber dirtied_time_when. This may result in inode
+writeback starting later after cgroup switch however cgroup switches are
+rare so it shouldn't matter much. Since the cgroup had write access to
+the inode, there are no practical concerns of the possible DoS issues.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 07473cf2a7c9b..75e8c102c5eef 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -420,22 +420,23 @@ static bool inode_do_switch_wbs(struct inode *inode,
+        * Transfer to @new_wb's IO list if necessary.  If the @inode is dirty,
+        * the specific list @inode was on is ignored and the @inode is put on
+        * ->b_dirty which is always correct including from ->b_dirty_time.
+-       * The transfer preserves @inode->dirtied_when ordering.  If the @inode
+-       * was clean, it means it was on the b_attached list, so move it onto
+-       * the b_attached list of @new_wb.
++       * If the @inode was clean, it means it was on the b_attached list, so
++       * move it onto the b_attached list of @new_wb.
+        */
+       if (!list_empty(&inode->i_io_list)) {
+               inode->i_wb = new_wb;
+               if (inode->i_state & I_DIRTY_ALL) {
+-                      struct inode *pos;
+-
+-                      list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+-                              if (time_after_eq(inode->dirtied_when,
+-                                                pos->dirtied_when))
+-                                      break;
++                      /*
++                       * We need to keep b_dirty list sorted by
++                       * dirtied_time_when. However properly sorting the
++                       * inode in the list gets too expensive when switching
++                       * many inodes. So just attach inode at the end of the
++                       * dirty list and clobber the dirtied_time_when.
++                       */
++                      inode->dirtied_time_when = jiffies;
+                       inode_io_list_move_locked(inode, new_wb,
+-                                                pos->i_io_list.prev);
++                                                &new_wb->b_dirty);
+               } else {
+                       inode_cgwb_move_to_attached(inode, new_wb);
+               }
+-- 
+2.51.0
+
diff --git a/queue-6.1/writeback-avoid-softlockup-when-switching-many-inode.patch b/queue-6.1/writeback-avoid-softlockup-when-switching-many-inode.patch
new file mode 100644 (file)
index 0000000..dd41341
--- /dev/null
@@ -0,0 +1,65 @@
+From 5cfd0e6adfdedb4d45aeeb61ffcbdd1acb9e8d8e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:36 +0200
+Subject: writeback: Avoid softlockup when switching many inodes
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 66c14dccd810d42ec5c73bb8a9177489dfd62278 ]
+
+process_inode_switch_wbs_work() can be switching over 100 inodes to a
+different cgroup. Since switching an inode requires counting all dirty &
+under-writeback pages in the address space of each inode, this can take
+a significant amount of time. Add a possibility to reschedule after
+processing each inode to avoid softlockups.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 41f8ae8a416fb..07473cf2a7c9b 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -477,6 +477,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        */
+       down_read(&bdi->wb_switch_rwsem);
++      inodep = isw->inodes;
+       /*
+        * By the time control reaches here, RCU grace period has passed
+        * since I_WB_SWITCH assertion and all wb stat update transactions
+@@ -487,6 +488,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        * gives us exclusion against all wb related operations on @inode
+        * including IO list manipulations and stat updates.
+        */
++relock:
+       if (old_wb < new_wb) {
+               spin_lock(&old_wb->list_lock);
+               spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
+@@ -495,10 +497,17 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+               spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
+       }
+-      for (inodep = isw->inodes; *inodep; inodep++) {
++      while (*inodep) {
+               WARN_ON_ONCE((*inodep)->i_wb != old_wb);
+               if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
+                       nr_switched++;
++              inodep++;
++              if (*inodep && need_resched()) {
++                      spin_unlock(&new_wb->list_lock);
++                      spin_unlock(&old_wb->list_lock);
++                      cond_resched();
++                      goto relock;
++              }
+       }
+       spin_unlock(&new_wb->list_lock);
+-- 
+2.51.0
+
diff --git a/queue-6.12/copy_file_range-limit-size-if-in-compat-mode.patch b/queue-6.12/copy_file_range-limit-size-if-in-compat-mode.patch
new file mode 100644 (file)
index 0000000..080a2af
--- /dev/null
@@ -0,0 +1,66 @@
+From 2f9b6661cd9ebb2b99dc7a3584f5eeeab267e370 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 17:11:05 +0200
+Subject: copy_file_range: limit size if in compat mode
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+[ Upstream commit f8f59a2c05dc16d19432e3154a9ac7bc385f4b92 ]
+
+If the process runs in 32-bit compat mode, copy_file_range results can be
+in the in-band error range.  In this case limit copy length to MAX_RW_COUNT
+to prevent a signed overflow.
+
+Reported-by: Florian Weimer <fweimer@redhat.com>
+Closes: https://lore.kernel.org/all/lhuh5ynl8z5.fsf@oldenburg.str.redhat.com/
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Link: https://lore.kernel.org/20250813151107.99856-1-mszeredi@redhat.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/read_write.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/fs/read_write.c b/fs/read_write.c
+index befec0b5c537a..46408bab92385 100644
+--- a/fs/read_write.c
++++ b/fs/read_write.c
+@@ -1600,6 +1600,13 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+       if (len == 0)
+               return 0;
++      /*
++       * Make sure return value doesn't overflow in 32bit compat mode.  Also
++       * limit the size for all cases except when calling ->copy_file_range().
++       */
++      if (splice || !file_out->f_op->copy_file_range || in_compat_syscall())
++              len = min_t(size_t, MAX_RW_COUNT, len);
++
+       file_start_write(file_out);
+       /*
+@@ -1613,9 +1620,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+                                                     len, flags);
+       } else if (!splice && file_in->f_op->remap_file_range && samesb) {
+               ret = file_in->f_op->remap_file_range(file_in, pos_in,
+-                              file_out, pos_out,
+-                              min_t(loff_t, MAX_RW_COUNT, len),
+-                              REMAP_FILE_CAN_SHORTEN);
++                              file_out, pos_out, len, REMAP_FILE_CAN_SHORTEN);
+               /* fallback to splice */
+               if (ret <= 0)
+                       splice = true;
+@@ -1648,8 +1653,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+        * to splicing from input file, while file_start_write() is held on
+        * the output file on a different sb.
+        */
+-      ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+-                             min_t(size_t, len, MAX_RW_COUNT), 0);
++      ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len, 0);
+ done:
+       if (ret > 0) {
+               fsnotify_access(file_in);
+-- 
+2.51.0
+
diff --git a/queue-6.12/cramfs-verify-inode-mode-when-loading-from-disk.patch b/queue-6.12/cramfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..ef148d1
--- /dev/null
@@ -0,0 +1,51 @@
+From 1f031cd7b076a4542583c81a7517497430ae246b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Aug 2025 19:01:01 +0900
+Subject: cramfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 7f9d34b0a7cb93d678ee7207f0634dbf79e47fe5 ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/429b3ef1-13de-4310-9a8e-c2dc9a36234a@I-love.SAKURA.ne.jp
+Acked-by: Nicolas Pitre <nico@fluxnic.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cramfs/inode.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
+index b84d1747a0205..e7d192f7ab3b4 100644
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -117,9 +117,18 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
+               inode_nohighmem(inode);
+               inode->i_data.a_ops = &cramfs_aops;
+               break;
+-      default:
++      case S_IFCHR:
++      case S_IFBLK:
++      case S_IFIFO:
++      case S_IFSOCK:
+               init_special_inode(inode, cramfs_inode->mode,
+                               old_decode_dev(cramfs_inode->size));
++              break;
++      default:
++              printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              iget_failed(inode);
++              return ERR_PTR(-EIO);
+       }
+       inode->i_mode = cramfs_inode->mode;
+-- 
+2.51.0
+
diff --git a/queue-6.12/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch b/queue-6.12/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
new file mode 100644 (file)
index 0000000..30ab747
--- /dev/null
@@ -0,0 +1,116 @@
+From 0c73650be4836726549af5eff48302970c99a5f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 20:14:59 +0800
+Subject: fs: Add 'initramfs_options' to set initramfs mount options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lichen Liu <lichliu@redhat.com>
+
+[ Upstream commit 278033a225e13ec21900f0a92b8351658f5377f2 ]
+
+When CONFIG_TMPFS is enabled, the initial root filesystem is a tmpfs.
+By default, a tmpfs mount is limited to using 50% of the available RAM
+for its content. This can be problematic in memory-constrained
+environments, particularly during a kdump capture.
+
+In a kdump scenario, the capture kernel boots with a limited amount of
+memory specified by the 'crashkernel' parameter. If the initramfs is
+large, it may fail to unpack into the tmpfs rootfs due to insufficient
+space. This is because to get X MB of usable space in tmpfs, 2*X MB of
+memory must be available for the mount. This leads to an OOM failure
+during the early boot process, preventing a successful crash dump.
+
+This patch introduces a new kernel command-line parameter,
+initramfs_options, which allows passing specific mount options directly
+to the rootfs when it is first mounted. This gives users control over
+the rootfs behavior.
+
+For example, a user can now specify initramfs_options=size=75% to allow
+the tmpfs to use up to 75% of the available memory. This can
+significantly reduce the memory pressure for kdump.
+
+Consider a practical example:
+
+To unpack a 48MB initramfs, the tmpfs needs 48MB of usable space. With
+the default 50% limit, this requires a memory pool of 96MB to be
+available for the tmpfs mount. The total memory requirement is therefore
+approximately: 16MB (vmlinuz) + 48MB (loaded initramfs) + 48MB (unpacked
+kernel) + 96MB (for tmpfs) + 12MB (runtime overhead) ≈ 220MB.
+
+By using initramfs_options=size=75%, the memory pool required for the
+48MB tmpfs is reduced to 48MB / 0.75 = 64MB. This reduces the total
+memory requirement by 32MB (96MB - 64MB), allowing the kdump to succeed
+with a smaller crashkernel size, such as 192MB.
+
+An alternative approach of reusing the existing rootflags parameter was
+considered. However, a new, dedicated initramfs_options parameter was
+chosen to avoid altering the current behavior of rootflags (which
+applies to the final root filesystem) and to prevent any potential
+regressions.
+
+Also add documentation for the new kernel parameter "initramfs_options"
+
+This approach is inspired by prior discussions and patches on the topic.
+Ref: https://www.lightofdawn.org/blog/?viewDetailed=00128
+Ref: https://landley.net/notes-2015.html#01-01-2015
+Ref: https://lkml.org/lkml/2021/6/29/783
+Ref: https://www.kernel.org/doc/html/latest/filesystems/ramfs-rootfs-initramfs.html#what-is-rootfs
+
+Signed-off-by: Lichen Liu <lichliu@redhat.com>
+Link: https://lore.kernel.org/20250815121459.3391223-1-lichliu@redhat.com
+Tested-by: Rob Landley <rob@landley.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  3 +++
+ fs/namespace.c                                  | 11 ++++++++++-
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 8724c2c580b88..e88505e945d52 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5923,6 +5923,9 @@
+       rootflags=      [KNL] Set root filesystem mount option string
++      initramfs_options= [KNL]
++                        Specify mount options for for the initramfs mount.
++
+       rootfstype=     [KNL] Set root filesystem type
+       rootwait        [KNL] Wait (indefinitely) for root device to show up.
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 7606969412493..f5e46c2595b11 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -65,6 +65,15 @@ static int __init set_mphash_entries(char *str)
+ }
+ __setup("mphash_entries=", set_mphash_entries);
++static char * __initdata initramfs_options;
++static int __init initramfs_options_setup(char *str)
++{
++      initramfs_options = str;
++      return 1;
++}
++
++__setup("initramfs_options=", initramfs_options_setup);
++
+ static u64 event;
+ static DEFINE_IDA(mnt_id_ida);
+ static DEFINE_IDA(mnt_group_ida);
+@@ -5566,7 +5575,7 @@ static void __init init_mount_tree(void)
+       struct mnt_namespace *ns;
+       struct path root;
+-      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
++      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
+       if (IS_ERR(mnt))
+               panic("Can't create rootfs");
+-- 
+2.51.0
+
diff --git a/queue-6.12/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch b/queue-6.12/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch
new file mode 100644 (file)
index 0000000..773d171
--- /dev/null
@@ -0,0 +1,64 @@
+From 058647f6c5b3d0a4209840a51e23fa14443ee006 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Sep 2025 15:43:19 +0100
+Subject: irqchip/sifive-plic: Avoid interrupt ID 0 handling during
+ suspend/resume
+
+From: Lucas Zampieri <lzampier@redhat.com>
+
+[ Upstream commit f75e07bf5226da640fa99a0594687c780d9bace4 ]
+
+According to the PLIC specification[1], global interrupt sources are
+assigned small unsigned integer identifiers beginning at the value 1.
+An interrupt ID of 0 is reserved to mean "no interrupt".
+
+The current plic_irq_resume() and plic_irq_suspend() functions incorrectly
+start the loop from index 0, which accesses the register space for the
+reserved interrupt ID 0.
+
+Change the loop to start from index 1, skipping the reserved
+interrupt ID 0 as per the PLIC specification.
+
+This prevents potential undefined behavior when accessing the reserved
+register space during suspend/resume cycles.
+
+Fixes: e80f0b6a2cf3 ("irqchip/irq-sifive-plic: Add syscore callbacks for hibernation")
+Co-developed-by: Jia Wang <wangjia@ultrarisc.com>
+Signed-off-by: Jia Wang <wangjia@ultrarisc.com>
+Co-developed-by: Charles Mirabile <cmirabil@redhat.com>
+Signed-off-by: Charles Mirabile <cmirabil@redhat.com>
+Signed-off-by: Lucas Zampieri <lzampier@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://github.com/riscv/riscv-plic-spec/releases/tag/1.0.0
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-sifive-plic.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
+index bf69a4802b71e..9c4af7d588463 100644
+--- a/drivers/irqchip/irq-sifive-plic.c
++++ b/drivers/irqchip/irq-sifive-plic.c
+@@ -252,7 +252,8 @@ static int plic_irq_suspend(void)
+       priv = per_cpu_ptr(&plic_handlers, smp_processor_id())->priv;
+-      for (i = 0; i < priv->nr_irqs; i++) {
++      /* irq ID 0 is reserved */
++      for (i = 1; i < priv->nr_irqs; i++) {
+               __assign_bit(i, priv->prio_save,
+                            readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID));
+       }
+@@ -283,7 +284,8 @@ static void plic_irq_resume(void)
+       priv = per_cpu_ptr(&plic_handlers, smp_processor_id())->priv;
+-      for (i = 0; i < priv->nr_irqs; i++) {
++      /* irq ID 0 is reserved */
++      for (i = 1; i < priv->nr_irqs; i++) {
+               index = BIT_WORD(i);
+               writel((priv->prio_save[index] & BIT_MASK(i)) ? 1 : 0,
+                      priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID);
+-- 
+2.51.0
+
diff --git a/queue-6.12/irqchip-sifive-plic-make-use-of-__assign_bit.patch b/queue-6.12/irqchip-sifive-plic-make-use-of-__assign_bit.patch
new file mode 100644 (file)
index 0000000..caf7ade
--- /dev/null
@@ -0,0 +1,51 @@
+From c65fedccb3cc2eed80b6eb8cfe52677a574b2a08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Sep 2024 21:08:24 +0800
+Subject: irqchip/sifive-plic: Make use of __assign_bit()
+
+From: Hongbo Li <lihongbo22@huawei.com>
+
+[ Upstream commit 40d7af5375a4e27d8576d9d11954ac213d06f09e ]
+
+Replace the open coded
+
+if (foo)
+        __set_bit(n, bar);
+    else
+        __clear_bit(n, bar);
+
+with __assign_bit(). No functional change intended.
+
+Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Palmer Dabbelt <palmer@rivosinc.com>
+Link: https://lore.kernel.org/all/20240902130824.2878644-1-lihongbo22@huawei.com
+Stable-dep-of: f75e07bf5226 ("irqchip/sifive-plic: Avoid interrupt ID 0 handling during suspend/resume")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-sifive-plic.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
+index 36dbcf2d728a5..bf69a4802b71e 100644
+--- a/drivers/irqchip/irq-sifive-plic.c
++++ b/drivers/irqchip/irq-sifive-plic.c
+@@ -252,11 +252,10 @@ static int plic_irq_suspend(void)
+       priv = per_cpu_ptr(&plic_handlers, smp_processor_id())->priv;
+-      for (i = 0; i < priv->nr_irqs; i++)
+-              if (readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID))
+-                      __set_bit(i, priv->prio_save);
+-              else
+-                      __clear_bit(i, priv->prio_save);
++      for (i = 0; i < priv->nr_irqs; i++) {
++              __assign_bit(i, priv->prio_save,
++                           readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID));
++      }
+       for_each_cpu(cpu, cpu_present_mask) {
+               struct plic_handler *handler = per_cpu_ptr(&plic_handlers, cpu);
+-- 
+2.51.0
+
diff --git a/queue-6.12/minixfs-verify-inode-mode-when-loading-from-disk.patch b/queue-6.12/minixfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..cfcca5d
--- /dev/null
@@ -0,0 +1,46 @@
+From 087bb55affd7ea5762b35c3b173cd2b283945ed6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 00:17:44 +0900
+Subject: minixfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 73861970938ad1323eb02bbbc87f6fbd1e5bacca ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/ec982681-84b8-4624-94fa-8af15b77cbd2@I-love.SAKURA.ne.jp
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/minix/inode.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/minix/inode.c b/fs/minix/inode.c
+index f007e389d5d29..fc01f9dc8c391 100644
+--- a/fs/minix/inode.c
++++ b/fs/minix/inode.c
+@@ -491,8 +491,14 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
+               inode->i_op = &minix_symlink_inode_operations;
+               inode_nohighmem(inode);
+               inode->i_mapping->a_ops = &minix_aops;
+-      } else
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               init_special_inode(inode, inode->i_mode, rdev);
++      } else {
++              printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              make_bad_inode(inode);
++      }
+ }
+ /*
+-- 
+2.51.0
+
diff --git a/queue-6.12/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch b/queue-6.12/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
new file mode 100644 (file)
index 0000000..44cc37c
--- /dev/null
@@ -0,0 +1,95 @@
+From 3f235527ebaae3b7942faa087d4f67ae5f839dfc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 2 Aug 2025 10:21:23 +0800
+Subject: pid: Add a judgment for ns null in pid_nr_ns
+
+From: gaoxiang17 <gaoxiang17@xiaomi.com>
+
+[ Upstream commit 006568ab4c5ca2309ceb36fa553e390b4aa9c0c7 ]
+
+__task_pid_nr_ns
+        ns = task_active_pid_ns(current);
+        pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+                if (pid && ns->level <= pid->level) {
+
+Sometimes null is returned for task_active_pid_ns. Then it will trigger kernel panic in pid_nr_ns.
+
+For example:
+       Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058
+       Mem abort info:
+       ESR = 0x0000000096000007
+       EC = 0x25: DABT (current EL), IL = 32 bits
+       SET = 0, FnV = 0
+       EA = 0, S1PTW = 0
+       FSC = 0x07: level 3 translation fault
+       Data abort info:
+       ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000
+       CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+       GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+       user pgtable: 4k pages, 39-bit VAs, pgdp=00000002175aa000
+       [0000000000000058] pgd=08000002175ab003, p4d=08000002175ab003, pud=08000002175ab003, pmd=08000002175be003, pte=0000000000000000
+       pstate: 834000c5 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+       pc : __task_pid_nr_ns+0x74/0xd0
+       lr : __task_pid_nr_ns+0x24/0xd0
+       sp : ffffffc08001bd10
+       x29: ffffffc08001bd10 x28: ffffffd4422b2000 x27: 0000000000000001
+       x26: ffffffd442821168 x25: ffffffd442821000 x24: 00000f89492eab31
+       x23: 00000000000000c0 x22: ffffff806f5693c0 x21: ffffff806f5693c0
+       x20: 0000000000000001 x19: 0000000000000000 x18: 0000000000000000
+       x17: 00000000529c6ef0 x16: 00000000529c6ef0 x15: 00000000023a1adc
+       x14: 0000000000000003 x13: 00000000007ef6d8 x12: 001167c391c78800
+       x11: 00ffffffffffffff x10: 0000000000000000 x9 : 0000000000000001
+       x8 : ffffff80816fa3c0 x7 : 0000000000000000 x6 : 49534d702d535449
+       x5 : ffffffc080c4c2c0 x4 : ffffffd43ee128c8 x3 : ffffffd43ee124dc
+       x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffff806f5693c0
+       Call trace:
+       __task_pid_nr_ns+0x74/0xd0
+       ...
+       __handle_irq_event_percpu+0xd4/0x284
+       handle_irq_event+0x48/0xb0
+       handle_fasteoi_irq+0x160/0x2d8
+       generic_handle_domain_irq+0x44/0x60
+       gic_handle_irq+0x4c/0x114
+       call_on_irq_stack+0x3c/0x74
+       do_interrupt_handler+0x4c/0x84
+       el1_interrupt+0x34/0x58
+       el1h_64_irq_handler+0x18/0x24
+       el1h_64_irq+0x68/0x6c
+       account_kernel_stack+0x60/0x144
+       exit_task_stack_account+0x1c/0x80
+       do_exit+0x7e4/0xaf8
+       ...
+       get_signal+0x7bc/0x8d8
+       do_notify_resume+0x128/0x828
+       el0_svc+0x6c/0x70
+       el0t_64_sync_handler+0x68/0xbc
+       el0t_64_sync+0x1a8/0x1ac
+       Code: 35fffe54 911a02a8 f9400108 b4000128 (b9405a69)
+       ---[ end trace 0000000000000000 ]---
+       Kernel panic - not syncing: Oops: Fatal exception in interrupt
+
+Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/20250802022123.3536934-1-gxxa03070307@gmail.com
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 2715afb77eab8..b80c3bfb58d07 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -487,7 +487,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+       struct upid *upid;
+       pid_t nr = 0;
+-      if (pid && ns->level <= pid->level) {
++      if (pid && ns && ns->level <= pid->level) {
+               upid = &pid->numbers[ns->level];
+               if (upid->ns == ns)
+                       nr = upid->nr;
+-- 
+2.51.0
+
diff --git a/queue-6.12/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch b/queue-6.12/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
new file mode 100644 (file)
index 0000000..d9c61ad
--- /dev/null
@@ -0,0 +1,48 @@
+From d0a57bd87b2bc11529d3849fe657d04b5de485d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Aug 2025 19:36:04 +0200
+Subject: pid: make __task_pid_nr_ns(ns => NULL) safe for zombie callers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit abdfd4948e45c51b19162cf8b3f5003f8f53c9b9 ]
+
+task_pid_vnr(another_task) will crash if the caller was already reaped.
+The pid_alive(current) check can't really help, the parent/debugger can
+call release_task() right after this check.
+
+This also means that even task_ppid_nr_ns(current, NULL) is not safe,
+pid_alive() only ensures that it is safe to dereference ->real_parent.
+
+Change __task_pid_nr_ns() to ensure ns != NULL.
+
+Originally-by: 高翔 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/all/20250802022123.3536934-1-gxxa03070307@gmail.com/
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lore.kernel.org/20250810173604.GA19991@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index b80c3bfb58d07..8fdc3a5f87c7d 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -510,7 +510,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+       rcu_read_lock();
+       if (!ns)
+               ns = task_active_pid_ns(current);
+-      nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
++      if (ns)
++              nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+       rcu_read_unlock();
+       return nr;
+-- 
+2.51.0
+
index 95bb15cb5e1a243a8dab69acc46da5ca9318164c..951af66de6d4581c563a4e92039b8aea9bfb6554 100644 (file)
@@ -260,3 +260,13 @@ s390-bpf-centralize-frame-offset-calculations.patch
 s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch
 s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch
 s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch
+irqchip-sifive-plic-make-use-of-__assign_bit.patch
+irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch
+copy_file_range-limit-size-if-in-compat-mode.patch
+minixfs-verify-inode-mode-when-loading-from-disk.patch
+pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
+pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
+fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
+cramfs-verify-inode-mode-when-loading-from-disk.patch
+writeback-avoid-softlockup-when-switching-many-inode.patch
+writeback-avoid-excessively-long-inode-switching-tim.patch
diff --git a/queue-6.12/writeback-avoid-excessively-long-inode-switching-tim.patch b/queue-6.12/writeback-avoid-excessively-long-inode-switching-tim.patch
new file mode 100644 (file)
index 0000000..f2ca30c
--- /dev/null
@@ -0,0 +1,102 @@
+From 610a1224b6c329c6463202ca1bee301c9b4502d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:37 +0200
+Subject: writeback: Avoid excessively long inode switching times
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 9a6ebbdbd41235ea3bc0c4f39e2076599b8113cc ]
+
+With lazytime mount option enabled we can be switching many dirty inodes
+on cgroup exit to the parent cgroup. The numbers observed in practice
+when systemd slice of a large cron job exits can easily reach hundreds
+of thousands or millions. The logic in inode_do_switch_wbs() which sorts
+the inode into appropriate place in b_dirty list of the target wb
+however has linear complexity in the number of dirty inodes thus overall
+time complexity of switching all the inodes is quadratic leading to
+workers being pegged for hours consuming 100% of the CPU and switching
+inodes to the parent wb.
+
+Simple reproducer of the issue:
+  FILES=10000
+  # Filesystem mounted with lazytime mount option
+  MNT=/mnt/
+  echo "Creating files and switching timestamps"
+  for (( j = 0; j < 50; j ++ )); do
+      mkdir $MNT/dir$j
+      for (( i = 0; i < $FILES; i++ )); do
+          echo "foo" >$MNT/dir$j/file$i
+      done
+      touch -a -t 202501010000 $MNT/dir$j/file*
+  done
+  wait
+  echo "Syncing and flushing"
+  sync
+  echo 3 >/proc/sys/vm/drop_caches
+
+  echo "Reading all files from a cgroup"
+  mkdir /sys/fs/cgroup/unified/mycg1 || exit
+  echo $$ >/sys/fs/cgroup/unified/mycg1/cgroup.procs || exit
+  for (( j = 0; j < 50; j ++ )); do
+      cat /mnt/dir$j/file* >/dev/null &
+  done
+  wait
+  echo "Switching wbs"
+  # Now rmdir the cgroup after the script exits
+
+We need to maintain b_dirty list ordering to keep writeback happy so
+instead of sorting inode into appropriate place just append it at the
+end of the list and clobber dirtied_time_when. This may result in inode
+writeback starting later after cgroup switch however cgroup switches are
+rare so it shouldn't matter much. Since the cgroup had write access to
+the inode, there are no practical concerns of the possible DoS issues.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index eff778dc0386c..28edfad85c628 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -446,22 +446,23 @@ static bool inode_do_switch_wbs(struct inode *inode,
+        * Transfer to @new_wb's IO list if necessary.  If the @inode is dirty,
+        * the specific list @inode was on is ignored and the @inode is put on
+        * ->b_dirty which is always correct including from ->b_dirty_time.
+-       * The transfer preserves @inode->dirtied_when ordering.  If the @inode
+-       * was clean, it means it was on the b_attached list, so move it onto
+-       * the b_attached list of @new_wb.
++       * If the @inode was clean, it means it was on the b_attached list, so
++       * move it onto the b_attached list of @new_wb.
+        */
+       if (!list_empty(&inode->i_io_list)) {
+               inode->i_wb = new_wb;
+               if (inode->i_state & I_DIRTY_ALL) {
+-                      struct inode *pos;
+-
+-                      list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+-                              if (time_after_eq(inode->dirtied_when,
+-                                                pos->dirtied_when))
+-                                      break;
++                      /*
++                       * We need to keep b_dirty list sorted by
++                       * dirtied_time_when. However properly sorting the
++                       * inode in the list gets too expensive when switching
++                       * many inodes. So just attach inode at the end of the
++                       * dirty list and clobber the dirtied_time_when.
++                       */
++                      inode->dirtied_time_when = jiffies;
+                       inode_io_list_move_locked(inode, new_wb,
+-                                                pos->i_io_list.prev);
++                                                &new_wb->b_dirty);
+               } else {
+                       inode_cgwb_move_to_attached(inode, new_wb);
+               }
+-- 
+2.51.0
+
diff --git a/queue-6.12/writeback-avoid-softlockup-when-switching-many-inode.patch b/queue-6.12/writeback-avoid-softlockup-when-switching-many-inode.patch
new file mode 100644 (file)
index 0000000..77c42e8
--- /dev/null
@@ -0,0 +1,65 @@
+From b730971ba2912b8af3125a63069f27aa7da3826c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:36 +0200
+Subject: writeback: Avoid softlockup when switching many inodes
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 66c14dccd810d42ec5c73bb8a9177489dfd62278 ]
+
+process_inode_switch_wbs_work() can be switching over 100 inodes to a
+different cgroup. Since switching an inode requires counting all dirty &
+under-writeback pages in the address space of each inode, this can take
+a significant amount of time. Add a possibility to reschedule after
+processing each inode to avoid softlockups.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 4ae226778d646..eff778dc0386c 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -503,6 +503,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        */
+       down_read(&bdi->wb_switch_rwsem);
++      inodep = isw->inodes;
+       /*
+        * By the time control reaches here, RCU grace period has passed
+        * since I_WB_SWITCH assertion and all wb stat update transactions
+@@ -513,6 +514,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        * gives us exclusion against all wb related operations on @inode
+        * including IO list manipulations and stat updates.
+        */
++relock:
+       if (old_wb < new_wb) {
+               spin_lock(&old_wb->list_lock);
+               spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
+@@ -521,10 +523,17 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+               spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
+       }
+-      for (inodep = isw->inodes; *inodep; inodep++) {
++      while (*inodep) {
+               WARN_ON_ONCE((*inodep)->i_wb != old_wb);
+               if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
+                       nr_switched++;
++              inodep++;
++              if (*inodep && need_resched()) {
++                      spin_unlock(&new_wb->list_lock);
++                      spin_unlock(&old_wb->list_lock);
++                      cond_resched();
++                      goto relock;
++              }
+       }
+       spin_unlock(&new_wb->list_lock);
+-- 
+2.51.0
+
diff --git a/queue-6.17/copy_file_range-limit-size-if-in-compat-mode.patch b/queue-6.17/copy_file_range-limit-size-if-in-compat-mode.patch
new file mode 100644 (file)
index 0000000..09e4e4c
--- /dev/null
@@ -0,0 +1,66 @@
+From 239d59f3832f0839ae7db629aaa876403b2deb28 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 17:11:05 +0200
+Subject: copy_file_range: limit size if in compat mode
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+[ Upstream commit f8f59a2c05dc16d19432e3154a9ac7bc385f4b92 ]
+
+If the process runs in 32-bit compat mode, copy_file_range results can be
+in the in-band error range.  In this case limit copy length to MAX_RW_COUNT
+to prevent a signed overflow.
+
+Reported-by: Florian Weimer <fweimer@redhat.com>
+Closes: https://lore.kernel.org/all/lhuh5ynl8z5.fsf@oldenburg.str.redhat.com/
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Link: https://lore.kernel.org/20250813151107.99856-1-mszeredi@redhat.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/read_write.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/fs/read_write.c b/fs/read_write.c
+index c5b6265d984ba..833bae068770a 100644
+--- a/fs/read_write.c
++++ b/fs/read_write.c
+@@ -1576,6 +1576,13 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+       if (len == 0)
+               return 0;
++      /*
++       * Make sure return value doesn't overflow in 32bit compat mode.  Also
++       * limit the size for all cases except when calling ->copy_file_range().
++       */
++      if (splice || !file_out->f_op->copy_file_range || in_compat_syscall())
++              len = min_t(size_t, MAX_RW_COUNT, len);
++
+       file_start_write(file_out);
+       /*
+@@ -1589,9 +1596,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+                                                     len, flags);
+       } else if (!splice && file_in->f_op->remap_file_range && samesb) {
+               ret = file_in->f_op->remap_file_range(file_in, pos_in,
+-                              file_out, pos_out,
+-                              min_t(loff_t, MAX_RW_COUNT, len),
+-                              REMAP_FILE_CAN_SHORTEN);
++                              file_out, pos_out, len, REMAP_FILE_CAN_SHORTEN);
+               /* fallback to splice */
+               if (ret <= 0)
+                       splice = true;
+@@ -1624,8 +1629,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+        * to splicing from input file, while file_start_write() is held on
+        * the output file on a different sb.
+        */
+-      ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+-                             min_t(size_t, len, MAX_RW_COUNT), 0);
++      ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, len, 0);
+ done:
+       if (ret > 0) {
+               fsnotify_access(file_in);
+-- 
+2.51.0
+
diff --git a/queue-6.17/cramfs-verify-inode-mode-when-loading-from-disk.patch b/queue-6.17/cramfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..6a86650
--- /dev/null
@@ -0,0 +1,51 @@
+From aed3cc20fd7d92905a3bab5081abb6a58ddb7371 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Aug 2025 19:01:01 +0900
+Subject: cramfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 7f9d34b0a7cb93d678ee7207f0634dbf79e47fe5 ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/429b3ef1-13de-4310-9a8e-c2dc9a36234a@I-love.SAKURA.ne.jp
+Acked-by: Nicolas Pitre <nico@fluxnic.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cramfs/inode.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
+index 56c8005b24a34..ca54bf24b719f 100644
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -116,9 +116,18 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
+               inode_nohighmem(inode);
+               inode->i_data.a_ops = &cramfs_aops;
+               break;
+-      default:
++      case S_IFCHR:
++      case S_IFBLK:
++      case S_IFIFO:
++      case S_IFSOCK:
+               init_special_inode(inode, cramfs_inode->mode,
+                               old_decode_dev(cramfs_inode->size));
++              break;
++      default:
++              printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              iget_failed(inode);
++              return ERR_PTR(-EIO);
+       }
+       inode->i_mode = cramfs_inode->mode;
+-- 
+2.51.0
+
diff --git a/queue-6.17/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch b/queue-6.17/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
new file mode 100644 (file)
index 0000000..8830757
--- /dev/null
@@ -0,0 +1,116 @@
+From caf84963cbd30f5b57f96f509db7fb5a213b7cdc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 20:14:59 +0800
+Subject: fs: Add 'initramfs_options' to set initramfs mount options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lichen Liu <lichliu@redhat.com>
+
+[ Upstream commit 278033a225e13ec21900f0a92b8351658f5377f2 ]
+
+When CONFIG_TMPFS is enabled, the initial root filesystem is a tmpfs.
+By default, a tmpfs mount is limited to using 50% of the available RAM
+for its content. This can be problematic in memory-constrained
+environments, particularly during a kdump capture.
+
+In a kdump scenario, the capture kernel boots with a limited amount of
+memory specified by the 'crashkernel' parameter. If the initramfs is
+large, it may fail to unpack into the tmpfs rootfs due to insufficient
+space. This is because to get X MB of usable space in tmpfs, 2*X MB of
+memory must be available for the mount. This leads to an OOM failure
+during the early boot process, preventing a successful crash dump.
+
+This patch introduces a new kernel command-line parameter,
+initramfs_options, which allows passing specific mount options directly
+to the rootfs when it is first mounted. This gives users control over
+the rootfs behavior.
+
+For example, a user can now specify initramfs_options=size=75% to allow
+the tmpfs to use up to 75% of the available memory. This can
+significantly reduce the memory pressure for kdump.
+
+Consider a practical example:
+
+To unpack a 48MB initramfs, the tmpfs needs 48MB of usable space. With
+the default 50% limit, this requires a memory pool of 96MB to be
+available for the tmpfs mount. The total memory requirement is therefore
+approximately: 16MB (vmlinuz) + 48MB (loaded initramfs) + 48MB (unpacked
+kernel) + 96MB (for tmpfs) + 12MB (runtime overhead) ≈ 220MB.
+
+By using initramfs_options=size=75%, the memory pool required for the
+48MB tmpfs is reduced to 48MB / 0.75 = 64MB. This reduces the total
+memory requirement by 32MB (96MB - 64MB), allowing the kdump to succeed
+with a smaller crashkernel size, such as 192MB.
+
+An alternative approach of reusing the existing rootflags parameter was
+considered. However, a new, dedicated initramfs_options parameter was
+chosen to avoid altering the current behavior of rootflags (which
+applies to the final root filesystem) and to prevent any potential
+regressions.
+
+Also add documentation for the new kernel parameter "initramfs_options"
+
+This approach is inspired by prior discussions and patches on the topic.
+Ref: https://www.lightofdawn.org/blog/?viewDetailed=00128
+Ref: https://landley.net/notes-2015.html#01-01-2015
+Ref: https://lkml.org/lkml/2021/6/29/783
+Ref: https://www.kernel.org/doc/html/latest/filesystems/ramfs-rootfs-initramfs.html#what-is-rootfs
+
+Signed-off-by: Lichen Liu <lichliu@redhat.com>
+Link: https://lore.kernel.org/20250815121459.3391223-1-lichliu@redhat.com
+Tested-by: Rob Landley <rob@landley.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  3 +++
+ fs/namespace.c                                  | 11 ++++++++++-
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 5a7a83c411e9c..e92c0056e4e0a 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -6429,6 +6429,9 @@
+       rootflags=      [KNL] Set root filesystem mount option string
++      initramfs_options= [KNL]
++                        Specify mount options for for the initramfs mount.
++
+       rootfstype=     [KNL] Set root filesystem type
+       rootwait        [KNL] Wait (indefinitely) for root device to show up.
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 46e654247274f..38609066cf330 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -65,6 +65,15 @@ static int __init set_mphash_entries(char *str)
+ }
+ __setup("mphash_entries=", set_mphash_entries);
++static char * __initdata initramfs_options;
++static int __init initramfs_options_setup(char *str)
++{
++      initramfs_options = str;
++      return 1;
++}
++
++__setup("initramfs_options=", initramfs_options_setup);
++
+ static u64 event;
+ static DEFINE_XARRAY_FLAGS(mnt_id_xa, XA_FLAGS_ALLOC);
+ static DEFINE_IDA(mnt_group_ida);
+@@ -6127,7 +6136,7 @@ static void __init init_mount_tree(void)
+       struct mnt_namespace *ns;
+       struct path root;
+-      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
++      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
+       if (IS_ERR(mnt))
+               panic("Can't create rootfs");
+-- 
+2.51.0
+
diff --git a/queue-6.17/iomap-error-out-on-file-io-when-there-is-no-inline_d.patch b/queue-6.17/iomap-error-out-on-file-io-when-there-is-no-inline_d.patch
new file mode 100644 (file)
index 0000000..5cbb422
--- /dev/null
@@ -0,0 +1,95 @@
+From f72becab157a099604e0cbcb6fcb3b8283f2c872 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Sep 2025 08:00:45 -0700
+Subject: iomap: error out on file IO when there is no inline_data buffer
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+[ Upstream commit 6a96fb653b6481ec73e9627ade216b299e4de9ea ]
+
+Return IO errors if an ->iomap_begin implementation returns an
+IOMAP_INLINE buffer but forgets to set the inline_data pointer.
+Filesystems should never do this, but we could help fs developers (me)
+fix their bugs by handling this more gracefully than crashing the
+kernel.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Link: https://lore.kernel.org/175803480324.966383.7414345025943296442.stgit@frogsfrogsfrogs
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/iomap/buffered-io.c | 15 ++++++++++-----
+ fs/iomap/direct-io.c   |  3 +++
+ 2 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
+index fd827398afd2f..6fa653d83f703 100644
+--- a/fs/iomap/buffered-io.c
++++ b/fs/iomap/buffered-io.c
+@@ -304,6 +304,9 @@ static int iomap_read_inline_data(const struct iomap_iter *iter,
+       size_t size = i_size_read(iter->inode) - iomap->offset;
+       size_t offset = offset_in_folio(folio, iomap->offset);
++      if (WARN_ON_ONCE(!iomap->inline_data))
++              return -EIO;
++
+       if (folio_test_uptodate(folio))
+               return 0;
+@@ -894,7 +897,7 @@ static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
+       return true;
+ }
+-static void iomap_write_end_inline(const struct iomap_iter *iter,
++static bool iomap_write_end_inline(const struct iomap_iter *iter,
+               struct folio *folio, loff_t pos, size_t copied)
+ {
+       const struct iomap *iomap = &iter->iomap;
+@@ -903,12 +906,16 @@ static void iomap_write_end_inline(const struct iomap_iter *iter,
+       WARN_ON_ONCE(!folio_test_uptodate(folio));
+       BUG_ON(!iomap_inline_data_valid(iomap));
++      if (WARN_ON_ONCE(!iomap->inline_data))
++              return false;
++
+       flush_dcache_folio(folio);
+       addr = kmap_local_folio(folio, pos);
+       memcpy(iomap_inline_data(iomap, pos), addr, copied);
+       kunmap_local(addr);
+       mark_inode_dirty(iter->inode);
++      return true;
+ }
+ /*
+@@ -921,10 +928,8 @@ static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied,
+       const struct iomap *srcmap = iomap_iter_srcmap(iter);
+       loff_t pos = iter->pos;
+-      if (srcmap->type == IOMAP_INLINE) {
+-              iomap_write_end_inline(iter, folio, pos, copied);
+-              return true;
+-      }
++      if (srcmap->type == IOMAP_INLINE)
++              return iomap_write_end_inline(iter, folio, pos, copied);
+       if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
+               size_t bh_written;
+diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
+index b84f6af2eb4c8..46aa85af13dc5 100644
+--- a/fs/iomap/direct-io.c
++++ b/fs/iomap/direct-io.c
+@@ -519,6 +519,9 @@ static int iomap_dio_inline_iter(struct iomap_iter *iomi, struct iomap_dio *dio)
+       loff_t pos = iomi->pos;
+       u64 copied;
++      if (WARN_ON_ONCE(!inline_data))
++              return -EIO;
++
+       if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap)))
+               return -EIO;
+-- 
+2.51.0
+
diff --git a/queue-6.17/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch b/queue-6.17/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch
new file mode 100644 (file)
index 0000000..3361ef2
--- /dev/null
@@ -0,0 +1,64 @@
+From aab342170636f6a2ce9aa86e3821cb205d32bc2f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Sep 2025 15:43:19 +0100
+Subject: irqchip/sifive-plic: Avoid interrupt ID 0 handling during
+ suspend/resume
+
+From: Lucas Zampieri <lzampier@redhat.com>
+
+[ Upstream commit f75e07bf5226da640fa99a0594687c780d9bace4 ]
+
+According to the PLIC specification[1], global interrupt sources are
+assigned small unsigned integer identifiers beginning at the value 1.
+An interrupt ID of 0 is reserved to mean "no interrupt".
+
+The current plic_irq_resume() and plic_irq_suspend() functions incorrectly
+start the loop from index 0, which accesses the register space for the
+reserved interrupt ID 0.
+
+Change the loop to start from index 1, skipping the reserved
+interrupt ID 0 as per the PLIC specification.
+
+This prevents potential undefined behavior when accessing the reserved
+register space during suspend/resume cycles.
+
+Fixes: e80f0b6a2cf3 ("irqchip/irq-sifive-plic: Add syscore callbacks for hibernation")
+Co-developed-by: Jia Wang <wangjia@ultrarisc.com>
+Signed-off-by: Jia Wang <wangjia@ultrarisc.com>
+Co-developed-by: Charles Mirabile <cmirabil@redhat.com>
+Signed-off-by: Charles Mirabile <cmirabil@redhat.com>
+Signed-off-by: Lucas Zampieri <lzampier@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://github.com/riscv/riscv-plic-spec/releases/tag/1.0.0
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-sifive-plic.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
+index bf69a4802b71e..9c4af7d588463 100644
+--- a/drivers/irqchip/irq-sifive-plic.c
++++ b/drivers/irqchip/irq-sifive-plic.c
+@@ -252,7 +252,8 @@ static int plic_irq_suspend(void)
+       priv = per_cpu_ptr(&plic_handlers, smp_processor_id())->priv;
+-      for (i = 0; i < priv->nr_irqs; i++) {
++      /* irq ID 0 is reserved */
++      for (i = 1; i < priv->nr_irqs; i++) {
+               __assign_bit(i, priv->prio_save,
+                            readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID));
+       }
+@@ -283,7 +284,8 @@ static void plic_irq_resume(void)
+       priv = per_cpu_ptr(&plic_handlers, smp_processor_id())->priv;
+-      for (i = 0; i < priv->nr_irqs; i++) {
++      /* irq ID 0 is reserved */
++      for (i = 1; i < priv->nr_irqs; i++) {
+               index = BIT_WORD(i);
+               writel((priv->prio_save[index] & BIT_MASK(i)) ? 1 : 0,
+                      priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID);
+-- 
+2.51.0
+
diff --git a/queue-6.17/minixfs-verify-inode-mode-when-loading-from-disk.patch b/queue-6.17/minixfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..fc4cf3e
--- /dev/null
@@ -0,0 +1,46 @@
+From 437eff349e57c5cfea1494e285a2389f6a18f651 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 00:17:44 +0900
+Subject: minixfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 73861970938ad1323eb02bbbc87f6fbd1e5bacca ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/ec982681-84b8-4624-94fa-8af15b77cbd2@I-love.SAKURA.ne.jp
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/minix/inode.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/minix/inode.c b/fs/minix/inode.c
+index df9d11479caf1..32db676127a9e 100644
+--- a/fs/minix/inode.c
++++ b/fs/minix/inode.c
+@@ -492,8 +492,14 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
+               inode->i_op = &minix_symlink_inode_operations;
+               inode_nohighmem(inode);
+               inode->i_mapping->a_ops = &minix_aops;
+-      } else
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               init_special_inode(inode, inode->i_mode, rdev);
++      } else {
++              printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              make_bad_inode(inode);
++      }
+ }
+ /*
+-- 
+2.51.0
+
diff --git a/queue-6.17/mnt_ns_tree_remove-dtrt-if-mnt_ns-had-never-been-add.patch b/queue-6.17/mnt_ns_tree_remove-dtrt-if-mnt_ns-had-never-been-add.patch
new file mode 100644 (file)
index 0000000..a8def39
--- /dev/null
@@ -0,0 +1,40 @@
+From 534f26572b7d52ed25e7ce4a308b6b573a791d66 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Aug 2025 16:35:55 -0400
+Subject: mnt_ns_tree_remove(): DTRT if mnt_ns had never been added to
+ mnt_ns_list
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit 38f4885088fc5ad41b8b0a2a2cfc73d01e709e5c ]
+
+Actual removal is done under the lock, but for checking if need to bother
+the lockless RB_EMPTY_NODE() is safe - either that namespace had never
+been added to mnt_ns_tree, in which case the the node will stay empty, or
+whoever had allocated it has called mnt_ns_tree_add() and it has already
+run to completion.  After that point RB_EMPTY_NODE() will become false and
+will remain false, no matter what we do with other nodes in the tree.
+
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 38609066cf330..fc4cbbefa70e2 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -196,7 +196,7 @@ static void mnt_ns_release_rcu(struct rcu_head *rcu)
+ static void mnt_ns_tree_remove(struct mnt_namespace *ns)
+ {
+       /* remove from global mount namespace list */
+-      if (!is_anon_ns(ns)) {
++      if (!RB_EMPTY_NODE(&ns->mnt_ns_tree_node)) {
+               mnt_ns_tree_write_lock();
+               rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
+               list_bidir_del_rcu(&ns->mnt_ns_list);
+-- 
+2.51.0
+
diff --git a/queue-6.17/mount-handle-null-values-in-mnt_ns_release.patch b/queue-6.17/mount-handle-null-values-in-mnt_ns_release.patch
new file mode 100644 (file)
index 0000000..53cf94e
--- /dev/null
@@ -0,0 +1,35 @@
+From d9058cb0a4d40a4c557fc71b42b59eaa328f78aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Sep 2025 11:41:16 +0200
+Subject: mount: handle NULL values in mnt_ns_release()
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 6c7ca6a02f8f9549a438a08a23c6327580ecf3d6 ]
+
+When calling in listmount() mnt_ns_release() may be passed a NULL
+pointer. Handle that case gracefully.
+
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index fc4cbbefa70e2..c8c2376bb2424 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -180,7 +180,7 @@ static void mnt_ns_tree_add(struct mnt_namespace *ns)
+ static void mnt_ns_release(struct mnt_namespace *ns)
+ {
+       /* keep alive for {list,stat}mount() */
+-      if (refcount_dec_and_test(&ns->passive)) {
++      if (ns && refcount_dec_and_test(&ns->passive)) {
+               fsnotify_mntns_delete(ns);
+               put_user_ns(ns->user_ns);
+               kfree(ns);
+-- 
+2.51.0
+
diff --git a/queue-6.17/nsfs-validate-extensible-ioctls.patch b/queue-6.17/nsfs-validate-extensible-ioctls.patch
new file mode 100644 (file)
index 0000000..001099d
--- /dev/null
@@ -0,0 +1,38 @@
+From c95d11d21c5a2c1e5d66978735cec7e89395caa5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 13:52:26 +0200
+Subject: nsfs: validate extensible ioctls
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit f8527a29f4619f74bc30a9845ea87abb9a6faa1e ]
+
+Validate extensible ioctls stricter than we do now.
+
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nsfs.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nsfs.c b/fs/nsfs.c
+index 59aa801347a7d..34f0b35d3ead7 100644
+--- a/fs/nsfs.c
++++ b/fs/nsfs.c
+@@ -169,9 +169,11 @@ static bool nsfs_ioctl_valid(unsigned int cmd)
+       /* Extensible ioctls require some extra handling. */
+       switch (_IOC_NR(cmd)) {
+       case _IOC_NR(NS_MNT_GET_INFO):
++              return extensible_ioctl_valid(cmd, NS_MNT_GET_INFO, MNT_NS_INFO_SIZE_VER0);
+       case _IOC_NR(NS_MNT_GET_NEXT):
++              return extensible_ioctl_valid(cmd, NS_MNT_GET_NEXT, MNT_NS_INFO_SIZE_VER0);
+       case _IOC_NR(NS_MNT_GET_PREV):
+-              return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd));
++              return extensible_ioctl_valid(cmd, NS_MNT_GET_PREV, MNT_NS_INFO_SIZE_VER0);
+       }
+       return false;
+-- 
+2.51.0
+
diff --git a/queue-6.17/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch b/queue-6.17/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
new file mode 100644 (file)
index 0000000..881d366
--- /dev/null
@@ -0,0 +1,95 @@
+From 611e274ae9760c9a32ab1e615ce9c761690b92d7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 2 Aug 2025 10:21:23 +0800
+Subject: pid: Add a judgment for ns null in pid_nr_ns
+
+From: gaoxiang17 <gaoxiang17@xiaomi.com>
+
+[ Upstream commit 006568ab4c5ca2309ceb36fa553e390b4aa9c0c7 ]
+
+__task_pid_nr_ns
+        ns = task_active_pid_ns(current);
+        pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+                if (pid && ns->level <= pid->level) {
+
+Sometimes null is returned for task_active_pid_ns. Then it will trigger kernel panic in pid_nr_ns.
+
+For example:
+       Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058
+       Mem abort info:
+       ESR = 0x0000000096000007
+       EC = 0x25: DABT (current EL), IL = 32 bits
+       SET = 0, FnV = 0
+       EA = 0, S1PTW = 0
+       FSC = 0x07: level 3 translation fault
+       Data abort info:
+       ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000
+       CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+       GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+       user pgtable: 4k pages, 39-bit VAs, pgdp=00000002175aa000
+       [0000000000000058] pgd=08000002175ab003, p4d=08000002175ab003, pud=08000002175ab003, pmd=08000002175be003, pte=0000000000000000
+       pstate: 834000c5 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+       pc : __task_pid_nr_ns+0x74/0xd0
+       lr : __task_pid_nr_ns+0x24/0xd0
+       sp : ffffffc08001bd10
+       x29: ffffffc08001bd10 x28: ffffffd4422b2000 x27: 0000000000000001
+       x26: ffffffd442821168 x25: ffffffd442821000 x24: 00000f89492eab31
+       x23: 00000000000000c0 x22: ffffff806f5693c0 x21: ffffff806f5693c0
+       x20: 0000000000000001 x19: 0000000000000000 x18: 0000000000000000
+       x17: 00000000529c6ef0 x16: 00000000529c6ef0 x15: 00000000023a1adc
+       x14: 0000000000000003 x13: 00000000007ef6d8 x12: 001167c391c78800
+       x11: 00ffffffffffffff x10: 0000000000000000 x9 : 0000000000000001
+       x8 : ffffff80816fa3c0 x7 : 0000000000000000 x6 : 49534d702d535449
+       x5 : ffffffc080c4c2c0 x4 : ffffffd43ee128c8 x3 : ffffffd43ee124dc
+       x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffff806f5693c0
+       Call trace:
+       __task_pid_nr_ns+0x74/0xd0
+       ...
+       __handle_irq_event_percpu+0xd4/0x284
+       handle_irq_event+0x48/0xb0
+       handle_fasteoi_irq+0x160/0x2d8
+       generic_handle_domain_irq+0x44/0x60
+       gic_handle_irq+0x4c/0x114
+       call_on_irq_stack+0x3c/0x74
+       do_interrupt_handler+0x4c/0x84
+       el1_interrupt+0x34/0x58
+       el1h_64_irq_handler+0x18/0x24
+       el1h_64_irq+0x68/0x6c
+       account_kernel_stack+0x60/0x144
+       exit_task_stack_account+0x1c/0x80
+       do_exit+0x7e4/0xaf8
+       ...
+       get_signal+0x7bc/0x8d8
+       do_notify_resume+0x128/0x828
+       el0_svc+0x6c/0x70
+       el0t_64_sync_handler+0x68/0xbc
+       el0t_64_sync+0x1a8/0x1ac
+       Code: 35fffe54 911a02a8 f9400108 b4000128 (b9405a69)
+       ---[ end trace 0000000000000000 ]---
+       Kernel panic - not syncing: Oops: Fatal exception in interrupt
+
+Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/20250802022123.3536934-1-gxxa03070307@gmail.com
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index d94ce02505012..296cd04c24bae 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -491,7 +491,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+       struct upid *upid;
+       pid_t nr = 0;
+-      if (pid && ns->level <= pid->level) {
++      if (pid && ns && ns->level <= pid->level) {
+               upid = &pid->numbers[ns->level];
+               if (upid->ns == ns)
+                       nr = upid->nr;
+-- 
+2.51.0
+
diff --git a/queue-6.17/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch b/queue-6.17/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
new file mode 100644 (file)
index 0000000..8b359d7
--- /dev/null
@@ -0,0 +1,48 @@
+From 00e585e46d9e321e24f568a4dc53de36a3dd0735 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Aug 2025 19:36:04 +0200
+Subject: pid: make __task_pid_nr_ns(ns => NULL) safe for zombie callers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit abdfd4948e45c51b19162cf8b3f5003f8f53c9b9 ]
+
+task_pid_vnr(another_task) will crash if the caller was already reaped.
+The pid_alive(current) check can't really help, the parent/debugger can
+call release_task() right after this check.
+
+This also means that even task_ppid_nr_ns(current, NULL) is not safe,
+pid_alive() only ensures that it is safe to dereference ->real_parent.
+
+Change __task_pid_nr_ns() to ensure ns != NULL.
+
+Originally-by: 高翔 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/all/20250802022123.3536934-1-gxxa03070307@gmail.com/
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lore.kernel.org/20250810173604.GA19991@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 296cd04c24bae..2dbcc4dd90cc0 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -514,7 +514,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+       rcu_read_lock();
+       if (!ns)
+               ns = task_active_pid_ns(current);
+-      nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
++      if (ns)
++              nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+       rcu_read_unlock();
+       return nr;
+-- 
+2.51.0
+
diff --git a/queue-6.17/pidfs-validate-extensible-ioctls.patch b/queue-6.17/pidfs-validate-extensible-ioctls.patch
new file mode 100644 (file)
index 0000000..c9a6e4c
--- /dev/null
@@ -0,0 +1,59 @@
+From 9e028a4439f7572bba16a80d460649e02433ce17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 13:52:24 +0200
+Subject: pidfs: validate extensible ioctls
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 3c17001b21b9f168c957ced9384abe969019b609 ]
+
+Validate extensible ioctls stricter than we do now.
+
+Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/pidfs.c         |  2 +-
+ include/linux/fs.h | 14 ++++++++++++++
+ 2 files changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/fs/pidfs.c b/fs/pidfs.c
+index 108e7527f837f..2c9c7636253af 100644
+--- a/fs/pidfs.c
++++ b/fs/pidfs.c
+@@ -440,7 +440,7 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
+                * erronously mistook the file descriptor for a pidfd.
+                * This is not perfect but will catch most cases.
+                */
+-              return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO));
++              return extensible_ioctl_valid(cmd, PIDFD_GET_INFO, PIDFD_INFO_SIZE_VER0);
+       }
+       return false;
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 74f2bfc519263..ed02715261036 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -4025,4 +4025,18 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
+ int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
++static inline bool extensible_ioctl_valid(unsigned int cmd_a,
++                                        unsigned int cmd_b, size_t min_size)
++{
++      if (_IOC_DIR(cmd_a) != _IOC_DIR(cmd_b))
++              return false;
++      if (_IOC_TYPE(cmd_a) != _IOC_TYPE(cmd_b))
++              return false;
++      if (_IOC_NR(cmd_a) != _IOC_NR(cmd_b))
++              return false;
++      if (_IOC_SIZE(cmd_a) < min_size)
++              return false;
++      return true;
++}
++
+ #endif /* _LINUX_FS_H */
+-- 
+2.51.0
+
index 1de0ca9ed3d4309f8b223809315c57b6d5b115c8..66963d4e26e7238d4869103cb0acd5d656e2c7dc 100644 (file)
@@ -355,3 +355,17 @@ arm64-dts-qcom-qcs615-add-missing-dt-property-in-qup-ses.patch
 acpi-property-disregard-references-in-data-only-subnode-lists.patch
 acpi-property-add-code-comments-explaining-what-is-going-on.patch
 acpi-property-do-not-pass-null-handles-to-acpi_attach_data.patch
+irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch
+copy_file_range-limit-size-if-in-compat-mode.patch
+minixfs-verify-inode-mode-when-loading-from-disk.patch
+pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
+pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
+fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
+cramfs-verify-inode-mode-when-loading-from-disk.patch
+nsfs-validate-extensible-ioctls.patch
+mnt_ns_tree_remove-dtrt-if-mnt_ns-had-never-been-add.patch
+writeback-avoid-softlockup-when-switching-many-inode.patch
+writeback-avoid-excessively-long-inode-switching-tim.patch
+iomap-error-out-on-file-io-when-there-is-no-inline_d.patch
+pidfs-validate-extensible-ioctls.patch
+mount-handle-null-values-in-mnt_ns_release.patch
diff --git a/queue-6.17/writeback-avoid-excessively-long-inode-switching-tim.patch b/queue-6.17/writeback-avoid-excessively-long-inode-switching-tim.patch
new file mode 100644 (file)
index 0000000..9009dfb
--- /dev/null
@@ -0,0 +1,102 @@
+From 020fc909cdeb18a6098135b891ea7a85fd31e3c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:37 +0200
+Subject: writeback: Avoid excessively long inode switching times
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 9a6ebbdbd41235ea3bc0c4f39e2076599b8113cc ]
+
+With lazytime mount option enabled we can be switching many dirty inodes
+on cgroup exit to the parent cgroup. The numbers observed in practice
+when systemd slice of a large cron job exits can easily reach hundreds
+of thousands or millions. The logic in inode_do_switch_wbs() which sorts
+the inode into appropriate place in b_dirty list of the target wb
+however has linear complexity in the number of dirty inodes thus overall
+time complexity of switching all the inodes is quadratic leading to
+workers being pegged for hours consuming 100% of the CPU and switching
+inodes to the parent wb.
+
+Simple reproducer of the issue:
+  FILES=10000
+  # Filesystem mounted with lazytime mount option
+  MNT=/mnt/
+  echo "Creating files and switching timestamps"
+  for (( j = 0; j < 50; j ++ )); do
+      mkdir $MNT/dir$j
+      for (( i = 0; i < $FILES; i++ )); do
+          echo "foo" >$MNT/dir$j/file$i
+      done
+      touch -a -t 202501010000 $MNT/dir$j/file*
+  done
+  wait
+  echo "Syncing and flushing"
+  sync
+  echo 3 >/proc/sys/vm/drop_caches
+
+  echo "Reading all files from a cgroup"
+  mkdir /sys/fs/cgroup/unified/mycg1 || exit
+  echo $$ >/sys/fs/cgroup/unified/mycg1/cgroup.procs || exit
+  for (( j = 0; j < 50; j ++ )); do
+      cat /mnt/dir$j/file* >/dev/null &
+  done
+  wait
+  echo "Switching wbs"
+  # Now rmdir the cgroup after the script exits
+
+We need to maintain b_dirty list ordering to keep writeback happy so
+instead of sorting inode into appropriate place just append it at the
+end of the list and clobber dirtied_time_when. This may result in inode
+writeback starting later after cgroup switch however cgroup switches are
+rare so it shouldn't matter much. Since the cgroup had write access to
+the inode, there are no practical concerns of the possible DoS issues.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index b4aa78da7d94e..3bfc430ef74dc 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -445,22 +445,23 @@ static bool inode_do_switch_wbs(struct inode *inode,
+        * Transfer to @new_wb's IO list if necessary.  If the @inode is dirty,
+        * the specific list @inode was on is ignored and the @inode is put on
+        * ->b_dirty which is always correct including from ->b_dirty_time.
+-       * The transfer preserves @inode->dirtied_when ordering.  If the @inode
+-       * was clean, it means it was on the b_attached list, so move it onto
+-       * the b_attached list of @new_wb.
++       * If the @inode was clean, it means it was on the b_attached list, so
++       * move it onto the b_attached list of @new_wb.
+        */
+       if (!list_empty(&inode->i_io_list)) {
+               inode->i_wb = new_wb;
+               if (inode->i_state & I_DIRTY_ALL) {
+-                      struct inode *pos;
+-
+-                      list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+-                              if (time_after_eq(inode->dirtied_when,
+-                                                pos->dirtied_when))
+-                                      break;
++                      /*
++                       * We need to keep b_dirty list sorted by
++                       * dirtied_time_when. However properly sorting the
++                       * inode in the list gets too expensive when switching
++                       * many inodes. So just attach inode at the end of the
++                       * dirty list and clobber the dirtied_time_when.
++                       */
++                      inode->dirtied_time_when = jiffies;
+                       inode_io_list_move_locked(inode, new_wb,
+-                                                pos->i_io_list.prev);
++                                                &new_wb->b_dirty);
+               } else {
+                       inode_cgwb_move_to_attached(inode, new_wb);
+               }
+-- 
+2.51.0
+
diff --git a/queue-6.17/writeback-avoid-softlockup-when-switching-many-inode.patch b/queue-6.17/writeback-avoid-softlockup-when-switching-many-inode.patch
new file mode 100644 (file)
index 0000000..07f180c
--- /dev/null
@@ -0,0 +1,65 @@
+From 87fc87f4e4b1fe44ad237c159d2bebfb3e7ea99c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:36 +0200
+Subject: writeback: Avoid softlockup when switching many inodes
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 66c14dccd810d42ec5c73bb8a9177489dfd62278 ]
+
+process_inode_switch_wbs_work() can be switching over 100 inodes to a
+different cgroup. Since switching an inode requires counting all dirty &
+under-writeback pages in the address space of each inode, this can take
+a significant amount of time. Add a possibility to reschedule after
+processing each inode to avoid softlockups.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index a07b8cf73ae27..b4aa78da7d94e 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -502,6 +502,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        */
+       down_read(&bdi->wb_switch_rwsem);
++      inodep = isw->inodes;
+       /*
+        * By the time control reaches here, RCU grace period has passed
+        * since I_WB_SWITCH assertion and all wb stat update transactions
+@@ -512,6 +513,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        * gives us exclusion against all wb related operations on @inode
+        * including IO list manipulations and stat updates.
+        */
++relock:
+       if (old_wb < new_wb) {
+               spin_lock(&old_wb->list_lock);
+               spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
+@@ -520,10 +522,17 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+               spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
+       }
+-      for (inodep = isw->inodes; *inodep; inodep++) {
++      while (*inodep) {
+               WARN_ON_ONCE((*inodep)->i_wb != old_wb);
+               if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
+                       nr_switched++;
++              inodep++;
++              if (*inodep && need_resched()) {
++                      spin_unlock(&new_wb->list_lock);
++                      spin_unlock(&old_wb->list_lock);
++                      cond_resched();
++                      goto relock;
++              }
+       }
+       spin_unlock(&new_wb->list_lock);
+-- 
+2.51.0
+
diff --git a/queue-6.6/cramfs-verify-inode-mode-when-loading-from-disk.patch b/queue-6.6/cramfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..af2ec57
--- /dev/null
@@ -0,0 +1,51 @@
+From d6191824ac6e01b466929288aa2b099570f46283 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Aug 2025 19:01:01 +0900
+Subject: cramfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 7f9d34b0a7cb93d678ee7207f0634dbf79e47fe5 ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/429b3ef1-13de-4310-9a8e-c2dc9a36234a@I-love.SAKURA.ne.jp
+Acked-by: Nicolas Pitre <nico@fluxnic.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cramfs/inode.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
+index 2fbf97077ce91..3f06362985b5a 100644
+--- a/fs/cramfs/inode.c
++++ b/fs/cramfs/inode.c
+@@ -117,9 +117,18 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
+               inode_nohighmem(inode);
+               inode->i_data.a_ops = &cramfs_aops;
+               break;
+-      default:
++      case S_IFCHR:
++      case S_IFBLK:
++      case S_IFIFO:
++      case S_IFSOCK:
+               init_special_inode(inode, cramfs_inode->mode,
+                               old_decode_dev(cramfs_inode->size));
++              break;
++      default:
++              printk(KERN_DEBUG "CRAMFS: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              iget_failed(inode);
++              return ERR_PTR(-EIO);
+       }
+       inode->i_mode = cramfs_inode->mode;
+-- 
+2.51.0
+
diff --git a/queue-6.6/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch b/queue-6.6/fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
new file mode 100644 (file)
index 0000000..d94c759
--- /dev/null
@@ -0,0 +1,116 @@
+From 516dcf829e0b2836fc94885dec4c34f96bb89804 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Aug 2025 20:14:59 +0800
+Subject: fs: Add 'initramfs_options' to set initramfs mount options
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lichen Liu <lichliu@redhat.com>
+
+[ Upstream commit 278033a225e13ec21900f0a92b8351658f5377f2 ]
+
+When CONFIG_TMPFS is enabled, the initial root filesystem is a tmpfs.
+By default, a tmpfs mount is limited to using 50% of the available RAM
+for its content. This can be problematic in memory-constrained
+environments, particularly during a kdump capture.
+
+In a kdump scenario, the capture kernel boots with a limited amount of
+memory specified by the 'crashkernel' parameter. If the initramfs is
+large, it may fail to unpack into the tmpfs rootfs due to insufficient
+space. This is because to get X MB of usable space in tmpfs, 2*X MB of
+memory must be available for the mount. This leads to an OOM failure
+during the early boot process, preventing a successful crash dump.
+
+This patch introduces a new kernel command-line parameter,
+initramfs_options, which allows passing specific mount options directly
+to the rootfs when it is first mounted. This gives users control over
+the rootfs behavior.
+
+For example, a user can now specify initramfs_options=size=75% to allow
+the tmpfs to use up to 75% of the available memory. This can
+significantly reduce the memory pressure for kdump.
+
+Consider a practical example:
+
+To unpack a 48MB initramfs, the tmpfs needs 48MB of usable space. With
+the default 50% limit, this requires a memory pool of 96MB to be
+available for the tmpfs mount. The total memory requirement is therefore
+approximately: 16MB (vmlinuz) + 48MB (loaded initramfs) + 48MB (unpacked
+kernel) + 96MB (for tmpfs) + 12MB (runtime overhead) ≈ 220MB.
+
+By using initramfs_options=size=75%, the memory pool required for the
+48MB tmpfs is reduced to 48MB / 0.75 = 64MB. This reduces the total
+memory requirement by 32MB (96MB - 64MB), allowing the kdump to succeed
+with a smaller crashkernel size, such as 192MB.
+
+An alternative approach of reusing the existing rootflags parameter was
+considered. However, a new, dedicated initramfs_options parameter was
+chosen to avoid altering the current behavior of rootflags (which
+applies to the final root filesystem) and to prevent any potential
+regressions.
+
+Also add documentation for the new kernel parameter "initramfs_options"
+
+This approach is inspired by prior discussions and patches on the topic.
+Ref: https://www.lightofdawn.org/blog/?viewDetailed=00128
+Ref: https://landley.net/notes-2015.html#01-01-2015
+Ref: https://lkml.org/lkml/2021/6/29/783
+Ref: https://www.kernel.org/doc/html/latest/filesystems/ramfs-rootfs-initramfs.html#what-is-rootfs
+
+Signed-off-by: Lichen Liu <lichliu@redhat.com>
+Link: https://lore.kernel.org/20250815121459.3391223-1-lichliu@redhat.com
+Tested-by: Rob Landley <rob@landley.net>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |  3 +++
+ fs/namespace.c                                  | 11 ++++++++++-
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index 60d48ebbc2cb0..fff3ca50c6c26 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5638,6 +5638,9 @@
+       rootflags=      [KNL] Set root filesystem mount option string
++      initramfs_options= [KNL]
++                        Specify mount options for for the initramfs mount.
++
+       rootfstype=     [KNL] Set root filesystem type
+       rootwait        [KNL] Wait (indefinitely) for root device to show up.
+diff --git a/fs/namespace.c b/fs/namespace.c
+index f79226472251b..646d9e7d41ee8 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -64,6 +64,15 @@ static int __init set_mphash_entries(char *str)
+ }
+ __setup("mphash_entries=", set_mphash_entries);
++static char * __initdata initramfs_options;
++static int __init initramfs_options_setup(char *str)
++{
++      initramfs_options = str;
++      return 1;
++}
++
++__setup("initramfs_options=", initramfs_options_setup);
++
+ static u64 event;
+ static DEFINE_IDA(mnt_id_ida);
+ static DEFINE_IDA(mnt_group_ida);
+@@ -4728,7 +4737,7 @@ static void __init init_mount_tree(void)
+       struct mnt_namespace *ns;
+       struct path root;
+-      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
++      mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", initramfs_options);
+       if (IS_ERR(mnt))
+               panic("Can't create rootfs");
+-- 
+2.51.0
+
diff --git a/queue-6.6/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch b/queue-6.6/irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch
new file mode 100644 (file)
index 0000000..8235fa3
--- /dev/null
@@ -0,0 +1,64 @@
+From bf7a84c0673734f3223aeab43a839908ceac24db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Sep 2025 15:43:19 +0100
+Subject: irqchip/sifive-plic: Avoid interrupt ID 0 handling during
+ suspend/resume
+
+From: Lucas Zampieri <lzampier@redhat.com>
+
+[ Upstream commit f75e07bf5226da640fa99a0594687c780d9bace4 ]
+
+According to the PLIC specification[1], global interrupt sources are
+assigned small unsigned integer identifiers beginning at the value 1.
+An interrupt ID of 0 is reserved to mean "no interrupt".
+
+The current plic_irq_resume() and plic_irq_suspend() functions incorrectly
+start the loop from index 0, which accesses the register space for the
+reserved interrupt ID 0.
+
+Change the loop to start from index 1, skipping the reserved
+interrupt ID 0 as per the PLIC specification.
+
+This prevents potential undefined behavior when accessing the reserved
+register space during suspend/resume cycles.
+
+Fixes: e80f0b6a2cf3 ("irqchip/irq-sifive-plic: Add syscore callbacks for hibernation")
+Co-developed-by: Jia Wang <wangjia@ultrarisc.com>
+Signed-off-by: Jia Wang <wangjia@ultrarisc.com>
+Co-developed-by: Charles Mirabile <cmirabil@redhat.com>
+Signed-off-by: Charles Mirabile <cmirabil@redhat.com>
+Signed-off-by: Lucas Zampieri <lzampier@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://github.com/riscv/riscv-plic-spec/releases/tag/1.0.0
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-sifive-plic.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
+index 0fcd37108b67e..2d20cf9d84cea 100644
+--- a/drivers/irqchip/irq-sifive-plic.c
++++ b/drivers/irqchip/irq-sifive-plic.c
+@@ -248,7 +248,8 @@ static int plic_irq_suspend(void)
+       priv = per_cpu_ptr(&plic_handlers, smp_processor_id())->priv;
+-      for (i = 0; i < priv->nr_irqs; i++) {
++      /* irq ID 0 is reserved */
++      for (i = 1; i < priv->nr_irqs; i++) {
+               __assign_bit(i, priv->prio_save,
+                            readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID));
+       }
+@@ -278,7 +279,8 @@ static void plic_irq_resume(void)
+       priv = per_cpu_ptr(&plic_handlers, smp_processor_id())->priv;
+-      for (i = 0; i < priv->nr_irqs; i++) {
++      /* irq ID 0 is reserved */
++      for (i = 1; i < priv->nr_irqs; i++) {
+               index = BIT_WORD(i);
+               writel((priv->prio_save[index] & BIT_MASK(i)) ? 1 : 0,
+                      priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID);
+-- 
+2.51.0
+
diff --git a/queue-6.6/irqchip-sifive-plic-make-use-of-__assign_bit.patch b/queue-6.6/irqchip-sifive-plic-make-use-of-__assign_bit.patch
new file mode 100644 (file)
index 0000000..c752824
--- /dev/null
@@ -0,0 +1,51 @@
+From ada1def3fe5a7e2bd853ee04dc0d0e4287fbc9d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Sep 2024 21:08:24 +0800
+Subject: irqchip/sifive-plic: Make use of __assign_bit()
+
+From: Hongbo Li <lihongbo22@huawei.com>
+
+[ Upstream commit 40d7af5375a4e27d8576d9d11954ac213d06f09e ]
+
+Replace the open coded
+
+if (foo)
+        __set_bit(n, bar);
+    else
+        __clear_bit(n, bar);
+
+with __assign_bit(). No functional change intended.
+
+Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Palmer Dabbelt <palmer@rivosinc.com>
+Link: https://lore.kernel.org/all/20240902130824.2878644-1-lihongbo22@huawei.com
+Stable-dep-of: f75e07bf5226 ("irqchip/sifive-plic: Avoid interrupt ID 0 handling during suspend/resume")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-sifive-plic.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
+index 5728996691549..0fcd37108b67e 100644
+--- a/drivers/irqchip/irq-sifive-plic.c
++++ b/drivers/irqchip/irq-sifive-plic.c
+@@ -248,11 +248,10 @@ static int plic_irq_suspend(void)
+       priv = per_cpu_ptr(&plic_handlers, smp_processor_id())->priv;
+-      for (i = 0; i < priv->nr_irqs; i++)
+-              if (readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID))
+-                      __set_bit(i, priv->prio_save);
+-              else
+-                      __clear_bit(i, priv->prio_save);
++      for (i = 0; i < priv->nr_irqs; i++) {
++              __assign_bit(i, priv->prio_save,
++                           readl(priv->regs + PRIORITY_BASE + i * PRIORITY_PER_ID));
++      }
+       for_each_cpu(cpu, cpu_present_mask) {
+               struct plic_handler *handler = per_cpu_ptr(&plic_handlers, cpu);
+-- 
+2.51.0
+
diff --git a/queue-6.6/minixfs-verify-inode-mode-when-loading-from-disk.patch b/queue-6.6/minixfs-verify-inode-mode-when-loading-from-disk.patch
new file mode 100644 (file)
index 0000000..cd9c539
--- /dev/null
@@ -0,0 +1,46 @@
+From 5d53e0ce33564aebcf764c0ed37fb03ce36a965d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Aug 2025 00:17:44 +0900
+Subject: minixfs: Verify inode mode when loading from disk
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit 73861970938ad1323eb02bbbc87f6fbd1e5bacca ]
+
+The inode mode loaded from corrupted disk can be invalid. Do like what
+commit 0a9e74051313 ("isofs: Verify inode mode when loading from disk")
+does.
+
+Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
+Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/ec982681-84b8-4624-94fa-8af15b77cbd2@I-love.SAKURA.ne.jp
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/minix/inode.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/minix/inode.c b/fs/minix/inode.c
+index df575473c1cc0..ee8a6fe360e72 100644
+--- a/fs/minix/inode.c
++++ b/fs/minix/inode.c
+@@ -470,8 +470,14 @@ void minix_set_inode(struct inode *inode, dev_t rdev)
+               inode->i_op = &minix_symlink_inode_operations;
+               inode_nohighmem(inode);
+               inode->i_mapping->a_ops = &minix_aops;
+-      } else
++      } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+               init_special_inode(inode, inode->i_mode, rdev);
++      } else {
++              printk(KERN_DEBUG "MINIX-fs: Invalid file type 0%04o for inode %lu.\n",
++                     inode->i_mode, inode->i_ino);
++              make_bad_inode(inode);
++      }
+ }
+ /*
+-- 
+2.51.0
+
diff --git a/queue-6.6/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch b/queue-6.6/pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
new file mode 100644 (file)
index 0000000..385515b
--- /dev/null
@@ -0,0 +1,95 @@
+From 9694c9b5a0f7267934f619b147c175af4bf654b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 2 Aug 2025 10:21:23 +0800
+Subject: pid: Add a judgment for ns null in pid_nr_ns
+
+From: gaoxiang17 <gaoxiang17@xiaomi.com>
+
+[ Upstream commit 006568ab4c5ca2309ceb36fa553e390b4aa9c0c7 ]
+
+__task_pid_nr_ns
+        ns = task_active_pid_ns(current);
+        pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+                if (pid && ns->level <= pid->level) {
+
+Sometimes null is returned for task_active_pid_ns. Then it will trigger kernel panic in pid_nr_ns.
+
+For example:
+       Unable to handle kernel NULL pointer dereference at virtual address 0000000000000058
+       Mem abort info:
+       ESR = 0x0000000096000007
+       EC = 0x25: DABT (current EL), IL = 32 bits
+       SET = 0, FnV = 0
+       EA = 0, S1PTW = 0
+       FSC = 0x07: level 3 translation fault
+       Data abort info:
+       ISV = 0, ISS = 0x00000007, ISS2 = 0x00000000
+       CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+       GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+       user pgtable: 4k pages, 39-bit VAs, pgdp=00000002175aa000
+       [0000000000000058] pgd=08000002175ab003, p4d=08000002175ab003, pud=08000002175ab003, pmd=08000002175be003, pte=0000000000000000
+       pstate: 834000c5 (Nzcv daIF +PAN -UAO +TCO +DIT -SSBS BTYPE=--)
+       pc : __task_pid_nr_ns+0x74/0xd0
+       lr : __task_pid_nr_ns+0x24/0xd0
+       sp : ffffffc08001bd10
+       x29: ffffffc08001bd10 x28: ffffffd4422b2000 x27: 0000000000000001
+       x26: ffffffd442821168 x25: ffffffd442821000 x24: 00000f89492eab31
+       x23: 00000000000000c0 x22: ffffff806f5693c0 x21: ffffff806f5693c0
+       x20: 0000000000000001 x19: 0000000000000000 x18: 0000000000000000
+       x17: 00000000529c6ef0 x16: 00000000529c6ef0 x15: 00000000023a1adc
+       x14: 0000000000000003 x13: 00000000007ef6d8 x12: 001167c391c78800
+       x11: 00ffffffffffffff x10: 0000000000000000 x9 : 0000000000000001
+       x8 : ffffff80816fa3c0 x7 : 0000000000000000 x6 : 49534d702d535449
+       x5 : ffffffc080c4c2c0 x4 : ffffffd43ee128c8 x3 : ffffffd43ee124dc
+       x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffffff806f5693c0
+       Call trace:
+       __task_pid_nr_ns+0x74/0xd0
+       ...
+       __handle_irq_event_percpu+0xd4/0x284
+       handle_irq_event+0x48/0xb0
+       handle_fasteoi_irq+0x160/0x2d8
+       generic_handle_domain_irq+0x44/0x60
+       gic_handle_irq+0x4c/0x114
+       call_on_irq_stack+0x3c/0x74
+       do_interrupt_handler+0x4c/0x84
+       el1_interrupt+0x34/0x58
+       el1h_64_irq_handler+0x18/0x24
+       el1h_64_irq+0x68/0x6c
+       account_kernel_stack+0x60/0x144
+       exit_task_stack_account+0x1c/0x80
+       do_exit+0x7e4/0xaf8
+       ...
+       get_signal+0x7bc/0x8d8
+       do_notify_resume+0x128/0x828
+       el0_svc+0x6c/0x70
+       el0t_64_sync_handler+0x68/0xbc
+       el0t_64_sync+0x1a8/0x1ac
+       Code: 35fffe54 911a02a8 f9400108 b4000128 (b9405a69)
+       ---[ end trace 0000000000000000 ]---
+       Kernel panic - not syncing: Oops: Fatal exception in interrupt
+
+Signed-off-by: gaoxiang17 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/20250802022123.3536934-1-gxxa03070307@gmail.com
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 6500ef956f2f8..e57adc00cb779 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -477,7 +477,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+       struct upid *upid;
+       pid_t nr = 0;
+-      if (pid && ns->level <= pid->level) {
++      if (pid && ns && ns->level <= pid->level) {
+               upid = &pid->numbers[ns->level];
+               if (upid->ns == ns)
+                       nr = upid->nr;
+-- 
+2.51.0
+
diff --git a/queue-6.6/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch b/queue-6.6/pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
new file mode 100644 (file)
index 0000000..71581b2
--- /dev/null
@@ -0,0 +1,48 @@
+From 94ebb937895b3a258a3422b9afee350632d69ff9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Aug 2025 19:36:04 +0200
+Subject: pid: make __task_pid_nr_ns(ns => NULL) safe for zombie callers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+[ Upstream commit abdfd4948e45c51b19162cf8b3f5003f8f53c9b9 ]
+
+task_pid_vnr(another_task) will crash if the caller was already reaped.
+The pid_alive(current) check can't really help, the parent/debugger can
+call release_task() right after this check.
+
+This also means that even task_ppid_nr_ns(current, NULL) is not safe,
+pid_alive() only ensures that it is safe to dereference ->real_parent.
+
+Change __task_pid_nr_ns() to ensure ns != NULL.
+
+Originally-by: 高翔 <gaoxiang17@xiaomi.com>
+Link: https://lore.kernel.org/all/20250802022123.3536934-1-gxxa03070307@gmail.com/
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lore.kernel.org/20250810173604.GA19991@redhat.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/pid.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/pid.c b/kernel/pid.c
+index e57adc00cb779..69922b2e7ed15 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -500,7 +500,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+       rcu_read_lock();
+       if (!ns)
+               ns = task_active_pid_ns(current);
+-      nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
++      if (ns)
++              nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
+       rcu_read_unlock();
+       return nr;
+-- 
+2.51.0
+
index 6f7c96df797e79d00043876036bf78b18167c50c..dfc2ca96c84b3374f56288b7fcdcd25d75f0b5fd 100644 (file)
@@ -189,3 +189,12 @@ s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch
 s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch
 selftests-mm-skip-soft-dirty-tests-when-config_mem_soft_dirty-is-disabled.patch
 mptcp-pm-in-kernel-usable-client-side-with-c-flag.patch
+irqchip-sifive-plic-make-use-of-__assign_bit.patch
+irqchip-sifive-plic-avoid-interrupt-id-0-handling-du.patch
+minixfs-verify-inode-mode-when-loading-from-disk.patch
+pid-add-a-judgment-for-ns-null-in-pid_nr_ns.patch
+pid-make-__task_pid_nr_ns-ns-null-safe-for-zombie-ca.patch
+fs-add-initramfs_options-to-set-initramfs-mount-opti.patch
+cramfs-verify-inode-mode-when-loading-from-disk.patch
+writeback-avoid-softlockup-when-switching-many-inode.patch
+writeback-avoid-excessively-long-inode-switching-tim.patch
diff --git a/queue-6.6/writeback-avoid-excessively-long-inode-switching-tim.patch b/queue-6.6/writeback-avoid-excessively-long-inode-switching-tim.patch
new file mode 100644 (file)
index 0000000..145a205
--- /dev/null
@@ -0,0 +1,102 @@
+From 11c76462dd44df84c01eb56f712a52f07819be26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:37 +0200
+Subject: writeback: Avoid excessively long inode switching times
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 9a6ebbdbd41235ea3bc0c4f39e2076599b8113cc ]
+
+With lazytime mount option enabled we can be switching many dirty inodes
+on cgroup exit to the parent cgroup. The numbers observed in practice
+when systemd slice of a large cron job exits can easily reach hundreds
+of thousands or millions. The logic in inode_do_switch_wbs() which sorts
+the inode into appropriate place in b_dirty list of the target wb
+however has linear complexity in the number of dirty inodes thus overall
+time complexity of switching all the inodes is quadratic leading to
+workers being pegged for hours consuming 100% of the CPU and switching
+inodes to the parent wb.
+
+Simple reproducer of the issue:
+  FILES=10000
+  # Filesystem mounted with lazytime mount option
+  MNT=/mnt/
+  echo "Creating files and switching timestamps"
+  for (( j = 0; j < 50; j ++ )); do
+      mkdir $MNT/dir$j
+      for (( i = 0; i < $FILES; i++ )); do
+          echo "foo" >$MNT/dir$j/file$i
+      done
+      touch -a -t 202501010000 $MNT/dir$j/file*
+  done
+  wait
+  echo "Syncing and flushing"
+  sync
+  echo 3 >/proc/sys/vm/drop_caches
+
+  echo "Reading all files from a cgroup"
+  mkdir /sys/fs/cgroup/unified/mycg1 || exit
+  echo $$ >/sys/fs/cgroup/unified/mycg1/cgroup.procs || exit
+  for (( j = 0; j < 50; j ++ )); do
+      cat /mnt/dir$j/file* >/dev/null &
+  done
+  wait
+  echo "Switching wbs"
+  # Now rmdir the cgroup after the script exits
+
+We need to maintain b_dirty list ordering to keep writeback happy so
+instead of sorting inode into appropriate place just append it at the
+end of the list and clobber dirtied_time_when. This may result in inode
+writeback starting later after cgroup switch however cgroup switches are
+rare so it shouldn't matter much. Since the cgroup had write access to
+the inode, there are no practical concerns of the possible DoS issues.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 0454a1f0fc636..274fae88b498e 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -422,22 +422,23 @@ static bool inode_do_switch_wbs(struct inode *inode,
+        * Transfer to @new_wb's IO list if necessary.  If the @inode is dirty,
+        * the specific list @inode was on is ignored and the @inode is put on
+        * ->b_dirty which is always correct including from ->b_dirty_time.
+-       * The transfer preserves @inode->dirtied_when ordering.  If the @inode
+-       * was clean, it means it was on the b_attached list, so move it onto
+-       * the b_attached list of @new_wb.
++       * If the @inode was clean, it means it was on the b_attached list, so
++       * move it onto the b_attached list of @new_wb.
+        */
+       if (!list_empty(&inode->i_io_list)) {
+               inode->i_wb = new_wb;
+               if (inode->i_state & I_DIRTY_ALL) {
+-                      struct inode *pos;
+-
+-                      list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+-                              if (time_after_eq(inode->dirtied_when,
+-                                                pos->dirtied_when))
+-                                      break;
++                      /*
++                       * We need to keep b_dirty list sorted by
++                       * dirtied_time_when. However properly sorting the
++                       * inode in the list gets too expensive when switching
++                       * many inodes. So just attach inode at the end of the
++                       * dirty list and clobber the dirtied_time_when.
++                       */
++                      inode->dirtied_time_when = jiffies;
+                       inode_io_list_move_locked(inode, new_wb,
+-                                                pos->i_io_list.prev);
++                                                &new_wb->b_dirty);
+               } else {
+                       inode_cgwb_move_to_attached(inode, new_wb);
+               }
+-- 
+2.51.0
+
diff --git a/queue-6.6/writeback-avoid-softlockup-when-switching-many-inode.patch b/queue-6.6/writeback-avoid-softlockup-when-switching-many-inode.patch
new file mode 100644 (file)
index 0000000..9894c80
--- /dev/null
@@ -0,0 +1,65 @@
+From 1823f0dc3123706c07e1aeb86cf0175ac090becd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 Sep 2025 12:38:36 +0200
+Subject: writeback: Avoid softlockup when switching many inodes
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 66c14dccd810d42ec5c73bb8a9177489dfd62278 ]
+
+process_inode_switch_wbs_work() can be switching over 100 inodes to a
+different cgroup. Since switching an inode requires counting all dirty &
+under-writeback pages in the address space of each inode, this can take
+a significant amount of time. Add a possibility to reschedule after
+processing each inode to avoid softlockups.
+
+Acked-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fs-writeback.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index ed110568d6127..0454a1f0fc636 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -479,6 +479,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        */
+       down_read(&bdi->wb_switch_rwsem);
++      inodep = isw->inodes;
+       /*
+        * By the time control reaches here, RCU grace period has passed
+        * since I_WB_SWITCH assertion and all wb stat update transactions
+@@ -489,6 +490,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+        * gives us exclusion against all wb related operations on @inode
+        * including IO list manipulations and stat updates.
+        */
++relock:
+       if (old_wb < new_wb) {
+               spin_lock(&old_wb->list_lock);
+               spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
+@@ -497,10 +499,17 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
+               spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
+       }
+-      for (inodep = isw->inodes; *inodep; inodep++) {
++      while (*inodep) {
+               WARN_ON_ONCE((*inodep)->i_wb != old_wb);
+               if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
+                       nr_switched++;
++              inodep++;
++              if (*inodep && need_resched()) {
++                      spin_unlock(&new_wb->list_lock);
++                      spin_unlock(&old_wb->list_lock);
++                      cond_resched();
++                      goto relock;
++              }
+       }
+       spin_unlock(&new_wb->list_lock);
+-- 
+2.51.0
+