]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for all trees
authorSasha Levin <sashal@kernel.org>
Sat, 23 Aug 2025 14:02:46 +0000 (10:02 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 23 Aug 2025 14:02:46 +0000 (10:02 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
35 files changed:
queue-5.10/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch [new file with mode: 0644]
queue-5.10/move_mount-allow-to-add-a-mount-into-an-existing-gro.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/use-uniform-permission-checks-for-all-mount-propagat.patch [new file with mode: 0644]
queue-5.15/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/use-uniform-permission-checks-for-all-mount-propagat.patch [new file with mode: 0644]
queue-5.4/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch [new file with mode: 0644]
queue-5.4/move_mount-allow-to-add-a-mount-into-an-existing-gro.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/use-uniform-permission-checks-for-all-mount-propagat.patch [new file with mode: 0644]
queue-6.1/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch [new file with mode: 0644]
queue-6.1/use-uniform-permission-checks-for-all-mount-propagat.patch [new file with mode: 0644]
queue-6.12/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch [new file with mode: 0644]
queue-6.12/series
queue-6.12/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch [new file with mode: 0644]
queue-6.12/use-uniform-permission-checks-for-all-mount-propagat.patch [new file with mode: 0644]
queue-6.16/btrfs-zoned-fix-data-relocation-block-group-reservat.patch [new file with mode: 0644]
queue-6.16/fhandle-do_handle_open-should-get-fd-with-user-flags.patch [new file with mode: 0644]
queue-6.16/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch [new file with mode: 0644]
queue-6.16/fs-fix-incorrect-lflags-value-in-the-move_mount-sysc.patch [new file with mode: 0644]
queue-6.16/libfs-massage-path_from_stashed-to-allow-custom-stas.patch [new file with mode: 0644]
queue-6.16/pidfs-fix-memory-leak-in-pidfd_info.patch [new file with mode: 0644]
queue-6.16/pidfs-move-to-anonymous-struct.patch [new file with mode: 0644]
queue-6.16/pidfs-persist-information.patch [new file with mode: 0644]
queue-6.16/series
queue-6.16/signal-fix-memory-leak-for-pidfd_self-sentinels.patch [new file with mode: 0644]
queue-6.16/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch [new file with mode: 0644]
queue-6.16/use-uniform-permission-checks-for-all-mount-propagat.patch [new file with mode: 0644]
queue-6.6/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch [new file with mode: 0644]
queue-6.6/use-uniform-permission-checks-for-all-mount-propagat.patch [new file with mode: 0644]

diff --git a/queue-5.10/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch b/queue-5.10/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
new file mode 100644 (file)
index 0000000..a2d18c5
--- /dev/null
@@ -0,0 +1,96 @@
+From 72224bc8cdf03b97d5429ff1627a2c81f6f6299a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Aug 2025 22:18:30 +0800
+Subject: fs/buffer: fix use-after-free when call bh_read() helper
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 ]
+
+There's issue as follows:
+BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110
+Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0
+CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x55/0x70
+ print_address_description.constprop.0+0x2c/0x390
+ print_report+0xb4/0x270
+ kasan_report+0xb8/0xf0
+ end_buffer_read_sync+0xe3/0x110
+ end_bio_bh_io_sync+0x56/0x80
+ blk_update_request+0x30a/0x720
+ scsi_end_request+0x51/0x2b0
+ scsi_io_completion+0xe3/0x480
+ ? scsi_device_unbusy+0x11e/0x160
+ blk_complete_reqs+0x7b/0x90
+ handle_softirqs+0xef/0x370
+ irq_exit_rcu+0xa5/0xd0
+ sysvec_apic_timer_interrupt+0x6e/0x90
+ </IRQ>
+
+ Above issue happens when do ntfs3 filesystem mount, issue may happens
+ as follows:
+           mount                            IRQ
+ntfs_fill_super
+  read_cache_page
+    do_read_cache_folio
+      filemap_read_folio
+        mpage_read_folio
+        do_mpage_readpage
+         ntfs_get_block_vbo
+          bh_read
+            submit_bh
+            wait_on_buffer(bh);
+                                   blk_complete_reqs
+                                    scsi_io_completion
+                                     scsi_end_request
+                                      blk_update_request
+                                       end_bio_bh_io_sync
+                                        end_buffer_read_sync
+                                         __end_buffer_read_notouch
+                                          unlock_buffer
+
+            wait_on_buffer(bh);--> return will return to caller
+
+                                         put_bh
+                                           --> trigger stack-out-of-bounds
+In the mpage_read_folio() function, the stack variable 'map_bh' is
+passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and
+wait_on_buffer() returns to continue processing, the stack variable
+is likely to be reclaimed. Consequently, during the end_buffer_read_sync()
+process, calling put_bh() may result in stack overrun.
+
+If the bh is not allocated on the stack, it belongs to a folio.  Freeing
+a buffer head which belongs to a folio is done by drop_buffers() which
+will fail to free buffers which are still locked.  So it is safe to call
+put_bh() before __end_buffer_read_notouch().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/buffer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/buffer.c b/fs/buffer.c
+index ee66abadcbc2..9c41306e8d82 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -156,8 +156,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+  */
+ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+ {
+-      __end_buffer_read_notouch(bh, uptodate);
+       put_bh(bh);
++      __end_buffer_read_notouch(bh, uptodate);
+ }
+ EXPORT_SYMBOL(end_buffer_read_sync);
+-- 
+2.50.1
+
diff --git a/queue-5.10/move_mount-allow-to-add-a-mount-into-an-existing-gro.patch b/queue-5.10/move_mount-allow-to-add-a-mount-into-an-existing-gro.patch
new file mode 100644 (file)
index 0000000..0930387
--- /dev/null
@@ -0,0 +1,173 @@
+From d8a12919fd4a2b9a14adbe4a52c3bd287cfcea92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jul 2021 13:07:13 +0300
+Subject: move_mount: allow to add a mount into an existing group
+
+From: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+
+[ Upstream commit 9ffb14ef61bab83fa818736bf3e7e6b6e182e8e2 ]
+
+Previously a sharing group (shared and master ids pair) can be only
+inherited when mount is created via bindmount. This patch adds an
+ability to add an existing private mount into an existing sharing group.
+
+With this functionality one can first create the desired mount tree from
+only private mounts (without the need to care about undesired mount
+propagation or mount creation order implied by sharing group
+dependencies), and next then setup any desired mount sharing between
+those mounts in tree as needed.
+
+This allows CRIU to restore any set of mount namespaces, mount trees and
+sharing group trees for a container.
+
+We have many issues with restoring mounts in CRIU related to sharing
+groups and propagation:
+- reverse sharing groups vs mount tree order requires complex mounts
+  reordering which mostly implies also using some temporary mounts
+(please see https://lkml.org/lkml/2021/3/23/569 for more info)
+
+- mount() syscall creates tons of mounts due to propagation
+- mount re-parenting due to propagation
+- "Mount Trap" due to propagation
+- "Non Uniform" propagation, meaning that with different tricks with
+  mount order and temporary children-"lock" mounts one can create mount
+  trees which can't be restored without those tricks
+(see https://www.linuxplumbersconf.org/event/7/contributions/640/)
+
+With this new functionality we can resolve all the problems with
+propagation at once.
+
+Link: https://lore.kernel.org/r/20210715100714.120228-1-ptikhomirov@virtuozzo.com
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <christian.brauner@ubuntu.com>
+Cc: Mattias Nissler <mnissler@chromium.org>
+Cc: Aleksa Sarai <cyphar@cyphar.com>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: linux-fsdevel@vger.kernel.org
+Cc: linux-api@vger.kernel.org
+Cc: lkml <linux-kernel@vger.kernel.org>
+Co-developed-by: Andrei Vagin <avagin@gmail.com>
+Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
+Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Signed-off-by: Andrei Vagin <avagin@gmail.com>
+Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
+Stable-dep-of: cffd0441872e ("use uniform permission checks for all mount propagation changes")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c             | 77 +++++++++++++++++++++++++++++++++++++-
+ include/uapi/linux/mount.h |  3 +-
+ 2 files changed, 78 insertions(+), 2 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index ee6d139f7529..7f7ccc9e53b8 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2692,6 +2692,78 @@ static bool check_for_nsfs_mounts(struct mount *subtree)
+       return ret;
+ }
++static int do_set_group(struct path *from_path, struct path *to_path)
++{
++      struct mount *from, *to;
++      int err;
++
++      from = real_mount(from_path->mnt);
++      to = real_mount(to_path->mnt);
++
++      namespace_lock();
++
++      err = -EINVAL;
++      /* To and From must be mounted */
++      if (!is_mounted(&from->mnt))
++              goto out;
++      if (!is_mounted(&to->mnt))
++              goto out;
++
++      err = -EPERM;
++      /* We should be allowed to modify mount namespaces of both mounts */
++      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++              goto out;
++      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++              goto out;
++
++      err = -EINVAL;
++      /* To and From paths should be mount roots */
++      if (from_path->dentry != from_path->mnt->mnt_root)
++              goto out;
++      if (to_path->dentry != to_path->mnt->mnt_root)
++              goto out;
++
++      /* Setting sharing groups is only allowed across same superblock */
++      if (from->mnt.mnt_sb != to->mnt.mnt_sb)
++              goto out;
++
++      /* From mount root should be wider than To mount root */
++      if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
++              goto out;
++
++      /* From mount should not have locked children in place of To's root */
++      if (has_locked_children(from, to->mnt.mnt_root))
++              goto out;
++
++      /* Setting sharing groups is only allowed on private mounts */
++      if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
++              goto out;
++
++      /* From should not be private */
++      if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
++              goto out;
++
++      if (IS_MNT_SLAVE(from)) {
++              struct mount *m = from->mnt_master;
++
++              list_add(&to->mnt_slave, &m->mnt_slave_list);
++              to->mnt_master = m;
++      }
++
++      if (IS_MNT_SHARED(from)) {
++              to->mnt_group_id = from->mnt_group_id;
++              list_add(&to->mnt_share, &from->mnt_share);
++              lock_mount_hash();
++              set_mnt_shared(to);
++              unlock_mount_hash();
++      }
++
++      err = 0;
++out:
++      namespace_unlock();
++      return err;
++}
++
+ static int do_move_mount(struct path *old_path, struct path *new_path)
+ {
+       struct mnt_namespace *ns;
+@@ -3667,7 +3739,10 @@ SYSCALL_DEFINE5(move_mount,
+       if (ret < 0)
+               goto out_to;
+-      ret = do_move_mount(&from_path, &to_path);
++      if (flags & MOVE_MOUNT_SET_GROUP)
++              ret = do_set_group(&from_path, &to_path);
++      else
++              ret = do_move_mount(&from_path, &to_path);
+ out_to:
+       path_put(&to_path);
+diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
+index dd8306ea336c..fc6a2e63130b 100644
+--- a/include/uapi/linux/mount.h
++++ b/include/uapi/linux/mount.h
+@@ -71,7 +71,8 @@
+ #define MOVE_MOUNT_T_SYMLINKS         0x00000010 /* Follow symlinks on to path */
+ #define MOVE_MOUNT_T_AUTOMOUNTS               0x00000020 /* Follow automounts on to path */
+ #define MOVE_MOUNT_T_EMPTY_PATH               0x00000040 /* Empty to path permitted */
+-#define MOVE_MOUNT__MASK              0x00000077
++#define MOVE_MOUNT_SET_GROUP          0x00000100 /* Set sharing group instead */
++#define MOVE_MOUNT__MASK              0x00000177
+ /*
+  * fsopen() flags.
+-- 
+2.50.1
+
index d260a6a7af7200cc2fa39b7f92832917a6add3fc..d7396de2b9d8c876d912bcd1b2cf1428b0bf6934 100644 (file)
@@ -398,3 +398,6 @@ drm-amd-display-fix-dp-audio-dto1-clock-source-on-dce-6.patch
 drm-amd-display-find-first-crtc-and-its-line-time-in-dce110_fill_display_configs.patch
 drm-amd-display-fill-display-clock-and-vblank-time-in-dce110_fill_display_configs.patch
 selftests-mptcp-connect-also-cover-alt-modes.patch
+fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
+move_mount-allow-to-add-a-mount-into-an-existing-gro.patch
+use-uniform-permission-checks-for-all-mount-propagat.patch
diff --git a/queue-5.10/use-uniform-permission-checks-for-all-mount-propagat.patch b/queue-5.10/use-uniform-permission-checks-for-all-mount-propagat.patch
new file mode 100644 (file)
index 0000000..7b58b3c
--- /dev/null
@@ -0,0 +1,103 @@
+From fbf502f4b777b86bc1201f04b71677f2905ba243 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 01:44:31 -0400
+Subject: use uniform permission checks for all mount propagation changes
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit cffd0441872e7f6b1fce5e78fb1c99187a291330 ]
+
+do_change_type() and do_set_group() are operating on different
+aspects of the same thing - propagation graph.  The latter
+asks for mounts involved to be mounted in namespace(s) the caller
+has CAP_SYS_ADMIN for.  The former is a mess - originally it
+didn't even check that mount *is* mounted.  That got fixed,
+but the resulting check turns out to be too strict for userland -
+in effect, we check that mount is in our namespace, having already
+checked that we have CAP_SYS_ADMIN there.
+
+What we really need (in both cases) is
+       * only touch mounts that are mounted.  That's a must-have
+constraint - data corruption happens if it get violated.
+       * don't allow to mess with a namespace unless you already
+have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
+
+That's an equivalent of what do_set_group() does; let's extract that
+into a helper (may_change_propagation()) and use it in both
+do_set_group() and do_change_type().
+
+Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
+Acked-by: Andrei Vagin <avagin@gmail.com>
+Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 34 ++++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 14 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 7f7ccc9e53b8..d1751f9b6f1c 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2272,6 +2272,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+       return attach_recursive_mnt(mnt, p, mp, false);
+ }
++static int may_change_propagation(const struct mount *m)
++{
++        struct mnt_namespace *ns = m->mnt_ns;
++
++       // it must be mounted in some namespace
++       if (IS_ERR_OR_NULL(ns))         // is_mounted()
++               return -EINVAL;
++       // and the caller must be admin in userns of that namespace
++       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++               return -EPERM;
++       return 0;
++}
++
+ /*
+  * Sanity check the flags to change_mnt_propagation.
+  */
+@@ -2308,10 +2321,10 @@ static int do_change_type(struct path *path, int ms_flags)
+               return -EINVAL;
+       namespace_lock();
+-      if (!check_mnt(mnt)) {
+-              err = -EINVAL;
++      err = may_change_propagation(mnt);
++      if (err)
+               goto out_unlock;
+-      }
++
+       if (type == MS_SHARED) {
+               err = invent_group_ids(mnt, recurse);
+               if (err)
+@@ -2702,18 +2715,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
+       namespace_lock();
+-      err = -EINVAL;
+-      /* To and From must be mounted */
+-      if (!is_mounted(&from->mnt))
+-              goto out;
+-      if (!is_mounted(&to->mnt))
+-              goto out;
+-
+-      err = -EPERM;
+-      /* We should be allowed to modify mount namespaces of both mounts */
+-      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(from);
++      if (err)
+               goto out;
+-      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(to);
++      if (err)
+               goto out;
+       err = -EINVAL;
+-- 
+2.50.1
+
diff --git a/queue-5.15/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch b/queue-5.15/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
new file mode 100644 (file)
index 0000000..0cfd087
--- /dev/null
@@ -0,0 +1,96 @@
+From 2df7766eba5764ddaf794b7b09ddeca65ef2bd83 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Aug 2025 22:18:30 +0800
+Subject: fs/buffer: fix use-after-free when call bh_read() helper
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 ]
+
+There's issue as follows:
+BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110
+Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0
+CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x55/0x70
+ print_address_description.constprop.0+0x2c/0x390
+ print_report+0xb4/0x270
+ kasan_report+0xb8/0xf0
+ end_buffer_read_sync+0xe3/0x110
+ end_bio_bh_io_sync+0x56/0x80
+ blk_update_request+0x30a/0x720
+ scsi_end_request+0x51/0x2b0
+ scsi_io_completion+0xe3/0x480
+ ? scsi_device_unbusy+0x11e/0x160
+ blk_complete_reqs+0x7b/0x90
+ handle_softirqs+0xef/0x370
+ irq_exit_rcu+0xa5/0xd0
+ sysvec_apic_timer_interrupt+0x6e/0x90
+ </IRQ>
+
+ Above issue happens when do ntfs3 filesystem mount, issue may happens
+ as follows:
+           mount                            IRQ
+ntfs_fill_super
+  read_cache_page
+    do_read_cache_folio
+      filemap_read_folio
+        mpage_read_folio
+        do_mpage_readpage
+         ntfs_get_block_vbo
+          bh_read
+            submit_bh
+            wait_on_buffer(bh);
+                                   blk_complete_reqs
+                                    scsi_io_completion
+                                     scsi_end_request
+                                      blk_update_request
+                                       end_bio_bh_io_sync
+                                        end_buffer_read_sync
+                                         __end_buffer_read_notouch
+                                          unlock_buffer
+
+            wait_on_buffer(bh);--> return will return to caller
+
+                                         put_bh
+                                           --> trigger stack-out-of-bounds
+In the mpage_read_folio() function, the stack variable 'map_bh' is
+passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and
+wait_on_buffer() returns to continue processing, the stack variable
+is likely to be reclaimed. Consequently, during the end_buffer_read_sync()
+process, calling put_bh() may result in stack overrun.
+
+If the bh is not allocated on the stack, it belongs to a folio.  Freeing
+a buffer head which belongs to a folio is done by drop_buffers() which
+will fail to free buffers which are still locked.  So it is safe to call
+put_bh() before __end_buffer_read_notouch().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/buffer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/buffer.c b/fs/buffer.c
+index 1960e2d43ae2..87fcbb725241 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -156,8 +156,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+  */
+ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+ {
+-      __end_buffer_read_notouch(bh, uptodate);
+       put_bh(bh);
++      __end_buffer_read_notouch(bh, uptodate);
+ }
+ EXPORT_SYMBOL(end_buffer_read_sync);
+-- 
+2.50.1
+
index 973391eadcf7e6f17b8a6c87ca6d7c1ebbd208fc..e903e90baaf935c447a67121e11c3f14fcb9a351 100644 (file)
@@ -571,3 +571,5 @@ drm-amd-display-fix-fractional-fb-divider-in-set_pixel_clock_v3.patch
 drm-amd-display-fix-dp-audio-dto1-clock-source-on-dce-6.patch
 drm-amd-display-find-first-crtc-and-its-line-time-in-dce110_fill_display_configs.patch
 drm-amd-display-fill-display-clock-and-vblank-time-in-dce110_fill_display_configs.patch
+fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
+use-uniform-permission-checks-for-all-mount-propagat.patch
diff --git a/queue-5.15/use-uniform-permission-checks-for-all-mount-propagat.patch b/queue-5.15/use-uniform-permission-checks-for-all-mount-propagat.patch
new file mode 100644 (file)
index 0000000..949e843
--- /dev/null
@@ -0,0 +1,103 @@
+From c1b868951f6c0b271848d762a3111763d16c03d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 01:44:31 -0400
+Subject: use uniform permission checks for all mount propagation changes
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit cffd0441872e7f6b1fce5e78fb1c99187a291330 ]
+
+do_change_type() and do_set_group() are operating on different
+aspects of the same thing - propagation graph.  The latter
+asks for mounts involved to be mounted in namespace(s) the caller
+has CAP_SYS_ADMIN for.  The former is a mess - originally it
+didn't even check that mount *is* mounted.  That got fixed,
+but the resulting check turns out to be too strict for userland -
+in effect, we check that mount is in our namespace, having already
+checked that we have CAP_SYS_ADMIN there.
+
+What we really need (in both cases) is
+       * only touch mounts that are mounted.  That's a must-have
+constraint - data corruption happens if it get violated.
+       * don't allow to mess with a namespace unless you already
+have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
+
+That's an equivalent of what do_set_group() does; let's extract that
+into a helper (may_change_propagation()) and use it in both
+do_set_group() and do_change_type().
+
+Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
+Acked-by: Andrei Vagin <avagin@gmail.com>
+Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 34 ++++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 14 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 9e1717692be3..35d63bb3b22d 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2296,6 +2296,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+       return attach_recursive_mnt(mnt, p, mp, false);
+ }
++static int may_change_propagation(const struct mount *m)
++{
++        struct mnt_namespace *ns = m->mnt_ns;
++
++       // it must be mounted in some namespace
++       if (IS_ERR_OR_NULL(ns))         // is_mounted()
++               return -EINVAL;
++       // and the caller must be admin in userns of that namespace
++       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++               return -EPERM;
++       return 0;
++}
++
+ /*
+  * Sanity check the flags to change_mnt_propagation.
+  */
+@@ -2332,10 +2345,10 @@ static int do_change_type(struct path *path, int ms_flags)
+               return -EINVAL;
+       namespace_lock();
+-      if (!check_mnt(mnt)) {
+-              err = -EINVAL;
++      err = may_change_propagation(mnt);
++      if (err)
+               goto out_unlock;
+-      }
++
+       if (type == MS_SHARED) {
+               err = invent_group_ids(mnt, recurse);
+               if (err)
+@@ -2730,18 +2743,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
+       namespace_lock();
+-      err = -EINVAL;
+-      /* To and From must be mounted */
+-      if (!is_mounted(&from->mnt))
+-              goto out;
+-      if (!is_mounted(&to->mnt))
+-              goto out;
+-
+-      err = -EPERM;
+-      /* We should be allowed to modify mount namespaces of both mounts */
+-      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(from);
++      if (err)
+               goto out;
+-      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(to);
++      if (err)
+               goto out;
+       err = -EINVAL;
+-- 
+2.50.1
+
diff --git a/queue-5.4/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch b/queue-5.4/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
new file mode 100644 (file)
index 0000000..a52c2aa
--- /dev/null
@@ -0,0 +1,96 @@
+From 4176fbb2c1bd40b245ac155f5142eb62e8795b35 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Aug 2025 22:18:30 +0800
+Subject: fs/buffer: fix use-after-free when call bh_read() helper
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 ]
+
+There's issue as follows:
+BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110
+Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0
+CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x55/0x70
+ print_address_description.constprop.0+0x2c/0x390
+ print_report+0xb4/0x270
+ kasan_report+0xb8/0xf0
+ end_buffer_read_sync+0xe3/0x110
+ end_bio_bh_io_sync+0x56/0x80
+ blk_update_request+0x30a/0x720
+ scsi_end_request+0x51/0x2b0
+ scsi_io_completion+0xe3/0x480
+ ? scsi_device_unbusy+0x11e/0x160
+ blk_complete_reqs+0x7b/0x90
+ handle_softirqs+0xef/0x370
+ irq_exit_rcu+0xa5/0xd0
+ sysvec_apic_timer_interrupt+0x6e/0x90
+ </IRQ>
+
+ Above issue happens when do ntfs3 filesystem mount, issue may happens
+ as follows:
+           mount                            IRQ
+ntfs_fill_super
+  read_cache_page
+    do_read_cache_folio
+      filemap_read_folio
+        mpage_read_folio
+        do_mpage_readpage
+         ntfs_get_block_vbo
+          bh_read
+            submit_bh
+            wait_on_buffer(bh);
+                                   blk_complete_reqs
+                                    scsi_io_completion
+                                     scsi_end_request
+                                      blk_update_request
+                                       end_bio_bh_io_sync
+                                        end_buffer_read_sync
+                                         __end_buffer_read_notouch
+                                          unlock_buffer
+
+            wait_on_buffer(bh);--> return will return to caller
+
+                                         put_bh
+                                           --> trigger stack-out-of-bounds
+In the mpage_read_folio() function, the stack variable 'map_bh' is
+passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and
+wait_on_buffer() returns to continue processing, the stack variable
+is likely to be reclaimed. Consequently, during the end_buffer_read_sync()
+process, calling put_bh() may result in stack overrun.
+
+If the bh is not allocated on the stack, it belongs to a folio.  Freeing
+a buffer head which belongs to a folio is done by drop_buffers() which
+will fail to free buffers which are still locked.  So it is safe to call
+put_bh() before __end_buffer_read_notouch().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/buffer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/buffer.c b/fs/buffer.c
+index 4ec88d08d04e..e0da5e56e499 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -161,8 +161,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+  */
+ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+ {
+-      __end_buffer_read_notouch(bh, uptodate);
+       put_bh(bh);
++      __end_buffer_read_notouch(bh, uptodate);
+ }
+ EXPORT_SYMBOL(end_buffer_read_sync);
+-- 
+2.50.1
+
diff --git a/queue-5.4/move_mount-allow-to-add-a-mount-into-an-existing-gro.patch b/queue-5.4/move_mount-allow-to-add-a-mount-into-an-existing-gro.patch
new file mode 100644 (file)
index 0000000..7993182
--- /dev/null
@@ -0,0 +1,173 @@
+From b9e00646381d1e68b08918d0ac52799e5ce23761 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jul 2021 13:07:13 +0300
+Subject: move_mount: allow to add a mount into an existing group
+
+From: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+
+[ Upstream commit 9ffb14ef61bab83fa818736bf3e7e6b6e182e8e2 ]
+
+Previously a sharing group (shared and master ids pair) can be only
+inherited when mount is created via bindmount. This patch adds an
+ability to add an existing private mount into an existing sharing group.
+
+With this functionality one can first create the desired mount tree from
+only private mounts (without the need to care about undesired mount
+propagation or mount creation order implied by sharing group
+dependencies), and next then setup any desired mount sharing between
+those mounts in tree as needed.
+
+This allows CRIU to restore any set of mount namespaces, mount trees and
+sharing group trees for a container.
+
+We have many issues with restoring mounts in CRIU related to sharing
+groups and propagation:
+- reverse sharing groups vs mount tree order requires complex mounts
+  reordering which mostly implies also using some temporary mounts
+(please see https://lkml.org/lkml/2021/3/23/569 for more info)
+
+- mount() syscall creates tons of mounts due to propagation
+- mount re-parenting due to propagation
+- "Mount Trap" due to propagation
+- "Non Uniform" propagation, meaning that with different tricks with
+  mount order and temporary children-"lock" mounts one can create mount
+  trees which can't be restored without those tricks
+(see https://www.linuxplumbersconf.org/event/7/contributions/640/)
+
+With this new functionality we can resolve all the problems with
+propagation at once.
+
+Link: https://lore.kernel.org/r/20210715100714.120228-1-ptikhomirov@virtuozzo.com
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <christian.brauner@ubuntu.com>
+Cc: Mattias Nissler <mnissler@chromium.org>
+Cc: Aleksa Sarai <cyphar@cyphar.com>
+Cc: Andrei Vagin <avagin@gmail.com>
+Cc: linux-fsdevel@vger.kernel.org
+Cc: linux-api@vger.kernel.org
+Cc: lkml <linux-kernel@vger.kernel.org>
+Co-developed-by: Andrei Vagin <avagin@gmail.com>
+Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
+Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Signed-off-by: Andrei Vagin <avagin@gmail.com>
+Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
+Stable-dep-of: cffd0441872e ("use uniform permission checks for all mount propagation changes")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c             | 77 +++++++++++++++++++++++++++++++++++++-
+ include/uapi/linux/mount.h |  3 +-
+ 2 files changed, 78 insertions(+), 2 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index ee5a87061f20..3c1afe60d438 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2624,6 +2624,78 @@ static bool check_for_nsfs_mounts(struct mount *subtree)
+       return ret;
+ }
++static int do_set_group(struct path *from_path, struct path *to_path)
++{
++      struct mount *from, *to;
++      int err;
++
++      from = real_mount(from_path->mnt);
++      to = real_mount(to_path->mnt);
++
++      namespace_lock();
++
++      err = -EINVAL;
++      /* To and From must be mounted */
++      if (!is_mounted(&from->mnt))
++              goto out;
++      if (!is_mounted(&to->mnt))
++              goto out;
++
++      err = -EPERM;
++      /* We should be allowed to modify mount namespaces of both mounts */
++      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++              goto out;
++      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++              goto out;
++
++      err = -EINVAL;
++      /* To and From paths should be mount roots */
++      if (from_path->dentry != from_path->mnt->mnt_root)
++              goto out;
++      if (to_path->dentry != to_path->mnt->mnt_root)
++              goto out;
++
++      /* Setting sharing groups is only allowed across same superblock */
++      if (from->mnt.mnt_sb != to->mnt.mnt_sb)
++              goto out;
++
++      /* From mount root should be wider than To mount root */
++      if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
++              goto out;
++
++      /* From mount should not have locked children in place of To's root */
++      if (has_locked_children(from, to->mnt.mnt_root))
++              goto out;
++
++      /* Setting sharing groups is only allowed on private mounts */
++      if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
++              goto out;
++
++      /* From should not be private */
++      if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
++              goto out;
++
++      if (IS_MNT_SLAVE(from)) {
++              struct mount *m = from->mnt_master;
++
++              list_add(&to->mnt_slave, &m->mnt_slave_list);
++              to->mnt_master = m;
++      }
++
++      if (IS_MNT_SHARED(from)) {
++              to->mnt_group_id = from->mnt_group_id;
++              list_add(&to->mnt_share, &from->mnt_share);
++              lock_mount_hash();
++              set_mnt_shared(to);
++              unlock_mount_hash();
++      }
++
++      err = 0;
++out:
++      namespace_unlock();
++      return err;
++}
++
+ static int do_move_mount(struct path *old_path, struct path *new_path)
+ {
+       struct mnt_namespace *ns;
+@@ -3583,7 +3655,10 @@ SYSCALL_DEFINE5(move_mount,
+       if (ret < 0)
+               goto out_to;
+-      ret = do_move_mount(&from_path, &to_path);
++      if (flags & MOVE_MOUNT_SET_GROUP)
++              ret = do_set_group(&from_path, &to_path);
++      else
++              ret = do_move_mount(&from_path, &to_path);
+ out_to:
+       path_put(&to_path);
+diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
+index 96a0240f23fe..535ca707dfd7 100644
+--- a/include/uapi/linux/mount.h
++++ b/include/uapi/linux/mount.h
+@@ -70,7 +70,8 @@
+ #define MOVE_MOUNT_T_SYMLINKS         0x00000010 /* Follow symlinks on to path */
+ #define MOVE_MOUNT_T_AUTOMOUNTS               0x00000020 /* Follow automounts on to path */
+ #define MOVE_MOUNT_T_EMPTY_PATH               0x00000040 /* Empty to path permitted */
+-#define MOVE_MOUNT__MASK              0x00000077
++#define MOVE_MOUNT_SET_GROUP          0x00000100 /* Set sharing group instead */
++#define MOVE_MOUNT__MASK              0x00000177
+ /*
+  * fsopen() flags.
+-- 
+2.50.1
+
index 317b187cddb7683654d5bae3ecc5c6cd8e4b1618..6d60daa24a308bfbdb8b9934d8a037bbd5343ee2 100644 (file)
@@ -309,3 +309,6 @@ memstick-fix-deadlock-by-moving-removing-flag-earlier.patch
 squashfs-fix-memory-leak-in-squashfs_fill_super.patch
 drm-amd-display-fix-fractional-fb-divider-in-set_pixel_clock_v3.patch
 drm-amd-display-find-first-crtc-and-its-line-time-in-dce110_fill_display_configs.patch
+fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
+move_mount-allow-to-add-a-mount-into-an-existing-gro.patch
+use-uniform-permission-checks-for-all-mount-propagat.patch
diff --git a/queue-5.4/use-uniform-permission-checks-for-all-mount-propagat.patch b/queue-5.4/use-uniform-permission-checks-for-all-mount-propagat.patch
new file mode 100644 (file)
index 0000000..d4dd399
--- /dev/null
@@ -0,0 +1,103 @@
+From 7443bb571f9c649e0d9792239c64937e042a7627 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 01:44:31 -0400
+Subject: use uniform permission checks for all mount propagation changes
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit cffd0441872e7f6b1fce5e78fb1c99187a291330 ]
+
+do_change_type() and do_set_group() are operating on different
+aspects of the same thing - propagation graph.  The latter
+asks for mounts involved to be mounted in namespace(s) the caller
+has CAP_SYS_ADMIN for.  The former is a mess - originally it
+didn't even check that mount *is* mounted.  That got fixed,
+but the resulting check turns out to be too strict for userland -
+in effect, we check that mount is in our namespace, having already
+checked that we have CAP_SYS_ADMIN there.
+
+What we really need (in both cases) is
+       * only touch mounts that are mounted.  That's a must-have
+constraint - data corruption happens if it get violated.
+       * don't allow to mess with a namespace unless you already
+have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
+
+That's an equivalent of what do_set_group() does; let's extract that
+into a helper (may_change_propagation()) and use it in both
+do_set_group() and do_change_type().
+
+Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
+Acked-by: Andrei Vagin <avagin@gmail.com>
+Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 34 ++++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 14 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 3c1afe60d438..c87f847c959d 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2210,6 +2210,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+       return attach_recursive_mnt(mnt, p, mp, false);
+ }
++static int may_change_propagation(const struct mount *m)
++{
++        struct mnt_namespace *ns = m->mnt_ns;
++
++       // it must be mounted in some namespace
++       if (IS_ERR_OR_NULL(ns))         // is_mounted()
++               return -EINVAL;
++       // and the caller must be admin in userns of that namespace
++       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++               return -EPERM;
++       return 0;
++}
++
+ /*
+  * Sanity check the flags to change_mnt_propagation.
+  */
+@@ -2246,10 +2259,10 @@ static int do_change_type(struct path *path, int ms_flags)
+               return -EINVAL;
+       namespace_lock();
+-      if (!check_mnt(mnt)) {
+-              err = -EINVAL;
++      err = may_change_propagation(mnt);
++      if (err)
+               goto out_unlock;
+-      }
++
+       if (type == MS_SHARED) {
+               err = invent_group_ids(mnt, recurse);
+               if (err)
+@@ -2634,18 +2647,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
+       namespace_lock();
+-      err = -EINVAL;
+-      /* To and From must be mounted */
+-      if (!is_mounted(&from->mnt))
+-              goto out;
+-      if (!is_mounted(&to->mnt))
+-              goto out;
+-
+-      err = -EPERM;
+-      /* We should be allowed to modify mount namespaces of both mounts */
+-      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(from);
++      if (err)
+               goto out;
+-      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(to);
++      if (err)
+               goto out;
+       err = -EINVAL;
+-- 
+2.50.1
+
diff --git a/queue-6.1/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch b/queue-6.1/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
new file mode 100644 (file)
index 0000000..f1c4b8b
--- /dev/null
@@ -0,0 +1,96 @@
+From 39b77aa67201df647c34cd59d219c296bfd0d2ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Aug 2025 22:18:30 +0800
+Subject: fs/buffer: fix use-after-free when call bh_read() helper
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 ]
+
+There's issue as follows:
+BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110
+Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0
+CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x55/0x70
+ print_address_description.constprop.0+0x2c/0x390
+ print_report+0xb4/0x270
+ kasan_report+0xb8/0xf0
+ end_buffer_read_sync+0xe3/0x110
+ end_bio_bh_io_sync+0x56/0x80
+ blk_update_request+0x30a/0x720
+ scsi_end_request+0x51/0x2b0
+ scsi_io_completion+0xe3/0x480
+ ? scsi_device_unbusy+0x11e/0x160
+ blk_complete_reqs+0x7b/0x90
+ handle_softirqs+0xef/0x370
+ irq_exit_rcu+0xa5/0xd0
+ sysvec_apic_timer_interrupt+0x6e/0x90
+ </IRQ>
+
+ Above issue happens when do ntfs3 filesystem mount, issue may happens
+ as follows:
+           mount                            IRQ
+ntfs_fill_super
+  read_cache_page
+    do_read_cache_folio
+      filemap_read_folio
+        mpage_read_folio
+        do_mpage_readpage
+         ntfs_get_block_vbo
+          bh_read
+            submit_bh
+            wait_on_buffer(bh);
+                                   blk_complete_reqs
+                                    scsi_io_completion
+                                     scsi_end_request
+                                      blk_update_request
+                                       end_bio_bh_io_sync
+                                        end_buffer_read_sync
+                                         __end_buffer_read_notouch
+                                          unlock_buffer
+
+            wait_on_buffer(bh);--> return will return to caller
+
+                                         put_bh
+                                           --> trigger stack-out-of-bounds
+In the mpage_read_folio() function, the stack variable 'map_bh' is
+passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and
+wait_on_buffer() returns to continue processing, the stack variable
+is likely to be reclaimed. Consequently, during the end_buffer_read_sync()
+process, calling put_bh() may result in stack overrun.
+
+If the bh is not allocated on the stack, it belongs to a folio.  Freeing
+a buffer head which belongs to a folio is done by drop_buffers() which
+will fail to free buffers which are still locked.  So it is safe to call
+put_bh() before __end_buffer_read_notouch().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/buffer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/buffer.c b/fs/buffer.c
+index d9c6d1fbb6dd..3033a937e3a5 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -156,8 +156,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+  */
+ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+ {
+-      __end_buffer_read_notouch(bh, uptodate);
+       put_bh(bh);
++      __end_buffer_read_notouch(bh, uptodate);
+ }
+ EXPORT_SYMBOL(end_buffer_read_sync);
+-- 
+2.50.1
+
index 0d081a289c3bede2fe7fc520d949278f12aed4f2..36d8f29b9ea3edcbf457c5eaa0bd34673f9e72ba 100644 (file)
@@ -402,3 +402,6 @@ drm-amd-display-fix-fractional-fb-divider-in-set_pixel_clock_v3.patch
 drm-amd-display-fix-dp-audio-dto1-clock-source-on-dce-6.patch
 drm-amd-display-find-first-crtc-and-its-line-time-in-dce110_fill_display_configs.patch
 drm-amd-display-fill-display-clock-and-vblank-time-in-dce110_fill_display_configs.patch
+smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch
+fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
+use-uniform-permission-checks-for-all-mount-propagat.patch
diff --git a/queue-6.1/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch b/queue-6.1/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch
new file mode 100644 (file)
index 0000000..d30ae3a
--- /dev/null
@@ -0,0 +1,93 @@
+From 644638b631c35db9fc1b31ba66478654cef415e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Aug 2025 18:45:46 +0200
+Subject: smb: server: split ksmbd_rdma_stop_listening() out of
+ ksmbd_rdma_destroy()
+
+From: Stefan Metzmacher <metze@samba.org>
+
+[ Upstream commit bac7b996d42e458a94578f4227795a0d4deef6fa ]
+
+We can't call destroy_workqueue(smb_direct_wq); before stop_sessions()!
+
+Otherwise already existing connections try to use smb_direct_wq as
+a NULL pointer.
+
+Cc: Namjae Jeon <linkinjeon@kernel.org>
+Cc: Steve French <smfrench@gmail.com>
+Cc: Tom Talpey <tom@talpey.com>
+Cc: linux-cifs@vger.kernel.org
+Cc: samba-technical@lists.samba.org
+Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers")
+Signed-off-by: Stefan Metzmacher <metze@samba.org>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/server/connection.c     | 3 ++-
+ fs/smb/server/transport_rdma.c | 5 ++++-
+ fs/smb/server/transport_rdma.h | 4 +++-
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
+index 09e1e7771592..92d8a0d898eb 100644
+--- a/fs/smb/server/connection.c
++++ b/fs/smb/server/connection.c
+@@ -436,7 +436,8 @@ void ksmbd_conn_transport_destroy(void)
+ {
+       mutex_lock(&init_lock);
+       ksmbd_tcp_destroy();
+-      ksmbd_rdma_destroy();
++      ksmbd_rdma_stop_listening();
+       stop_sessions();
++      ksmbd_rdma_destroy();
+       mutex_unlock(&init_lock);
+ }
+diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
+index 7d59ed6e1383..3006d76d8059 100644
+--- a/fs/smb/server/transport_rdma.c
++++ b/fs/smb/server/transport_rdma.c
+@@ -2188,7 +2188,7 @@ int ksmbd_rdma_init(void)
+       return 0;
+ }
+-void ksmbd_rdma_destroy(void)
++void ksmbd_rdma_stop_listening(void)
+ {
+       if (!smb_direct_listener.cm_id)
+               return;
+@@ -2197,7 +2197,10 @@ void ksmbd_rdma_destroy(void)
+       rdma_destroy_id(smb_direct_listener.cm_id);
+       smb_direct_listener.cm_id = NULL;
++}
++void ksmbd_rdma_destroy(void)
++{
+       if (smb_direct_wq) {
+               destroy_workqueue(smb_direct_wq);
+               smb_direct_wq = NULL;
+diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h
+index 77aee4e5c9dc..a2291b77488a 100644
+--- a/fs/smb/server/transport_rdma.h
++++ b/fs/smb/server/transport_rdma.h
+@@ -54,13 +54,15 @@ struct smb_direct_data_transfer {
+ #ifdef CONFIG_SMB_SERVER_SMBDIRECT
+ int ksmbd_rdma_init(void);
++void ksmbd_rdma_stop_listening(void);
+ void ksmbd_rdma_destroy(void);
+ bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
+ void init_smbd_max_io_size(unsigned int sz);
+ unsigned int get_smbd_max_read_write_size(void);
+ #else
+ static inline int ksmbd_rdma_init(void) { return 0; }
+-static inline int ksmbd_rdma_destroy(void) { return 0; }
++static inline void ksmbd_rdma_stop_listening(void) { }
++static inline void ksmbd_rdma_destroy(void) { }
+ static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
+ static inline void init_smbd_max_io_size(unsigned int sz) { }
+ static inline unsigned int get_smbd_max_read_write_size(void) { return 0; }
+-- 
+2.50.1
+
diff --git a/queue-6.1/use-uniform-permission-checks-for-all-mount-propagat.patch b/queue-6.1/use-uniform-permission-checks-for-all-mount-propagat.patch
new file mode 100644 (file)
index 0000000..c4dbff2
--- /dev/null
@@ -0,0 +1,103 @@
+From e42ab06efafdf77b326724a7158305e8549d6a2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 01:44:31 -0400
+Subject: use uniform permission checks for all mount propagation changes
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit cffd0441872e7f6b1fce5e78fb1c99187a291330 ]
+
+do_change_type() and do_set_group() are operating on different
+aspects of the same thing - propagation graph.  The latter
+asks for mounts involved to be mounted in namespace(s) the caller
+has CAP_SYS_ADMIN for.  The former is a mess - originally it
+didn't even check that mount *is* mounted.  That got fixed,
+but the resulting check turns out to be too strict for userland -
+in effect, we check that mount is in our namespace, having already
+checked that we have CAP_SYS_ADMIN there.
+
+What we really need (in both cases) is
+       * only touch mounts that are mounted.  That's a must-have
+constraint - data corruption happens if it get violated.
+       * don't allow to mess with a namespace unless you already
+have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
+
+That's an equivalent of what do_set_group() does; let's extract that
+into a helper (may_change_propagation()) and use it in both
+do_set_group() and do_change_type().
+
+Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
+Acked-by: Andrei Vagin <avagin@gmail.com>
+Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 34 ++++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 14 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index f0fa2a1a6b05..2a76269f2a4e 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2340,6 +2340,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+       return attach_recursive_mnt(mnt, p, mp, false);
+ }
++static int may_change_propagation(const struct mount *m)
++{
++        struct mnt_namespace *ns = m->mnt_ns;
++
++       // it must be mounted in some namespace
++       if (IS_ERR_OR_NULL(ns))         // is_mounted()
++               return -EINVAL;
++       // and the caller must be admin in userns of that namespace
++       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++               return -EPERM;
++       return 0;
++}
++
+ /*
+  * Sanity check the flags to change_mnt_propagation.
+  */
+@@ -2376,10 +2389,10 @@ static int do_change_type(struct path *path, int ms_flags)
+               return -EINVAL;
+       namespace_lock();
+-      if (!check_mnt(mnt)) {
+-              err = -EINVAL;
++      err = may_change_propagation(mnt);
++      if (err)
+               goto out_unlock;
+-      }
++
+       if (type == MS_SHARED) {
+               err = invent_group_ids(mnt, recurse);
+               if (err)
+@@ -2774,18 +2787,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
+       namespace_lock();
+-      err = -EINVAL;
+-      /* To and From must be mounted */
+-      if (!is_mounted(&from->mnt))
+-              goto out;
+-      if (!is_mounted(&to->mnt))
+-              goto out;
+-
+-      err = -EPERM;
+-      /* We should be allowed to modify mount namespaces of both mounts */
+-      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(from);
++      if (err)
+               goto out;
+-      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(to);
++      if (err)
+               goto out;
+       err = -EINVAL;
+-- 
+2.50.1
+
diff --git a/queue-6.12/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch b/queue-6.12/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
new file mode 100644 (file)
index 0000000..95f6ff7
--- /dev/null
@@ -0,0 +1,96 @@
+From 232b188de7b1415fedb27dfaa86937f1c8fcf7f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Aug 2025 22:18:30 +0800
+Subject: fs/buffer: fix use-after-free when call bh_read() helper
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 ]
+
+There's issue as follows:
+BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110
+Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0
+CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x55/0x70
+ print_address_description.constprop.0+0x2c/0x390
+ print_report+0xb4/0x270
+ kasan_report+0xb8/0xf0
+ end_buffer_read_sync+0xe3/0x110
+ end_bio_bh_io_sync+0x56/0x80
+ blk_update_request+0x30a/0x720
+ scsi_end_request+0x51/0x2b0
+ scsi_io_completion+0xe3/0x480
+ ? scsi_device_unbusy+0x11e/0x160
+ blk_complete_reqs+0x7b/0x90
+ handle_softirqs+0xef/0x370
+ irq_exit_rcu+0xa5/0xd0
+ sysvec_apic_timer_interrupt+0x6e/0x90
+ </IRQ>
+
+ Above issue happens when do ntfs3 filesystem mount, issue may happens
+ as follows:
+           mount                            IRQ
+ntfs_fill_super
+  read_cache_page
+    do_read_cache_folio
+      filemap_read_folio
+        mpage_read_folio
+        do_mpage_readpage
+         ntfs_get_block_vbo
+          bh_read
+            submit_bh
+            wait_on_buffer(bh);
+                                   blk_complete_reqs
+                                    scsi_io_completion
+                                     scsi_end_request
+                                      blk_update_request
+                                       end_bio_bh_io_sync
+                                        end_buffer_read_sync
+                                         __end_buffer_read_notouch
+                                          unlock_buffer
+
+            wait_on_buffer(bh);--> return will return to caller
+
+                                         put_bh
+                                           --> trigger stack-out-of-bounds
+In the mpage_read_folio() function, the stack variable 'map_bh' is
+passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and
+wait_on_buffer() returns to continue processing, the stack variable
+is likely to be reclaimed. Consequently, during the end_buffer_read_sync()
+process, calling put_bh() may result in stack overrun.
+
+If the bh is not allocated on the stack, it belongs to a folio.  Freeing
+a buffer head which belongs to a folio is done by drop_buffers() which
+will fail to free buffers which are still locked.  So it is safe to call
+put_bh() before __end_buffer_read_notouch().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/buffer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/buffer.c b/fs/buffer.c
+index e9e84512a027..79c19ffa4401 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -157,8 +157,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+  */
+ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+ {
+-      __end_buffer_read_notouch(bh, uptodate);
+       put_bh(bh);
++      __end_buffer_read_notouch(bh, uptodate);
+ }
+ EXPORT_SYMBOL(end_buffer_read_sync);
+-- 
+2.50.1
+
index 6600e62bc45d8d84fffdda2586183cfde764e166..40e4ba70f3f7ca092a5ebb135b85c12676009ae5 100644 (file)
@@ -203,3 +203,6 @@ arm64-dts-ti-k3-am6-add-boot-phase-flag-to-support-mmc-boot.patch
 arm64-dts-ti-k3-am62-add-non-removable-flag-for-emmc.patch
 arm64-dts-ti-k3-am6-remove-disable-wp-for-emmc.patch
 arm64-dts-ti-k3-am62-move-emmc-pinmux-to-top-level-board-file.patch
+smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch
+fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
+use-uniform-permission-checks-for-all-mount-propagat.patch
diff --git a/queue-6.12/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch b/queue-6.12/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch
new file mode 100644 (file)
index 0000000..da3fb5f
--- /dev/null
@@ -0,0 +1,93 @@
+From 6b1aed9c11c28b5959e9edc4cfcab9f53f06a8df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Aug 2025 18:45:46 +0200
+Subject: smb: server: split ksmbd_rdma_stop_listening() out of
+ ksmbd_rdma_destroy()
+
+From: Stefan Metzmacher <metze@samba.org>
+
+[ Upstream commit bac7b996d42e458a94578f4227795a0d4deef6fa ]
+
+We can't call destroy_workqueue(smb_direct_wq); before stop_sessions()!
+
+Otherwise already existing connections try to use smb_direct_wq as
+a NULL pointer.
+
+Cc: Namjae Jeon <linkinjeon@kernel.org>
+Cc: Steve French <smfrench@gmail.com>
+Cc: Tom Talpey <tom@talpey.com>
+Cc: linux-cifs@vger.kernel.org
+Cc: samba-technical@lists.samba.org
+Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers")
+Signed-off-by: Stefan Metzmacher <metze@samba.org>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/server/connection.c     | 3 ++-
+ fs/smb/server/transport_rdma.c | 5 ++++-
+ fs/smb/server/transport_rdma.h | 4 +++-
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
+index 9eb3e6010aa6..1c37d1e9aef3 100644
+--- a/fs/smb/server/connection.c
++++ b/fs/smb/server/connection.c
+@@ -503,7 +503,8 @@ void ksmbd_conn_transport_destroy(void)
+ {
+       mutex_lock(&init_lock);
+       ksmbd_tcp_destroy();
+-      ksmbd_rdma_destroy();
++      ksmbd_rdma_stop_listening();
+       stop_sessions();
++      ksmbd_rdma_destroy();
+       mutex_unlock(&init_lock);
+ }
+diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
+index 805c20f619b0..67c989e5ddaa 100644
+--- a/fs/smb/server/transport_rdma.c
++++ b/fs/smb/server/transport_rdma.c
+@@ -2193,7 +2193,7 @@ int ksmbd_rdma_init(void)
+       return 0;
+ }
+-void ksmbd_rdma_destroy(void)
++void ksmbd_rdma_stop_listening(void)
+ {
+       if (!smb_direct_listener.cm_id)
+               return;
+@@ -2202,7 +2202,10 @@ void ksmbd_rdma_destroy(void)
+       rdma_destroy_id(smb_direct_listener.cm_id);
+       smb_direct_listener.cm_id = NULL;
++}
++void ksmbd_rdma_destroy(void)
++{
+       if (smb_direct_wq) {
+               destroy_workqueue(smb_direct_wq);
+               smb_direct_wq = NULL;
+diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h
+index 77aee4e5c9dc..a2291b77488a 100644
+--- a/fs/smb/server/transport_rdma.h
++++ b/fs/smb/server/transport_rdma.h
+@@ -54,13 +54,15 @@ struct smb_direct_data_transfer {
+ #ifdef CONFIG_SMB_SERVER_SMBDIRECT
+ int ksmbd_rdma_init(void);
++void ksmbd_rdma_stop_listening(void);
+ void ksmbd_rdma_destroy(void);
+ bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
+ void init_smbd_max_io_size(unsigned int sz);
+ unsigned int get_smbd_max_read_write_size(void);
+ #else
+ static inline int ksmbd_rdma_init(void) { return 0; }
+-static inline int ksmbd_rdma_destroy(void) { return 0; }
++static inline void ksmbd_rdma_stop_listening(void) { }
++static inline void ksmbd_rdma_destroy(void) { }
+ static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
+ static inline void init_smbd_max_io_size(unsigned int sz) { }
+ static inline unsigned int get_smbd_max_read_write_size(void) { return 0; }
+-- 
+2.50.1
+
diff --git a/queue-6.12/use-uniform-permission-checks-for-all-mount-propagat.patch b/queue-6.12/use-uniform-permission-checks-for-all-mount-propagat.patch
new file mode 100644 (file)
index 0000000..e936f7c
--- /dev/null
@@ -0,0 +1,103 @@
+From 9e3bdb957325f6d7f89af0e33eb0a1e078c6e363 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 01:44:31 -0400
+Subject: use uniform permission checks for all mount propagation changes
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit cffd0441872e7f6b1fce5e78fb1c99187a291330 ]
+
+do_change_type() and do_set_group() are operating on different
+aspects of the same thing - propagation graph.  The latter
+asks for mounts involved to be mounted in namespace(s) the caller
+has CAP_SYS_ADMIN for.  The former is a mess - originally it
+didn't even check that mount *is* mounted.  That got fixed,
+but the resulting check turns out to be too strict for userland -
+in effect, we check that mount is in our namespace, having already
+checked that we have CAP_SYS_ADMIN there.
+
+What we really need (in both cases) is
+       * only touch mounts that are mounted.  That's a must-have
+constraint - data corruption happens if it get violated.
+       * don't allow to mess with a namespace unless you already
+have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
+
+That's an equivalent of what do_set_group() does; let's extract that
+into a helper (may_change_propagation()) and use it in both
+do_set_group() and do_change_type().
+
+Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
+Acked-by: Andrei Vagin <avagin@gmail.com>
+Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 34 ++++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 14 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index bb1560b0d25c..962fda4fa246 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2683,6 +2683,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+       return attach_recursive_mnt(mnt, p, mp, 0);
+ }
++static int may_change_propagation(const struct mount *m)
++{
++        struct mnt_namespace *ns = m->mnt_ns;
++
++       // it must be mounted in some namespace
++       if (IS_ERR_OR_NULL(ns))         // is_mounted()
++               return -EINVAL;
++       // and the caller must be admin in userns of that namespace
++       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++               return -EPERM;
++       return 0;
++}
++
+ /*
+  * Sanity check the flags to change_mnt_propagation.
+  */
+@@ -2719,10 +2732,10 @@ static int do_change_type(struct path *path, int ms_flags)
+               return -EINVAL;
+       namespace_lock();
+-      if (!check_mnt(mnt)) {
+-              err = -EINVAL;
++      err = may_change_propagation(mnt);
++      if (err)
+               goto out_unlock;
+-      }
++
+       if (type == MS_SHARED) {
+               err = invent_group_ids(mnt, recurse);
+               if (err)
+@@ -3116,18 +3129,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
+       namespace_lock();
+-      err = -EINVAL;
+-      /* To and From must be mounted */
+-      if (!is_mounted(&from->mnt))
+-              goto out;
+-      if (!is_mounted(&to->mnt))
+-              goto out;
+-
+-      err = -EPERM;
+-      /* We should be allowed to modify mount namespaces of both mounts */
+-      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(from);
++      if (err)
+               goto out;
+-      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(to);
++      if (err)
+               goto out;
+       err = -EINVAL;
+-- 
+2.50.1
+
diff --git a/queue-6.16/btrfs-zoned-fix-data-relocation-block-group-reservat.patch b/queue-6.16/btrfs-zoned-fix-data-relocation-block-group-reservat.patch
new file mode 100644 (file)
index 0000000..d621050
--- /dev/null
@@ -0,0 +1,140 @@
+From 857051bf7c514f4842da987b5df5e02e19c8aa62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Jul 2025 16:59:53 +0900
+Subject: btrfs: zoned: fix data relocation block group reservation
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit daa0fde322350b467bc62bc1b141bf62df6123f8 ]
+
+btrfs_zoned_reserve_data_reloc_bg() is called on mount and at that point,
+all data block groups belong to the primary data space_info. So, we don't
+find anything in the data relocation space_info.
+
+Also, the condition "bg->used > 0" can select a block group with full of
+zone_unusable bytes for the candidate. As we cannot allocate from the block
+group, it is useless to reserve it as the data relocation block group.
+
+Furthermore, because of the space_info separation, we need to migrate the
+selected block group to the data relocation space_info. If not, the extent
+allocator cannot use the block group to do the allocation.
+
+This commit fixes these three issues.
+
+Fixes: e606ff985ec7 ("btrfs: zoned: reserve data_reloc block group on mount")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/zoned.c | 55 +++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 47 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 936448b1f716..af5ba3ad2eb8 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -18,6 +18,7 @@
+ #include "accessors.h"
+ #include "bio.h"
+ #include "transaction.h"
++#include "sysfs.h"
+ /* Maximum number of zones to report per blkdev_report_zones() call */
+ #define BTRFS_REPORT_NR_ZONES   4096
+@@ -2510,12 +2511,12 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
+ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
+ {
+       struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
+-      struct btrfs_space_info *space_info = data_sinfo->sub_group[0];
++      struct btrfs_space_info *space_info = data_sinfo;
+       struct btrfs_trans_handle *trans;
+       struct btrfs_block_group *bg;
+       struct list_head *bg_list;
+       u64 alloc_flags;
+-      bool initial = false;
++      bool first = true;
+       bool did_chunk_alloc = false;
+       int index;
+       int ret;
+@@ -2529,21 +2530,52 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
+       if (sb_rdonly(fs_info->sb))
+               return;
+-      ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+       alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags);
+       index = btrfs_bg_flags_to_raid_index(alloc_flags);
+-      bg_list = &data_sinfo->block_groups[index];
++      /* Scan the data space_info to find empty block groups. Take the second one. */
+ again:
++      bg_list = &space_info->block_groups[index];
+       list_for_each_entry(bg, bg_list, list) {
+-              if (bg->used > 0)
++              if (bg->alloc_offset != 0)
+                       continue;
+-              if (!initial) {
+-                      initial = true;
++              if (first) {
++                      first = false;
+                       continue;
+               }
++              if (space_info == data_sinfo) {
++                      /* Migrate the block group to the data relocation space_info. */
++                      struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0];
++                      int factor;
++
++                      ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
++                      factor = btrfs_bg_type_to_factor(bg->flags);
++
++                      down_write(&space_info->groups_sem);
++                      list_del_init(&bg->list);
++                      /* We can assume this as we choose the second empty one. */
++                      ASSERT(!list_empty(&space_info->block_groups[index]));
++                      up_write(&space_info->groups_sem);
++
++                      spin_lock(&space_info->lock);
++                      space_info->total_bytes -= bg->length;
++                      space_info->disk_total -= bg->length * factor;
++                      /* There is no allocation ever happened. */
++                      ASSERT(bg->used == 0);
++                      ASSERT(bg->zone_unusable == 0);
++                      /* No super block in a block group on the zoned setup. */
++                      ASSERT(bg->bytes_super == 0);
++                      spin_unlock(&space_info->lock);
++
++                      bg->space_info = reloc_sinfo;
++                      if (reloc_sinfo->block_group_kobjs[index] == NULL)
++                              btrfs_sysfs_add_block_group_type(bg);
++
++                      btrfs_add_bg_to_space_info(fs_info, bg);
++              }
++
+               fs_info->data_reloc_bg = bg->start;
+               set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
+               btrfs_zone_activate(bg);
+@@ -2558,11 +2590,18 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
+       if (IS_ERR(trans))
+               return;
++      /* Allocate new BG in the data relocation space_info. */
++      space_info = data_sinfo->sub_group[0];
++      ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+       ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
+       btrfs_end_transaction(trans);
+       if (ret == 1) {
++              /*
++               * We allocated a new block group in the data relocation space_info. We
++               * can take that one.
++               */
++              first = false;
+               did_chunk_alloc = true;
+-              bg_list = &space_info->block_groups[index];
+               goto again;
+       }
+ }
+-- 
+2.50.1
+
diff --git a/queue-6.16/fhandle-do_handle_open-should-get-fd-with-user-flags.patch b/queue-6.16/fhandle-do_handle_open-should-get-fd-with-user-flags.patch
new file mode 100644 (file)
index 0000000..ff53de8
--- /dev/null
@@ -0,0 +1,46 @@
+From 19d9401d6ff9f1623822e7faa3f3abd037090aa1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 17:54:28 -0600
+Subject: fhandle: do_handle_open() should get FD with user flags
+
+From: Thomas Bertschinger <tahbertschinger@gmail.com>
+
+[ Upstream commit b5ca88927e353185b3d9ac4362d33e5aeb25771f ]
+
+In f07c7cc4684a, do_handle_open() was switched to use the automatic
+cleanup method for getting a FD. In that change it was also switched
+to pass O_CLOEXEC unconditionally to get_unused_fd_flags() instead
+of passing the user-specified flags.
+
+I don't see anything in that commit description that indicates this was
+intentional, so I am assuming it was an oversight.
+
+With this fix, the FD will again be opened with, or without, O_CLOEXEC
+according to what the user requested.
+
+Fixes: f07c7cc4684a ("fhandle: simplify error handling")
+Signed-off-by: Thomas Bertschinger <tahbertschinger@gmail.com>
+Link: https://lore.kernel.org/20250814235431.995876-4-tahbertschinger@gmail.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fhandle.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/fhandle.c b/fs/fhandle.c
+index 66ff60591d17..e21ec857f2ab 100644
+--- a/fs/fhandle.c
++++ b/fs/fhandle.c
+@@ -404,7 +404,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
+       if (retval)
+               return retval;
+-      CLASS(get_unused_fd, fd)(O_CLOEXEC);
++      CLASS(get_unused_fd, fd)(open_flag);
+       if (fd < 0)
+               return fd;
+-- 
+2.50.1
+
diff --git a/queue-6.16/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch b/queue-6.16/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
new file mode 100644 (file)
index 0000000..e1df1b6
--- /dev/null
@@ -0,0 +1,96 @@
+From 799f239fcd810facce9de91101b83a5e30b93c9f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Aug 2025 22:18:30 +0800
+Subject: fs/buffer: fix use-after-free when call bh_read() helper
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 ]
+
+There's issue as follows:
+BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110
+Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0
+CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x55/0x70
+ print_address_description.constprop.0+0x2c/0x390
+ print_report+0xb4/0x270
+ kasan_report+0xb8/0xf0
+ end_buffer_read_sync+0xe3/0x110
+ end_bio_bh_io_sync+0x56/0x80
+ blk_update_request+0x30a/0x720
+ scsi_end_request+0x51/0x2b0
+ scsi_io_completion+0xe3/0x480
+ ? scsi_device_unbusy+0x11e/0x160
+ blk_complete_reqs+0x7b/0x90
+ handle_softirqs+0xef/0x370
+ irq_exit_rcu+0xa5/0xd0
+ sysvec_apic_timer_interrupt+0x6e/0x90
+ </IRQ>
+
+ Above issue happens when do ntfs3 filesystem mount, issue may happens
+ as follows:
+           mount                            IRQ
+ntfs_fill_super
+  read_cache_page
+    do_read_cache_folio
+      filemap_read_folio
+        mpage_read_folio
+        do_mpage_readpage
+         ntfs_get_block_vbo
+          bh_read
+            submit_bh
+            wait_on_buffer(bh);
+                                   blk_complete_reqs
+                                    scsi_io_completion
+                                     scsi_end_request
+                                      blk_update_request
+                                       end_bio_bh_io_sync
+                                        end_buffer_read_sync
+                                         __end_buffer_read_notouch
+                                          unlock_buffer
+
+            wait_on_buffer(bh);--> return will return to caller
+
+                                         put_bh
+                                           --> trigger stack-out-of-bounds
+In the mpage_read_folio() function, the stack variable 'map_bh' is
+passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and
+wait_on_buffer() returns to continue processing, the stack variable
+is likely to be reclaimed. Consequently, during the end_buffer_read_sync()
+process, calling put_bh() may result in stack overrun.
+
+If the bh is not allocated on the stack, it belongs to a folio.  Freeing
+a buffer head which belongs to a folio is done by drop_buffers() which
+will fail to free buffers which are still locked.  So it is safe to call
+put_bh() before __end_buffer_read_notouch().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/buffer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/buffer.c b/fs/buffer.c
+index 8cf4a1dc481e..eb6d85edc37a 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -157,8 +157,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+  */
+ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+ {
+-      __end_buffer_read_notouch(bh, uptodate);
+       put_bh(bh);
++      __end_buffer_read_notouch(bh, uptodate);
+ }
+ EXPORT_SYMBOL(end_buffer_read_sync);
+-- 
+2.50.1
+
diff --git a/queue-6.16/fs-fix-incorrect-lflags-value-in-the-move_mount-sysc.patch b/queue-6.16/fs-fix-incorrect-lflags-value-in-the-move_mount-sysc.patch
new file mode 100644 (file)
index 0000000..48e5dc3
--- /dev/null
@@ -0,0 +1,91 @@
+From 55f69d678046f9fcd348e29c10720fd2844c6aff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Aug 2025 13:24:26 +0800
+Subject: fs: fix incorrect lflags value in the move_mount syscall
+
+From: Yuntao Wang <yuntao.wang@linux.dev>
+
+[ Upstream commit 593d9e4c3d634c370f226f55453c376bf43b3684 ]
+
+The lflags value used to look up from_path was overwritten by the one used
+to look up to_path.
+
+In other words, from_path was looked up with the wrong lflags value. Fix it.
+
+Fixes: f9fde814de37 ("fs: support getname_maybe_null() in move_mount()")
+Signed-off-by: Yuntao Wang <yuntao.wang@linux.dev>
+Link: https://lore.kernel.org/20250811052426.129188-1-yuntao.wang@linux.dev
+[Christian Brauner <brauner@kernel.org>: massage patch]
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 32 ++++++++++++++++++++------------
+ 1 file changed, 20 insertions(+), 12 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index ea724ad3d113..49d016711469 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -4657,20 +4657,10 @@ SYSCALL_DEFINE5(move_mount,
+       if (flags & MOVE_MOUNT_SET_GROUP)       mflags |= MNT_TREE_PROPAGATION;
+       if (flags & MOVE_MOUNT_BENEATH)         mflags |= MNT_TREE_BENEATH;
+-      lflags = 0;
+-      if (flags & MOVE_MOUNT_F_SYMLINKS)      lflags |= LOOKUP_FOLLOW;
+-      if (flags & MOVE_MOUNT_F_AUTOMOUNTS)    lflags |= LOOKUP_AUTOMOUNT;
+       uflags = 0;
+-      if (flags & MOVE_MOUNT_F_EMPTY_PATH)    uflags = AT_EMPTY_PATH;
+-      from_name = getname_maybe_null(from_pathname, uflags);
+-      if (IS_ERR(from_name))
+-              return PTR_ERR(from_name);
++      if (flags & MOVE_MOUNT_T_EMPTY_PATH)
++              uflags = AT_EMPTY_PATH;
+-      lflags = 0;
+-      if (flags & MOVE_MOUNT_T_SYMLINKS)      lflags |= LOOKUP_FOLLOW;
+-      if (flags & MOVE_MOUNT_T_AUTOMOUNTS)    lflags |= LOOKUP_AUTOMOUNT;
+-      uflags = 0;
+-      if (flags & MOVE_MOUNT_T_EMPTY_PATH)    uflags = AT_EMPTY_PATH;
+       to_name = getname_maybe_null(to_pathname, uflags);
+       if (IS_ERR(to_name))
+               return PTR_ERR(to_name);
+@@ -4683,11 +4673,24 @@ SYSCALL_DEFINE5(move_mount,
+               to_path = fd_file(f_to)->f_path;
+               path_get(&to_path);
+       } else {
++              lflags = 0;
++              if (flags & MOVE_MOUNT_T_SYMLINKS)
++                      lflags |= LOOKUP_FOLLOW;
++              if (flags & MOVE_MOUNT_T_AUTOMOUNTS)
++                      lflags |= LOOKUP_AUTOMOUNT;
+               ret = filename_lookup(to_dfd, to_name, lflags, &to_path, NULL);
+               if (ret)
+                       return ret;
+       }
++      uflags = 0;
++      if (flags & MOVE_MOUNT_F_EMPTY_PATH)
++              uflags = AT_EMPTY_PATH;
++
++      from_name = getname_maybe_null(from_pathname, uflags);
++      if (IS_ERR(from_name))
++              return PTR_ERR(from_name);
++
+       if (!from_name && from_dfd >= 0) {
+               CLASS(fd_raw, f_from)(from_dfd);
+               if (fd_empty(f_from))
+@@ -4696,6 +4699,11 @@ SYSCALL_DEFINE5(move_mount,
+               return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags);
+       }
++      lflags = 0;
++      if (flags & MOVE_MOUNT_F_SYMLINKS)
++              lflags |= LOOKUP_FOLLOW;
++      if (flags & MOVE_MOUNT_F_AUTOMOUNTS)
++              lflags |= LOOKUP_AUTOMOUNT;
+       ret = filename_lookup(from_dfd, from_name, lflags, &from_path, NULL);
+       if (ret)
+               return ret;
+-- 
+2.50.1
+
diff --git a/queue-6.16/libfs-massage-path_from_stashed-to-allow-custom-stas.patch b/queue-6.16/libfs-massage-path_from_stashed-to-allow-custom-stas.patch
new file mode 100644 (file)
index 0000000..6b3958e
--- /dev/null
@@ -0,0 +1,109 @@
+From 699705a1913035b03cf9d135a5c32411821f3be0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jun 2025 22:53:36 +0200
+Subject: libfs: massage path_from_stashed() to allow custom stashing behavior
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit bda3f1608d993419fa247dc11263fc931ceca58a ]
+
+* Add a callback to struct stashed_operations so it's possible to
+  implement custom behavior for pidfs and allow for it to return errors.
+
+* Teach stashed_dentry_get() to handle error pointers.
+
+Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-2-98f3456fd552@kernel.org
+Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 0b2d71a7c826 ("pidfs: Fix memory leak in pidfd_info()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/internal.h |  3 +++
+ fs/libfs.c    | 27 ++++++++++++++++++++-------
+ 2 files changed, 23 insertions(+), 7 deletions(-)
+
+diff --git a/fs/internal.h b/fs/internal.h
+index 393f6c5c24f6..22ba066d1dba 100644
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -322,12 +322,15 @@ struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns);
+ struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
+ void mnt_idmap_put(struct mnt_idmap *idmap);
+ struct stashed_operations {
++      struct dentry *(*stash_dentry)(struct dentry **stashed,
++                                     struct dentry *dentry);
+       void (*put_data)(void *data);
+       int (*init_inode)(struct inode *inode, void *data);
+ };
+ int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
+                     struct path *path);
+ void stashed_dentry_prune(struct dentry *dentry);
++struct dentry *stash_dentry(struct dentry **stashed, struct dentry *dentry);
+ struct dentry *stashed_dentry_get(struct dentry **stashed);
+ /**
+  * path_mounted - check whether path is mounted
+diff --git a/fs/libfs.c b/fs/libfs.c
+index 972b95cc7433..5b936ee71892 100644
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -2126,6 +2126,8 @@ struct dentry *stashed_dentry_get(struct dentry **stashed)
+       dentry = rcu_dereference(*stashed);
+       if (!dentry)
+               return NULL;
++      if (IS_ERR(dentry))
++              return dentry;
+       if (!lockref_get_not_dead(&dentry->d_lockref))
+               return NULL;
+       return dentry;
+@@ -2174,8 +2176,7 @@ static struct dentry *prepare_anon_dentry(struct dentry **stashed,
+       return dentry;
+ }
+-static struct dentry *stash_dentry(struct dentry **stashed,
+-                                 struct dentry *dentry)
++struct dentry *stash_dentry(struct dentry **stashed, struct dentry *dentry)
+ {
+       guard(rcu)();
+       for (;;) {
+@@ -2216,12 +2217,15 @@ static struct dentry *stash_dentry(struct dentry **stashed,
+ int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
+                     struct path *path)
+ {
+-      struct dentry *dentry;
++      struct dentry *dentry, *res;
+       const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info;
+       /* See if dentry can be reused. */
+-      path->dentry = stashed_dentry_get(stashed);
+-      if (path->dentry) {
++      res = stashed_dentry_get(stashed);
++      if (IS_ERR(res))
++              return PTR_ERR(res);
++      if (res) {
++              path->dentry = res;
+               sops->put_data(data);
+               goto out_path;
+       }
+@@ -2232,8 +2236,17 @@ int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
+               return PTR_ERR(dentry);
+       /* Added a new dentry. @data is now owned by the filesystem. */
+-      path->dentry = stash_dentry(stashed, dentry);
+-      if (path->dentry != dentry)
++      if (sops->stash_dentry)
++              res = sops->stash_dentry(stashed, dentry);
++      else
++              res = stash_dentry(stashed, dentry);
++      if (IS_ERR(res)) {
++              dput(dentry);
++              return PTR_ERR(res);
++      }
++      path->dentry = res;
++      /* A dentry was reused. */
++      if (res != dentry)
+               dput(dentry);
+ out_path:
+-- 
+2.50.1
+
diff --git a/queue-6.16/pidfs-fix-memory-leak-in-pidfd_info.patch b/queue-6.16/pidfs-fix-memory-leak-in-pidfd_info.patch
new file mode 100644 (file)
index 0000000..45d99c4
--- /dev/null
@@ -0,0 +1,79 @@
+From cc1498c01e878471397ca1bf180751c274c0e168 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 17:44:53 +0800
+Subject: pidfs: Fix memory leak in pidfd_info()
+
+From: Adrian Huang (Lenovo) <adrianhuang0701@gmail.com>
+
+[ Upstream commit 0b2d71a7c82628bb36fd43e80193bcc2693c239a ]
+
+After running the program 'ioctl_pidfd03' of Linux Test Project (LTP) or
+the program 'pidfd_info_test' in 'tools/testing/selftests/pidfd' of the
+kernel source, kmemleak reports the following memory leaks:
+
+  # cat /sys/kernel/debug/kmemleak
+  unreferenced object 0xff110020e5988000 (size 8216):
+    comm "ioctl_pidfd03", pid 10853, jiffies 4294800031
+    hex dump (first 32 bytes):
+      02 40 00 00 00 00 00 00 10 00 00 00 00 00 00 00  .@..............
+      00 00 00 00 af 01 00 00 80 00 00 00 00 00 00 00  ................
+    backtrace (crc 69483047):
+      kmem_cache_alloc_node_noprof+0x2fb/0x410
+      copy_process+0x178/0x1740
+      kernel_clone+0x99/0x3b0
+      __do_sys_clone3+0xbe/0x100
+      do_syscall_64+0x7b/0x2c0
+      entry_SYSCALL_64_after_hwframe+0x76/0x7e
+  ...
+  unreferenced object 0xff11002097b70000 (size 8216):
+  comm "pidfd_info_test", pid 11840, jiffies 4294889165
+  hex dump (first 32 bytes):
+    06 40 00 00 00 00 00 00 10 00 00 00 00 00 00 00  .@..............
+    00 00 00 00 b5 00 00 00 80 00 00 00 00 00 00 00  ................
+  backtrace (crc a6286bb7):
+    kmem_cache_alloc_node_noprof+0x2fb/0x410
+    copy_process+0x178/0x1740
+    kernel_clone+0x99/0x3b0
+    __do_sys_clone3+0xbe/0x100
+    do_syscall_64+0x7b/0x2c0
+    entry_SYSCALL_64_after_hwframe+0x76/0x7e
+  ...
+
+The leak occurs because pidfd_info() obtains a task_struct via
+get_pid_task() but never calls put_task_struct() to drop the reference,
+leaving task->usage unbalanced.
+
+Fix the issue by adding '__free(put_task) = NULL' to the local variable
+'task', ensuring that put_task_struct() is automatically invoked when
+the variable goes out of scope.
+
+Fixes: 7477d7dce48a ("pidfs: allow to retrieve exit information")
+Signed-off-by: Adrian Huang (Lenovo) <adrianhuang0701@gmail.com>
+Link: https://lore.kernel.org/20250814094453.15232-1-adrianhuang0701@gmail.com
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/pidfs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/pidfs.c b/fs/pidfs.c
+index 568574fed576..1266af5f3a15 100644
+--- a/fs/pidfs.c
++++ b/fs/pidfs.c
+@@ -282,12 +282,12 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags)
+ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
+ {
+       struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
++      struct task_struct *task __free(put_task) = NULL;
+       struct pid *pid = pidfd_pid(file);
+       size_t usize = _IOC_SIZE(cmd);
+       struct pidfd_info kinfo = {};
+       struct pidfs_exit_info *exit_info;
+       struct user_namespace *user_ns;
+-      struct task_struct *task;
+       struct pidfs_attr *attr;
+       const struct cred *c;
+       __u64 mask;
+-- 
+2.50.1
+
diff --git a/queue-6.16/pidfs-move-to-anonymous-struct.patch b/queue-6.16/pidfs-move-to-anonymous-struct.patch
new file mode 100644 (file)
index 0000000..7392ba8
--- /dev/null
@@ -0,0 +1,48 @@
+From 4c0f467c619d0226966270385e884d37fa8703d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jun 2025 22:53:38 +0200
+Subject: pidfs: move to anonymous struct
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 75215c972581d3934e76a57690cf838d7ceab399 ]
+
+Move the pidfs entries to an anonymous struct.
+
+Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-4-98f3456fd552@kernel.org
+Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 0b2d71a7c826 ("pidfs: Fix memory leak in pidfd_info()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/pid.h | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/pid.h b/include/linux/pid.h
+index 453ae6d8a68d..00646a692dd4 100644
+--- a/include/linux/pid.h
++++ b/include/linux/pid.h
+@@ -52,14 +52,15 @@ struct upid {
+       struct pid_namespace *ns;
+ };
+-struct pid
+-{
++struct pid {
+       refcount_t count;
+       unsigned int level;
+       spinlock_t lock;
+-      struct dentry *stashed;
+-      u64 ino;
+-      struct rb_node pidfs_node;
++      struct {
++              u64 ino;
++              struct rb_node pidfs_node;
++              struct dentry *stashed;
++      };
+       /* lists of tasks that use this pid */
+       struct hlist_head tasks[PIDTYPE_MAX];
+       struct hlist_head inodes;
+-- 
+2.50.1
+
diff --git a/queue-6.16/pidfs-persist-information.patch b/queue-6.16/pidfs-persist-information.patch
new file mode 100644 (file)
index 0000000..9aef5b9
--- /dev/null
@@ -0,0 +1,471 @@
+From a10eec941b82c9405218adbfc3d5f3d095fd2282 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jun 2025 22:53:39 +0200
+Subject: pidfs: persist information
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 8ec7c826d97b390879df2a03dfb035c70af86779 ]
+
+Persist exit and coredump information independent of whether anyone
+currently holds a pidfd for the struct pid.
+
+The current scheme allocated pidfs dentries on-demand repeatedly.
+This scheme is reaching it's limits as it makes it impossible to pin
+information that needs to be available after the task has exited or
+coredumped and that should not be lost simply because the pidfd got
+closed temporarily. The next opener should still see the stashed
+information.
+
+This is also a prerequisite for supporting extended attributes on
+pidfds to allow attaching meta information to them.
+
+If someone opens a pidfd for a struct pid a pidfs dentry is allocated
+and stashed in pid->stashed. Once the last pidfd for the struct pid is
+closed the pidfs dentry is released and removed from pid->stashed.
+
+So if 10 callers create a pidfs dentry for the same struct pid
+sequentially, i.e., each closing the pidfd before the other creates a
+new one then a new pidfs dentry is allocated every time.
+
+Because multiple tasks acquiring and releasing a pidfd for the same
+struct pid can race with each another a task may still find a valid
+pidfs entry from the previous task in pid->stashed and reuse it. Or it
+might find a dead dentry in there and fail to reuse it and so stashes a
+new pidfs dentry. Multiple tasks may race to stash a new pidfs dentry
+but only one will succeed, the other ones will put their dentry.
+
+The current scheme aims to ensure that a pidfs dentry for a struct pid
+can only be created if the task is still alive or if a pidfs dentry
+already existed before the task was reaped and so exit information has
+been was stashed in the pidfs inode.
+
+That's great except that it's buggy. If a pidfs dentry is stashed in
+pid->stashed after pidfs_exit() but before __unhash_process() is called
+we will return a pidfd for a reaped task without exit information being
+available.
+
+The pidfds_pid_valid() check does not guard against this race as it
+doens't sync at all with pidfs_exit(). The pid_has_task() check might be
+successful simply because we're before __unhash_process() but after
+pidfs_exit().
+
+Introduce a new scheme where the lifetime of information associated with
+a pidfs entry (coredump and exit information) isn't bound to the
+lifetime of the pidfs inode but the struct pid itself.
+
+The first time a pidfs dentry is allocated for a struct pid a struct
+pidfs_attr will be allocated which will be used to store exit and
+coredump information.
+
+If all pidfs for the pidfs dentry are closed the dentry and inode can be
+cleaned up but the struct pidfs_attr will stick until the struct pid
+itself is freed. This will ensure minimal memory usage while persisting
+relevant information.
+
+The new scheme has various advantages. First, it allows to close the
+race where we end up handing out a pidfd for a reaped task for which no
+exit information is available. Second, it minimizes memory usage.
+Third, it allows to remove complex lifetime tracking via dentries when
+registering a struct pid with pidfs. There's no need to get or put a
+reference. Instead, the lifetime of exit and coredump information
+associated with a struct pid is bound to the lifetime of struct pid
+itself.
+
+Link: https://lore.kernel.org/20250618-work-pidfs-persistent-v2-5-98f3456fd552@kernel.org
+Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Stable-dep-of: 0b2d71a7c826 ("pidfs: Fix memory leak in pidfd_info()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/pidfs.c            | 212 +++++++++++++++++++++++++++++-------------
+ include/linux/pid.h   |   3 +
+ include/linux/pidfs.h |   1 +
+ kernel/pid.c          |   2 +-
+ 4 files changed, 151 insertions(+), 67 deletions(-)
+
+diff --git a/fs/pidfs.c b/fs/pidfs.c
+index 4c551bfa8927..568574fed576 100644
+--- a/fs/pidfs.c
++++ b/fs/pidfs.c
+@@ -25,7 +25,10 @@
+ #include "internal.h"
+ #include "mount.h"
++#define PIDFS_PID_DEAD ERR_PTR(-ESRCH)
++
+ static struct kmem_cache *pidfs_cachep __ro_after_init;
++static struct kmem_cache *pidfs_attr_cachep __ro_after_init;
+ /*
+  * Stashes information that userspace needs to access even after the
+@@ -37,6 +40,11 @@ struct pidfs_exit_info {
+       __u32 coredump_mask;
+ };
++struct pidfs_attr {
++      struct pidfs_exit_info __pei;
++      struct pidfs_exit_info *exit_info;
++};
++
+ struct pidfs_inode {
+       struct pidfs_exit_info __pei;
+       struct pidfs_exit_info *exit_info;
+@@ -125,6 +133,7 @@ void pidfs_add_pid(struct pid *pid)
+       pid->ino = pidfs_ino_nr;
+       pid->stashed = NULL;
++      pid->attr = NULL;
+       pidfs_ino_nr++;
+       write_seqcount_begin(&pidmap_lock_seq);
+@@ -139,6 +148,18 @@ void pidfs_remove_pid(struct pid *pid)
+       write_seqcount_end(&pidmap_lock_seq);
+ }
++void pidfs_free_pid(struct pid *pid)
++{
++      /*
++       * Any dentry must've been wiped from the pid by now.
++       * Otherwise there's a reference count bug.
++       */
++      VFS_WARN_ON_ONCE(pid->stashed);
++
++      if (!IS_ERR(pid->attr))
++              kfree(pid->attr);
++}
++
+ #ifdef CONFIG_PROC_FS
+ /**
+  * pidfd_show_fdinfo - print information about a pidfd
+@@ -261,13 +282,13 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags)
+ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
+ {
+       struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
+-      struct inode *inode = file_inode(file);
+       struct pid *pid = pidfd_pid(file);
+       size_t usize = _IOC_SIZE(cmd);
+       struct pidfd_info kinfo = {};
+       struct pidfs_exit_info *exit_info;
+       struct user_namespace *user_ns;
+       struct task_struct *task;
++      struct pidfs_attr *attr;
+       const struct cred *c;
+       __u64 mask;
+@@ -286,8 +307,9 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
+       if (!pid_in_current_pidns(pid))
+               return -ESRCH;
++      attr = READ_ONCE(pid->attr);
+       if (mask & PIDFD_INFO_EXIT) {
+-              exit_info = READ_ONCE(pidfs_i(inode)->exit_info);
++              exit_info = READ_ONCE(attr->exit_info);
+               if (exit_info) {
+                       kinfo.mask |= PIDFD_INFO_EXIT;
+ #ifdef CONFIG_CGROUPS
+@@ -300,7 +322,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
+       if (mask & PIDFD_INFO_COREDUMP) {
+               kinfo.mask |= PIDFD_INFO_COREDUMP;
+-              kinfo.coredump_mask = READ_ONCE(pidfs_i(inode)->__pei.coredump_mask);
++              kinfo.coredump_mask = READ_ONCE(attr->__pei.coredump_mask);
+       }
+       task = get_pid_task(pid, PIDTYPE_PID);
+@@ -552,41 +574,61 @@ struct pid *pidfd_pid(const struct file *file)
+  * task has been reaped which cannot happen until we're out of
+  * release_task().
+  *
+- * If this struct pid is referred to by a pidfd then
+- * stashed_dentry_get() will return the dentry and inode for that struct
+- * pid. Since we've taken a reference on it there's now an additional
+- * reference from the exit path on it. Which is fine. We're going to put
+- * it again in a second and we know that the pid is kept alive anyway.
++ * If this struct pid has at least once been referred to by a pidfd then
++ * pid->attr will be allocated. If not we mark the struct pid as dead so
++ * anyone who is trying to register it with pidfs will fail to do so.
++ * Otherwise we would hand out pidfs for reaped tasks without having
++ * exit information available.
+  *
+- * Worst case is that we've filled in the info and immediately free the
+- * dentry and inode afterwards since the pidfd has been closed. Since
++ * Worst case is that we've filled in the info and the pid gets freed
++ * right away in free_pid() when no one holds a pidfd anymore. Since
+  * pidfs_exit() currently is placed after exit_task_work() we know that
+- * it cannot be us aka the exiting task holding a pidfd to ourselves.
++ * it cannot be us aka the exiting task holding a pidfd to itself.
+  */
+ void pidfs_exit(struct task_struct *tsk)
+ {
+-      struct dentry *dentry;
++      struct pid *pid = task_pid(tsk);
++      struct pidfs_attr *attr;
++      struct pidfs_exit_info *exit_info;
++#ifdef CONFIG_CGROUPS
++      struct cgroup *cgrp;
++#endif
+       might_sleep();
+-      dentry = stashed_dentry_get(&task_pid(tsk)->stashed);
+-      if (dentry) {
+-              struct inode *inode = d_inode(dentry);
+-              struct pidfs_exit_info *exit_info = &pidfs_i(inode)->__pei;
+-#ifdef CONFIG_CGROUPS
+-              struct cgroup *cgrp;
++      guard(spinlock_irq)(&pid->wait_pidfd.lock);
++      attr = pid->attr;
++      if (!attr) {
++              /*
++               * No one ever held a pidfd for this struct pid.
++               * Mark it as dead so no one can add a pidfs
++               * entry anymore. We're about to be reaped and
++               * so no exit information would be available.
++               */
++              pid->attr = PIDFS_PID_DEAD;
++              return;
++      }
+-              rcu_read_lock();
+-              cgrp = task_dfl_cgroup(tsk);
+-              exit_info->cgroupid = cgroup_id(cgrp);
+-              rcu_read_unlock();
++      /*
++       * If @pid->attr is set someone might still legitimately hold a
++       * pidfd to @pid or someone might concurrently still be getting
++       * a reference to an already stashed dentry from @pid->stashed.
++       * So defer cleaning @pid->attr until the last reference to @pid
++       * is put
++       */
++
++      exit_info = &attr->__pei;
++
++#ifdef CONFIG_CGROUPS
++      rcu_read_lock();
++      cgrp = task_dfl_cgroup(tsk);
++      exit_info->cgroupid = cgroup_id(cgrp);
++      rcu_read_unlock();
+ #endif
+-              exit_info->exit_code = tsk->exit_code;
++      exit_info->exit_code = tsk->exit_code;
+-              /* Ensure that PIDFD_GET_INFO sees either all or nothing. */
+-              smp_store_release(&pidfs_i(inode)->exit_info, &pidfs_i(inode)->__pei);
+-              dput(dentry);
+-      }
++      /* Ensure that PIDFD_GET_INFO sees either all or nothing. */
++      smp_store_release(&attr->exit_info, &attr->__pei);
+ }
+ #ifdef CONFIG_COREDUMP
+@@ -594,16 +636,15 @@ void pidfs_coredump(const struct coredump_params *cprm)
+ {
+       struct pid *pid = cprm->pid;
+       struct pidfs_exit_info *exit_info;
+-      struct dentry *dentry;
+-      struct inode *inode;
++      struct pidfs_attr *attr;
+       __u32 coredump_mask = 0;
+-      dentry = pid->stashed;
+-      if (WARN_ON_ONCE(!dentry))
+-              return;
++      attr = READ_ONCE(pid->attr);
+-      inode = d_inode(dentry);
+-      exit_info = &pidfs_i(inode)->__pei;
++      VFS_WARN_ON_ONCE(!attr);
++      VFS_WARN_ON_ONCE(attr == PIDFS_PID_DEAD);
++
++      exit_info = &attr->__pei;
+       /* Note how we were coredumped. */
+       coredump_mask = pidfs_coredump_mask(cprm->mm_flags);
+       /* Note that we actually did coredump. */
+@@ -663,7 +704,7 @@ static struct inode *pidfs_alloc_inode(struct super_block *sb)
+ static void pidfs_free_inode(struct inode *inode)
+ {
+-      kmem_cache_free(pidfs_cachep, pidfs_i(inode));
++      kfree(pidfs_i(inode));
+ }
+ static const struct super_operations pidfs_sops = {
+@@ -831,8 +872,13 @@ static inline bool pidfs_pid_valid(struct pid *pid, const struct path *path,
+        * recorded and published can be handled correctly.
+        */
+       if (unlikely(!pid_has_task(pid, type))) {
+-              struct inode *inode = d_inode(path->dentry);
+-              return !!READ_ONCE(pidfs_i(inode)->exit_info);
++              struct pidfs_attr *attr;
++
++              attr = READ_ONCE(pid->attr);
++              if (!attr)
++                      return false;
++              if (!READ_ONCE(attr->exit_info))
++                      return false;
+       }
+       return true;
+@@ -878,9 +924,67 @@ static void pidfs_put_data(void *data)
+       put_pid(pid);
+ }
++/**
++ * pidfs_register_pid - register a struct pid in pidfs
++ * @pid: pid to pin
++ *
++ * Register a struct pid in pidfs. Needs to be paired with
++ * pidfs_put_pid() to not risk leaking the pidfs dentry and inode.
++ *
++ * Return: On success zero, on error a negative error code is returned.
++ */
++int pidfs_register_pid(struct pid *pid)
++{
++      struct pidfs_attr *new_attr __free(kfree) = NULL;
++      struct pidfs_attr *attr;
++
++      might_sleep();
++
++      if (!pid)
++              return 0;
++
++      attr = READ_ONCE(pid->attr);
++      if (unlikely(attr == PIDFS_PID_DEAD))
++              return PTR_ERR(PIDFS_PID_DEAD);
++      if (attr)
++              return 0;
++
++      new_attr = kmem_cache_zalloc(pidfs_attr_cachep, GFP_KERNEL);
++      if (!new_attr)
++              return -ENOMEM;
++
++      /* Synchronize with pidfs_exit(). */
++      guard(spinlock_irq)(&pid->wait_pidfd.lock);
++
++      attr = pid->attr;
++      if (unlikely(attr == PIDFS_PID_DEAD))
++              return PTR_ERR(PIDFS_PID_DEAD);
++      if (unlikely(attr))
++              return 0;
++
++      pid->attr = no_free_ptr(new_attr);
++      return 0;
++}
++
++static struct dentry *pidfs_stash_dentry(struct dentry **stashed,
++                                       struct dentry *dentry)
++{
++      int ret;
++      struct pid *pid = d_inode(dentry)->i_private;
++
++      VFS_WARN_ON_ONCE(stashed != &pid->stashed);
++
++      ret = pidfs_register_pid(pid);
++      if (ret)
++              return ERR_PTR(ret);
++
++      return stash_dentry(stashed, dentry);
++}
++
+ static const struct stashed_operations pidfs_stashed_ops = {
+-      .init_inode = pidfs_init_inode,
+-      .put_data = pidfs_put_data,
++      .stash_dentry   = pidfs_stash_dentry,
++      .init_inode     = pidfs_init_inode,
++      .put_data       = pidfs_put_data,
+ };
+ static int pidfs_init_fs_context(struct fs_context *fc)
+@@ -936,33 +1040,6 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags)
+       return pidfd_file;
+ }
+-/**
+- * pidfs_register_pid - register a struct pid in pidfs
+- * @pid: pid to pin
+- *
+- * Register a struct pid in pidfs. Needs to be paired with
+- * pidfs_put_pid() to not risk leaking the pidfs dentry and inode.
+- *
+- * Return: On success zero, on error a negative error code is returned.
+- */
+-int pidfs_register_pid(struct pid *pid)
+-{
+-      struct path path __free(path_put) = {};
+-      int ret;
+-
+-      might_sleep();
+-
+-      if (!pid)
+-              return 0;
+-
+-      ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
+-      if (unlikely(ret))
+-              return ret;
+-      /* Keep the dentry and only put the reference to the mount. */
+-      path.dentry = NULL;
+-      return 0;
+-}
+-
+ /**
+  * pidfs_get_pid - pin a struct pid through pidfs
+  * @pid: pid to pin
+@@ -1008,6 +1085,9 @@ void __init pidfs_init(void)
+                                        (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
+                                         SLAB_ACCOUNT | SLAB_PANIC),
+                                        pidfs_inode_init_once);
++      pidfs_attr_cachep = kmem_cache_create("pidfs_attr_cache", sizeof(struct pidfs_attr), 0,
++                                       (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
++                                        SLAB_ACCOUNT | SLAB_PANIC), NULL);
+       pidfs_mnt = kern_mount(&pidfs_type);
+       if (IS_ERR(pidfs_mnt))
+               panic("Failed to mount pidfs pseudo filesystem");
+diff --git a/include/linux/pid.h b/include/linux/pid.h
+index 00646a692dd4..003a1027d219 100644
+--- a/include/linux/pid.h
++++ b/include/linux/pid.h
+@@ -47,6 +47,8 @@
+ #define RESERVED_PIDS 300
++struct pidfs_attr;
++
+ struct upid {
+       int nr;
+       struct pid_namespace *ns;
+@@ -60,6 +62,7 @@ struct pid {
+               u64 ino;
+               struct rb_node pidfs_node;
+               struct dentry *stashed;
++              struct pidfs_attr *attr;
+       };
+       /* lists of tasks that use this pid */
+       struct hlist_head tasks[PIDTYPE_MAX];
+diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
+index 77e7db194914..8f6ed59bb3fb 100644
+--- a/include/linux/pidfs.h
++++ b/include/linux/pidfs.h
+@@ -16,5 +16,6 @@ extern const struct dentry_operations pidfs_dentry_operations;
+ int pidfs_register_pid(struct pid *pid);
+ void pidfs_get_pid(struct pid *pid);
+ void pidfs_put_pid(struct pid *pid);
++void pidfs_free_pid(struct pid *pid);
+ #endif /* _LINUX_PID_FS_H */
+diff --git a/kernel/pid.c b/kernel/pid.c
+index 8317bcbc7cf7..07db7d8d066c 100644
+--- a/kernel/pid.c
++++ b/kernel/pid.c
+@@ -100,7 +100,7 @@ void put_pid(struct pid *pid)
+       ns = pid->numbers[pid->level].ns;
+       if (refcount_dec_and_test(&pid->count)) {
+-              WARN_ON_ONCE(pid->stashed);
++              pidfs_free_pid(pid);
+               kmem_cache_free(ns->pid_cachep, pid);
+               put_pid_ns(ns);
+       }
+-- 
+2.50.1
+
index aaf91f41564cbb056feee0520108c73762bb7c11..0333ff56f7440099f2c6746c7afe6e783c225309 100644 (file)
@@ -277,3 +277,14 @@ pci-rockchip-set-target-link-speed-to-5.0-gt-s-before-retraining.patch
 drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch
 soc-qcom-mdt_loader-fix-error-return-values-in-mdt_header_valid.patch
 xfs-fix-frozen-file-system-assert-in-xfs_trans_alloc.patch
+fs-fix-incorrect-lflags-value-in-the-move_mount-sysc.patch
+btrfs-zoned-fix-data-relocation-block-group-reservat.patch
+fhandle-do_handle_open-should-get-fd-with-user-flags.patch
+libfs-massage-path_from_stashed-to-allow-custom-stas.patch
+pidfs-move-to-anonymous-struct.patch
+pidfs-persist-information.patch
+pidfs-fix-memory-leak-in-pidfd_info.patch
+smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch
+fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
+signal-fix-memory-leak-for-pidfd_self-sentinels.patch
+use-uniform-permission-checks-for-all-mount-propagat.patch
diff --git a/queue-6.16/signal-fix-memory-leak-for-pidfd_self-sentinels.patch b/queue-6.16/signal-fix-memory-leak-for-pidfd_self-sentinels.patch
new file mode 100644 (file)
index 0000000..93ef2a7
--- /dev/null
@@ -0,0 +1,71 @@
+From 211db3292e9f8d7343b7c751a1175e263149ed91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Aug 2025 21:43:10 +0800
+Subject: signal: Fix memory leak for PIDFD_SELF* sentinels
+
+From: Adrian Huang (Lenovo) <adrianhuang0701@gmail.com>
+
+[ Upstream commit a2c1f82618b0b65f1ef615aa9cfdac8122537d69 ]
+
+Commit f08d0c3a7111 ("pidfd: add PIDFD_SELF* sentinels to refer to own
+thread/process") introduced a leak by acquiring a pid reference through
+get_task_pid(), which increments pid->count but never drops it with
+put_pid().
+
+As a result, kmemleak reports unreferenced pid objects after running
+tools/testing/selftests/pidfd/pidfd_test, for example:
+
+  unreferenced object 0xff1100206757a940 (size 160):
+    comm "pidfd_test", pid 16965, jiffies 4294853028
+    hex dump (first 32 bytes):
+      01 00 00 00 00 00 00 00 00 00 00 00 fd 57 50 04  .............WP.
+      5e 44 00 00 00 00 00 00 18 de 34 17 01 00 11 ff  ^D........4.....
+    backtrace (crc cd8844d4):
+      kmem_cache_alloc_noprof+0x2f4/0x3f0
+      alloc_pid+0x54/0x3d0
+      copy_process+0xd58/0x1740
+      kernel_clone+0x99/0x3b0
+      __do_sys_clone3+0xbe/0x100
+      do_syscall_64+0x7b/0x2c0
+      entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Fix this by calling put_pid() after do_pidfd_send_signal() returns.
+
+Fixes: f08d0c3a7111 ("pidfd: add PIDFD_SELF* sentinels to refer to own thread/process")
+Signed-off-by: Adrian Huang (Lenovo) <adrianhuang0701@gmail.com>
+Link: https://lore.kernel.org/20250818134310.12273-1-adrianhuang0701@gmail.com
+Tested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/signal.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/signal.c b/kernel/signal.c
+index 148082db9a55..6b1493558a3d 100644
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -4067,6 +4067,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
+ {
+       struct pid *pid;
+       enum pid_type type;
++      int ret;
+       /* Enforce flags be set to 0 until we add an extension. */
+       if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
+@@ -4108,7 +4109,10 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
+       }
+       }
+-      return do_pidfd_send_signal(pid, sig, type, info, flags);
++      ret = do_pidfd_send_signal(pid, sig, type, info, flags);
++      put_pid(pid);
++
++      return ret;
+ }
+ static int
+-- 
+2.50.1
+
diff --git a/queue-6.16/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch b/queue-6.16/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch
new file mode 100644 (file)
index 0000000..4b6da11
--- /dev/null
@@ -0,0 +1,93 @@
+From 4bfcec7d6efb0f837917abe51cedc54e3b5cf2c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Aug 2025 18:45:46 +0200
+Subject: smb: server: split ksmbd_rdma_stop_listening() out of
+ ksmbd_rdma_destroy()
+
+From: Stefan Metzmacher <metze@samba.org>
+
+[ Upstream commit bac7b996d42e458a94578f4227795a0d4deef6fa ]
+
+We can't call destroy_workqueue(smb_direct_wq); before stop_sessions()!
+
+Otherwise already existing connections try to use smb_direct_wq as
+a NULL pointer.
+
+Cc: Namjae Jeon <linkinjeon@kernel.org>
+Cc: Steve French <smfrench@gmail.com>
+Cc: Tom Talpey <tom@talpey.com>
+Cc: linux-cifs@vger.kernel.org
+Cc: samba-technical@lists.samba.org
+Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers")
+Signed-off-by: Stefan Metzmacher <metze@samba.org>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/server/connection.c     | 3 ++-
+ fs/smb/server/transport_rdma.c | 5 ++++-
+ fs/smb/server/transport_rdma.h | 4 +++-
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
+index 3f04a2977ba8..67c4f73398df 100644
+--- a/fs/smb/server/connection.c
++++ b/fs/smb/server/connection.c
+@@ -504,7 +504,8 @@ void ksmbd_conn_transport_destroy(void)
+ {
+       mutex_lock(&init_lock);
+       ksmbd_tcp_destroy();
+-      ksmbd_rdma_destroy();
++      ksmbd_rdma_stop_listening();
+       stop_sessions();
++      ksmbd_rdma_destroy();
+       mutex_unlock(&init_lock);
+ }
+diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
+index 8d366db5f605..5466aa8c39b1 100644
+--- a/fs/smb/server/transport_rdma.c
++++ b/fs/smb/server/transport_rdma.c
+@@ -2194,7 +2194,7 @@ int ksmbd_rdma_init(void)
+       return 0;
+ }
+-void ksmbd_rdma_destroy(void)
++void ksmbd_rdma_stop_listening(void)
+ {
+       if (!smb_direct_listener.cm_id)
+               return;
+@@ -2203,7 +2203,10 @@ void ksmbd_rdma_destroy(void)
+       rdma_destroy_id(smb_direct_listener.cm_id);
+       smb_direct_listener.cm_id = NULL;
++}
++void ksmbd_rdma_destroy(void)
++{
+       if (smb_direct_wq) {
+               destroy_workqueue(smb_direct_wq);
+               smb_direct_wq = NULL;
+diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h
+index 77aee4e5c9dc..a2291b77488a 100644
+--- a/fs/smb/server/transport_rdma.h
++++ b/fs/smb/server/transport_rdma.h
+@@ -54,13 +54,15 @@ struct smb_direct_data_transfer {
+ #ifdef CONFIG_SMB_SERVER_SMBDIRECT
+ int ksmbd_rdma_init(void);
++void ksmbd_rdma_stop_listening(void);
+ void ksmbd_rdma_destroy(void);
+ bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
+ void init_smbd_max_io_size(unsigned int sz);
+ unsigned int get_smbd_max_read_write_size(void);
+ #else
+ static inline int ksmbd_rdma_init(void) { return 0; }
+-static inline int ksmbd_rdma_destroy(void) { return 0; }
++static inline void ksmbd_rdma_stop_listening(void) { }
++static inline void ksmbd_rdma_destroy(void) { }
+ static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
+ static inline void init_smbd_max_io_size(unsigned int sz) { }
+ static inline unsigned int get_smbd_max_read_write_size(void) { return 0; }
+-- 
+2.50.1
+
diff --git a/queue-6.16/use-uniform-permission-checks-for-all-mount-propagat.patch b/queue-6.16/use-uniform-permission-checks-for-all-mount-propagat.patch
new file mode 100644 (file)
index 0000000..3969b89
--- /dev/null
@@ -0,0 +1,103 @@
+From b26c76ec9fce85602e7825ed976fffab1aa89d51 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 01:44:31 -0400
+Subject: use uniform permission checks for all mount propagation changes
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit cffd0441872e7f6b1fce5e78fb1c99187a291330 ]
+
+do_change_type() and do_set_group() are operating on different
+aspects of the same thing - propagation graph.  The latter
+asks for mounts involved to be mounted in namespace(s) the caller
+has CAP_SYS_ADMIN for.  The former is a mess - originally it
+didn't even check that mount *is* mounted.  That got fixed,
+but the resulting check turns out to be too strict for userland -
+in effect, we check that mount is in our namespace, having already
+checked that we have CAP_SYS_ADMIN there.
+
+What we really need (in both cases) is
+       * only touch mounts that are mounted.  That's a must-have
+constraint - data corruption happens if it get violated.
+       * don't allow to mess with a namespace unless you already
+have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
+
+That's an equivalent of what do_set_group() does; let's extract that
+into a helper (may_change_propagation()) and use it in both
+do_set_group() and do_change_type().
+
+Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
+Acked-by: Andrei Vagin <avagin@gmail.com>
+Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 34 ++++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 14 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 49d016711469..6b038bf74a3d 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2925,6 +2925,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+       return attach_recursive_mnt(mnt, p, mp, 0);
+ }
++static int may_change_propagation(const struct mount *m)
++{
++        struct mnt_namespace *ns = m->mnt_ns;
++
++       // it must be mounted in some namespace
++       if (IS_ERR_OR_NULL(ns))         // is_mounted()
++               return -EINVAL;
++       // and the caller must be admin in userns of that namespace
++       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++               return -EPERM;
++       return 0;
++}
++
+ /*
+  * Sanity check the flags to change_mnt_propagation.
+  */
+@@ -2961,10 +2974,10 @@ static int do_change_type(struct path *path, int ms_flags)
+               return -EINVAL;
+       namespace_lock();
+-      if (!check_mnt(mnt)) {
+-              err = -EINVAL;
++      err = may_change_propagation(mnt);
++      if (err)
+               goto out_unlock;
+-      }
++
+       if (type == MS_SHARED) {
+               err = invent_group_ids(mnt, recurse);
+               if (err)
+@@ -3419,18 +3432,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
+       namespace_lock();
+-      err = -EINVAL;
+-      /* To and From must be mounted */
+-      if (!is_mounted(&from->mnt))
+-              goto out;
+-      if (!is_mounted(&to->mnt))
+-              goto out;
+-
+-      err = -EPERM;
+-      /* We should be allowed to modify mount namespaces of both mounts */
+-      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(from);
++      if (err)
+               goto out;
+-      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(to);
++      if (err)
+               goto out;
+       err = -EINVAL;
+-- 
+2.50.1
+
diff --git a/queue-6.6/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch b/queue-6.6/fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
new file mode 100644 (file)
index 0000000..d532370
--- /dev/null
@@ -0,0 +1,96 @@
+From 8e64499c0e7e250049cea183fe9e4e9a06889a26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Aug 2025 22:18:30 +0800
+Subject: fs/buffer: fix use-after-free when call bh_read() helper
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 7375f22495e7cd1c5b3b5af9dcc4f6dffe34ce49 ]
+
+There's issue as follows:
+BUG: KASAN: stack-out-of-bounds in end_buffer_read_sync+0xe3/0x110
+Read of size 8 at addr ffffc9000168f7f8 by task swapper/3/0
+CPU: 3 UID: 0 PID: 0 Comm: swapper/3 Not tainted 6.16.0-862.14.0.6.x86_64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x55/0x70
+ print_address_description.constprop.0+0x2c/0x390
+ print_report+0xb4/0x270
+ kasan_report+0xb8/0xf0
+ end_buffer_read_sync+0xe3/0x110
+ end_bio_bh_io_sync+0x56/0x80
+ blk_update_request+0x30a/0x720
+ scsi_end_request+0x51/0x2b0
+ scsi_io_completion+0xe3/0x480
+ ? scsi_device_unbusy+0x11e/0x160
+ blk_complete_reqs+0x7b/0x90
+ handle_softirqs+0xef/0x370
+ irq_exit_rcu+0xa5/0xd0
+ sysvec_apic_timer_interrupt+0x6e/0x90
+ </IRQ>
+
+ Above issue happens when do ntfs3 filesystem mount, issue may happens
+ as follows:
+           mount                            IRQ
+ntfs_fill_super
+  read_cache_page
+    do_read_cache_folio
+      filemap_read_folio
+        mpage_read_folio
+        do_mpage_readpage
+         ntfs_get_block_vbo
+          bh_read
+            submit_bh
+            wait_on_buffer(bh);
+                                   blk_complete_reqs
+                                    scsi_io_completion
+                                     scsi_end_request
+                                      blk_update_request
+                                       end_bio_bh_io_sync
+                                        end_buffer_read_sync
+                                         __end_buffer_read_notouch
+                                          unlock_buffer
+
+            wait_on_buffer(bh);--> return will return to caller
+
+                                         put_bh
+                                           --> trigger stack-out-of-bounds
+In the mpage_read_folio() function, the stack variable 'map_bh' is
+passed to ntfs_get_block_vbo(). Once unlock_buffer() unlocks and
+wait_on_buffer() returns to continue processing, the stack variable
+is likely to be reclaimed. Consequently, during the end_buffer_read_sync()
+process, calling put_bh() may result in stack overrun.
+
+If the bh is not allocated on the stack, it belongs to a folio.  Freeing
+a buffer head which belongs to a folio is done by drop_buffers() which
+will fail to free buffers which are still locked.  So it is safe to call
+put_bh() before __end_buffer_read_notouch().
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/20250811141830.343774-1-yebin@huaweicloud.com
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/buffer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/buffer.c b/fs/buffer.c
+index 4b86e971efd8..32df6163ffed 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -157,8 +157,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
+  */
+ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+ {
+-      __end_buffer_read_notouch(bh, uptodate);
+       put_bh(bh);
++      __end_buffer_read_notouch(bh, uptodate);
+ }
+ EXPORT_SYMBOL(end_buffer_read_sync);
+-- 
+2.50.1
+
index ea891e491438505979043f4f58112eca7fe1d07f..efc6df1ec4a89b5b5c9d60dec10ef161cfd74514 100644 (file)
@@ -499,3 +499,6 @@ scsi-mpi3mr-drop-unnecessary-volatile-from-__iomem-pointers.patch
 scsi-mpi3mr-serialize-admin-queue-bar-writes-on-32-bit-systems.patch
 ext4-preserve-sb_i_version-on-remount.patch
 arm64-dts-ti-k3-am62-main-remove-emmc-high-speed-ddr-support.patch
+smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch
+fs-buffer-fix-use-after-free-when-call-bh_read-helpe.patch
+use-uniform-permission-checks-for-all-mount-propagat.patch
diff --git a/queue-6.6/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch b/queue-6.6/smb-server-split-ksmbd_rdma_stop_listening-out-of-ks.patch
new file mode 100644 (file)
index 0000000..df8a644
--- /dev/null
@@ -0,0 +1,93 @@
+From 7b277e5e66e7664dc6d2c2cb01ed006f430d6a1f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Aug 2025 18:45:46 +0200
+Subject: smb: server: split ksmbd_rdma_stop_listening() out of
+ ksmbd_rdma_destroy()
+
+From: Stefan Metzmacher <metze@samba.org>
+
+[ Upstream commit bac7b996d42e458a94578f4227795a0d4deef6fa ]
+
+We can't call destroy_workqueue(smb_direct_wq); before stop_sessions()!
+
+Otherwise already existing connections try to use smb_direct_wq as
+a NULL pointer.
+
+Cc: Namjae Jeon <linkinjeon@kernel.org>
+Cc: Steve French <smfrench@gmail.com>
+Cc: Tom Talpey <tom@talpey.com>
+Cc: linux-cifs@vger.kernel.org
+Cc: samba-technical@lists.samba.org
+Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers")
+Signed-off-by: Stefan Metzmacher <metze@samba.org>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/server/connection.c     | 3 ++-
+ fs/smb/server/transport_rdma.c | 5 ++++-
+ fs/smb/server/transport_rdma.h | 4 +++-
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
+index 66b20c3d963e..f5ebc200dd73 100644
+--- a/fs/smb/server/connection.c
++++ b/fs/smb/server/connection.c
+@@ -503,7 +503,8 @@ void ksmbd_conn_transport_destroy(void)
+ {
+       mutex_lock(&init_lock);
+       ksmbd_tcp_destroy();
+-      ksmbd_rdma_destroy();
++      ksmbd_rdma_stop_listening();
+       stop_sessions();
++      ksmbd_rdma_destroy();
+       mutex_unlock(&init_lock);
+ }
+diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
+index 6c3a57bff147..a4ff1167c9a1 100644
+--- a/fs/smb/server/transport_rdma.c
++++ b/fs/smb/server/transport_rdma.c
+@@ -2193,7 +2193,7 @@ int ksmbd_rdma_init(void)
+       return 0;
+ }
+-void ksmbd_rdma_destroy(void)
++void ksmbd_rdma_stop_listening(void)
+ {
+       if (!smb_direct_listener.cm_id)
+               return;
+@@ -2202,7 +2202,10 @@ void ksmbd_rdma_destroy(void)
+       rdma_destroy_id(smb_direct_listener.cm_id);
+       smb_direct_listener.cm_id = NULL;
++}
++void ksmbd_rdma_destroy(void)
++{
+       if (smb_direct_wq) {
+               destroy_workqueue(smb_direct_wq);
+               smb_direct_wq = NULL;
+diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h
+index 77aee4e5c9dc..a2291b77488a 100644
+--- a/fs/smb/server/transport_rdma.h
++++ b/fs/smb/server/transport_rdma.h
+@@ -54,13 +54,15 @@ struct smb_direct_data_transfer {
+ #ifdef CONFIG_SMB_SERVER_SMBDIRECT
+ int ksmbd_rdma_init(void);
++void ksmbd_rdma_stop_listening(void);
+ void ksmbd_rdma_destroy(void);
+ bool ksmbd_rdma_capable_netdev(struct net_device *netdev);
+ void init_smbd_max_io_size(unsigned int sz);
+ unsigned int get_smbd_max_read_write_size(void);
+ #else
+ static inline int ksmbd_rdma_init(void) { return 0; }
+-static inline int ksmbd_rdma_destroy(void) { return 0; }
++static inline void ksmbd_rdma_stop_listening(void) { }
++static inline void ksmbd_rdma_destroy(void) { }
+ static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; }
+ static inline void init_smbd_max_io_size(unsigned int sz) { }
+ static inline unsigned int get_smbd_max_read_write_size(void) { return 0; }
+-- 
+2.50.1
+
diff --git a/queue-6.6/use-uniform-permission-checks-for-all-mount-propagat.patch b/queue-6.6/use-uniform-permission-checks-for-all-mount-propagat.patch
new file mode 100644 (file)
index 0000000..79fc931
--- /dev/null
@@ -0,0 +1,103 @@
+From bbf50df32c1759b99735d8fc42a7df9c6770d874 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Aug 2025 01:44:31 -0400
+Subject: use uniform permission checks for all mount propagation changes
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit cffd0441872e7f6b1fce5e78fb1c99187a291330 ]
+
+do_change_type() and do_set_group() are operating on different
+aspects of the same thing - propagation graph.  The latter
+asks for mounts involved to be mounted in namespace(s) the caller
+has CAP_SYS_ADMIN for.  The former is a mess - originally it
+didn't even check that mount *is* mounted.  That got fixed,
+but the resulting check turns out to be too strict for userland -
+in effect, we check that mount is in our namespace, having already
+checked that we have CAP_SYS_ADMIN there.
+
+What we really need (in both cases) is
+       * only touch mounts that are mounted.  That's a must-have
+constraint - data corruption happens if it get violated.
+       * don't allow to mess with a namespace unless you already
+have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).
+
+That's an equivalent of what do_set_group() does; let's extract that
+into a helper (may_change_propagation()) and use it in both
+do_set_group() and do_change_type().
+
+Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
+Acked-by: Andrei Vagin <avagin@gmail.com>
+Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namespace.c | 34 ++++++++++++++++++++--------------
+ 1 file changed, 20 insertions(+), 14 deletions(-)
+
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 6a9c53c800c4..f79226472251 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2526,6 +2526,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
+       return attach_recursive_mnt(mnt, p, mp, 0);
+ }
++static int may_change_propagation(const struct mount *m)
++{
++        struct mnt_namespace *ns = m->mnt_ns;
++
++       // it must be mounted in some namespace
++       if (IS_ERR_OR_NULL(ns))         // is_mounted()
++               return -EINVAL;
++       // and the caller must be admin in userns of that namespace
++       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
++               return -EPERM;
++       return 0;
++}
++
+ /*
+  * Sanity check the flags to change_mnt_propagation.
+  */
+@@ -2562,10 +2575,10 @@ static int do_change_type(struct path *path, int ms_flags)
+               return -EINVAL;
+       namespace_lock();
+-      if (!check_mnt(mnt)) {
+-              err = -EINVAL;
++      err = may_change_propagation(mnt);
++      if (err)
+               goto out_unlock;
+-      }
++
+       if (type == MS_SHARED) {
+               err = invent_group_ids(mnt, recurse);
+               if (err)
+@@ -2960,18 +2973,11 @@ static int do_set_group(struct path *from_path, struct path *to_path)
+       namespace_lock();
+-      err = -EINVAL;
+-      /* To and From must be mounted */
+-      if (!is_mounted(&from->mnt))
+-              goto out;
+-      if (!is_mounted(&to->mnt))
+-              goto out;
+-
+-      err = -EPERM;
+-      /* We should be allowed to modify mount namespaces of both mounts */
+-      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(from);
++      if (err)
+               goto out;
+-      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
++      err = may_change_propagation(to);
++      if (err)
+               goto out;
+       err = -EINVAL;
+-- 
+2.50.1
+