]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Sat, 3 Aug 2024 14:48:14 +0000 (10:48 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 3 Aug 2024 14:48:14 +0000 (10:48 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
27 files changed:
queue-5.10/devres-fix-memory-leakage-caused-by-driver-api-devm_.patch [new file with mode: 0644]
queue-5.10/driver-core-cast-to-void-with-__force-for-__percpu-p.patch [new file with mode: 0644]
queue-5.10/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch [new file with mode: 0644]
queue-5.10/ext4-check-the-extent-status-again-before-inserting-.patch [new file with mode: 0644]
queue-5.10/ext4-factor-out-a-common-helper-to-query-extent-map.patch [new file with mode: 0644]
queue-5.10/fuse-name-fs_context-consistently.patch [new file with mode: 0644]
queue-5.10/fuse-verify-g-u-id-mount-options-correctly.patch [new file with mode: 0644]
queue-5.10/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch [new file with mode: 0644]
queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch [new file with mode: 0644]
queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch [new file with mode: 0644]
queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch [new file with mode: 0644]
queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch [new file with mode: 0644]
queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch [new file with mode: 0644]
queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch [new file with mode: 0644]
queue-5.10/irqchip-imx-irqsteer-add-runtime-pm-support.patch [new file with mode: 0644]
queue-5.10/irqchip-imx-irqsteer-constify-irq_chip-struct.patch [new file with mode: 0644]
queue-5.10/irqchip-imx-irqsteer-handle-runtime-power-management.patch [new file with mode: 0644]
queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch [new file with mode: 0644]
queue-5.10/remoteproc-imx_rproc-fix-ignoring-mapping-vdev-regio.patch [new file with mode: 0644]
queue-5.10/remoteproc-imx_rproc-ignore-mapping-vdev-regions.patch [new file with mode: 0644]
queue-5.10/remoteproc-imx_rproc-skip-over-memory-region-when-no.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch [new file with mode: 0644]
queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch [new file with mode: 0644]
queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch [new file with mode: 0644]
queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch [new file with mode: 0644]
queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch [new file with mode: 0644]

diff --git a/queue-5.10/devres-fix-memory-leakage-caused-by-driver-api-devm_.patch b/queue-5.10/devres-fix-memory-leakage-caused-by-driver-api-devm_.patch
new file mode 100644 (file)
index 0000000..8d67fbe
--- /dev/null
@@ -0,0 +1,43 @@
+From f719c758e33b8380fbbfc31ec202e5a74f80d568 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 22:51:51 +0800
+Subject: devres: Fix memory leakage caused by driver API devm_free_percpu()
+
+From: Zijun Hu <quic_zijuhu@quicinc.com>
+
+[ Upstream commit bd50a974097bb82d52a458bd3ee39fb723129a0c ]
+
+It will cause memory leakage when use driver API devm_free_percpu()
+to free memory allocated by devm_alloc_percpu(), fixed by using
+devres_release() instead of devres_destroy() within devm_free_percpu().
+
+Fixes: ff86aae3b411 ("devres: add devm_alloc_percpu()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
+Link: https://lore.kernel.org/r/1719931914-19035-3-git-send-email-quic_zijuhu@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/devres.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/base/devres.c b/drivers/base/devres.c
+index a1508eeb8ebd1..8a74008c13c44 100644
+--- a/drivers/base/devres.c
++++ b/drivers/base/devres.c
+@@ -1230,7 +1230,11 @@ EXPORT_SYMBOL_GPL(__devm_alloc_percpu);
+  */
+ void devm_free_percpu(struct device *dev, void __percpu *pdata)
+ {
+-      WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match,
++      /*
++       * Use devres_release() to prevent memory leakage as
++       * devm_free_pages() does.
++       */
++      WARN_ON(devres_release(dev, devm_percpu_release, devm_percpu_match,
+                              (__force void *)pdata));
+ }
+ EXPORT_SYMBOL_GPL(devm_free_percpu);
+-- 
+2.43.0
+
diff --git a/queue-5.10/driver-core-cast-to-void-with-__force-for-__percpu-p.patch b/queue-5.10/driver-core-cast-to-void-with-__force-for-__percpu-p.patch
new file mode 100644 (file)
index 0000000..51ffee0
--- /dev/null
@@ -0,0 +1,39 @@
+From 1737e7bfd1ca2c6c9bbe297d01e3e24256226cf4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Apr 2021 20:10:30 +0300
+Subject: driver core: Cast to (void *) with __force for __percpu pointer
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit d7aa44f5a1f86cb40659eef06035d8d92604b9d5 ]
+
+Sparse is not happy:
+
+  drivers/base/devres.c:1230:9: warning: cast removes address space '__percpu' of expression
+
+Use __force attribute to make it happy.
+
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20210401171030.60527-1-andriy.shevchenko@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: bd50a974097b ("devres: Fix memory leakage caused by driver API devm_free_percpu()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/devres.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/base/devres.c b/drivers/base/devres.c
+index 452541ce0dc82..a1508eeb8ebd1 100644
+--- a/drivers/base/devres.c
++++ b/drivers/base/devres.c
+@@ -1231,6 +1231,6 @@ EXPORT_SYMBOL_GPL(__devm_alloc_percpu);
+ void devm_free_percpu(struct device *dev, void __percpu *pdata)
+ {
+       WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match,
+-                             (void *)pdata));
++                             (__force void *)pdata));
+ }
+ EXPORT_SYMBOL_GPL(devm_free_percpu);
+-- 
+2.43.0
+
diff --git a/queue-5.10/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch b/queue-5.10/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch
new file mode 100644 (file)
index 0000000..7d82053
--- /dev/null
@@ -0,0 +1,53 @@
+From a9e63de4caf04291dc9359a64863b4dc1012eae1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 May 2024 04:23:45 -0700
+Subject: drivers: soc: xilinx: check return status of get_api_version()
+
+From: Jay Buddhabhatti <jay.buddhabhatti@amd.com>
+
+[ Upstream commit 9b003e14801cf85a8cebeddc87bc9fc77100fdce ]
+
+Currently return status is not getting checked for get_api_version
+and because of that for x86 arch we are getting below smatch error.
+
+    CC      drivers/soc/xilinx/zynqmp_power.o
+drivers/soc/xilinx/zynqmp_power.c: In function 'zynqmp_pm_probe':
+drivers/soc/xilinx/zynqmp_power.c:295:12: warning: 'pm_api_version' is
+used uninitialized [-Wuninitialized]
+    295 |         if (pm_api_version < ZYNQMP_PM_VERSION)
+        |            ^
+    CHECK   drivers/soc/xilinx/zynqmp_power.c
+drivers/soc/xilinx/zynqmp_power.c:295 zynqmp_pm_probe() error:
+uninitialized symbol 'pm_api_version'.
+
+So, check return status of pm_get_api_version and return error in case
+of failure to avoid checking uninitialized pm_api_version variable.
+
+Fixes: b9b3a8be28b3 ("firmware: xilinx: Remove eemi ops for get_api_version")
+Signed-off-by: Jay Buddhabhatti <jay.buddhabhatti@amd.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240515112345.24673-1-jay.buddhabhatti@amd.com
+Signed-off-by: Michal Simek <michal.simek@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/xilinx/zynqmp_power.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c
+index f8c301984d4f9..2653d29ba829b 100644
+--- a/drivers/soc/xilinx/zynqmp_power.c
++++ b/drivers/soc/xilinx/zynqmp_power.c
+@@ -178,7 +178,9 @@ static int zynqmp_pm_probe(struct platform_device *pdev)
+       u32 pm_api_version;
+       struct mbox_client *client;
+-      zynqmp_pm_get_api_version(&pm_api_version);
++      ret = zynqmp_pm_get_api_version(&pm_api_version);
++      if (ret)
++              return ret;
+       /* Check PM API version number */
+       if (pm_api_version < ZYNQMP_PM_VERSION)
+-- 
+2.43.0
+
diff --git a/queue-5.10/ext4-check-the-extent-status-again-before-inserting-.patch b/queue-5.10/ext4-check-the-extent-status-again-before-inserting-.patch
new file mode 100644 (file)
index 0000000..341ba96
--- /dev/null
@@ -0,0 +1,100 @@
+From 4ed5c1a089d0f6408d657cf498c880172aa7d41b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 May 2024 20:39:57 +0800
+Subject: ext4: check the extent status again before inserting delalloc block
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit 0ea6560abb3bac1ffcfa4bf6b2c4d344fdc27b3c ]
+
+ext4_da_map_blocks looks up for any extent entry in the extent status
+tree (w/o i_data_sem) and then the looks up for any ondisk extent
+mapping (with i_data_sem in read mode).
+
+If it finds a hole in the extent status tree or if it couldn't find any
+entry at all, it then takes the i_data_sem in write mode to add a da
+entry into the extent status tree. This can actually race with page
+mkwrite & fallocate path.
+
+Note that this is ok between
+1. ext4 buffered-write path v/s ext4_page_mkwrite(), because of the
+   folio lock
+2. ext4 buffered write path v/s ext4 fallocate because of the inode
+   lock.
+
+But this can race between ext4_page_mkwrite() & ext4 fallocate path
+
+ext4_page_mkwrite()             ext4_fallocate()
+ block_page_mkwrite()
+  ext4_da_map_blocks()
+   //find hole in extent status tree
+                                 ext4_alloc_file_blocks()
+                                  ext4_map_blocks()
+                                   //allocate block and unwritten extent
+   ext4_insert_delayed_block()
+    ext4_da_reserve_space()
+     //reserve one more block
+    ext4_es_insert_delayed_block()
+     //drop unwritten extent and add delayed extent by mistake
+
+Then, the delalloc extent is wrong until writeback and the extra
+reserved block can't be released any more and it triggers below warning:
+
+ EXT4-fs (pmem2): Inode 13 (00000000bbbd4d23): i_reserved_data_blocks(1) not cleared!
+
+Fix the problem by looking up extent status tree again while the
+i_data_sem is held in write mode. If it still can't find any entry, then
+we insert a new da entry into the extent status tree.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20240517124005.347221-3-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index a252c84edac8c..6e9323a56d289 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1760,6 +1760,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+               if (ext4_es_is_hole(&es))
+                       goto add_delayed;
++found:
+               /*
+                * Delayed extent could be allocated by fallocate.
+                * So we need to check it.
+@@ -1804,6 +1805,26 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+ add_delayed:
+       down_write(&EXT4_I(inode)->i_data_sem);
++      /*
++       * Page fault path (ext4_page_mkwrite does not take i_rwsem)
++       * and fallocate path (no folio lock) can race. Make sure we
++       * lookup the extent status tree here again while i_data_sem
++       * is held in write mode, before inserting a new da entry in
++       * the extent status tree.
++       */
++      if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
++              if (!ext4_es_is_hole(&es)) {
++                      up_write(&EXT4_I(inode)->i_data_sem);
++                      goto found;
++              }
++      } else if (!ext4_has_inline_data(inode)) {
++              retval = ext4_map_query_blocks(NULL, inode, map);
++              if (retval) {
++                      up_write(&EXT4_I(inode)->i_data_sem);
++                      return retval;
++              }
++      }
++
+       retval = ext4_insert_delayed_block(inode, map->m_lblk);
+       up_write(&EXT4_I(inode)->i_data_sem);
+       if (retval)
+-- 
+2.43.0
+
diff --git a/queue-5.10/ext4-factor-out-a-common-helper-to-query-extent-map.patch b/queue-5.10/ext4-factor-out-a-common-helper-to-query-extent-map.patch
new file mode 100644 (file)
index 0000000..0a41c9f
--- /dev/null
@@ -0,0 +1,104 @@
+From 9c8b75fd11eacab9a71251b14b7353be405fe4be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 May 2024 20:39:56 +0800
+Subject: ext4: factor out a common helper to query extent map
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit 8e4e5cdf2fdeb99445a468b6b6436ad79b9ecb30 ]
+
+Factor out a new common helper ext4_map_query_blocks() from the
+ext4_da_map_blocks(), it query and return the extent map status on the
+inode's extent path, no logic changes.
+
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://patch.msgid.link/20240517124005.347221-2-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 57 +++++++++++++++++++++++++++----------------------
+ 1 file changed, 32 insertions(+), 25 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 8b48ed351c4b9..a252c84edac8c 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -484,6 +484,35 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
+ }
+ #endif /* ES_AGGRESSIVE_TEST */
++static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
++                               struct ext4_map_blocks *map)
++{
++      unsigned int status;
++      int retval;
++
++      if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
++              retval = ext4_ext_map_blocks(handle, inode, map, 0);
++      else
++              retval = ext4_ind_map_blocks(handle, inode, map, 0);
++
++      if (retval <= 0)
++              return retval;
++
++      if (unlikely(retval != map->m_len)) {
++              ext4_warning(inode->i_sb,
++                           "ES len assertion failed for inode "
++                           "%lu: retval %d != map->m_len %d",
++                           inode->i_ino, retval, map->m_len);
++              WARN_ON(1);
++      }
++
++      status = map->m_flags & EXT4_MAP_UNWRITTEN ?
++                      EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
++      ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
++                            map->m_pblk, status);
++      return retval;
++}
++
+ /*
+  * The ext4_map_blocks() function tries to look up the requested blocks,
+  * and returns if the blocks are already mapped.
+@@ -1767,33 +1796,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
+       down_read(&EXT4_I(inode)->i_data_sem);
+       if (ext4_has_inline_data(inode))
+               retval = 0;
+-      else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+-              retval = ext4_ext_map_blocks(NULL, inode, map, 0);
+       else
+-              retval = ext4_ind_map_blocks(NULL, inode, map, 0);
+-      if (retval < 0) {
+-              up_read(&EXT4_I(inode)->i_data_sem);
+-              return retval;
+-      }
+-      if (retval > 0) {
+-              unsigned int status;
+-
+-              if (unlikely(retval != map->m_len)) {
+-                      ext4_warning(inode->i_sb,
+-                                   "ES len assertion failed for inode "
+-                                   "%lu: retval %d != map->m_len %d",
+-                                   inode->i_ino, retval, map->m_len);
+-                      WARN_ON(1);
+-              }
+-
+-              status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+-                              EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+-              ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+-                                    map->m_pblk, status);
+-              up_read(&EXT4_I(inode)->i_data_sem);
+-              return retval;
+-      }
++              retval = ext4_map_query_blocks(NULL, inode, map);
+       up_read(&EXT4_I(inode)->i_data_sem);
++      if (retval)
++              return retval;
+ add_delayed:
+       down_write(&EXT4_I(inode)->i_data_sem);
+-- 
+2.43.0
+
diff --git a/queue-5.10/fuse-name-fs_context-consistently.patch b/queue-5.10/fuse-name-fs_context-consistently.patch
new file mode 100644 (file)
index 0000000..069bd55
--- /dev/null
@@ -0,0 +1,280 @@
+From 7256357bb327af4770cfc2189f8d0312378fe513 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Aug 2021 13:22:58 +0200
+Subject: fuse: name fs_context consistently
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+[ Upstream commit 84c215075b5723ab946708a6c74c26bd3c51114c ]
+
+Naming convention under fs/fuse/:
+
+       struct fuse_conn *fc;
+       struct fs_context *fsc;
+
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Stable-dep-of: 525bd65aa759 ("fuse: verify {g,u}id mount options correctly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/control.c   | 10 ++++----
+ fs/fuse/inode.c     | 60 ++++++++++++++++++++++-----------------------
+ fs/fuse/virtio_fs.c | 12 ++++-----
+ 3 files changed, 41 insertions(+), 41 deletions(-)
+
+diff --git a/fs/fuse/control.c b/fs/fuse/control.c
+index 24b4d9db231db..79f01d09c78cb 100644
+--- a/fs/fuse/control.c
++++ b/fs/fuse/control.c
+@@ -328,7 +328,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
+       drop_nlink(d_inode(fuse_control_sb->s_root));
+ }
+-static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
++static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc)
+ {
+       static const struct tree_descr empty_descr = {""};
+       struct fuse_conn *fc;
+@@ -354,18 +354,18 @@ static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
+       return 0;
+ }
+-static int fuse_ctl_get_tree(struct fs_context *fc)
++static int fuse_ctl_get_tree(struct fs_context *fsc)
+ {
+-      return get_tree_single(fc, fuse_ctl_fill_super);
++      return get_tree_single(fsc, fuse_ctl_fill_super);
+ }
+ static const struct fs_context_operations fuse_ctl_context_ops = {
+       .get_tree       = fuse_ctl_get_tree,
+ };
+-static int fuse_ctl_init_fs_context(struct fs_context *fc)
++static int fuse_ctl_init_fs_context(struct fs_context *fsc)
+ {
+-      fc->ops = &fuse_ctl_context_ops;
++      fsc->ops = &fuse_ctl_context_ops;
+       return 0;
+ }
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index 4a7ebccd359ee..5f9b2dc59135b 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -141,12 +141,12 @@ static void fuse_evict_inode(struct inode *inode)
+       }
+ }
+-static int fuse_reconfigure(struct fs_context *fc)
++static int fuse_reconfigure(struct fs_context *fsc)
+ {
+-      struct super_block *sb = fc->root->d_sb;
++      struct super_block *sb = fsc->root->d_sb;
+       sync_filesystem(sb);
+-      if (fc->sb_flags & SB_MANDLOCK)
++      if (fsc->sb_flags & SB_MANDLOCK)
+               return -EINVAL;
+       return 0;
+@@ -535,38 +535,38 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
+       {}
+ };
+-static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
++static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
+ {
+       struct fs_parse_result result;
+-      struct fuse_fs_context *ctx = fc->fs_private;
++      struct fuse_fs_context *ctx = fsc->fs_private;
+       int opt;
+-      if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
++      if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+               /*
+                * Ignore options coming from mount(MS_REMOUNT) for backward
+                * compatibility.
+                */
+-              if (fc->oldapi)
++              if (fsc->oldapi)
+                       return 0;
+-              return invalfc(fc, "No changes allowed in reconfigure");
++              return invalfc(fsc, "No changes allowed in reconfigure");
+       }
+-      opt = fs_parse(fc, fuse_fs_parameters, param, &result);
++      opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
+       switch (opt) {
+       case OPT_SOURCE:
+-              if (fc->source)
+-                      return invalfc(fc, "Multiple sources specified");
+-              fc->source = param->string;
++              if (fsc->source)
++                      return invalfc(fsc, "Multiple sources specified");
++              fsc->source = param->string;
+               param->string = NULL;
+               break;
+       case OPT_SUBTYPE:
+               if (ctx->subtype)
+-                      return invalfc(fc, "Multiple subtypes specified");
++                      return invalfc(fsc, "Multiple subtypes specified");
+               ctx->subtype = param->string;
+               param->string = NULL;
+               return 0;
+@@ -578,22 +578,22 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
+       case OPT_ROOTMODE:
+               if (!fuse_valid_type(result.uint_32))
+-                      return invalfc(fc, "Invalid rootmode");
++                      return invalfc(fsc, "Invalid rootmode");
+               ctx->rootmode = result.uint_32;
+               ctx->rootmode_present = true;
+               break;
+       case OPT_USER_ID:
+-              ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
++              ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
+               if (!uid_valid(ctx->user_id))
+-                      return invalfc(fc, "Invalid user_id");
++                      return invalfc(fsc, "Invalid user_id");
+               ctx->user_id_present = true;
+               break;
+       case OPT_GROUP_ID:
+-              ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
++              ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
+               if (!gid_valid(ctx->group_id))
+-                      return invalfc(fc, "Invalid group_id");
++                      return invalfc(fsc, "Invalid group_id");
+               ctx->group_id_present = true;
+               break;
+@@ -611,7 +611,7 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
+       case OPT_BLKSIZE:
+               if (!ctx->is_bdev)
+-                      return invalfc(fc, "blksize only supported for fuseblk");
++                      return invalfc(fsc, "blksize only supported for fuseblk");
+               ctx->blksize = result.uint_32;
+               break;
+@@ -622,9 +622,9 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
+       return 0;
+ }
+-static void fuse_free_fc(struct fs_context *fc)
++static void fuse_free_fsc(struct fs_context *fsc)
+ {
+-      struct fuse_fs_context *ctx = fc->fs_private;
++      struct fuse_fs_context *ctx = fsc->fs_private;
+       if (ctx) {
+               kfree(ctx->subtype);
+@@ -1486,9 +1486,9 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
+       return err;
+ }
+-static int fuse_get_tree(struct fs_context *fc)
++static int fuse_get_tree(struct fs_context *fsc)
+ {
+-      struct fuse_fs_context *ctx = fc->fs_private;
++      struct fuse_fs_context *ctx = fsc->fs_private;
+       if (!ctx->fd_present || !ctx->rootmode_present ||
+           !ctx->user_id_present || !ctx->group_id_present)
+@@ -1496,14 +1496,14 @@ static int fuse_get_tree(struct fs_context *fc)
+ #ifdef CONFIG_BLOCK
+       if (ctx->is_bdev)
+-              return get_tree_bdev(fc, fuse_fill_super);
++              return get_tree_bdev(fsc, fuse_fill_super);
+ #endif
+-      return get_tree_nodev(fc, fuse_fill_super);
++      return get_tree_nodev(fsc, fuse_fill_super);
+ }
+ static const struct fs_context_operations fuse_context_ops = {
+-      .free           = fuse_free_fc,
++      .free           = fuse_free_fsc,
+       .parse_param    = fuse_parse_param,
+       .reconfigure    = fuse_reconfigure,
+       .get_tree       = fuse_get_tree,
+@@ -1512,7 +1512,7 @@ static const struct fs_context_operations fuse_context_ops = {
+ /*
+  * Set up the filesystem mount context.
+  */
+-static int fuse_init_fs_context(struct fs_context *fc)
++static int fuse_init_fs_context(struct fs_context *fsc)
+ {
+       struct fuse_fs_context *ctx;
+@@ -1525,14 +1525,14 @@ static int fuse_init_fs_context(struct fs_context *fc)
+       ctx->legacy_opts_show = true;
+ #ifdef CONFIG_BLOCK
+-      if (fc->fs_type == &fuseblk_fs_type) {
++      if (fsc->fs_type == &fuseblk_fs_type) {
+               ctx->is_bdev = true;
+               ctx->destroy = true;
+       }
+ #endif
+-      fc->fs_private = ctx;
+-      fc->ops = &fuse_context_ops;
++      fsc->fs_private = ctx;
++      fsc->ops = &fuse_context_ops;
+       return 0;
+ }
+diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
+index faadc80485e7f..7d4655022afc6 100644
+--- a/fs/fuse/virtio_fs.c
++++ b/fs/fuse/virtio_fs.c
+@@ -97,14 +97,14 @@ static const struct fs_parameter_spec virtio_fs_parameters[] = {
+       {}
+ };
+-static int virtio_fs_parse_param(struct fs_context *fc,
++static int virtio_fs_parse_param(struct fs_context *fsc,
+                                struct fs_parameter *param)
+ {
+       struct fs_parse_result result;
+-      struct fuse_fs_context *ctx = fc->fs_private;
++      struct fuse_fs_context *ctx = fsc->fs_private;
+       int opt;
+-      opt = fs_parse(fc, virtio_fs_parameters, param, &result);
++      opt = fs_parse(fsc, virtio_fs_parameters, param, &result);
+       if (opt < 0)
+               return opt;
+@@ -119,9 +119,9 @@ static int virtio_fs_parse_param(struct fs_context *fc,
+       return 0;
+ }
+-static void virtio_fs_free_fc(struct fs_context *fc)
++static void virtio_fs_free_fsc(struct fs_context *fsc)
+ {
+-      struct fuse_fs_context *ctx = fc->fs_private;
++      struct fuse_fs_context *ctx = fsc->fs_private;
+       kfree(ctx);
+ }
+@@ -1500,7 +1500,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
+ }
+ static const struct fs_context_operations virtio_fs_context_ops = {
+-      .free           = virtio_fs_free_fc,
++      .free           = virtio_fs_free_fsc,
+       .parse_param    = virtio_fs_parse_param,
+       .get_tree       = virtio_fs_get_tree,
+ };
+-- 
+2.43.0
+
diff --git a/queue-5.10/fuse-verify-g-u-id-mount-options-correctly.patch b/queue-5.10/fuse-verify-g-u-id-mount-options-correctly.patch
new file mode 100644 (file)
index 0000000..359580a
--- /dev/null
@@ -0,0 +1,86 @@
+From 491c7ff888ab75bfbadad5ca1b9b3f764781fdc3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 17:22:41 -0500
+Subject: fuse: verify {g,u}id mount options correctly
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+[ Upstream commit 525bd65aa759ec320af1dc06e114ed69733e9e23 ]
+
+As was done in
+0200679fc795 ("tmpfs: verify {g,u}id mount options correctly")
+we need to validate that the requested uid and/or gid is representable in
+the filesystem's idmapping.
+
+Cribbing from the above commit log,
+
+The contract for {g,u}id mount options and {g,u}id values in general set
+from userspace has always been that they are translated according to the
+caller's idmapping. In so far, fuse has been doing the correct thing.
+But since fuse is mountable in unprivileged contexts it is also
+necessary to verify that the resulting {k,g}uid is representable in the
+namespace of the superblock.
+
+Fixes: c30da2e981a7 ("fuse: convert to use the new mount API")
+Cc: stable@vger.kernel.org # 5.4+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Link: https://lore.kernel.org/r/8f07d45d-c806-484d-a2e3-7a2199df1cd2@redhat.com
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/inode.c | 24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index 5f9b2dc59135b..a5d1eb0bc5214 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -540,6 +540,8 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
+       struct fs_parse_result result;
+       struct fuse_fs_context *ctx = fsc->fs_private;
+       int opt;
++      kuid_t kuid;
++      kgid_t kgid;
+       if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+               /*
+@@ -584,16 +586,30 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
+               break;
+       case OPT_USER_ID:
+-              ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
+-              if (!uid_valid(ctx->user_id))
++              kuid =  make_kuid(fsc->user_ns, result.uint_32);
++              if (!uid_valid(kuid))
+                       return invalfc(fsc, "Invalid user_id");
++              /*
++               * The requested uid must be representable in the
++               * filesystem's idmapping.
++               */
++              if (!kuid_has_mapping(fsc->user_ns, kuid))
++                      return invalfc(fsc, "Invalid user_id");
++              ctx->user_id = kuid;
+               ctx->user_id_present = true;
+               break;
+       case OPT_GROUP_ID:
+-              ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
+-              if (!gid_valid(ctx->group_id))
++              kgid = make_kgid(fsc->user_ns, result.uint_32);;
++              if (!gid_valid(kgid))
++                      return invalfc(fsc, "Invalid group_id");
++              /*
++               * The requested gid must be representable in the
++               * filesystem's idmapping.
++               */
++              if (!kgid_has_mapping(fsc->user_ns, kgid))
+                       return invalfc(fsc, "Invalid group_id");
++              ctx->group_id = kgid;
+               ctx->group_id_present = true;
+               break;
+-- 
+2.43.0
+
diff --git a/queue-5.10/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch b/queue-5.10/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch
new file mode 100644 (file)
index 0000000..a857fe3
--- /dev/null
@@ -0,0 +1,122 @@
+From 6082826257da6d7c5217f69a4c34521feed6379a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Feb 2022 12:02:59 +0000
+Subject: genirq: Allow the PM device to originate from irq domain
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 1f8863bfb5ca500ea1c7669b16b1931ba27fce20 ]
+
+As a preparation to moving the reference to the device used for
+runtime power management, add a new 'dev' field to the irqdomain
+structure for that exact purpose.
+
+The irq_chip_pm_{get,put}() helpers are made aware of the dual
+location via a new private helper.
+
+No functional change intended.
+
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Tested-by: Tony Lindgren <tony@atomide.com>
+Acked-by: Bartosz Golaszewski <brgl@bgdev.pl>
+Link: https://lore.kernel.org/r/20220201120310.878267-2-maz@kernel.org
+Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/irqdomain.h | 10 ++++++++++
+ kernel/irq/chip.c         | 23 ++++++++++++++++++-----
+ 2 files changed, 28 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
+index 9b9743f7538c4..60f53eadfa422 100644
+--- a/include/linux/irqdomain.h
++++ b/include/linux/irqdomain.h
+@@ -149,6 +149,8 @@ struct irq_domain_chip_generic;
+  * @gc: Pointer to a list of generic chips. There is a helper function for
+  *      setting up one or more generic chips for interrupt controllers
+  *      drivers using the generic chip library which uses this pointer.
++ * @dev: Pointer to a device that the domain represent, and that will be
++ *       used for power management purposes.
+  * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
+  * @debugfs_file: dentry for the domain debugfs file
+  *
+@@ -171,6 +173,7 @@ struct irq_domain {
+       struct fwnode_handle *fwnode;
+       enum irq_domain_bus_token bus_token;
+       struct irq_domain_chip_generic *gc;
++      struct device *dev;
+ #ifdef        CONFIG_IRQ_DOMAIN_HIERARCHY
+       struct irq_domain *parent;
+ #endif
+@@ -227,6 +230,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
+       return to_of_node(d->fwnode);
+ }
++static inline void irq_domain_set_pm_device(struct irq_domain *d,
++                                          struct device *dev)
++{
++      if (d)
++              d->dev = dev;
++}
++
+ #ifdef CONFIG_IRQ_DOMAIN
+ struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
+                                               const char *name, phys_addr_t *pa);
+diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
+index e7d284261d450..b8aa9e22105f9 100644
+--- a/kernel/irq/chip.c
++++ b/kernel/irq/chip.c
+@@ -1586,6 +1586,17 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+       return 0;
+ }
++static struct device *irq_get_parent_device(struct irq_data *data)
++{
++      if (data->chip->parent_device)
++              return data->chip->parent_device;
++
++      if (data->domain)
++              return data->domain->dev;
++
++      return NULL;
++}
++
+ /**
+  * irq_chip_pm_get - Enable power for an IRQ chip
+  * @data:     Pointer to interrupt specific data
+@@ -1595,12 +1606,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+  */
+ int irq_chip_pm_get(struct irq_data *data)
+ {
++      struct device *dev = irq_get_parent_device(data);
+       int retval;
+-      if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) {
+-              retval = pm_runtime_get_sync(data->chip->parent_device);
++      if (IS_ENABLED(CONFIG_PM) && dev) {
++              retval = pm_runtime_get_sync(dev);
+               if (retval < 0) {
+-                      pm_runtime_put_noidle(data->chip->parent_device);
++                      pm_runtime_put_noidle(dev);
+                       return retval;
+               }
+       }
+@@ -1618,10 +1630,11 @@ int irq_chip_pm_get(struct irq_data *data)
+  */
+ int irq_chip_pm_put(struct irq_data *data)
+ {
++      struct device *dev = irq_get_parent_device(data);
+       int retval = 0;
+-      if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device)
+-              retval = pm_runtime_put(data->chip->parent_device);
++      if (IS_ENABLED(CONFIG_PM) && dev)
++              retval = pm_runtime_put(dev);
+       return (retval < 0) ? retval : 0;
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch b/queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch
new file mode 100644 (file)
index 0000000..f575504
--- /dev/null
@@ -0,0 +1,110 @@
+From 2b1bb86dc48a14692989153e5b91def24b213416 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Nov 2021 18:35:59 -0800
+Subject: ipc: check checkpoint_restore_ns_capable() to modify C/R proc files
+
+From: Michal Clapinski <mclapinski@google.com>
+
+[ Upstream commit 5563cabdde7ee53c34ec7e5e0283bfcc9a1bc893 ]
+
+This commit removes the requirement to be root to modify sem_next_id,
+msg_next_id and shm_next_id and checks checkpoint_restore_ns_capable
+instead.
+
+Since those files are specific to the IPC namespace, there is no reason
+they should require root privileges.  This is similar to ns_last_pid,
+which also only checks checkpoint_restore_ns_capable.
+
+[akpm@linux-foundation.org: ipc/ipc_sysctl.c needs capability.h for checkpoint_restore_ns_capable()]
+
+Link: https://lkml.kernel.org/r/20210916163717.3179496-1-mclapinski@google.com
+Signed-off-by: Michal Clapinski <mclapinski@google.com>
+Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
+Reviewed-by: Manfred Spraul <manfred@colorfullife.com>
+Cc: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ ipc/ipc_sysctl.c | 29 +++++++++++++++++++++++------
+ 1 file changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
+index 3f312bf2b1163..345e4d673e61e 100644
+--- a/ipc/ipc_sysctl.c
++++ b/ipc/ipc_sysctl.c
+@@ -10,6 +10,7 @@
+ #include <linux/nsproxy.h>
+ #include <linux/sysctl.h>
+ #include <linux/uaccess.h>
++#include <linux/capability.h>
+ #include <linux/ipc_namespace.h>
+ #include <linux/msg.h>
+ #include "util.h"
+@@ -104,6 +105,19 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
+       return ret;
+ }
++#ifdef CONFIG_CHECKPOINT_RESTORE
++static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table,
++              int write, void *buffer, size_t *lenp, loff_t *ppos)
++{
++      struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns;
++
++      if (write && !checkpoint_restore_ns_capable(user_ns))
++              return -EPERM;
++
++      return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
++}
++#endif
++
+ #else
+ #define proc_ipc_doulongvec_minmax NULL
+ #define proc_ipc_dointvec        NULL
+@@ -111,6 +125,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
+ #define proc_ipc_dointvec_minmax_orphans   NULL
+ #define proc_ipc_auto_msgmni     NULL
+ #define proc_ipc_sem_dointvec    NULL
++#ifdef CONFIG_CHECKPOINT_RESTORE
++#define proc_ipc_dointvec_minmax_checkpoint_restore   NULL
++#endif        /* CONFIG_CHECKPOINT_RESTORE */
+ #endif
+ int ipc_mni = IPCMNI;
+@@ -198,8 +215,8 @@ static struct ctl_table ipc_kern_table[] = {
+               .procname       = "sem_next_id",
+               .data           = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
+-              .mode           = 0644,
+-              .proc_handler   = proc_ipc_dointvec_minmax,
++              .mode           = 0666,
++              .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_INT_MAX,
+       },
+@@ -207,8 +224,8 @@ static struct ctl_table ipc_kern_table[] = {
+               .procname       = "msg_next_id",
+               .data           = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
+-              .mode           = 0644,
+-              .proc_handler   = proc_ipc_dointvec_minmax,
++              .mode           = 0666,
++              .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_INT_MAX,
+       },
+@@ -216,8 +233,8 @@ static struct ctl_table ipc_kern_table[] = {
+               .procname       = "shm_next_id",
+               .data           = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
+-              .mode           = 0644,
+-              .proc_handler   = proc_ipc_dointvec_minmax,
++              .mode           = 0666,
++              .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_INT_MAX,
+       },
+-- 
+2.43.0
+
diff --git a/queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch b/queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch
new file mode 100644 (file)
index 0000000..7828ef2
--- /dev/null
@@ -0,0 +1,137 @@
+From 58463ddf843a769113f5f44de099157c98150f50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 May 2022 15:39:56 +0200
+Subject: ipc: Check permissions for checkpoint_restart sysctls at open time
+
+From: Alexey Gladkov <legion@kernel.org>
+
+[ Upstream commit 0889f44e281034e180daa6daf3e2d57c012452d4 ]
+
+As Eric Biederman pointed out, it is possible not to use a custom
+proc_handler and check permissions for every write, but to use a
+.permission handler. That will allow the checkpoint_restart sysctls to
+perform all of their permission checks at open time, and not need any
+other special code.
+
+Link: https://lore.kernel.org/lkml/87czib9g38.fsf@email.froward.int.ebiederm.org/
+Fixes: 1f5c135ee509 ("ipc: Store ipc sysctls in the ipc namespace")
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Alexey Gladkov <legion@kernel.org>
+Link: https://lkml.kernel.org/r/65fa8459803830608da4610a39f33c76aa933eb9.1651584847.git.legion@kernel.org
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ ipc/ipc_sysctl.c | 57 ++++++++++++++++++++++++------------------------
+ 1 file changed, 29 insertions(+), 28 deletions(-)
+
+diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
+index 15210ac47e9e1..a2b871d006da7 100644
+--- a/ipc/ipc_sysctl.c
++++ b/ipc/ipc_sysctl.c
+@@ -78,25 +78,6 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
+       return ret;
+ }
+-#ifdef CONFIG_CHECKPOINT_RESTORE
+-static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table,
+-              int write, void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-      struct ipc_namespace *ns = table->extra1;
+-      struct ctl_table ipc_table;
+-
+-      if (write && !checkpoint_restore_ns_capable(ns->user_ns))
+-              return -EPERM;
+-
+-      memcpy(&ipc_table, table, sizeof(ipc_table));
+-
+-      ipc_table.extra1 = SYSCTL_ZERO;
+-      ipc_table.extra2 = SYSCTL_INT_MAX;
+-
+-      return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+-}
+-#endif
+-
+ int ipc_mni = IPCMNI;
+ int ipc_mni_shift = IPCMNI_SHIFT;
+ int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
+@@ -180,22 +161,28 @@ static struct ctl_table ipc_sysctls[] = {
+               .procname       = "sem_next_id",
+               .data           = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
+-              .mode           = 0666,
+-              .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
++              .mode           = 0444,
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = SYSCTL_ZERO,
++              .extra2         = SYSCTL_INT_MAX,
+       },
+       {
+               .procname       = "msg_next_id",
+               .data           = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
+-              .mode           = 0666,
+-              .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
++              .mode           = 0444,
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = SYSCTL_ZERO,
++              .extra2         = SYSCTL_INT_MAX,
+       },
+       {
+               .procname       = "shm_next_id",
+               .data           = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
+-              .mode           = 0666,
+-              .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
++              .mode           = 0444,
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = SYSCTL_ZERO,
++              .extra2         = SYSCTL_INT_MAX,
+       },
+ #endif
+       {}
+@@ -211,8 +198,25 @@ static int set_is_seen(struct ctl_table_set *set)
+       return &current->nsproxy->ipc_ns->ipc_set == set;
+ }
++static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table)
++{
++      int mode = table->mode;
++
++#ifdef CONFIG_CHECKPOINT_RESTORE
++      struct ipc_namespace *ns = current->nsproxy->ipc_ns;
++
++      if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
++           (table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
++           (table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
++          checkpoint_restore_ns_capable(ns->user_ns))
++              mode = 0666;
++#endif
++      return mode;
++}
++
+ static struct ctl_table_root set_root = {
+       .lookup = set_lookup,
++      .permissions = ipc_permissions,
+ };
+ bool setup_ipc_sysctls(struct ipc_namespace *ns)
+@@ -254,15 +258,12 @@ bool setup_ipc_sysctls(struct ipc_namespace *ns)
+ #ifdef CONFIG_CHECKPOINT_RESTORE
+                       } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) {
+                               tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id;
+-                              tbl[i].extra1 = ns;
+                       } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) {
+                               tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id;
+-                              tbl[i].extra1 = ns;
+                       } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) {
+                               tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id;
+-                              tbl[i].extra1 = ns;
+ #endif
+                       } else {
+                               tbl[i].data = NULL;
+-- 
+2.43.0
+
diff --git a/queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch b/queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch
new file mode 100644 (file)
index 0000000..e00eca4
--- /dev/null
@@ -0,0 +1,69 @@
+From dc71da9fbefdbd0a29e74eda21a5d3a87dbd1729 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Nov 2021 18:36:02 -0800
+Subject: ipc/ipc_sysctl.c: remove fallback for !CONFIG_PROC_SYSCTL
+
+From: Manfred Spraul <manfred@colorfullife.com>
+
+[ Upstream commit 0e9beb8a96f21a6df1579cb3a679e150e3269d80 ]
+
+Compilation of ipc/ipc_sysctl.c is controlled by
+obj-$(CONFIG_SYSVIPC_SYSCTL)
+[see ipc/Makefile]
+
+And CONFIG_SYSVIPC_SYSCTL depends on SYSCTL
+[see init/Kconfig]
+
+An SYSCTL is selected by PROC_SYSCTL.
+[see fs/proc/Kconfig]
+
+Thus: #ifndef CONFIG_PROC_SYSCTL in ipc/ipc_sysctl.c is impossible, the
+fallback can be removed.
+
+Link: https://lkml.kernel.org/r/20210918145337.3369-1-manfred@colorfullife.com
+Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
+Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Acked-by: Davidlohr Bueso <dbueso@suse.de>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ ipc/ipc_sysctl.c | 13 -------------
+ 1 file changed, 13 deletions(-)
+
+diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
+index 345e4d673e61e..f101c171753f6 100644
+--- a/ipc/ipc_sysctl.c
++++ b/ipc/ipc_sysctl.c
+@@ -23,7 +23,6 @@ static void *get_ipc(struct ctl_table *table)
+       return which;
+ }
+-#ifdef CONFIG_PROC_SYSCTL
+ static int proc_ipc_dointvec(struct ctl_table *table, int write,
+               void *buffer, size_t *lenp, loff_t *ppos)
+ {
+@@ -118,18 +117,6 @@ static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table,
+ }
+ #endif
+-#else
+-#define proc_ipc_doulongvec_minmax NULL
+-#define proc_ipc_dointvec        NULL
+-#define proc_ipc_dointvec_minmax   NULL
+-#define proc_ipc_dointvec_minmax_orphans   NULL
+-#define proc_ipc_auto_msgmni     NULL
+-#define proc_ipc_sem_dointvec    NULL
+-#ifdef CONFIG_CHECKPOINT_RESTORE
+-#define proc_ipc_dointvec_minmax_checkpoint_restore   NULL
+-#endif        /* CONFIG_CHECKPOINT_RESTORE */
+-#endif
+-
+ int ipc_mni = IPCMNI;
+ int ipc_mni_shift = IPCMNI_SHIFT;
+ int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
+-- 
+2.43.0
+
diff --git a/queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch b/queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch
new file mode 100644 (file)
index 0000000..a273b95
--- /dev/null
@@ -0,0 +1,406 @@
+From d7b4862364b01676155853a01f0065e8caa36e1a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Feb 2022 19:18:15 +0100
+Subject: ipc: Store ipc sysctls in the ipc namespace
+
+From: Alexey Gladkov <legion@kernel.org>
+
+[ Upstream commit 1f5c135ee509e89e0cc274333a65f73c62cb16e5 ]
+
+The ipc sysctls are not available for modification inside the user
+namespace. Following the mqueue sysctls, we changed the implementation
+to be more userns friendly.
+
+So far, the changes do not provide additional access to files. This
+will be done in a future patch.
+
+Signed-off-by: Alexey Gladkov <legion@kernel.org>
+Link: https://lkml.kernel.org/r/be6f9d014276f4dddd0c3aa05a86052856c1c555.1644862280.git.legion@kernel.org
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/ipc_namespace.h |  21 ++++
+ ipc/ipc_sysctl.c              | 189 ++++++++++++++++++++++------------
+ ipc/namespace.c               |   4 +
+ 3 files changed, 147 insertions(+), 67 deletions(-)
+
+diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
+index 60cd84c1ba146..efcfa7f1d4551 100644
+--- a/include/linux/ipc_namespace.h
++++ b/include/linux/ipc_namespace.h
+@@ -68,6 +68,9 @@ struct ipc_namespace {
+       struct ctl_table_set    mq_set;
+       struct ctl_table_header *mq_sysctls;
++      struct ctl_table_set    ipc_set;
++      struct ctl_table_header *ipc_sysctls;
++
+       /* user_ns which owns the ipc ns */
+       struct user_namespace *user_ns;
+       struct ucounts *ucounts;
+@@ -189,4 +192,22 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns)
+ }
+ #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */
++
++#ifdef CONFIG_SYSVIPC_SYSCTL
++
++bool setup_ipc_sysctls(struct ipc_namespace *ns);
++void retire_ipc_sysctls(struct ipc_namespace *ns);
++
++#else /* CONFIG_SYSVIPC_SYSCTL */
++
++static inline void retire_ipc_sysctls(struct ipc_namespace *ns)
++{
++}
++
++static inline bool setup_ipc_sysctls(struct ipc_namespace *ns)
++{
++      return true;
++}
++
++#endif /* CONFIG_SYSVIPC_SYSCTL */
+ #endif
+diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
+index f101c171753f6..15210ac47e9e1 100644
+--- a/ipc/ipc_sysctl.c
++++ b/ipc/ipc_sysctl.c
+@@ -13,43 +13,22 @@
+ #include <linux/capability.h>
+ #include <linux/ipc_namespace.h>
+ #include <linux/msg.h>
++#include <linux/slab.h>
+ #include "util.h"
+-static void *get_ipc(struct ctl_table *table)
+-{
+-      char *which = table->data;
+-      struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
+-      which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
+-      return which;
+-}
+-
+-static int proc_ipc_dointvec(struct ctl_table *table, int write,
+-              void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-      struct ctl_table ipc_table;
+-
+-      memcpy(&ipc_table, table, sizeof(ipc_table));
+-      ipc_table.data = get_ipc(table);
+-
+-      return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
+-}
+-
+-static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write,
++static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
+               void *buffer, size_t *lenp, loff_t *ppos)
+ {
++      struct ipc_namespace *ns = table->extra1;
+       struct ctl_table ipc_table;
++      int err;
+       memcpy(&ipc_table, table, sizeof(ipc_table));
+-      ipc_table.data = get_ipc(table);
+-      return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+-}
++      ipc_table.extra1 = SYSCTL_ZERO;
++      ipc_table.extra2 = SYSCTL_ONE;
+-static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
+-              void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-      struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+-      int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
++      err = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+       if (err < 0)
+               return err;
+@@ -58,17 +37,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
+       return err;
+ }
+-static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
+-              void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-      struct ctl_table ipc_table;
+-      memcpy(&ipc_table, table, sizeof(ipc_table));
+-      ipc_table.data = get_ipc(table);
+-
+-      return proc_doulongvec_minmax(&ipc_table, write, buffer,
+-                                      lenp, ppos);
+-}
+-
+ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
+               void *buffer, size_t *lenp, loff_t *ppos)
+ {
+@@ -87,11 +55,17 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
+ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
+       void *buffer, size_t *lenp, loff_t *ppos)
+ {
++      struct ipc_namespace *ns = table->extra1;
++      struct ctl_table ipc_table;
+       int ret, semmni;
+-      struct ipc_namespace *ns = current->nsproxy->ipc_ns;
++
++      memcpy(&ipc_table, table, sizeof(ipc_table));
++
++      ipc_table.extra1 = NULL;
++      ipc_table.extra2 = NULL;
+       semmni = ns->sem_ctls[3];
+-      ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos);
++      ret = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (!ret)
+               ret = sem_check_semmni(current->nsproxy->ipc_ns);
+@@ -108,12 +82,18 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
+ static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table,
+               int write, void *buffer, size_t *lenp, loff_t *ppos)
+ {
+-      struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns;
++      struct ipc_namespace *ns = table->extra1;
++      struct ctl_table ipc_table;
+-      if (write && !checkpoint_restore_ns_capable(user_ns))
++      if (write && !checkpoint_restore_ns_capable(ns->user_ns))
+               return -EPERM;
+-      return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
++      memcpy(&ipc_table, table, sizeof(ipc_table));
++
++      ipc_table.extra1 = SYSCTL_ZERO;
++      ipc_table.extra2 = SYSCTL_INT_MAX;
++
++      return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+ }
+ #endif
+@@ -121,27 +101,27 @@ int ipc_mni = IPCMNI;
+ int ipc_mni_shift = IPCMNI_SHIFT;
+ int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
+-static struct ctl_table ipc_kern_table[] = {
++static struct ctl_table ipc_sysctls[] = {
+       {
+               .procname       = "shmmax",
+               .data           = &init_ipc_ns.shm_ctlmax,
+               .maxlen         = sizeof(init_ipc_ns.shm_ctlmax),
+               .mode           = 0644,
+-              .proc_handler   = proc_ipc_doulongvec_minmax,
++              .proc_handler   = proc_doulongvec_minmax,
+       },
+       {
+               .procname       = "shmall",
+               .data           = &init_ipc_ns.shm_ctlall,
+               .maxlen         = sizeof(init_ipc_ns.shm_ctlall),
+               .mode           = 0644,
+-              .proc_handler   = proc_ipc_doulongvec_minmax,
++              .proc_handler   = proc_doulongvec_minmax,
+       },
+       {
+               .procname       = "shmmni",
+               .data           = &init_ipc_ns.shm_ctlmni,
+               .maxlen         = sizeof(init_ipc_ns.shm_ctlmni),
+               .mode           = 0644,
+-              .proc_handler   = proc_ipc_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = &ipc_mni,
+       },
+@@ -151,15 +131,13 @@ static struct ctl_table ipc_kern_table[] = {
+               .maxlen         = sizeof(init_ipc_ns.shm_rmid_forced),
+               .mode           = 0644,
+               .proc_handler   = proc_ipc_dointvec_minmax_orphans,
+-              .extra1         = SYSCTL_ZERO,
+-              .extra2         = SYSCTL_ONE,
+       },
+       {
+               .procname       = "msgmax",
+               .data           = &init_ipc_ns.msg_ctlmax,
+               .maxlen         = sizeof(init_ipc_ns.msg_ctlmax),
+               .mode           = 0644,
+-              .proc_handler   = proc_ipc_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_INT_MAX,
+       },
+@@ -168,7 +146,7 @@ static struct ctl_table ipc_kern_table[] = {
+               .data           = &init_ipc_ns.msg_ctlmni,
+               .maxlen         = sizeof(init_ipc_ns.msg_ctlmni),
+               .mode           = 0644,
+-              .proc_handler   = proc_ipc_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = &ipc_mni,
+       },
+@@ -186,7 +164,7 @@ static struct ctl_table ipc_kern_table[] = {
+               .data           = &init_ipc_ns.msg_ctlmnb,
+               .maxlen         = sizeof(init_ipc_ns.msg_ctlmnb),
+               .mode           = 0644,
+-              .proc_handler   = proc_ipc_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_INT_MAX,
+       },
+@@ -204,8 +182,6 @@ static struct ctl_table ipc_kern_table[] = {
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
+               .mode           = 0666,
+               .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
+-              .extra1         = SYSCTL_ZERO,
+-              .extra2         = SYSCTL_INT_MAX,
+       },
+       {
+               .procname       = "msg_next_id",
+@@ -213,8 +189,6 @@ static struct ctl_table ipc_kern_table[] = {
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
+               .mode           = 0666,
+               .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
+-              .extra1         = SYSCTL_ZERO,
+-              .extra2         = SYSCTL_INT_MAX,
+       },
+       {
+               .procname       = "shm_next_id",
+@@ -222,25 +196,106 @@ static struct ctl_table ipc_kern_table[] = {
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
+               .mode           = 0666,
+               .proc_handler   = proc_ipc_dointvec_minmax_checkpoint_restore,
+-              .extra1         = SYSCTL_ZERO,
+-              .extra2         = SYSCTL_INT_MAX,
+       },
+ #endif
+       {}
+ };
+-static struct ctl_table ipc_root_table[] = {
+-      {
+-              .procname       = "kernel",
+-              .mode           = 0555,
+-              .child          = ipc_kern_table,
+-      },
+-      {}
++static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
++{
++      return &current->nsproxy->ipc_ns->ipc_set;
++}
++
++static int set_is_seen(struct ctl_table_set *set)
++{
++      return &current->nsproxy->ipc_ns->ipc_set == set;
++}
++
++static struct ctl_table_root set_root = {
++      .lookup = set_lookup,
+ };
++bool setup_ipc_sysctls(struct ipc_namespace *ns)
++{
++      struct ctl_table *tbl;
++
++      setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen);
++
++      tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL);
++      if (tbl) {
++              int i;
++
++              for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) {
++                      if (tbl[i].data == &init_ipc_ns.shm_ctlmax) {
++                              tbl[i].data = &ns->shm_ctlmax;
++
++                      } else if (tbl[i].data == &init_ipc_ns.shm_ctlall) {
++                              tbl[i].data = &ns->shm_ctlall;
++
++                      } else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) {
++                              tbl[i].data = &ns->shm_ctlmni;
++
++                      } else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) {
++                              tbl[i].data = &ns->shm_rmid_forced;
++                              tbl[i].extra1 = ns;
++
++                      } else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) {
++                              tbl[i].data = &ns->msg_ctlmax;
++
++                      } else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) {
++                              tbl[i].data = &ns->msg_ctlmni;
++
++                      } else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) {
++                              tbl[i].data = &ns->msg_ctlmnb;
++
++                      } else if (tbl[i].data == &init_ipc_ns.sem_ctls) {
++                              tbl[i].data = &ns->sem_ctls;
++                              tbl[i].extra1 = ns;
++#ifdef CONFIG_CHECKPOINT_RESTORE
++                      } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) {
++                              tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id;
++                              tbl[i].extra1 = ns;
++
++                      } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) {
++                              tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id;
++                              tbl[i].extra1 = ns;
++
++                      } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) {
++                              tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id;
++                              tbl[i].extra1 = ns;
++#endif
++                      } else {
++                              tbl[i].data = NULL;
++                      }
++              }
++
++              ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl);
++      }
++      if (!ns->ipc_sysctls) {
++              kfree(tbl);
++              retire_sysctl_set(&ns->ipc_set);
++              return false;
++      }
++
++      return true;
++}
++
++void retire_ipc_sysctls(struct ipc_namespace *ns)
++{
++      struct ctl_table *tbl;
++
++      tbl = ns->ipc_sysctls->ctl_table_arg;
++      unregister_sysctl_table(ns->ipc_sysctls);
++      retire_sysctl_set(&ns->ipc_set);
++      kfree(tbl);
++}
++
+ static int __init ipc_sysctl_init(void)
+ {
+-      register_sysctl_table(ipc_root_table);
++      if (!setup_ipc_sysctls(&init_ipc_ns)) {
++              pr_warn("ipc sysctl registration failed\n");
++              return -ENOMEM;
++      }
+       return 0;
+ }
+diff --git a/ipc/namespace.c b/ipc/namespace.c
+index 5d68e20f7d2bf..14bb40c9d0b85 100644
+--- a/ipc/namespace.c
++++ b/ipc/namespace.c
+@@ -63,6 +63,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
+       if (!setup_mq_sysctls(ns))
+               goto fail_put;
++      if (!setup_ipc_sysctls(ns))
++              goto fail_put;
++
+       sem_init_ns(ns);
+       msg_init_ns(ns);
+       shm_init_ns(ns);
+@@ -130,6 +133,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
+       shm_exit_ns(ns);
+       retire_mq_sysctls(ns);
++      retire_ipc_sysctls(ns);
+       dec_ipc_namespaces(ns->ucounts);
+       put_user_ns(ns->user_ns);
+-- 
+2.43.0
+
diff --git a/queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch b/queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch
new file mode 100644 (file)
index 0000000..dab916c
--- /dev/null
@@ -0,0 +1,323 @@
+From cb98de8a508d409d94c225f80e4ac33f3b6dfad5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Feb 2022 19:18:14 +0100
+Subject: ipc: Store mqueue sysctls in the ipc namespace
+
+From: Alexey Gladkov <legion@kernel.org>
+
+[ Upstream commit dc55e35f9e810f23dd69cfdc91a3d636023f57a2 ]
+
+Right now, the mqueue sysctls take ipc namespaces into account in a
+rather hacky way. This works in most cases, but does not respect the
+user namespace.
+
+Within the user namespace, the user cannot change the /proc/sys/fs/mqueue/*
+parametres. This poses a problem in the rootless containers.
+
+To solve this I changed the implementation of the mqueue sysctls just
+like some other sysctls.
+
+So far, the changes do not provide additional access to files. This will
+be done in a future patch.
+
+v3:
+* Don't implemenet set_permissions to keep the current behavior.
+
+v2:
+* Fixed compilation problem if CONFIG_POSIX_MQUEUE_SYSCTL is not
+  specified.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Alexey Gladkov <legion@kernel.org>
+Link: https://lkml.kernel.org/r/b0ccbb2489119f1f20c737cf1930c3a9c4e4243a.1644862280.git.legion@kernel.org
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/ipc_namespace.h |  16 +++--
+ ipc/mq_sysctl.c               | 121 ++++++++++++++++++----------------
+ ipc/mqueue.c                  |  10 ++-
+ ipc/namespace.c               |   6 ++
+ 4 files changed, 88 insertions(+), 65 deletions(-)
+
+diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
+index 08325105131a2..60cd84c1ba146 100644
+--- a/include/linux/ipc_namespace.h
++++ b/include/linux/ipc_namespace.h
+@@ -10,6 +10,7 @@
+ #include <linux/ns_common.h>
+ #include <linux/refcount.h>
+ #include <linux/rhashtable-types.h>
++#include <linux/sysctl.h>
+ struct user_namespace;
+@@ -64,6 +65,9 @@ struct ipc_namespace {
+       unsigned int    mq_msg_default;
+       unsigned int    mq_msgsize_default;
++      struct ctl_table_set    mq_set;
++      struct ctl_table_header *mq_sysctls;
++
+       /* user_ns which owns the ipc ns */
+       struct user_namespace *user_ns;
+       struct ucounts *ucounts;
+@@ -170,14 +174,18 @@ static inline void put_ipc_ns(struct ipc_namespace *ns)
+ #ifdef CONFIG_POSIX_MQUEUE_SYSCTL
+-struct ctl_table_header;
+-extern struct ctl_table_header *mq_register_sysctl_table(void);
++void retire_mq_sysctls(struct ipc_namespace *ns);
++bool setup_mq_sysctls(struct ipc_namespace *ns);
+ #else /* CONFIG_POSIX_MQUEUE_SYSCTL */
+-static inline struct ctl_table_header *mq_register_sysctl_table(void)
++static inline void retire_mq_sysctls(struct ipc_namespace *ns)
+ {
+-      return NULL;
++}
++
++static inline bool setup_mq_sysctls(struct ipc_namespace *ns)
++{
++      return true;
+ }
+ #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */
+diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
+index 72a92a08c848e..fbf6a8b93a265 100644
+--- a/ipc/mq_sysctl.c
++++ b/ipc/mq_sysctl.c
+@@ -9,39 +9,9 @@
+ #include <linux/ipc_namespace.h>
+ #include <linux/sysctl.h>
+-#ifdef CONFIG_PROC_SYSCTL
+-static void *get_mq(struct ctl_table *table)
+-{
+-      char *which = table->data;
+-      struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
+-      which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
+-      return which;
+-}
+-
+-static int proc_mq_dointvec(struct ctl_table *table, int write,
+-                          void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-      struct ctl_table mq_table;
+-      memcpy(&mq_table, table, sizeof(mq_table));
+-      mq_table.data = get_mq(table);
+-
+-      return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
+-}
+-
+-static int proc_mq_dointvec_minmax(struct ctl_table *table, int write,
+-              void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-      struct ctl_table mq_table;
+-      memcpy(&mq_table, table, sizeof(mq_table));
+-      mq_table.data = get_mq(table);
+-
+-      return proc_dointvec_minmax(&mq_table, write, buffer,
+-                                      lenp, ppos);
+-}
+-#else
+-#define proc_mq_dointvec NULL
+-#define proc_mq_dointvec_minmax NULL
+-#endif
++#include <linux/stat.h>
++#include <linux/capability.h>
++#include <linux/slab.h>
+ static int msg_max_limit_min = MIN_MSGMAX;
+ static int msg_max_limit_max = HARD_MSGMAX;
+@@ -55,14 +25,14 @@ static struct ctl_table mq_sysctls[] = {
+               .data           = &init_ipc_ns.mq_queues_max,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_mq_dointvec,
++              .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "msg_max",
+               .data           = &init_ipc_ns.mq_msg_max,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_mq_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &msg_max_limit_min,
+               .extra2         = &msg_max_limit_max,
+       },
+@@ -71,7 +41,7 @@ static struct ctl_table mq_sysctls[] = {
+               .data           = &init_ipc_ns.mq_msgsize_max,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_mq_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &msg_maxsize_limit_min,
+               .extra2         = &msg_maxsize_limit_max,
+       },
+@@ -80,7 +50,7 @@ static struct ctl_table mq_sysctls[] = {
+               .data           = &init_ipc_ns.mq_msg_default,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_mq_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &msg_max_limit_min,
+               .extra2         = &msg_max_limit_max,
+       },
+@@ -89,32 +59,73 @@ static struct ctl_table mq_sysctls[] = {
+               .data           = &init_ipc_ns.mq_msgsize_default,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_mq_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &msg_maxsize_limit_min,
+               .extra2         = &msg_maxsize_limit_max,
+       },
+       {}
+ };
+-static struct ctl_table mq_sysctl_dir[] = {
+-      {
+-              .procname       = "mqueue",
+-              .mode           = 0555,
+-              .child          = mq_sysctls,
+-      },
+-      {}
+-};
++static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
++{
++      return &current->nsproxy->ipc_ns->mq_set;
++}
+-static struct ctl_table mq_sysctl_root[] = {
+-      {
+-              .procname       = "fs",
+-              .mode           = 0555,
+-              .child          = mq_sysctl_dir,
+-      },
+-      {}
++static int set_is_seen(struct ctl_table_set *set)
++{
++      return &current->nsproxy->ipc_ns->mq_set == set;
++}
++
++static struct ctl_table_root set_root = {
++      .lookup = set_lookup,
+ };
+-struct ctl_table_header *mq_register_sysctl_table(void)
++bool setup_mq_sysctls(struct ipc_namespace *ns)
+ {
+-      return register_sysctl_table(mq_sysctl_root);
++      struct ctl_table *tbl;
++
++      setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen);
++
++      tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL);
++      if (tbl) {
++              int i;
++
++              for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) {
++                      if (tbl[i].data == &init_ipc_ns.mq_queues_max)
++                              tbl[i].data = &ns->mq_queues_max;
++
++                      else if (tbl[i].data == &init_ipc_ns.mq_msg_max)
++                              tbl[i].data = &ns->mq_msg_max;
++
++                      else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max)
++                              tbl[i].data = &ns->mq_msgsize_max;
++
++                      else if (tbl[i].data == &init_ipc_ns.mq_msg_default)
++                              tbl[i].data = &ns->mq_msg_default;
++
++                      else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default)
++                              tbl[i].data = &ns->mq_msgsize_default;
++                      else
++                              tbl[i].data = NULL;
++              }
++
++              ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl);
++      }
++      if (!ns->mq_sysctls) {
++              kfree(tbl);
++              retire_sysctl_set(&ns->mq_set);
++              return false;
++      }
++
++      return true;
++}
++
++void retire_mq_sysctls(struct ipc_namespace *ns)
++{
++      struct ctl_table *tbl;
++
++      tbl = ns->mq_sysctls->ctl_table_arg;
++      unregister_sysctl_table(ns->mq_sysctls);
++      retire_sysctl_set(&ns->mq_set);
++      kfree(tbl);
+ }
+diff --git a/ipc/mqueue.c b/ipc/mqueue.c
+index 86969de170843..b14ea1dcd50d4 100644
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -164,8 +164,6 @@ static void remove_notification(struct mqueue_inode_info *info);
+ static struct kmem_cache *mqueue_inode_cachep;
+-static struct ctl_table_header *mq_sysctl_table;
+-
+ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
+ {
+       return container_of(inode, struct mqueue_inode_info, vfs_inode);
+@@ -1724,8 +1722,10 @@ static int __init init_mqueue_fs(void)
+       if (mqueue_inode_cachep == NULL)
+               return -ENOMEM;
+-      /* ignore failures - they are not fatal */
+-      mq_sysctl_table = mq_register_sysctl_table();
++      if (!setup_mq_sysctls(&init_ipc_ns)) {
++              pr_warn("sysctl registration failed\n");
++              return -ENOMEM;
++      }
+       error = register_filesystem(&mqueue_fs_type);
+       if (error)
+@@ -1742,8 +1742,6 @@ static int __init init_mqueue_fs(void)
+ out_filesystem:
+       unregister_filesystem(&mqueue_fs_type);
+ out_sysctl:
+-      if (mq_sysctl_table)
+-              unregister_sysctl_table(mq_sysctl_table);
+       kmem_cache_destroy(mqueue_inode_cachep);
+       return error;
+ }
+diff --git a/ipc/namespace.c b/ipc/namespace.c
+index 24e7b45320f72..5d68e20f7d2bf 100644
+--- a/ipc/namespace.c
++++ b/ipc/namespace.c
+@@ -59,6 +59,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
+       if (err)
+               goto fail_put;
++      err = -ENOMEM;
++      if (!setup_mq_sysctls(ns))
++              goto fail_put;
++
+       sem_init_ns(ns);
+       msg_init_ns(ns);
+       shm_init_ns(ns);
+@@ -125,6 +129,8 @@ static void free_ipc_ns(struct ipc_namespace *ns)
+       msg_exit_ns(ns);
+       shm_exit_ns(ns);
++      retire_mq_sysctls(ns);
++
+       dec_ipc_namespaces(ns->ucounts);
+       put_user_ns(ns->user_ns);
+       ns_free_inum(&ns->ns);
+-- 
+2.43.0
+
diff --git a/queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch b/queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch
new file mode 100644 (file)
index 0000000..e9fd9d5
--- /dev/null
@@ -0,0 +1,53 @@
+From ab586543de36f330ced813886c8321973345ff1a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2024 10:14:27 +0200
+Subject: ipv4: fix source address selection with route leak
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 6807352353561187a718e87204458999dbcbba1b ]
+
+By default, an address assigned to the output interface is selected when
+the source address is not specified. This is problematic when a route,
+configured in a vrf, uses an interface from another vrf (aka route leak).
+The original vrf does not own the selected source address.
+
+Let's add a check against the output interface and call the appropriate
+function to select the source address.
+
+CC: stable@vger.kernel.org
+Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF")
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fib_semantics.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index a308d3f0f845c..57883bd6b5597 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -2285,6 +2285,15 @@ void fib_select_path(struct net *net, struct fib_result *res,
+               fib_select_default(fl4, res);
+ check_saddr:
+-      if (!fl4->saddr)
+-              fl4->saddr = fib_result_prefsrc(net, res);
++      if (!fl4->saddr) {
++              struct net_device *l3mdev;
++
++              l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev);
++
++              if (!l3mdev ||
++                  l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev)
++                      fl4->saddr = fib_result_prefsrc(net, res);
++              else
++                      fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
++      }
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/irqchip-imx-irqsteer-add-runtime-pm-support.patch b/queue-5.10/irqchip-imx-irqsteer-add-runtime-pm-support.patch
new file mode 100644 (file)
index 0000000..f7dd0f9
--- /dev/null
@@ -0,0 +1,85 @@
+From c2293deb2fc707d228b9697bfe49452de3f511a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Apr 2022 18:37:01 +0200
+Subject: irqchip/imx-irqsteer: Add runtime PM support
+
+From: Lucas Stach <l.stach@pengutronix.de>
+
+[ Upstream commit 4730d2233311d86cad9dc510318d1b40e4b53cf2 ]
+
+There are now SoCs that integrate the irqsteer controller within
+a separate power domain. In order to allow this domain to be
+powered down when not needed, add runtime PM support to the driver.
+
+Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20220406163701.1277930-2-l.stach@pengutronix.de
+Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-imx-irqsteer.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c
+index c9998b46414cc..b1ecdb08c618c 100644
+--- a/drivers/irqchip/irq-imx-irqsteer.c
++++ b/drivers/irqchip/irq-imx-irqsteer.c
+@@ -12,6 +12,7 @@
+ #include <linux/kernel.h>
+ #include <linux/of_irq.h>
+ #include <linux/of_platform.h>
++#include <linux/pm_runtime.h>
+ #include <linux/spinlock.h>
+ #define CTRL_STRIDE_OFF(_t, _r)       (_t * 4 * _r)
+@@ -178,7 +179,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
+       data->irq_count = DIV_ROUND_UP(irqs_num, 64);
+       data->reg_num = irqs_num / 32;
+-      if (IS_ENABLED(CONFIG_PM_SLEEP)) {
++      if (IS_ENABLED(CONFIG_PM)) {
+               data->saved_reg = devm_kzalloc(&pdev->dev,
+                                       sizeof(u32) * data->reg_num,
+                                       GFP_KERNEL);
+@@ -202,6 +203,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
+               ret = -ENOMEM;
+               goto out;
+       }
++      irq_domain_set_pm_device(data->domain, &pdev->dev);
+       if (!data->irq_count || data->irq_count > CHAN_MAX_OUTPUT_INT) {
+               ret = -EINVAL;
+@@ -222,6 +224,9 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
+       platform_set_drvdata(pdev, data);
++      pm_runtime_set_active(&pdev->dev);
++      pm_runtime_enable(&pdev->dev);
++
+       return 0;
+ out:
+       clk_disable_unprepare(data->ipg_clk);
+@@ -244,7 +249,7 @@ static int imx_irqsteer_remove(struct platform_device *pdev)
+       return 0;
+ }
+-#ifdef CONFIG_PM_SLEEP
++#ifdef CONFIG_PM
+ static void imx_irqsteer_save_regs(struct irqsteer_data *data)
+ {
+       int i;
+@@ -291,7 +296,10 @@ static int imx_irqsteer_resume(struct device *dev)
+ #endif
+ static const struct dev_pm_ops imx_irqsteer_pm_ops = {
+-      SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_irqsteer_suspend, imx_irqsteer_resume)
++      SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
++                                    pm_runtime_force_resume)
++      SET_RUNTIME_PM_OPS(imx_irqsteer_suspend,
++                         imx_irqsteer_resume, NULL)
+ };
+ static const struct of_device_id imx_irqsteer_dt_ids[] = {
+-- 
+2.43.0
+
diff --git a/queue-5.10/irqchip-imx-irqsteer-constify-irq_chip-struct.patch b/queue-5.10/irqchip-imx-irqsteer-constify-irq_chip-struct.patch
new file mode 100644 (file)
index 0000000..05d2e58
--- /dev/null
@@ -0,0 +1,36 @@
+From 72e979f5103330213ab860f5c77ab54ce7be3edc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Apr 2022 18:37:00 +0200
+Subject: irqchip/imx-irqsteer: Constify irq_chip struct
+
+From: Lucas Stach <l.stach@pengutronix.de>
+
+[ Upstream commit e9a50f12e579a48e124ac5adb93dafc35f0a46b8 ]
+
+The imx_irqsteer_irq_chip struct is constant data.
+
+Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20220406163701.1277930-1-l.stach@pengutronix.de
+Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-imx-irqsteer.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c
+index 1edf7692a790b..c9998b46414cc 100644
+--- a/drivers/irqchip/irq-imx-irqsteer.c
++++ b/drivers/irqchip/irq-imx-irqsteer.c
+@@ -70,7 +70,7 @@ static void imx_irqsteer_irq_mask(struct irq_data *d)
+       raw_spin_unlock_irqrestore(&data->lock, flags);
+ }
+-static struct irq_chip imx_irqsteer_irq_chip = {
++static const struct irq_chip imx_irqsteer_irq_chip = {
+       .name           = "irqsteer",
+       .irq_mask       = imx_irqsteer_irq_mask,
+       .irq_unmask     = imx_irqsteer_irq_unmask,
+-- 
+2.43.0
+
diff --git a/queue-5.10/irqchip-imx-irqsteer-handle-runtime-power-management.patch b/queue-5.10/irqchip-imx-irqsteer-handle-runtime-power-management.patch
new file mode 100644 (file)
index 0000000..d35f6e5
--- /dev/null
@@ -0,0 +1,107 @@
+From dfbf49aec91243e105dc7c26f3565f1448edaeb0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jul 2024 11:32:50 -0500
+Subject: irqchip/imx-irqsteer: Handle runtime power management correctly
+
+From: Shenwei Wang <shenwei.wang@nxp.com>
+
+[ Upstream commit 33b1c47d1fc0b5f06a393bb915db85baacba18ea ]
+
+The power domain is automatically activated from clk_prepare(). However, on
+certain platforms like i.MX8QM and i.MX8QXP, the power-on handling invokes
+sleeping functions, which triggers the 'scheduling while atomic' bug in the
+context switch path during device probing:
+
+ BUG: scheduling while atomic: kworker/u13:1/48/0x00000002
+ Call trace:
+  __schedule_bug+0x54/0x6c
+  __schedule+0x7f0/0xa94
+  schedule+0x5c/0xc4
+  schedule_preempt_disabled+0x24/0x40
+  __mutex_lock.constprop.0+0x2c0/0x540
+  __mutex_lock_slowpath+0x14/0x20
+  mutex_lock+0x48/0x54
+  clk_prepare_lock+0x44/0xa0
+  clk_prepare+0x20/0x44
+  imx_irqsteer_resume+0x28/0xe0
+  pm_generic_runtime_resume+0x2c/0x44
+  __genpd_runtime_resume+0x30/0x80
+  genpd_runtime_resume+0xc8/0x2c0
+  __rpm_callback+0x48/0x1d8
+  rpm_callback+0x6c/0x78
+  rpm_resume+0x490/0x6b4
+  __pm_runtime_resume+0x50/0x94
+  irq_chip_pm_get+0x2c/0xa0
+  __irq_do_set_handler+0x178/0x24c
+  irq_set_chained_handler_and_data+0x60/0xa4
+  mxc_gpio_probe+0x160/0x4b0
+
+Cure this by implementing the irq_bus_lock/sync_unlock() interrupt chip
+callbacks and handle power management in them as they are invoked from
+non-atomic context.
+
+[ tglx: Rewrote change log, added Fixes tag ]
+
+Fixes: 0136afa08967 ("irqchip: Add driver for imx-irqsteer controller")
+Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240703163250.47887-1-shenwei.wang@nxp.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-imx-irqsteer.c | 24 +++++++++++++++++++++---
+ 1 file changed, 21 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c
+index b1ecdb08c618c..4bdcefa44f11e 100644
+--- a/drivers/irqchip/irq-imx-irqsteer.c
++++ b/drivers/irqchip/irq-imx-irqsteer.c
+@@ -35,6 +35,7 @@ struct irqsteer_data {
+       int                     channel;
+       struct irq_domain       *domain;
+       u32                     *saved_reg;
++      struct device           *dev;
+ };
+ static int imx_irqsteer_get_reg_index(struct irqsteer_data *data,
+@@ -71,10 +72,26 @@ static void imx_irqsteer_irq_mask(struct irq_data *d)
+       raw_spin_unlock_irqrestore(&data->lock, flags);
+ }
++static void imx_irqsteer_irq_bus_lock(struct irq_data *d)
++{
++      struct irqsteer_data *data = d->chip_data;
++
++      pm_runtime_get_sync(data->dev);
++}
++
++static void imx_irqsteer_irq_bus_sync_unlock(struct irq_data *d)
++{
++      struct irqsteer_data *data = d->chip_data;
++
++      pm_runtime_put_autosuspend(data->dev);
++}
++
+ static const struct irq_chip imx_irqsteer_irq_chip = {
+-      .name           = "irqsteer",
+-      .irq_mask       = imx_irqsteer_irq_mask,
+-      .irq_unmask     = imx_irqsteer_irq_unmask,
++      .name                   = "irqsteer",
++      .irq_mask               = imx_irqsteer_irq_mask,
++      .irq_unmask             = imx_irqsteer_irq_unmask,
++      .irq_bus_lock           = imx_irqsteer_irq_bus_lock,
++      .irq_bus_sync_unlock    = imx_irqsteer_irq_bus_sync_unlock,
+ };
+ static int imx_irqsteer_irq_map(struct irq_domain *h, unsigned int irq,
+@@ -152,6 +169,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev)
+       if (!data)
+               return -ENOMEM;
++      data->dev = &pdev->dev;
+       data->regs = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(data->regs)) {
+               dev_err(&pdev->dev, "failed to initialize reg\n");
+-- 
+2.43.0
+
diff --git a/queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch b/queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch
new file mode 100644 (file)
index 0000000..dcb7494
--- /dev/null
@@ -0,0 +1,419 @@
+From f255c8425c2d31cac7c0aebceabe903c271db4d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Mar 2022 14:45:51 -0600
+Subject: net: Add l3mdev index to flow struct and avoid oif reset for port
+ devices
+
+From: David Ahern <dsahern@kernel.org>
+
+[ Upstream commit 40867d74c374b235e14d839f3a77f26684feefe5 ]
+
+The fundamental premise of VRF and l3mdev core code is binding a socket
+to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope.
+Legacy code resets flowi_oif to the l3mdev losing any original port
+device binding. Ben (among others) has demonstrated use cases where the
+original port device binding is important and needs to be retained.
+This patch handles that by adding a new entry to the common flow struct
+that can indicate the l3mdev index for later rule and table matching
+avoiding the need to reset flowi_oif.
+
+In addition to allowing more use cases that require port device binds,
+this patch brings a few datapath simplications:
+
+1. l3mdev_fib_rule_match is only called when walking fib rules and
+   always after l3mdev_update_flow. That allows an optimization to bail
+   early for non-VRF type uses cases when flowi_l3mdev is not set. Also,
+   only that index needs to be checked for the FIB table id.
+
+2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev
+   (e.g., VRF) device. By resetting flowi_oif only for this case the
+   FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed,
+   removing several checks in the datapath. The flowi_iif path can be
+   simplified to only be called if the it is not loopback (loopback can
+   not be assigned to an L3 domain) and the l3mdev index is not already
+   set.
+
+3. Avoid another device lookup in the output path when the fib lookup
+   returns a reject failure.
+
+Note: 2 functional tests for local traffic with reject fib rules are
+updated to reflect the new direct failure at FIB lookup time for ping
+rather than the failure on packet path. The current code fails like this:
+
+    HINT: Fails since address on vrf device is out of device scope
+    COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1
+    ping: Warning: source address might be selected on device other than: eth1
+    PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data.
+
+    --- 172.16.3.1 ping statistics ---
+    1 packets transmitted, 0 received, 100% packet loss, time 0ms
+
+where the test now directly fails:
+
+    HINT: Fails since address on vrf device is out of device scope
+    COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1
+    ping: connect: No route to host
+
+Signed-off-by: David Ahern <dsahern@kernel.org>
+Tested-by: Ben Greear <greearb@candelatech.com>
+Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 680735235356 ("ipv4: fix source address selection with route leak")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vrf.c                         |  7 ++--
+ include/net/flow.h                        |  6 +++-
+ net/ipv4/fib_frontend.c                   |  7 ++--
+ net/ipv4/fib_semantics.c                  |  2 +-
+ net/ipv4/fib_trie.c                       |  7 ++--
+ net/ipv4/route.c                          |  4 +--
+ net/ipv4/xfrm4_policy.c                   |  4 +--
+ net/ipv6/ip6_output.c                     |  3 +-
+ net/ipv6/route.c                          | 12 -------
+ net/ipv6/xfrm6_policy.c                   |  3 +-
+ net/l3mdev/l3mdev.c                       | 43 +++++++++--------------
+ tools/testing/selftests/net/fcnal-test.sh |  2 +-
+ 12 files changed, 37 insertions(+), 63 deletions(-)
+
+diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
+index 8ab0b5a8dfeff..13ad434643b80 100644
+--- a/drivers/net/vrf.c
++++ b/drivers/net/vrf.c
+@@ -470,14 +470,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
+       memset(&fl6, 0, sizeof(fl6));
+       /* needed to match OIF rule */
+-      fl6.flowi6_oif = dev->ifindex;
++      fl6.flowi6_l3mdev = dev->ifindex;
+       fl6.flowi6_iif = LOOPBACK_IFINDEX;
+       fl6.daddr = iph->daddr;
+       fl6.saddr = iph->saddr;
+       fl6.flowlabel = ip6_flowinfo(iph);
+       fl6.flowi6_mark = skb->mark;
+       fl6.flowi6_proto = iph->nexthdr;
+-      fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+       dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL);
+       if (IS_ERR(dst) || dst == dst_null)
+@@ -550,10 +549,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
+       memset(&fl4, 0, sizeof(fl4));
+       /* needed to match OIF rule */
+-      fl4.flowi4_oif = vrf_dev->ifindex;
++      fl4.flowi4_l3mdev = vrf_dev->ifindex;
+       fl4.flowi4_iif = LOOPBACK_IFINDEX;
+       fl4.flowi4_tos = RT_TOS(ip4h->tos);
+-      fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF;
++      fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
+       fl4.flowi4_proto = ip4h->protocol;
+       fl4.daddr = ip4h->daddr;
+       fl4.saddr = ip4h->saddr;
+diff --git a/include/net/flow.h b/include/net/flow.h
+index 7ffa1fe1107cc..1c19af4f3b97e 100644
+--- a/include/net/flow.h
++++ b/include/net/flow.h
+@@ -29,6 +29,7 @@ struct flowi_tunnel {
+ struct flowi_common {
+       int     flowic_oif;
+       int     flowic_iif;
++      int     flowic_l3mdev;
+       __u32   flowic_mark;
+       __u8    flowic_tos;
+       __u8    flowic_scope;
+@@ -36,7 +37,6 @@ struct flowi_common {
+       __u8    flowic_flags;
+ #define FLOWI_FLAG_ANYSRC             0x01
+ #define FLOWI_FLAG_KNOWN_NH           0x02
+-#define FLOWI_FLAG_SKIP_NH_OIF                0x04
+       __u32   flowic_secid;
+       kuid_t  flowic_uid;
+       __u32           flowic_multipath_hash;
+@@ -66,6 +66,7 @@ struct flowi4 {
+       struct flowi_common     __fl_common;
+ #define flowi4_oif            __fl_common.flowic_oif
+ #define flowi4_iif            __fl_common.flowic_iif
++#define flowi4_l3mdev         __fl_common.flowic_l3mdev
+ #define flowi4_mark           __fl_common.flowic_mark
+ #define flowi4_tos            __fl_common.flowic_tos
+ #define flowi4_scope          __fl_common.flowic_scope
+@@ -99,6 +100,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
+ {
+       fl4->flowi4_oif = oif;
+       fl4->flowi4_iif = LOOPBACK_IFINDEX;
++      fl4->flowi4_l3mdev = 0;
+       fl4->flowi4_mark = mark;
+       fl4->flowi4_tos = tos;
+       fl4->flowi4_scope = scope;
+@@ -129,6 +131,7 @@ struct flowi6 {
+       struct flowi_common     __fl_common;
+ #define flowi6_oif            __fl_common.flowic_oif
+ #define flowi6_iif            __fl_common.flowic_iif
++#define flowi6_l3mdev         __fl_common.flowic_l3mdev
+ #define flowi6_mark           __fl_common.flowic_mark
+ #define flowi6_scope          __fl_common.flowic_scope
+ #define flowi6_proto          __fl_common.flowic_proto
+@@ -159,6 +162,7 @@ struct flowi {
+       } u;
+ #define flowi_oif     u.__fl_common.flowic_oif
+ #define flowi_iif     u.__fl_common.flowic_iif
++#define flowi_l3mdev  u.__fl_common.flowic_l3mdev
+ #define flowi_mark    u.__fl_common.flowic_mark
+ #define flowi_tos     u.__fl_common.flowic_tos
+ #define flowi_scope   u.__fl_common.flowic_scope
+diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
+index 41f890bf9d4c4..0a61b993d823f 100644
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -290,7 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
+               bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
+               struct flowi4 fl4 = {
+                       .flowi4_iif = LOOPBACK_IFINDEX,
+-                      .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
++                      .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
+                       .daddr = ip_hdr(skb)->saddr,
+                       .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
+                       .flowi4_scope = scope,
+@@ -352,9 +352,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+       bool dev_match;
+       fl4.flowi4_oif = 0;
+-      fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
+-      if (!fl4.flowi4_iif)
+-              fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
++      fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev);
++      fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
+       fl4.daddr = src;
+       fl4.saddr = dst;
+       fl4.flowi4_tos = tos;
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index bb5255178d75c..a308d3f0f845c 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -2268,7 +2268,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
+ void fib_select_path(struct net *net, struct fib_result *res,
+                    struct flowi4 *fl4, const struct sk_buff *skb)
+ {
+-      if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
++      if (fl4->flowi4_oif)
+               goto check_saddr;
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index 3f4f6458d40e9..1bdcdc79d43f9 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1384,11 +1384,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
+           !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+               return false;
+-      if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
+-              if (flp->flowi4_oif &&
+-                  flp->flowi4_oif != nhc->nhc_oif)
+-                      return false;
+-      }
++      if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif)
++              return false;
+       return true;
+ }
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 1eb1e4316ed6d..c34386a9d99b4 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2200,6 +2200,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+       /*
+        *      Now we are ready to route packet.
+        */
++      fl4.flowi4_l3mdev = 0;
+       fl4.flowi4_oif = 0;
+       fl4.flowi4_iif = dev->ifindex;
+       fl4.flowi4_mark = skb->mark;
+@@ -2676,8 +2677,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
+               res->fi = NULL;
+               res->table = NULL;
+               if (fl4->flowi4_oif &&
+-                  (ipv4_is_multicast(fl4->daddr) ||
+-                  !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
++                  (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) {
+                       /* Apparently, routing tables are wrong. Assume,
+                          that the destination is on link.
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index 9ebd54752e03b..4548a91acdc89 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
+       memset(fl4, 0, sizeof(*fl4));
+       fl4->daddr = daddr->a4;
+       fl4->flowi4_tos = tos;
+-      fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
++      fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
+       fl4->flowi4_mark = mark;
+       if (saddr)
+               fl4->saddr = saddr->a4;
+-      fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;
+-
+       rt = __ip_route_output_key(net, fl4);
+       if (!IS_ERR(rt))
+               return &rt->dst;
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 32512b8ca5e72..ae00e2c7ee058 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1067,8 +1067,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
+ #ifdef CONFIG_IPV6_SUBTREES
+           ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
+ #endif
+-         (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+-            (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
++         (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+               dst_release(dst);
+               dst = NULL;
+       }
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 799779475c7de..37e05a77fe49e 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1207,9 +1207,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
+       struct fib6_node *fn;
+       struct rt6_info *rt;
+-      if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+-              flags &= ~RT6_LOOKUP_F_IFACE;
+-
+       rcu_read_lock();
+       fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+ restart:
+@@ -2183,9 +2180,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
+       fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+       saved_fn = fn;
+-      if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+-              oif = 0;
+-
+ redo_rt6_select:
+       rt6_select(net, fn, oif, res, strict);
+       if (res->f6i == net->ipv6.fib6_null_entry) {
+@@ -2932,12 +2926,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
+       struct fib6_info *rt;
+       struct fib6_node *fn;
+-      /* l3mdev_update_flow overrides oif if the device is enslaved; in
+-       * this case we must match on the real ingress device, so reset it
+-       */
+-      if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+-              fl6->flowi6_oif = skb->dev->ifindex;
+-
+       /* Get the "current" route for this destination and
+        * check if the redirect has come from appropriate router.
+        *
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index 7c903e0e446cb..492b9692c0dc0 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
+       int err;
+       memset(&fl6, 0, sizeof(fl6));
+-      fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
+-      fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
++      fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
+       fl6.flowi6_mark = mark;
+       memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
+       if (saddr)
+diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
+index f2c3a61ad134b..42794581762cb 100644
+--- a/net/l3mdev/l3mdev.c
++++ b/net/l3mdev/l3mdev.c
+@@ -249,25 +249,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+       struct net_device *dev;
+       int rc = 0;
+-      rcu_read_lock();
++      /* update flow ensures flowi_l3mdev is set when relevant */
++      if (!fl->flowi_l3mdev)
++              return 0;
+-      dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+-      if (dev && netif_is_l3_master(dev) &&
+-          dev->l3mdev_ops->l3mdev_fib_table) {
+-              arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+-              rc = 1;
+-              goto out;
+-      }
++      rcu_read_lock();
+-      dev = dev_get_by_index_rcu(net, fl->flowi_iif);
++      dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev);
+       if (dev && netif_is_l3_master(dev) &&
+           dev->l3mdev_ops->l3mdev_fib_table) {
+               arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
+               rc = 1;
+-              goto out;
+       }
+-out:
+       rcu_read_unlock();
+       return rc;
+@@ -276,31 +270,28 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
+ void l3mdev_update_flow(struct net *net, struct flowi *fl)
+ {
+       struct net_device *dev;
+-      int ifindex;
+       rcu_read_lock();
+       if (fl->flowi_oif) {
+               dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+               if (dev) {
+-                      ifindex = l3mdev_master_ifindex_rcu(dev);
+-                      if (ifindex) {
+-                              fl->flowi_oif = ifindex;
+-                              fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+-                              goto out;
+-                      }
++                      if (!fl->flowi_l3mdev)
++                              fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
++
++                      /* oif set to L3mdev directs lookup to its table;
++                       * reset to avoid oif match in fib_lookup
++                       */
++                      if (netif_is_l3_master(dev))
++                              fl->flowi_oif = 0;
++                      goto out;
+               }
+       }
+-      if (fl->flowi_iif) {
++      if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) {
+               dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+-              if (dev) {
+-                      ifindex = l3mdev_master_ifindex_rcu(dev);
+-                      if (ifindex) {
+-                              fl->flowi_iif = ifindex;
+-                              fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+-                      }
+-              }
++              if (dev)
++                      fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev);
+       }
+ out:
+diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
+index e13b0fb63333f..acffe0029fdd1 100755
+--- a/tools/testing/selftests/net/fcnal-test.sh
++++ b/tools/testing/selftests/net/fcnal-test.sh
+@@ -741,7 +741,7 @@ ipv4_ping_vrf()
+               log_start
+               show_hint "Fails since address on vrf device is out of device scope"
+               run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
+-              log_test_addr ${a} $? 1 "ping local, device bind"
++              log_test_addr ${a} $? 2 "ping local, device bind"
+       done
+       #
+-- 
+2.43.0
+
diff --git a/queue-5.10/remoteproc-imx_rproc-fix-ignoring-mapping-vdev-regio.patch b/queue-5.10/remoteproc-imx_rproc-fix-ignoring-mapping-vdev-regio.patch
new file mode 100644 (file)
index 0000000..049735b
--- /dev/null
@@ -0,0 +1,44 @@
+From b044e39b28e9c4e72b3e49ef6df2cbdce3a9a0a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Sep 2021 17:06:19 +0800
+Subject: remoteproc: imx_rproc: Fix ignoring mapping vdev regions
+
+From: Dong Aisheng <aisheng.dong@nxp.com>
+
+[ Upstream commit afe670e23af91d8a74a8d7049f6e0984bbf6ea11 ]
+
+vdev regions are typically named vdev0buffer, vdev0ring0, vdev0ring1 and
+etc. Change to strncmp to cover them all.
+
+Fixes: 8f2d8961640f ("remoteproc: imx_rproc: ignore mapping vdev regions")
+Reviewed-and-tested-by: Peng Fan <peng.fan@nxp.com>
+Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
+Signed-off-by: Peng Fan <peng.fan@nxp.com>
+Cc: stable <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20210910090621.3073540-5-peng.fan@oss.nxp.com
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Stable-dep-of: 2fa26ca8b786 ("remoteproc: imx_rproc: Skip over memory region when node value is NULL")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/remoteproc/imx_rproc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
+index 6e233f6289200..517d1b5733288 100644
+--- a/drivers/remoteproc/imx_rproc.c
++++ b/drivers/remoteproc/imx_rproc.c
+@@ -287,8 +287,8 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
+               struct resource res;
+               node = of_parse_phandle(np, "memory-region", a);
+-              /* Not map vdev region */
+-              if (!strcmp(node->name, "vdev"))
++              /* Not map vdevbuffer, vdevring region */
++              if (!strncmp(node->name, "vdev", strlen("vdev")))
+                       continue;
+               err = of_address_to_resource(node, 0, &res);
+               if (err) {
+-- 
+2.43.0
+
diff --git a/queue-5.10/remoteproc-imx_rproc-ignore-mapping-vdev-regions.patch b/queue-5.10/remoteproc-imx_rproc-ignore-mapping-vdev-regions.patch
new file mode 100644 (file)
index 0000000..659f003
--- /dev/null
@@ -0,0 +1,40 @@
+From 736e96345bb3c37fd6ba1c879dff082224e8ee26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 6 Mar 2021 19:24:24 +0800
+Subject: remoteproc: imx_rproc: ignore mapping vdev regions
+
+From: Peng Fan <peng.fan@nxp.com>
+
+[ Upstream commit 8f2d8961640f0346cbe892273c3260a0d30c1931 ]
+
+vdev regions are vdev0vring0, vdev0vring1, vdevbuffer and similar.
+They are handled by remoteproc common code, no need to map in imx
+rproc driver.
+
+Signed-off-by: Peng Fan <peng.fan@nxp.com>
+Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Link: https://lore.kernel.org/r/1615029865-23312-10-git-send-email-peng.fan@oss.nxp.com
+Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
+Stable-dep-of: 2fa26ca8b786 ("remoteproc: imx_rproc: Skip over memory region when node value is NULL")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/remoteproc/imx_rproc.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
+index 8957ed271d209..6e233f6289200 100644
+--- a/drivers/remoteproc/imx_rproc.c
++++ b/drivers/remoteproc/imx_rproc.c
+@@ -287,6 +287,9 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
+               struct resource res;
+               node = of_parse_phandle(np, "memory-region", a);
++              /* Not map vdev region */
++              if (!strcmp(node->name, "vdev"))
++                      continue;
+               err = of_address_to_resource(node, 0, &res);
+               if (err) {
+                       dev_err(dev, "unable to resolve memory region\n");
+-- 
+2.43.0
+
diff --git a/queue-5.10/remoteproc-imx_rproc-skip-over-memory-region-when-no.patch b/queue-5.10/remoteproc-imx_rproc-skip-over-memory-region-when-no.patch
new file mode 100644 (file)
index 0000000..7bcfb50
--- /dev/null
@@ -0,0 +1,45 @@
+From 0152ad810d1dff071647bdad9460b1b613c2951e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jun 2024 10:52:04 +0300
+Subject: remoteproc: imx_rproc: Skip over memory region when node value is
+ NULL
+
+From: Aleksandr Mishin <amishin@t-argos.ru>
+
+[ Upstream commit 2fa26ca8b786888673689ccc9da6094150939982 ]
+
+In imx_rproc_addr_init() "nph = of_count_phandle_with_args()" just counts
+number of phandles. But phandles may be empty. So of_parse_phandle() in
+the parsing loop (0 < a < nph) may return NULL which is later dereferenced.
+Adjust this issue by adding NULL-return check.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: a0ff4aa6f010 ("remoteproc: imx_rproc: add a NXP/Freescale imx_rproc driver")
+Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
+Reviewed-by: Peng Fan <peng.fan@nxp.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240606075204.12354-1-amishin@t-argos.ru
+[Fixed title to fit within the prescribed 70-75 charcters]
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/remoteproc/imx_rproc.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
+index 517d1b5733288..373fce8b91064 100644
+--- a/drivers/remoteproc/imx_rproc.c
++++ b/drivers/remoteproc/imx_rproc.c
+@@ -287,6 +287,8 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
+               struct resource res;
+               node = of_parse_phandle(np, "memory-region", a);
++              if (!node)
++                      continue;
+               /* Not map vdevbuffer, vdevring region */
+               if (!strncmp(node->name, "vdev", strlen("vdev")))
+                       continue;
+-- 
+2.43.0
+
index a025b88c97a51577d93f80fcd5d771d2295d418e..e67dc8ee69e9b2fb562e797378fd15249cbde919 100644 (file)
@@ -215,3 +215,29 @@ nvme-split-command-copy-into-a-helper.patch
 nvme-pci-add-missing-condition-check-for-existence-o.patch
 fs-don-t-allow-non-init-s_user_ns-for-filesystems-wi.patch
 powerpc-configs-update-defconfig-with-now-user-visible-config_fsl_ifc.patch
+net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch
+ipv4-fix-source-address-selection-with-route-leak.patch
+fuse-name-fs_context-consistently.patch
+fuse-verify-g-u-id-mount-options-correctly.patch
+ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch
+ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch
+ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch
+ipc-store-ipc-sysctls-in-the-ipc-namespace.patch
+ipc-check-permissions-for-checkpoint_restart-sysctls.patch
+sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch
+sysctl-allow-to-change-limits-for-posix-messages-que.patch
+sysctl-treewide-drop-unused-argument-ctl_table_root-.patch
+sysctl-always-initialize-i_uid-i_gid.patch
+ext4-factor-out-a-common-helper-to-query-extent-map.patch
+ext4-check-the-extent-status-again-before-inserting-.patch
+soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch
+drivers-soc-xilinx-check-return-status-of-get_api_ve.patch
+driver-core-cast-to-void-with-__force-for-__percpu-p.patch
+devres-fix-memory-leakage-caused-by-driver-api-devm_.patch
+genirq-allow-the-pm-device-to-originate-from-irq-dom.patch
+irqchip-imx-irqsteer-constify-irq_chip-struct.patch
+irqchip-imx-irqsteer-add-runtime-pm-support.patch
+irqchip-imx-irqsteer-handle-runtime-power-management.patch
+remoteproc-imx_rproc-ignore-mapping-vdev-regions.patch
+remoteproc-imx_rproc-fix-ignoring-mapping-vdev-regio.patch
+remoteproc-imx_rproc-skip-over-memory-region-when-no.patch
diff --git a/queue-5.10/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch b/queue-5.10/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch
new file mode 100644 (file)
index 0000000..4952265
--- /dev/null
@@ -0,0 +1,92 @@
+From 79285036ed34e612265d038c29426a15a864b967 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Aug 2021 17:03:10 +0200
+Subject: soc: xilinx: move PM_INIT_FINALIZE to zynqmp_pm_domains driver
+
+From: Michael Tretter <m.tretter@pengutronix.de>
+
+[ Upstream commit 7fd890b89dea55eb5866640eb8befad26d558161 ]
+
+PM_INIT_FINALIZE tells the PMU FW that Linux is able to handle the power
+management nodes that are provided by the PMU FW. Nodes that are not
+requested are shut down after this call.
+
+Calling PM_INIT_FINALIZE from the zynqmp_power driver is wrong. The PM
+node request mechanism is implemented in the zynqmp_pm_domains driver,
+which must also call PM_INIT_FINALIZE.
+
+Due to the behavior of the PMU FW, all devices must be powered up before
+PM_INIT_FINALIZE is called, because otherwise the devices might
+misbehave. Calling PM_INIT_FINALIZE from the sync_state device callback
+ensures that all users probed successfully before the PMU FW is allowed
+to power off unused domains.
+
+Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
+Acked-by: Michal Simek <michal.simek@xilinx.com>
+Acked-by: Rajan Vaja <rajan.vaja@xilinx.com>
+Link: https://lore.kernel.org/r/20210825150313.4033156-2-m.tretter@pengutronix.de
+Signed-off-by: Michal Simek <michal.simek@xilinx.com>
+Stable-dep-of: 9b003e14801c ("drivers: soc: xilinx: check return status of get_api_version()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/xilinx/zynqmp_pm_domains.c | 16 ++++++++++++++++
+ drivers/soc/xilinx/zynqmp_power.c      |  1 -
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/soc/xilinx/zynqmp_pm_domains.c b/drivers/soc/xilinx/zynqmp_pm_domains.c
+index 226d343f0a6a5..81e8e10f10929 100644
+--- a/drivers/soc/xilinx/zynqmp_pm_domains.c
++++ b/drivers/soc/xilinx/zynqmp_pm_domains.c
+@@ -152,11 +152,17 @@ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain)
+ static int zynqmp_gpd_attach_dev(struct generic_pm_domain *domain,
+                                struct device *dev)
+ {
++      struct device_link *link;
+       int ret;
+       struct zynqmp_pm_domain *pd;
+       pd = container_of(domain, struct zynqmp_pm_domain, gpd);
++      link = device_link_add(dev, &domain->dev, DL_FLAG_SYNC_STATE_ONLY);
++      if (!link)
++              dev_dbg(&domain->dev, "failed to create device link for %s\n",
++                      dev_name(dev));
++
+       /* If this is not the first device to attach there is nothing to do */
+       if (domain->device_count)
+               return 0;
+@@ -299,9 +305,19 @@ static int zynqmp_gpd_remove(struct platform_device *pdev)
+       return 0;
+ }
++static void zynqmp_gpd_sync_state(struct device *dev)
++{
++      int ret;
++
++      ret = zynqmp_pm_init_finalize();
++      if (ret)
++              dev_warn(dev, "failed to release power management to firmware\n");
++}
++
+ static struct platform_driver zynqmp_power_domain_driver = {
+       .driver = {
+               .name = "zynqmp_power_controller",
++              .sync_state = zynqmp_gpd_sync_state,
+       },
+       .probe = zynqmp_gpd_probe,
+       .remove = zynqmp_gpd_remove,
+diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c
+index c556623dae024..f8c301984d4f9 100644
+--- a/drivers/soc/xilinx/zynqmp_power.c
++++ b/drivers/soc/xilinx/zynqmp_power.c
+@@ -178,7 +178,6 @@ static int zynqmp_pm_probe(struct platform_device *pdev)
+       u32 pm_api_version;
+       struct mbox_client *client;
+-      zynqmp_pm_init_finalize();
+       zynqmp_pm_get_api_version(&pm_api_version);
+       /* Check PM API version number */
+-- 
+2.43.0
+
diff --git a/queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch b/queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch
new file mode 100644 (file)
index 0000000..a5744c7
--- /dev/null
@@ -0,0 +1,140 @@
+From 8443e2e5cb24ee6393549421a32d0798a334e92e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Jan 2024 15:46:41 +0000
+Subject: sysctl: allow change system v ipc sysctls inside ipc namespace
+
+From: Alexey Gladkov <legion@kernel.org>
+
+[ Upstream commit 50ec499b9a43e46200c9f7b7d723ab2e4af540b3 ]
+
+Patch series "Allow to change ipc/mq sysctls inside ipc namespace", v3.
+
+Right now ipc and mq limits count as per ipc namespace, but only real root
+can change them.  By default, the current values of these limits are such
+that it can only be reduced.  Since only root can change the values, it is
+impossible to reduce these limits in the rootless container.
+
+We can allow limit changes within ipc namespace because mq parameters are
+limited by RLIMIT_MSGQUEUE and ipc parameters are not limited to anything
+other than cgroups.
+
+This patch (of 3):
+
+Rootless containers are not allowed to modify kernel IPC parameters.
+
+All default limits are set to such high values that in fact there are no
+limits at all.  All limits are not inherited and are initialized to
+default values when a new ipc_namespace is created.
+
+For new ipc_namespace:
+
+size_t       ipc_ns.shm_ctlmax = SHMMAX; // (ULONG_MAX - (1UL << 24))
+size_t       ipc_ns.shm_ctlall = SHMALL; // (ULONG_MAX - (1UL << 24))
+int          ipc_ns.shm_ctlmni = IPCMNI; // (1 << 15)
+int          ipc_ns.shm_rmid_forced = 0;
+unsigned int ipc_ns.msg_ctlmax = MSGMAX; // 8192
+unsigned int ipc_ns.msg_ctlmni = MSGMNI; // 32000
+unsigned int ipc_ns.msg_ctlmnb = MSGMNB; // 16384
+
+The shm_tot (total amount of shared pages) has also ceased to be global,
+it is located in ipc_namespace and is not inherited from anywhere.
+
+In such conditions, it cannot be said that these limits limit anything.
+The real limiter for them is cgroups.
+
+If we allow rootless containers to change these parameters, then it can
+only be reduced.
+
+Link: https://lkml.kernel.org/r/cover.1705333426.git.legion@kernel.org
+Link: https://lkml.kernel.org/r/d2f4603305cbfed58a24755aa61d027314b73a45.1705333426.git.legion@kernel.org
+Signed-off-by: Alexey Gladkov <legion@kernel.org>
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Link: https://lkml.kernel.org/r/e2d84d3ec0172cfff759e6065da84ce0cc2736f8.1663756794.git.legion@kernel.org
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Joel Granados <joel.granados@gmail.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Luis Chamberlain <mcgrof@kernel.org>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ ipc/ipc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++--
+ 1 file changed, 35 insertions(+), 2 deletions(-)
+
+diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
+index a2b871d006da7..2864fd7fafaac 100644
+--- a/ipc/ipc_sysctl.c
++++ b/ipc/ipc_sysctl.c
+@@ -14,6 +14,7 @@
+ #include <linux/ipc_namespace.h>
+ #include <linux/msg.h>
+ #include <linux/slab.h>
++#include <linux/cred.h>
+ #include "util.h"
+ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
+@@ -198,25 +199,57 @@ static int set_is_seen(struct ctl_table_set *set)
+       return &current->nsproxy->ipc_ns->ipc_set == set;
+ }
++static void ipc_set_ownership(struct ctl_table_header *head,
++                            struct ctl_table *table,
++                            kuid_t *uid, kgid_t *gid)
++{
++      struct ipc_namespace *ns =
++              container_of(head->set, struct ipc_namespace, ipc_set);
++
++      kuid_t ns_root_uid = make_kuid(ns->user_ns, 0);
++      kgid_t ns_root_gid = make_kgid(ns->user_ns, 0);
++
++      *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID;
++      *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID;
++}
++
+ static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table)
+ {
+       int mode = table->mode;
+ #ifdef CONFIG_CHECKPOINT_RESTORE
+-      struct ipc_namespace *ns = current->nsproxy->ipc_ns;
++      struct ipc_namespace *ns =
++              container_of(head->set, struct ipc_namespace, ipc_set);
+       if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
+            (table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
+            (table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
+           checkpoint_restore_ns_capable(ns->user_ns))
+               mode = 0666;
++      else
+ #endif
+-      return mode;
++      {
++              kuid_t ns_root_uid;
++              kgid_t ns_root_gid;
++
++              ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid);
++
++              if (uid_eq(current_euid(), ns_root_uid))
++                      mode >>= 6;
++
++              else if (in_egroup_p(ns_root_gid))
++                      mode >>= 3;
++      }
++
++      mode &= 7;
++
++      return (mode << 6) | (mode << 3) | mode;
+ }
+ static struct ctl_table_root set_root = {
+       .lookup = set_lookup,
+       .permissions = ipc_permissions,
++      .set_ownership = ipc_set_ownership,
+ };
+ bool setup_ipc_sysctls(struct ipc_namespace *ns)
+-- 
+2.43.0
+
diff --git a/queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch b/queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch
new file mode 100644 (file)
index 0000000..608fe54
--- /dev/null
@@ -0,0 +1,95 @@
+From 2c050d168b37b543fedfec9d518ccbc182113e09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Jan 2024 15:46:43 +0000
+Subject: sysctl: allow to change limits for posix messages queues
+
+From: Alexey Gladkov <legion@kernel.org>
+
+[ Upstream commit f9436a5d0497f759330d07e1189565edd4456be8 ]
+
+All parameters of posix messages queues (queues_max/msg_max/msgsize_max)
+end up being limited by RLIMIT_MSGQUEUE.  The code in mqueue_get_inode is
+where that limiting happens.
+
+The RLIMIT_MSGQUEUE is bound to the user namespace and is counted
+hierarchically.
+
+We can allow root in the user namespace to modify the posix messages
+queues parameters.
+
+Link: https://lkml.kernel.org/r/6ad67f23d1459a4f4339f74aa73bac0ecf3995e1.1705333426.git.legion@kernel.org
+Signed-off-by: Alexey Gladkov <legion@kernel.org>
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Link: https://lkml.kernel.org/r/7eb21211c8622e91d226e63416b1b93c079f60ee.1663756794.git.legion@kernel.org
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Davidlohr Bueso <dave@stgolabs.net>
+Cc: Joel Granados <joel.granados@gmail.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Luis Chamberlain <mcgrof@kernel.org>
+Cc: Manfred Spraul <manfred@colorfullife.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ ipc/mq_sysctl.c | 36 ++++++++++++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
+
+diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
+index fbf6a8b93a265..ce03930aced55 100644
+--- a/ipc/mq_sysctl.c
++++ b/ipc/mq_sysctl.c
+@@ -12,6 +12,7 @@
+ #include <linux/stat.h>
+ #include <linux/capability.h>
+ #include <linux/slab.h>
++#include <linux/cred.h>
+ static int msg_max_limit_min = MIN_MSGMAX;
+ static int msg_max_limit_max = HARD_MSGMAX;
+@@ -76,8 +77,43 @@ static int set_is_seen(struct ctl_table_set *set)
+       return &current->nsproxy->ipc_ns->mq_set == set;
+ }
++static void mq_set_ownership(struct ctl_table_header *head,
++                           struct ctl_table *table,
++                           kuid_t *uid, kgid_t *gid)
++{
++      struct ipc_namespace *ns =
++              container_of(head->set, struct ipc_namespace, mq_set);
++
++      kuid_t ns_root_uid = make_kuid(ns->user_ns, 0);
++      kgid_t ns_root_gid = make_kgid(ns->user_ns, 0);
++
++      *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID;
++      *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID;
++}
++
++static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table)
++{
++      int mode = table->mode;
++      kuid_t ns_root_uid;
++      kgid_t ns_root_gid;
++
++      mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid);
++
++      if (uid_eq(current_euid(), ns_root_uid))
++              mode >>= 6;
++
++      else if (in_egroup_p(ns_root_gid))
++              mode >>= 3;
++
++      mode &= 7;
++
++      return (mode << 6) | (mode << 3) | mode;
++}
++
+ static struct ctl_table_root set_root = {
+       .lookup = set_lookup,
++      .permissions = mq_permissions,
++      .set_ownership = mq_set_ownership,
+ };
+ bool setup_mq_sysctls(struct ipc_namespace *ns)
+-- 
+2.43.0
+
diff --git a/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch b/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch
new file mode 100644 (file)
index 0000000..881e29c
--- /dev/null
@@ -0,0 +1,52 @@
+From 5d7defeab6026e3ce687533b8c44874295f0a96a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Apr 2024 23:10:34 +0200
+Subject: sysctl: always initialize i_uid/i_gid
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Weißschuh <linux@weissschuh.net>
+
+[ Upstream commit 98ca62ba9e2be5863c7d069f84f7166b45a5b2f4 ]
+
+Always initialize i_uid/i_gid inside the sysfs core so set_ownership()
+can safely skip setting them.
+
+Commit 5ec27ec735ba ("fs/proc/proc_sysctl.c: fix the default values of
+i_uid/i_gid on /proc/sys inodes.") added defaults for i_uid/i_gid when
+set_ownership() was not implemented. It also missed adjusting
+net_ctl_set_ownership() to use the same default values in case the
+computation of a better value failed.
+
+Fixes: 5ec27ec735ba ("fs/proc/proc_sysctl.c: fix the default values of i_uid/i_gid on /proc/sys inodes.")
+Cc: stable@vger.kernel.org
+Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
+Signed-off-by: Joel Granados <j.granados@samsung.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/proc_sysctl.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
+index d61777c67ada8..d97e2d399fe6d 100644
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -471,12 +471,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
+                       make_empty_dir_inode(inode);
+       }
++      inode->i_uid = GLOBAL_ROOT_UID;
++      inode->i_gid = GLOBAL_ROOT_GID;
+       if (root->set_ownership)
+               root->set_ownership(head, &inode->i_uid, &inode->i_gid);
+-      else {
+-              inode->i_uid = GLOBAL_ROOT_UID;
+-              inode->i_gid = GLOBAL_ROOT_GID;
+-      }
+       return inode;
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch b/queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch
new file mode 100644 (file)
index 0000000..746430a
--- /dev/null
@@ -0,0 +1,127 @@
+From 1b75a9102932eba02f4300ecd460eff59af6dbdd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Mar 2024 19:11:30 +0100
+Subject: sysctl: treewide: drop unused argument
+ ctl_table_root::set_ownership(table)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Weißschuh <linux@weissschuh.net>
+
+[ Upstream commit 520713a93d550406dae14d49cdb8778d70cecdfd ]
+
+Remove the 'table' argument from set_ownership as it is never used. This
+change is a step towards putting "struct ctl_table" into .rodata and
+eventually having sysctl core only use "const struct ctl_table".
+
+The patch was created with the following coccinelle script:
+
+  @@
+  identifier func, head, table, uid, gid;
+  @@
+
+  void func(
+    struct ctl_table_header *head,
+  - struct ctl_table *table,
+    kuid_t *uid, kgid_t *gid)
+  { ... }
+
+No additional occurrences of 'set_ownership' were found after doing a
+tree-wide search.
+
+Reviewed-by: Joel Granados <j.granados@samsung.com>
+Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
+Signed-off-by: Joel Granados <j.granados@samsung.com>
+Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/proc_sysctl.c  | 2 +-
+ include/linux/sysctl.h | 1 -
+ ipc/ipc_sysctl.c       | 3 +--
+ ipc/mq_sysctl.c        | 3 +--
+ net/sysctl_net.c       | 1 -
+ 5 files changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
+index aff9593feb73c..d61777c67ada8 100644
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -472,7 +472,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
+       }
+       if (root->set_ownership)
+-              root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
++              root->set_ownership(head, &inode->i_uid, &inode->i_gid);
+       else {
+               inode->i_uid = GLOBAL_ROOT_UID;
+               inode->i_gid = GLOBAL_ROOT_GID;
+diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
+index 47cf70c8eb93c..cde78b5a54295 100644
+--- a/include/linux/sysctl.h
++++ b/include/linux/sysctl.h
+@@ -173,7 +173,6 @@ struct ctl_table_root {
+       struct ctl_table_set default_set;
+       struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
+       void (*set_ownership)(struct ctl_table_header *head,
+-                            struct ctl_table *table,
+                             kuid_t *uid, kgid_t *gid);
+       int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
+ };
+diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
+index 2864fd7fafaac..c118d8293d3b6 100644
+--- a/ipc/ipc_sysctl.c
++++ b/ipc/ipc_sysctl.c
+@@ -200,7 +200,6 @@ static int set_is_seen(struct ctl_table_set *set)
+ }
+ static void ipc_set_ownership(struct ctl_table_header *head,
+-                            struct ctl_table *table,
+                             kuid_t *uid, kgid_t *gid)
+ {
+       struct ipc_namespace *ns =
+@@ -232,7 +231,7 @@ static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *tabl
+               kuid_t ns_root_uid;
+               kgid_t ns_root_gid;
+-              ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid);
++              ipc_set_ownership(head, &ns_root_uid, &ns_root_gid);
+               if (uid_eq(current_euid(), ns_root_uid))
+                       mode >>= 6;
+diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
+index ce03930aced55..c960691fc24d9 100644
+--- a/ipc/mq_sysctl.c
++++ b/ipc/mq_sysctl.c
+@@ -78,7 +78,6 @@ static int set_is_seen(struct ctl_table_set *set)
+ }
+ static void mq_set_ownership(struct ctl_table_header *head,
+-                           struct ctl_table *table,
+                            kuid_t *uid, kgid_t *gid)
+ {
+       struct ipc_namespace *ns =
+@@ -97,7 +96,7 @@ static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table
+       kuid_t ns_root_uid;
+       kgid_t ns_root_gid;
+-      mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid);
++      mq_set_ownership(head, &ns_root_uid, &ns_root_gid);
+       if (uid_eq(current_euid(), ns_root_uid))
+               mode >>= 6;
+diff --git a/net/sysctl_net.c b/net/sysctl_net.c
+index d14dab8b6774c..592f61eb1089b 100644
+--- a/net/sysctl_net.c
++++ b/net/sysctl_net.c
+@@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head,
+ }
+ static void net_ctl_set_ownership(struct ctl_table_header *head,
+-                                struct ctl_table *table,
+                                 kuid_t *uid, kgid_t *gid)
+ {
+       struct net *net = container_of(head->set, struct net, sysctls);
+-- 
+2.43.0
+