From: Sasha Levin Date: Sat, 3 Aug 2024 14:48:14 +0000 (-0400) Subject: Fixes for 5.10 X-Git-Tag: v6.1.104~30 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5ded06bb6cff17064809f029682627a5d6dac982;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/devres-fix-memory-leakage-caused-by-driver-api-devm_.patch b/queue-5.10/devres-fix-memory-leakage-caused-by-driver-api-devm_.patch new file mode 100644 index 00000000000..8d67fbe809a --- /dev/null +++ b/queue-5.10/devres-fix-memory-leakage-caused-by-driver-api-devm_.patch @@ -0,0 +1,43 @@ +From f719c758e33b8380fbbfc31ec202e5a74f80d568 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Jul 2024 22:51:51 +0800 +Subject: devres: Fix memory leakage caused by driver API devm_free_percpu() + +From: Zijun Hu + +[ Upstream commit bd50a974097bb82d52a458bd3ee39fb723129a0c ] + +It will cause memory leakage when use driver API devm_free_percpu() +to free memory allocated by devm_alloc_percpu(), fixed by using +devres_release() instead of devres_destroy() within devm_free_percpu(). + +Fixes: ff86aae3b411 ("devres: add devm_alloc_percpu()") +Cc: stable@vger.kernel.org +Signed-off-by: Zijun Hu +Link: https://lore.kernel.org/r/1719931914-19035-3-git-send-email-quic_zijuhu@quicinc.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/base/devres.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/base/devres.c b/drivers/base/devres.c +index a1508eeb8ebd1..8a74008c13c44 100644 +--- a/drivers/base/devres.c ++++ b/drivers/base/devres.c +@@ -1230,7 +1230,11 @@ EXPORT_SYMBOL_GPL(__devm_alloc_percpu); + */ + void devm_free_percpu(struct device *dev, void __percpu *pdata) + { +- WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match, ++ /* ++ * Use devres_release() to prevent memory leakage as ++ * devm_free_pages() does. ++ */ ++ WARN_ON(devres_release(dev, devm_percpu_release, devm_percpu_match, + (__force void *)pdata)); + } + EXPORT_SYMBOL_GPL(devm_free_percpu); +-- +2.43.0 + diff --git a/queue-5.10/driver-core-cast-to-void-with-__force-for-__percpu-p.patch b/queue-5.10/driver-core-cast-to-void-with-__force-for-__percpu-p.patch new file mode 100644 index 00000000000..51ffee000d2 --- /dev/null +++ b/queue-5.10/driver-core-cast-to-void-with-__force-for-__percpu-p.patch @@ -0,0 +1,39 @@ +From 1737e7bfd1ca2c6c9bbe297d01e3e24256226cf4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Apr 2021 20:10:30 +0300 +Subject: driver core: Cast to (void *) with __force for __percpu pointer + +From: Andy Shevchenko + +[ Upstream commit d7aa44f5a1f86cb40659eef06035d8d92604b9d5 ] + +Sparse is not happy: + + drivers/base/devres.c:1230:9: warning: cast removes address space '__percpu' of expression + +Use __force attribute to make it happy. + +Signed-off-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20210401171030.60527-1-andriy.shevchenko@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: bd50a974097b ("devres: Fix memory leakage caused by driver API devm_free_percpu()") +Signed-off-by: Sasha Levin +--- + drivers/base/devres.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/base/devres.c b/drivers/base/devres.c +index 452541ce0dc82..a1508eeb8ebd1 100644 +--- a/drivers/base/devres.c ++++ b/drivers/base/devres.c +@@ -1231,6 +1231,6 @@ EXPORT_SYMBOL_GPL(__devm_alloc_percpu); + void devm_free_percpu(struct device *dev, void __percpu *pdata) + { + WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match, +- (void *)pdata)); ++ (__force void *)pdata)); + } + EXPORT_SYMBOL_GPL(devm_free_percpu); +-- +2.43.0 + diff --git a/queue-5.10/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch b/queue-5.10/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch new file mode 100644 index 00000000000..7d820531fc7 --- /dev/null +++ b/queue-5.10/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch @@ -0,0 +1,53 @@ +From a9e63de4caf04291dc9359a64863b4dc1012eae1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 May 2024 04:23:45 -0700 +Subject: drivers: soc: xilinx: check return status of get_api_version() + +From: Jay Buddhabhatti + +[ Upstream commit 9b003e14801cf85a8cebeddc87bc9fc77100fdce ] + +Currently return status is not getting checked for get_api_version +and because of that for x86 arch we are getting below smatch error. + + CC drivers/soc/xilinx/zynqmp_power.o +drivers/soc/xilinx/zynqmp_power.c: In function 'zynqmp_pm_probe': +drivers/soc/xilinx/zynqmp_power.c:295:12: warning: 'pm_api_version' is +used uninitialized [-Wuninitialized] + 295 | if (pm_api_version < ZYNQMP_PM_VERSION) + | ^ + CHECK drivers/soc/xilinx/zynqmp_power.c +drivers/soc/xilinx/zynqmp_power.c:295 zynqmp_pm_probe() error: +uninitialized symbol 'pm_api_version'. + +So, check return status of pm_get_api_version and return error in case +of failure to avoid checking uninitialized pm_api_version variable. + +Fixes: b9b3a8be28b3 ("firmware: xilinx: Remove eemi ops for get_api_version") +Signed-off-by: Jay Buddhabhatti +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240515112345.24673-1-jay.buddhabhatti@amd.com +Signed-off-by: Michal Simek +Signed-off-by: Sasha Levin +--- + drivers/soc/xilinx/zynqmp_power.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c +index f8c301984d4f9..2653d29ba829b 100644 +--- a/drivers/soc/xilinx/zynqmp_power.c ++++ b/drivers/soc/xilinx/zynqmp_power.c +@@ -178,7 +178,9 @@ static int zynqmp_pm_probe(struct platform_device *pdev) + u32 pm_api_version; + struct mbox_client *client; + +- zynqmp_pm_get_api_version(&pm_api_version); ++ ret = zynqmp_pm_get_api_version(&pm_api_version); ++ if (ret) ++ return ret; + + /* Check PM API version number */ + if (pm_api_version < ZYNQMP_PM_VERSION) +-- +2.43.0 + diff --git a/queue-5.10/ext4-check-the-extent-status-again-before-inserting-.patch b/queue-5.10/ext4-check-the-extent-status-again-before-inserting-.patch new file mode 100644 index 00000000000..341ba9664fe --- /dev/null +++ b/queue-5.10/ext4-check-the-extent-status-again-before-inserting-.patch @@ -0,0 +1,100 @@ +From 4ed5c1a089d0f6408d657cf498c880172aa7d41b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 May 2024 20:39:57 +0800 +Subject: ext4: check the extent status again before inserting delalloc block + +From: Zhang Yi + +[ Upstream commit 0ea6560abb3bac1ffcfa4bf6b2c4d344fdc27b3c ] + +ext4_da_map_blocks looks up for any extent entry in the extent status +tree (w/o i_data_sem) and then the looks up for any ondisk extent +mapping (with i_data_sem in read mode). + +If it finds a hole in the extent status tree or if it couldn't find any +entry at all, it then takes the i_data_sem in write mode to add a da +entry into the extent status tree. This can actually race with page +mkwrite & fallocate path. + +Note that this is ok between +1. ext4 buffered-write path v/s ext4_page_mkwrite(), because of the + folio lock +2. ext4 buffered write path v/s ext4 fallocate because of the inode + lock. + +But this can race between ext4_page_mkwrite() & ext4 fallocate path + +ext4_page_mkwrite() ext4_fallocate() + block_page_mkwrite() + ext4_da_map_blocks() + //find hole in extent status tree + ext4_alloc_file_blocks() + ext4_map_blocks() + //allocate block and unwritten extent + ext4_insert_delayed_block() + ext4_da_reserve_space() + //reserve one more block + ext4_es_insert_delayed_block() + //drop unwritten extent and add delayed extent by mistake + +Then, the delalloc extent is wrong until writeback and the extra +reserved block can't be released any more and it triggers below warning: + + EXT4-fs (pmem2): Inode 13 (00000000bbbd4d23): i_reserved_data_blocks(1) not cleared! + +Fix the problem by looking up extent status tree again while the +i_data_sem is held in write mode. If it still can't find any entry, then +we insert a new da entry into the extent status tree. + +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://patch.msgid.link/20240517124005.347221-3-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index a252c84edac8c..6e9323a56d289 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1760,6 +1760,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + if (ext4_es_is_hole(&es)) + goto add_delayed; + ++found: + /* + * Delayed extent could be allocated by fallocate. + * So we need to check it. +@@ -1804,6 +1805,26 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + + add_delayed: + down_write(&EXT4_I(inode)->i_data_sem); ++ /* ++ * Page fault path (ext4_page_mkwrite does not take i_rwsem) ++ * and fallocate path (no folio lock) can race. Make sure we ++ * lookup the extent status tree here again while i_data_sem ++ * is held in write mode, before inserting a new da entry in ++ * the extent status tree. ++ */ ++ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { ++ if (!ext4_es_is_hole(&es)) { ++ up_write(&EXT4_I(inode)->i_data_sem); ++ goto found; ++ } ++ } else if (!ext4_has_inline_data(inode)) { ++ retval = ext4_map_query_blocks(NULL, inode, map); ++ if (retval) { ++ up_write(&EXT4_I(inode)->i_data_sem); ++ return retval; ++ } ++ } ++ + retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); + if (retval) +-- +2.43.0 + diff --git a/queue-5.10/ext4-factor-out-a-common-helper-to-query-extent-map.patch b/queue-5.10/ext4-factor-out-a-common-helper-to-query-extent-map.patch new file mode 100644 index 00000000000..0a41c9fe028 --- /dev/null +++ b/queue-5.10/ext4-factor-out-a-common-helper-to-query-extent-map.patch @@ -0,0 +1,104 @@ +From 9c8b75fd11eacab9a71251b14b7353be405fe4be Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 May 2024 20:39:56 +0800 +Subject: ext4: factor out a common helper to query extent map + +From: Zhang Yi + +[ Upstream commit 8e4e5cdf2fdeb99445a468b6b6436ad79b9ecb30 ] + +Factor out a new common helper ext4_map_query_blocks() from the +ext4_da_map_blocks(), it query and return the extent map status on the +inode's extent path, no logic changes. + +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Reviewed-by: Ritesh Harjani (IBM) +Link: https://patch.msgid.link/20240517124005.347221-2-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 57 +++++++++++++++++++++++++++---------------------- + 1 file changed, 32 insertions(+), 25 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 8b48ed351c4b9..a252c84edac8c 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -484,6 +484,35 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, + } + #endif /* ES_AGGRESSIVE_TEST */ + ++static int ext4_map_query_blocks(handle_t *handle, struct inode *inode, ++ struct ext4_map_blocks *map) ++{ ++ unsigned int status; ++ int retval; ++ ++ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ++ retval = ext4_ext_map_blocks(handle, inode, map, 0); ++ else ++ retval = ext4_ind_map_blocks(handle, inode, map, 0); ++ ++ if (retval <= 0) ++ return retval; ++ ++ if (unlikely(retval != map->m_len)) { ++ ext4_warning(inode->i_sb, ++ "ES len assertion failed for inode " ++ "%lu: retval %d != map->m_len %d", ++ inode->i_ino, retval, map->m_len); ++ WARN_ON(1); ++ } ++ ++ status = map->m_flags & EXT4_MAP_UNWRITTEN ? ++ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ++ map->m_pblk, status); ++ return retval; ++} ++ + /* + * The ext4_map_blocks() function tries to look up the requested blocks, + * and returns if the blocks are already mapped. +@@ -1767,33 +1796,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_has_inline_data(inode)) + retval = 0; +- else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) +- retval = ext4_ext_map_blocks(NULL, inode, map, 0); + else +- retval = ext4_ind_map_blocks(NULL, inode, map, 0); +- if (retval < 0) { +- up_read(&EXT4_I(inode)->i_data_sem); +- return retval; +- } +- if (retval > 0) { +- unsigned int status; +- +- if (unlikely(retval != map->m_len)) { +- ext4_warning(inode->i_sb, +- "ES len assertion failed for inode " +- "%lu: retval %d != map->m_len %d", +- inode->i_ino, retval, map->m_len); +- WARN_ON(1); +- } +- +- status = map->m_flags & EXT4_MAP_UNWRITTEN ? +- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; +- ext4_es_insert_extent(inode, map->m_lblk, map->m_len, +- map->m_pblk, status); +- up_read(&EXT4_I(inode)->i_data_sem); +- return retval; +- } ++ retval = ext4_map_query_blocks(NULL, inode, map); + up_read(&EXT4_I(inode)->i_data_sem); ++ if (retval) ++ return retval; + + add_delayed: + down_write(&EXT4_I(inode)->i_data_sem); +-- +2.43.0 + diff --git a/queue-5.10/fuse-name-fs_context-consistently.patch b/queue-5.10/fuse-name-fs_context-consistently.patch new file mode 100644 index 00000000000..069bd55bf93 --- /dev/null +++ b/queue-5.10/fuse-name-fs_context-consistently.patch @@ -0,0 +1,280 @@ +From 7256357bb327af4770cfc2189f8d0312378fe513 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Aug 2021 13:22:58 +0200 +Subject: fuse: name fs_context consistently + +From: Miklos Szeredi + +[ Upstream commit 84c215075b5723ab946708a6c74c26bd3c51114c ] + +Naming convention under fs/fuse/: + + struct fuse_conn *fc; + struct fs_context *fsc; + +Signed-off-by: Miklos Szeredi +Stable-dep-of: 525bd65aa759 ("fuse: verify {g,u}id mount options correctly") +Signed-off-by: Sasha Levin +--- + fs/fuse/control.c | 10 ++++---- + fs/fuse/inode.c | 60 ++++++++++++++++++++++----------------------- + fs/fuse/virtio_fs.c | 12 ++++----- + 3 files changed, 41 insertions(+), 41 deletions(-) + +diff --git a/fs/fuse/control.c b/fs/fuse/control.c +index 24b4d9db231db..79f01d09c78cb 100644 +--- a/fs/fuse/control.c ++++ b/fs/fuse/control.c +@@ -328,7 +328,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) + drop_nlink(d_inode(fuse_control_sb->s_root)); + } + +-static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx) ++static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc) + { + static const struct tree_descr empty_descr = {""}; + struct fuse_conn *fc; +@@ -354,18 +354,18 @@ static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx) + return 0; + } + +-static int fuse_ctl_get_tree(struct fs_context *fc) ++static int fuse_ctl_get_tree(struct fs_context *fsc) + { +- return get_tree_single(fc, fuse_ctl_fill_super); ++ return get_tree_single(fsc, fuse_ctl_fill_super); + } + + static const struct fs_context_operations fuse_ctl_context_ops = { + .get_tree = fuse_ctl_get_tree, + }; + +-static int fuse_ctl_init_fs_context(struct fs_context *fc) ++static int fuse_ctl_init_fs_context(struct fs_context *fsc) + { +- fc->ops = &fuse_ctl_context_ops; ++ fsc->ops = &fuse_ctl_context_ops; + return 0; + } + +diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c +index 4a7ebccd359ee..5f9b2dc59135b 100644 +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -141,12 +141,12 @@ static void fuse_evict_inode(struct inode *inode) + } + } + +-static int fuse_reconfigure(struct fs_context *fc) ++static int fuse_reconfigure(struct fs_context *fsc) + { +- struct super_block *sb = fc->root->d_sb; ++ struct super_block *sb = fsc->root->d_sb; + + sync_filesystem(sb); +- if (fc->sb_flags & SB_MANDLOCK) ++ if (fsc->sb_flags & SB_MANDLOCK) + return -EINVAL; + + return 0; +@@ -535,38 +535,38 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { + {} + }; + +-static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) ++static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) + { + struct fs_parse_result result; +- struct fuse_fs_context *ctx = fc->fs_private; ++ struct fuse_fs_context *ctx = fsc->fs_private; + int opt; + +- if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { ++ if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + /* + * Ignore options coming from mount(MS_REMOUNT) for backward + * compatibility. + */ +- if (fc->oldapi) ++ if (fsc->oldapi) + return 0; + +- return invalfc(fc, "No changes allowed in reconfigure"); ++ return invalfc(fsc, "No changes allowed in reconfigure"); + } + +- opt = fs_parse(fc, fuse_fs_parameters, param, &result); ++ opt = fs_parse(fsc, fuse_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case OPT_SOURCE: +- if (fc->source) +- return invalfc(fc, "Multiple sources specified"); +- fc->source = param->string; ++ if (fsc->source) ++ return invalfc(fsc, "Multiple sources specified"); ++ fsc->source = param->string; + param->string = NULL; + break; + + case OPT_SUBTYPE: + if (ctx->subtype) +- return invalfc(fc, "Multiple subtypes specified"); ++ return invalfc(fsc, "Multiple subtypes specified"); + ctx->subtype = param->string; + param->string = NULL; + return 0; +@@ -578,22 +578,22 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) + + case OPT_ROOTMODE: + if (!fuse_valid_type(result.uint_32)) +- return invalfc(fc, "Invalid rootmode"); ++ return invalfc(fsc, "Invalid rootmode"); + ctx->rootmode = result.uint_32; + ctx->rootmode_present = true; + break; + + case OPT_USER_ID: +- ctx->user_id = make_kuid(fc->user_ns, result.uint_32); ++ ctx->user_id = make_kuid(fsc->user_ns, result.uint_32); + if (!uid_valid(ctx->user_id)) +- return invalfc(fc, "Invalid user_id"); ++ return invalfc(fsc, "Invalid user_id"); + ctx->user_id_present = true; + break; + + case OPT_GROUP_ID: +- ctx->group_id = make_kgid(fc->user_ns, result.uint_32); ++ ctx->group_id = make_kgid(fsc->user_ns, result.uint_32); + if (!gid_valid(ctx->group_id)) +- return invalfc(fc, "Invalid group_id"); ++ return invalfc(fsc, "Invalid group_id"); + ctx->group_id_present = true; + break; + +@@ -611,7 +611,7 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) + + case OPT_BLKSIZE: + if (!ctx->is_bdev) +- return invalfc(fc, "blksize only supported for fuseblk"); ++ return invalfc(fsc, "blksize only supported for fuseblk"); + ctx->blksize = result.uint_32; + break; + +@@ -622,9 +622,9 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) + return 0; + } + +-static void fuse_free_fc(struct fs_context *fc) ++static void fuse_free_fsc(struct fs_context *fsc) + { +- struct fuse_fs_context *ctx = fc->fs_private; ++ struct fuse_fs_context *ctx = fsc->fs_private; + + if (ctx) { + kfree(ctx->subtype); +@@ -1486,9 +1486,9 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) + return err; + } + +-static int fuse_get_tree(struct fs_context *fc) ++static int fuse_get_tree(struct fs_context *fsc) + { +- struct fuse_fs_context *ctx = fc->fs_private; ++ struct fuse_fs_context *ctx = fsc->fs_private; + + if (!ctx->fd_present || !ctx->rootmode_present || + !ctx->user_id_present || !ctx->group_id_present) +@@ -1496,14 +1496,14 @@ static int fuse_get_tree(struct fs_context *fc) + + #ifdef CONFIG_BLOCK + if (ctx->is_bdev) +- return get_tree_bdev(fc, fuse_fill_super); ++ return get_tree_bdev(fsc, fuse_fill_super); + #endif + +- return get_tree_nodev(fc, fuse_fill_super); ++ return get_tree_nodev(fsc, fuse_fill_super); + } + + static const struct fs_context_operations fuse_context_ops = { +- .free = fuse_free_fc, ++ .free = fuse_free_fsc, + .parse_param = fuse_parse_param, + .reconfigure = fuse_reconfigure, + .get_tree = fuse_get_tree, +@@ -1512,7 +1512,7 @@ static const struct fs_context_operations fuse_context_ops = { + /* + * Set up the filesystem mount context. + */ +-static int fuse_init_fs_context(struct fs_context *fc) ++static int fuse_init_fs_context(struct fs_context *fsc) + { + struct fuse_fs_context *ctx; + +@@ -1525,14 +1525,14 @@ static int fuse_init_fs_context(struct fs_context *fc) + ctx->legacy_opts_show = true; + + #ifdef CONFIG_BLOCK +- if (fc->fs_type == &fuseblk_fs_type) { ++ if (fsc->fs_type == &fuseblk_fs_type) { + ctx->is_bdev = true; + ctx->destroy = true; + } + #endif + +- fc->fs_private = ctx; +- fc->ops = &fuse_context_ops; ++ fsc->fs_private = ctx; ++ fsc->ops = &fuse_context_ops; + return 0; + } + +diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c +index faadc80485e7f..7d4655022afc6 100644 +--- a/fs/fuse/virtio_fs.c ++++ b/fs/fuse/virtio_fs.c +@@ -97,14 +97,14 @@ static const struct fs_parameter_spec virtio_fs_parameters[] = { + {} + }; + +-static int virtio_fs_parse_param(struct fs_context *fc, ++static int virtio_fs_parse_param(struct fs_context *fsc, + struct fs_parameter *param) + { + struct fs_parse_result result; +- struct fuse_fs_context *ctx = fc->fs_private; ++ struct fuse_fs_context *ctx = fsc->fs_private; + int opt; + +- opt = fs_parse(fc, virtio_fs_parameters, param, &result); ++ opt = fs_parse(fsc, virtio_fs_parameters, param, &result); + if (opt < 0) + return opt; + +@@ -119,9 +119,9 @@ static int virtio_fs_parse_param(struct fs_context *fc, + return 0; + } + +-static void virtio_fs_free_fc(struct fs_context *fc) ++static void virtio_fs_free_fsc(struct fs_context *fsc) + { +- struct fuse_fs_context *ctx = fc->fs_private; ++ struct fuse_fs_context *ctx = fsc->fs_private; + + kfree(ctx); + } +@@ -1500,7 +1500,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc) + } + + static const struct fs_context_operations virtio_fs_context_ops = { +- .free = virtio_fs_free_fc, ++ .free = virtio_fs_free_fsc, + .parse_param = virtio_fs_parse_param, + .get_tree = virtio_fs_get_tree, + }; +-- +2.43.0 + diff --git a/queue-5.10/fuse-verify-g-u-id-mount-options-correctly.patch b/queue-5.10/fuse-verify-g-u-id-mount-options-correctly.patch new file mode 100644 index 00000000000..359580aa432 --- /dev/null +++ b/queue-5.10/fuse-verify-g-u-id-mount-options-correctly.patch @@ -0,0 +1,86 @@ +From 491c7ff888ab75bfbadad5ca1b9b3f764781fdc3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Jul 2024 17:22:41 -0500 +Subject: fuse: verify {g,u}id mount options correctly + +From: Eric Sandeen + +[ Upstream commit 525bd65aa759ec320af1dc06e114ed69733e9e23 ] + +As was done in +0200679fc795 ("tmpfs: verify {g,u}id mount options correctly") +we need to validate that the requested uid and/or gid is representable in +the filesystem's idmapping. + +Cribbing from the above commit log, + +The contract for {g,u}id mount options and {g,u}id values in general set +from userspace has always been that they are translated according to the +caller's idmapping. In so far, fuse has been doing the correct thing. +But since fuse is mountable in unprivileged contexts it is also +necessary to verify that the resulting {k,g}uid is representable in the +namespace of the superblock. + +Fixes: c30da2e981a7 ("fuse: convert to use the new mount API") +Cc: stable@vger.kernel.org # 5.4+ +Signed-off-by: Eric Sandeen +Link: https://lore.kernel.org/r/8f07d45d-c806-484d-a2e3-7a2199df1cd2@redhat.com +Reviewed-by: Christian Brauner +Reviewed-by: Josef Bacik +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/fuse/inode.c | 24 ++++++++++++++++++++---- + 1 file changed, 20 insertions(+), 4 deletions(-) + +diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c +index 5f9b2dc59135b..a5d1eb0bc5214 100644 +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -540,6 +540,8 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) + struct fs_parse_result result; + struct fuse_fs_context *ctx = fsc->fs_private; + int opt; ++ kuid_t kuid; ++ kgid_t kgid; + + if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + /* +@@ -584,16 +586,30 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) + break; + + case OPT_USER_ID: +- ctx->user_id = make_kuid(fsc->user_ns, result.uint_32); +- if (!uid_valid(ctx->user_id)) ++ kuid = make_kuid(fsc->user_ns, result.uint_32); ++ if (!uid_valid(kuid)) + return invalfc(fsc, "Invalid user_id"); ++ /* ++ * The requested uid must be representable in the ++ * filesystem's idmapping. ++ */ ++ if (!kuid_has_mapping(fsc->user_ns, kuid)) ++ return invalfc(fsc, "Invalid user_id"); ++ ctx->user_id = kuid; + ctx->user_id_present = true; + break; + + case OPT_GROUP_ID: +- ctx->group_id = make_kgid(fsc->user_ns, result.uint_32); +- if (!gid_valid(ctx->group_id)) ++ kgid = make_kgid(fsc->user_ns, result.uint_32);; ++ if (!gid_valid(kgid)) ++ return invalfc(fsc, "Invalid group_id"); ++ /* ++ * The requested gid must be representable in the ++ * filesystem's idmapping. ++ */ ++ if (!kgid_has_mapping(fsc->user_ns, kgid)) + return invalfc(fsc, "Invalid group_id"); ++ ctx->group_id = kgid; + ctx->group_id_present = true; + break; + +-- +2.43.0 + diff --git a/queue-5.10/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch b/queue-5.10/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch new file mode 100644 index 00000000000..a857fe3d342 --- /dev/null +++ b/queue-5.10/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch @@ -0,0 +1,122 @@ +From 6082826257da6d7c5217f69a4c34521feed6379a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Feb 2022 12:02:59 +0000 +Subject: genirq: Allow the PM device to originate from irq domain + +From: Marc Zyngier + +[ Upstream commit 1f8863bfb5ca500ea1c7669b16b1931ba27fce20 ] + +As a preparation to moving the reference to the device used for +runtime power management, add a new 'dev' field to the irqdomain +structure for that exact purpose. + +The irq_chip_pm_{get,put}() helpers are made aware of the dual +location via a new private helper. + +No functional change intended. + +Signed-off-by: Marc Zyngier +Reviewed-by: Geert Uytterhoeven +Tested-by: Geert Uytterhoeven +Tested-by: Tony Lindgren +Acked-by: Bartosz Golaszewski +Link: https://lore.kernel.org/r/20220201120310.878267-2-maz@kernel.org +Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly") +Signed-off-by: Sasha Levin +--- + include/linux/irqdomain.h | 10 ++++++++++ + kernel/irq/chip.c | 23 ++++++++++++++++++----- + 2 files changed, 28 insertions(+), 5 deletions(-) + +diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h +index 9b9743f7538c4..60f53eadfa422 100644 +--- a/include/linux/irqdomain.h ++++ b/include/linux/irqdomain.h +@@ -149,6 +149,8 @@ struct irq_domain_chip_generic; + * @gc: Pointer to a list of generic chips. There is a helper function for + * setting up one or more generic chips for interrupt controllers + * drivers using the generic chip library which uses this pointer. ++ * @dev: Pointer to a device that the domain represent, and that will be ++ * used for power management purposes. + * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * @debugfs_file: dentry for the domain debugfs file + * +@@ -171,6 +173,7 @@ struct irq_domain { + struct fwnode_handle *fwnode; + enum irq_domain_bus_token bus_token; + struct irq_domain_chip_generic *gc; ++ struct device *dev; + #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + struct irq_domain *parent; + #endif +@@ -227,6 +230,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) + return to_of_node(d->fwnode); + } + ++static inline void irq_domain_set_pm_device(struct irq_domain *d, ++ struct device *dev) ++{ ++ if (d) ++ d->dev = dev; ++} ++ + #ifdef CONFIG_IRQ_DOMAIN + struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, + const char *name, phys_addr_t *pa); +diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c +index e7d284261d450..b8aa9e22105f9 100644 +--- a/kernel/irq/chip.c ++++ b/kernel/irq/chip.c +@@ -1586,6 +1586,17 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + return 0; + } + ++static struct device *irq_get_parent_device(struct irq_data *data) ++{ ++ if (data->chip->parent_device) ++ return data->chip->parent_device; ++ ++ if (data->domain) ++ return data->domain->dev; ++ ++ return NULL; ++} ++ + /** + * irq_chip_pm_get - Enable power for an IRQ chip + * @data: Pointer to interrupt specific data +@@ -1595,12 +1606,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + */ + int irq_chip_pm_get(struct irq_data *data) + { ++ struct device *dev = irq_get_parent_device(data); + int retval; + +- if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) { +- retval = pm_runtime_get_sync(data->chip->parent_device); ++ if (IS_ENABLED(CONFIG_PM) && dev) { ++ retval = pm_runtime_get_sync(dev); + if (retval < 0) { +- pm_runtime_put_noidle(data->chip->parent_device); ++ pm_runtime_put_noidle(dev); + return retval; + } + } +@@ -1618,10 +1630,11 @@ int irq_chip_pm_get(struct irq_data *data) + */ + int irq_chip_pm_put(struct irq_data *data) + { ++ struct device *dev = irq_get_parent_device(data); + int retval = 0; + +- if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) +- retval = pm_runtime_put(data->chip->parent_device); ++ if (IS_ENABLED(CONFIG_PM) && dev) ++ retval = pm_runtime_put(dev); + + return (retval < 0) ? retval : 0; + } +-- +2.43.0 + diff --git a/queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch b/queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch new file mode 100644 index 00000000000..f5755045956 --- /dev/null +++ b/queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch @@ -0,0 +1,110 @@ +From 2b1bb86dc48a14692989153e5b91def24b213416 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Nov 2021 18:35:59 -0800 +Subject: ipc: check checkpoint_restore_ns_capable() to modify C/R proc files + +From: Michal Clapinski + +[ Upstream commit 5563cabdde7ee53c34ec7e5e0283bfcc9a1bc893 ] + +This commit removes the requirement to be root to modify sem_next_id, +msg_next_id and shm_next_id and checks checkpoint_restore_ns_capable +instead. + +Since those files are specific to the IPC namespace, there is no reason +they should require root privileges. This is similar to ns_last_pid, +which also only checks checkpoint_restore_ns_capable. + +[akpm@linux-foundation.org: ipc/ipc_sysctl.c needs capability.h for checkpoint_restore_ns_capable()] + +Link: https://lkml.kernel.org/r/20210916163717.3179496-1-mclapinski@google.com +Signed-off-by: Michal Clapinski +Reviewed-by: Davidlohr Bueso +Reviewed-by: Manfred Spraul +Cc: "Eric W. Biederman" +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/ipc_sysctl.c | 29 +++++++++++++++++++++++------ + 1 file changed, 23 insertions(+), 6 deletions(-) + +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index 3f312bf2b1163..345e4d673e61e 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + #include + #include "util.h" +@@ -104,6 +105,19 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + return ret; + } + ++#ifdef CONFIG_CHECKPOINT_RESTORE ++static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, ++ int write, void *buffer, size_t *lenp, loff_t *ppos) ++{ ++ struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; ++ ++ if (write && !checkpoint_restore_ns_capable(user_ns)) ++ return -EPERM; ++ ++ return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); ++} ++#endif ++ + #else + #define proc_ipc_doulongvec_minmax NULL + #define proc_ipc_dointvec NULL +@@ -111,6 +125,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + #define proc_ipc_dointvec_minmax_orphans NULL + #define proc_ipc_auto_msgmni NULL + #define proc_ipc_sem_dointvec NULL ++#ifdef CONFIG_CHECKPOINT_RESTORE ++#define proc_ipc_dointvec_minmax_checkpoint_restore NULL ++#endif /* CONFIG_CHECKPOINT_RESTORE */ + #endif + + int ipc_mni = IPCMNI; +@@ -198,8 +215,8 @@ static struct ctl_table ipc_kern_table[] = { + .procname = "sem_next_id", + .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), +- .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .mode = 0666, ++ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +@@ -207,8 +224,8 @@ static struct ctl_table ipc_kern_table[] = { + .procname = "msg_next_id", + .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), +- .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .mode = 0666, ++ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +@@ -216,8 +233,8 @@ static struct ctl_table ipc_kern_table[] = { + .procname = "shm_next_id", + .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), +- .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .mode = 0666, ++ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +-- +2.43.0 + diff --git a/queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch b/queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch new file mode 100644 index 00000000000..7828ef2ab1d --- /dev/null +++ b/queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch @@ -0,0 +1,137 @@ +From 58463ddf843a769113f5f44de099157c98150f50 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 May 2022 15:39:56 +0200 +Subject: ipc: Check permissions for checkpoint_restart sysctls at open time + +From: Alexey Gladkov + +[ Upstream commit 0889f44e281034e180daa6daf3e2d57c012452d4 ] + +As Eric Biederman pointed out, it is possible not to use a custom +proc_handler and check permissions for every write, but to use a +.permission handler. That will allow the checkpoint_restart sysctls to +perform all of their permission checks at open time, and not need any +other special code. + +Link: https://lore.kernel.org/lkml/87czib9g38.fsf@email.froward.int.ebiederm.org/ +Fixes: 1f5c135ee509 ("ipc: Store ipc sysctls in the ipc namespace") +Signed-off-by: Eric W. Biederman +Signed-off-by: Alexey Gladkov +Link: https://lkml.kernel.org/r/65fa8459803830608da4610a39f33c76aa933eb9.1651584847.git.legion@kernel.org +Signed-off-by: Eric W. Biederman +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/ipc_sysctl.c | 57 ++++++++++++++++++++++++------------------------ + 1 file changed, 29 insertions(+), 28 deletions(-) + +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index 15210ac47e9e1..a2b871d006da7 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -78,25 +78,6 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + return ret; + } + +-#ifdef CONFIG_CHECKPOINT_RESTORE +-static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, +- int write, void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ipc_namespace *ns = table->extra1; +- struct ctl_table ipc_table; +- +- if (write && !checkpoint_restore_ns_capable(ns->user_ns)) +- return -EPERM; +- +- memcpy(&ipc_table, table, sizeof(ipc_table)); +- +- ipc_table.extra1 = SYSCTL_ZERO; +- ipc_table.extra2 = SYSCTL_INT_MAX; +- +- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +-} +-#endif +- + int ipc_mni = IPCMNI; + int ipc_mni_shift = IPCMNI_SHIFT; + int ipc_min_cycle = RADIX_TREE_MAP_SIZE; +@@ -180,22 +161,28 @@ static struct ctl_table ipc_sysctls[] = { + .procname = "sem_next_id", + .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), +- .mode = 0666, +- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, ++ .mode = 0444, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "msg_next_id", + .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), +- .mode = 0666, +- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, ++ .mode = 0444, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "shm_next_id", + .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), +- .mode = 0666, +- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, ++ .mode = 0444, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_INT_MAX, + }, + #endif + {} +@@ -211,8 +198,25 @@ static int set_is_seen(struct ctl_table_set *set) + return ¤t->nsproxy->ipc_ns->ipc_set == set; + } + ++static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) ++{ ++ int mode = table->mode; ++ ++#ifdef CONFIG_CHECKPOINT_RESTORE ++ struct ipc_namespace *ns = current->nsproxy->ipc_ns; ++ ++ if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || ++ (table->data == &ns->ids[IPC_MSG_IDS].next_id) || ++ (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && ++ checkpoint_restore_ns_capable(ns->user_ns)) ++ mode = 0666; ++#endif ++ return mode; ++} ++ + static struct ctl_table_root set_root = { + .lookup = set_lookup, ++ .permissions = ipc_permissions, + }; + + bool setup_ipc_sysctls(struct ipc_namespace *ns) +@@ -254,15 +258,12 @@ bool setup_ipc_sysctls(struct ipc_namespace *ns) + #ifdef CONFIG_CHECKPOINT_RESTORE + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; +- tbl[i].extra1 = ns; + + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; +- tbl[i].extra1 = ns; + + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; +- tbl[i].extra1 = ns; + #endif + } else { + tbl[i].data = NULL; +-- +2.43.0 + diff --git a/queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch b/queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch new file mode 100644 index 00000000000..e00eca47c44 --- /dev/null +++ b/queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch @@ -0,0 +1,69 @@ +From dc71da9fbefdbd0a29e74eda21a5d3a87dbd1729 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Nov 2021 18:36:02 -0800 +Subject: ipc/ipc_sysctl.c: remove fallback for !CONFIG_PROC_SYSCTL + +From: Manfred Spraul + +[ Upstream commit 0e9beb8a96f21a6df1579cb3a679e150e3269d80 ] + +Compilation of ipc/ipc_sysctl.c is controlled by +obj-$(CONFIG_SYSVIPC_SYSCTL) +[see ipc/Makefile] + +And CONFIG_SYSVIPC_SYSCTL depends on SYSCTL +[see init/Kconfig] + +An SYSCTL is selected by PROC_SYSCTL. +[see fs/proc/Kconfig] + +Thus: #ifndef CONFIG_PROC_SYSCTL in ipc/ipc_sysctl.c is impossible, the +fallback can be removed. + +Link: https://lkml.kernel.org/r/20210918145337.3369-1-manfred@colorfullife.com +Signed-off-by: Manfred Spraul +Reviewed-by: "Eric W. Biederman" +Acked-by: Davidlohr Bueso +Cc: Manfred Spraul +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/ipc_sysctl.c | 13 ------------- + 1 file changed, 13 deletions(-) + +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index 345e4d673e61e..f101c171753f6 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -23,7 +23,6 @@ static void *get_ipc(struct ctl_table *table) + return which; + } + +-#ifdef CONFIG_PROC_SYSCTL + static int proc_ipc_dointvec(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { +@@ -118,18 +117,6 @@ static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, + } + #endif + +-#else +-#define proc_ipc_doulongvec_minmax NULL +-#define proc_ipc_dointvec NULL +-#define proc_ipc_dointvec_minmax NULL +-#define proc_ipc_dointvec_minmax_orphans NULL +-#define proc_ipc_auto_msgmni NULL +-#define proc_ipc_sem_dointvec NULL +-#ifdef CONFIG_CHECKPOINT_RESTORE +-#define proc_ipc_dointvec_minmax_checkpoint_restore NULL +-#endif /* CONFIG_CHECKPOINT_RESTORE */ +-#endif +- + int ipc_mni = IPCMNI; + int ipc_mni_shift = IPCMNI_SHIFT; + int ipc_min_cycle = RADIX_TREE_MAP_SIZE; +-- +2.43.0 + diff --git a/queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch b/queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch new file mode 100644 index 00000000000..a273b9510ab --- /dev/null +++ b/queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch @@ -0,0 +1,406 @@ +From d7b4862364b01676155853a01f0065e8caa36e1a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Feb 2022 19:18:15 +0100 +Subject: ipc: Store ipc sysctls in the ipc namespace + +From: Alexey Gladkov + +[ Upstream commit 1f5c135ee509e89e0cc274333a65f73c62cb16e5 ] + +The ipc sysctls are not available for modification inside the user +namespace. Following the mqueue sysctls, we changed the implementation +to be more userns friendly. + +So far, the changes do not provide additional access to files. This +will be done in a future patch. + +Signed-off-by: Alexey Gladkov +Link: https://lkml.kernel.org/r/be6f9d014276f4dddd0c3aa05a86052856c1c555.1644862280.git.legion@kernel.org +Signed-off-by: Eric W. Biederman +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + include/linux/ipc_namespace.h | 21 ++++ + ipc/ipc_sysctl.c | 189 ++++++++++++++++++++++------------ + ipc/namespace.c | 4 + + 3 files changed, 147 insertions(+), 67 deletions(-) + +diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h +index 60cd84c1ba146..efcfa7f1d4551 100644 +--- a/include/linux/ipc_namespace.h ++++ b/include/linux/ipc_namespace.h +@@ -68,6 +68,9 @@ struct ipc_namespace { + struct ctl_table_set mq_set; + struct ctl_table_header *mq_sysctls; + ++ struct ctl_table_set ipc_set; ++ struct ctl_table_header *ipc_sysctls; ++ + /* user_ns which owns the ipc ns */ + struct user_namespace *user_ns; + struct ucounts *ucounts; +@@ -189,4 +192,22 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns) + } + + #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ ++ ++#ifdef CONFIG_SYSVIPC_SYSCTL ++ ++bool setup_ipc_sysctls(struct ipc_namespace *ns); ++void retire_ipc_sysctls(struct ipc_namespace *ns); ++ ++#else /* CONFIG_SYSVIPC_SYSCTL */ ++ ++static inline void retire_ipc_sysctls(struct ipc_namespace *ns) ++{ ++} ++ ++static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) ++{ ++ return true; ++} ++ ++#endif /* CONFIG_SYSVIPC_SYSCTL */ + #endif +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index f101c171753f6..15210ac47e9e1 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -13,43 +13,22 @@ + #include + #include + #include ++#include + #include "util.h" + +-static void *get_ipc(struct ctl_table *table) +-{ +- char *which = table->data; +- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; +- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; +- return which; +-} +- +-static int proc_ipc_dointvec(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ctl_table ipc_table; +- +- memcpy(&ipc_table, table, sizeof(ipc_table)); +- ipc_table.data = get_ipc(table); +- +- return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); +-} +- +-static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, ++static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { ++ struct ipc_namespace *ns = table->extra1; + struct ctl_table ipc_table; ++ int err; + + memcpy(&ipc_table, table, sizeof(ipc_table)); +- ipc_table.data = get_ipc(table); + +- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +-} ++ ipc_table.extra1 = SYSCTL_ZERO; ++ ipc_table.extra2 = SYSCTL_ONE; + +-static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ipc_namespace *ns = current->nsproxy->ipc_ns; +- int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); ++ err = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); + + if (err < 0) + return err; +@@ -58,17 +37,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, + return err; + } + +-static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ctl_table ipc_table; +- memcpy(&ipc_table, table, sizeof(ipc_table)); +- ipc_table.data = get_ipc(table); +- +- return proc_doulongvec_minmax(&ipc_table, write, buffer, +- lenp, ppos); +-} +- + static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { +@@ -87,11 +55,17 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, + static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { ++ struct ipc_namespace *ns = table->extra1; ++ struct ctl_table ipc_table; + int ret, semmni; +- struct ipc_namespace *ns = current->nsproxy->ipc_ns; ++ ++ memcpy(&ipc_table, table, sizeof(ipc_table)); ++ ++ ipc_table.extra1 = NULL; ++ ipc_table.extra2 = NULL; + + semmni = ns->sem_ctls[3]; +- ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos); ++ ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (!ret) + ret = sem_check_semmni(current->nsproxy->ipc_ns); +@@ -108,12 +82,18 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, + int write, void *buffer, size_t *lenp, loff_t *ppos) + { +- struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; ++ struct ipc_namespace *ns = table->extra1; ++ struct ctl_table ipc_table; + +- if (write && !checkpoint_restore_ns_capable(user_ns)) ++ if (write && !checkpoint_restore_ns_capable(ns->user_ns)) + return -EPERM; + +- return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); ++ memcpy(&ipc_table, table, sizeof(ipc_table)); ++ ++ ipc_table.extra1 = SYSCTL_ZERO; ++ ipc_table.extra2 = SYSCTL_INT_MAX; ++ ++ return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); + } + #endif + +@@ -121,27 +101,27 @@ int ipc_mni = IPCMNI; + int ipc_mni_shift = IPCMNI_SHIFT; + int ipc_min_cycle = RADIX_TREE_MAP_SIZE; + +-static struct ctl_table ipc_kern_table[] = { ++static struct ctl_table ipc_sysctls[] = { + { + .procname = "shmmax", + .data = &init_ipc_ns.shm_ctlmax, + .maxlen = sizeof(init_ipc_ns.shm_ctlmax), + .mode = 0644, +- .proc_handler = proc_ipc_doulongvec_minmax, ++ .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "shmall", + .data = &init_ipc_ns.shm_ctlall, + .maxlen = sizeof(init_ipc_ns.shm_ctlall), + .mode = 0644, +- .proc_handler = proc_ipc_doulongvec_minmax, ++ .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "shmmni", + .data = &init_ipc_ns.shm_ctlmni, + .maxlen = sizeof(init_ipc_ns.shm_ctlmni), + .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &ipc_mni, + }, +@@ -151,15 +131,13 @@ static struct ctl_table ipc_kern_table[] = { + .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax_orphans, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_ONE, + }, + { + .procname = "msgmax", + .data = &init_ipc_ns.msg_ctlmax, + .maxlen = sizeof(init_ipc_ns.msg_ctlmax), + .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +@@ -168,7 +146,7 @@ static struct ctl_table ipc_kern_table[] = { + .data = &init_ipc_ns.msg_ctlmni, + .maxlen = sizeof(init_ipc_ns.msg_ctlmni), + .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &ipc_mni, + }, +@@ -186,7 +164,7 @@ static struct ctl_table ipc_kern_table[] = { + .data = &init_ipc_ns.msg_ctlmnb, + .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), + .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +@@ -204,8 +182,6 @@ static struct ctl_table ipc_kern_table[] = { + .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "msg_next_id", +@@ -213,8 +189,6 @@ static struct ctl_table ipc_kern_table[] = { + .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "shm_next_id", +@@ -222,25 +196,106 @@ static struct ctl_table ipc_kern_table[] = { + .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_INT_MAX, + }, + #endif + {} + }; + +-static struct ctl_table ipc_root_table[] = { +- { +- .procname = "kernel", +- .mode = 0555, +- .child = ipc_kern_table, +- }, +- {} ++static struct ctl_table_set *set_lookup(struct ctl_table_root *root) ++{ ++ return ¤t->nsproxy->ipc_ns->ipc_set; ++} ++ ++static int set_is_seen(struct ctl_table_set *set) ++{ ++ return ¤t->nsproxy->ipc_ns->ipc_set == set; ++} ++ ++static struct ctl_table_root set_root = { ++ .lookup = set_lookup, + }; + ++bool setup_ipc_sysctls(struct ipc_namespace *ns) ++{ ++ struct ctl_table *tbl; ++ ++ setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); ++ ++ tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); ++ if (tbl) { ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { ++ if (tbl[i].data == &init_ipc_ns.shm_ctlmax) { ++ tbl[i].data = &ns->shm_ctlmax; ++ ++ } else if (tbl[i].data == &init_ipc_ns.shm_ctlall) { ++ tbl[i].data = &ns->shm_ctlall; ++ ++ } else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) { ++ tbl[i].data = &ns->shm_ctlmni; ++ ++ } else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) { ++ tbl[i].data = &ns->shm_rmid_forced; ++ tbl[i].extra1 = ns; ++ ++ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) { ++ tbl[i].data = &ns->msg_ctlmax; ++ ++ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) { ++ tbl[i].data = &ns->msg_ctlmni; ++ ++ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) { ++ tbl[i].data = &ns->msg_ctlmnb; ++ ++ } else if (tbl[i].data == &init_ipc_ns.sem_ctls) { ++ tbl[i].data = &ns->sem_ctls; ++ tbl[i].extra1 = ns; ++#ifdef CONFIG_CHECKPOINT_RESTORE ++ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { ++ tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; ++ tbl[i].extra1 = ns; ++ ++ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { ++ tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; ++ tbl[i].extra1 = ns; ++ ++ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { ++ tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; ++ tbl[i].extra1 = ns; ++#endif ++ } else { ++ tbl[i].data = NULL; ++ } ++ } ++ ++ ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl); ++ } ++ if (!ns->ipc_sysctls) { ++ kfree(tbl); ++ retire_sysctl_set(&ns->ipc_set); ++ return false; ++ } ++ ++ return true; ++} ++ ++void retire_ipc_sysctls(struct ipc_namespace *ns) ++{ ++ struct ctl_table *tbl; ++ ++ tbl = ns->ipc_sysctls->ctl_table_arg; ++ unregister_sysctl_table(ns->ipc_sysctls); ++ retire_sysctl_set(&ns->ipc_set); ++ kfree(tbl); ++} ++ + static int __init ipc_sysctl_init(void) + { +- register_sysctl_table(ipc_root_table); ++ if (!setup_ipc_sysctls(&init_ipc_ns)) { ++ pr_warn("ipc sysctl registration failed\n"); ++ return -ENOMEM; ++ } + return 0; + } + +diff --git a/ipc/namespace.c b/ipc/namespace.c +index 5d68e20f7d2bf..14bb40c9d0b85 100644 +--- a/ipc/namespace.c ++++ b/ipc/namespace.c +@@ -63,6 +63,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, + if (!setup_mq_sysctls(ns)) + goto fail_put; + ++ if (!setup_ipc_sysctls(ns)) ++ goto fail_put; ++ + sem_init_ns(ns); + msg_init_ns(ns); + shm_init_ns(ns); +@@ -130,6 +133,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) + shm_exit_ns(ns); + + retire_mq_sysctls(ns); ++ retire_ipc_sysctls(ns); + + dec_ipc_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); +-- +2.43.0 + diff --git a/queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch b/queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch new file mode 100644 index 00000000000..dab916ccd59 --- /dev/null +++ b/queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch @@ -0,0 +1,323 @@ +From cb98de8a508d409d94c225f80e4ac33f3b6dfad5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Feb 2022 19:18:14 +0100 +Subject: ipc: Store mqueue sysctls in the ipc namespace + +From: Alexey Gladkov + +[ Upstream commit dc55e35f9e810f23dd69cfdc91a3d636023f57a2 ] + +Right now, the mqueue sysctls take ipc namespaces into account in a +rather hacky way. This works in most cases, but does not respect the +user namespace. + +Within the user namespace, the user cannot change the /proc/sys/fs/mqueue/* +parametres. This poses a problem in the rootless containers. + +To solve this I changed the implementation of the mqueue sysctls just +like some other sysctls. + +So far, the changes do not provide additional access to files. This will +be done in a future patch. + +v3: +* Don't implemenet set_permissions to keep the current behavior. + +v2: +* Fixed compilation problem if CONFIG_POSIX_MQUEUE_SYSCTL is not + specified. + +Reported-by: kernel test robot +Signed-off-by: Alexey Gladkov +Link: https://lkml.kernel.org/r/b0ccbb2489119f1f20c737cf1930c3a9c4e4243a.1644862280.git.legion@kernel.org +Signed-off-by: Eric W. Biederman +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + include/linux/ipc_namespace.h | 16 +++-- + ipc/mq_sysctl.c | 121 ++++++++++++++++++---------------- + ipc/mqueue.c | 10 ++- + ipc/namespace.c | 6 ++ + 4 files changed, 88 insertions(+), 65 deletions(-) + +diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h +index 08325105131a2..60cd84c1ba146 100644 +--- a/include/linux/ipc_namespace.h ++++ b/include/linux/ipc_namespace.h +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + struct user_namespace; + +@@ -64,6 +65,9 @@ struct ipc_namespace { + unsigned int mq_msg_default; + unsigned int mq_msgsize_default; + ++ struct ctl_table_set mq_set; ++ struct ctl_table_header *mq_sysctls; ++ + /* user_ns which owns the ipc ns */ + struct user_namespace *user_ns; + struct ucounts *ucounts; +@@ -170,14 +174,18 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) + + #ifdef CONFIG_POSIX_MQUEUE_SYSCTL + +-struct ctl_table_header; +-extern struct ctl_table_header *mq_register_sysctl_table(void); ++void retire_mq_sysctls(struct ipc_namespace *ns); ++bool setup_mq_sysctls(struct ipc_namespace *ns); + + #else /* CONFIG_POSIX_MQUEUE_SYSCTL */ + +-static inline struct ctl_table_header *mq_register_sysctl_table(void) ++static inline void retire_mq_sysctls(struct ipc_namespace *ns) + { +- return NULL; ++} ++ ++static inline bool setup_mq_sysctls(struct ipc_namespace *ns) ++{ ++ return true; + } + + #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ +diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c +index 72a92a08c848e..fbf6a8b93a265 100644 +--- a/ipc/mq_sysctl.c ++++ b/ipc/mq_sysctl.c +@@ -9,39 +9,9 @@ + #include + #include + +-#ifdef CONFIG_PROC_SYSCTL +-static void *get_mq(struct ctl_table *table) +-{ +- char *which = table->data; +- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; +- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; +- return which; +-} +- +-static int proc_mq_dointvec(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ctl_table mq_table; +- memcpy(&mq_table, table, sizeof(mq_table)); +- mq_table.data = get_mq(table); +- +- return proc_dointvec(&mq_table, write, buffer, lenp, ppos); +-} +- +-static int proc_mq_dointvec_minmax(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ctl_table mq_table; +- memcpy(&mq_table, table, sizeof(mq_table)); +- mq_table.data = get_mq(table); +- +- return proc_dointvec_minmax(&mq_table, write, buffer, +- lenp, ppos); +-} +-#else +-#define proc_mq_dointvec NULL +-#define proc_mq_dointvec_minmax NULL +-#endif ++#include ++#include ++#include + + static int msg_max_limit_min = MIN_MSGMAX; + static int msg_max_limit_max = HARD_MSGMAX; +@@ -55,14 +25,14 @@ static struct ctl_table mq_sysctls[] = { + .data = &init_ipc_ns.mq_queues_max, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec, ++ .proc_handler = proc_dointvec, + }, + { + .procname = "msg_max", + .data = &init_ipc_ns.mq_msg_max, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = &msg_max_limit_min, + .extra2 = &msg_max_limit_max, + }, +@@ -71,7 +41,7 @@ static struct ctl_table mq_sysctls[] = { + .data = &init_ipc_ns.mq_msgsize_max, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = &msg_maxsize_limit_min, + .extra2 = &msg_maxsize_limit_max, + }, +@@ -80,7 +50,7 @@ static struct ctl_table mq_sysctls[] = { + .data = &init_ipc_ns.mq_msg_default, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = &msg_max_limit_min, + .extra2 = &msg_max_limit_max, + }, +@@ -89,32 +59,73 @@ static struct ctl_table mq_sysctls[] = { + .data = &init_ipc_ns.mq_msgsize_default, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = &msg_maxsize_limit_min, + .extra2 = &msg_maxsize_limit_max, + }, + {} + }; + +-static struct ctl_table mq_sysctl_dir[] = { +- { +- .procname = "mqueue", +- .mode = 0555, +- .child = mq_sysctls, +- }, +- {} +-}; ++static struct ctl_table_set *set_lookup(struct ctl_table_root *root) ++{ ++ return ¤t->nsproxy->ipc_ns->mq_set; ++} + +-static struct ctl_table mq_sysctl_root[] = { +- { +- .procname = "fs", +- .mode = 0555, +- .child = mq_sysctl_dir, +- }, +- {} ++static int set_is_seen(struct ctl_table_set *set) ++{ ++ return ¤t->nsproxy->ipc_ns->mq_set == set; ++} ++ ++static struct ctl_table_root set_root = { ++ .lookup = set_lookup, + }; + +-struct ctl_table_header *mq_register_sysctl_table(void) ++bool setup_mq_sysctls(struct ipc_namespace *ns) + { +- return register_sysctl_table(mq_sysctl_root); ++ struct ctl_table *tbl; ++ ++ setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen); ++ ++ tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL); ++ if (tbl) { ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) { ++ if (tbl[i].data == &init_ipc_ns.mq_queues_max) ++ tbl[i].data = &ns->mq_queues_max; ++ ++ else if (tbl[i].data == &init_ipc_ns.mq_msg_max) ++ tbl[i].data = &ns->mq_msg_max; ++ ++ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max) ++ tbl[i].data = &ns->mq_msgsize_max; ++ ++ else if (tbl[i].data == &init_ipc_ns.mq_msg_default) ++ tbl[i].data = &ns->mq_msg_default; ++ ++ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default) ++ tbl[i].data = &ns->mq_msgsize_default; ++ else ++ tbl[i].data = NULL; ++ } ++ ++ ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl); ++ } ++ if (!ns->mq_sysctls) { ++ kfree(tbl); ++ retire_sysctl_set(&ns->mq_set); ++ return false; ++ } ++ ++ return true; ++} ++ ++void retire_mq_sysctls(struct ipc_namespace *ns) ++{ ++ struct ctl_table *tbl; ++ ++ tbl = ns->mq_sysctls->ctl_table_arg; ++ unregister_sysctl_table(ns->mq_sysctls); ++ retire_sysctl_set(&ns->mq_set); ++ kfree(tbl); + } +diff --git a/ipc/mqueue.c b/ipc/mqueue.c +index 86969de170843..b14ea1dcd50d4 100644 +--- a/ipc/mqueue.c ++++ b/ipc/mqueue.c +@@ -164,8 +164,6 @@ static void remove_notification(struct mqueue_inode_info *info); + + static struct kmem_cache *mqueue_inode_cachep; + +-static struct ctl_table_header *mq_sysctl_table; +- + static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) + { + return container_of(inode, struct mqueue_inode_info, vfs_inode); +@@ -1724,8 +1722,10 @@ static int __init init_mqueue_fs(void) + if (mqueue_inode_cachep == NULL) + return -ENOMEM; + +- /* ignore failures - they are not fatal */ +- mq_sysctl_table = mq_register_sysctl_table(); ++ if (!setup_mq_sysctls(&init_ipc_ns)) { ++ pr_warn("sysctl registration failed\n"); ++ return -ENOMEM; ++ } + + error = register_filesystem(&mqueue_fs_type); + if (error) +@@ -1742,8 +1742,6 @@ static int __init init_mqueue_fs(void) + out_filesystem: + unregister_filesystem(&mqueue_fs_type); + out_sysctl: +- if (mq_sysctl_table) +- unregister_sysctl_table(mq_sysctl_table); + kmem_cache_destroy(mqueue_inode_cachep); + return error; + } +diff --git a/ipc/namespace.c b/ipc/namespace.c +index 24e7b45320f72..5d68e20f7d2bf 100644 +--- a/ipc/namespace.c ++++ b/ipc/namespace.c +@@ -59,6 +59,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, + if (err) + goto fail_put; + ++ err = -ENOMEM; ++ if (!setup_mq_sysctls(ns)) ++ goto fail_put; ++ + sem_init_ns(ns); + msg_init_ns(ns); + shm_init_ns(ns); +@@ -125,6 +129,8 @@ static void free_ipc_ns(struct ipc_namespace *ns) + msg_exit_ns(ns); + shm_exit_ns(ns); + ++ retire_mq_sysctls(ns); ++ + dec_ipc_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); + ns_free_inum(&ns->ns); +-- +2.43.0 + diff --git a/queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch b/queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch new file mode 100644 index 00000000000..e9fd9d5bb2a --- /dev/null +++ b/queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch @@ -0,0 +1,53 @@ +From ab586543de36f330ced813886c8321973345ff1a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Jul 2024 10:14:27 +0200 +Subject: ipv4: fix source address selection with route leak + +From: Nicolas Dichtel + +[ Upstream commit 6807352353561187a718e87204458999dbcbba1b ] + +By default, an address assigned to the output interface is selected when +the source address is not specified. This is problematic when a route, +configured in a vrf, uses an interface from another vrf (aka route leak). +The original vrf does not own the selected source address. + +Let's add a check against the output interface and call the appropriate +function to select the source address. + +CC: stable@vger.kernel.org +Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF") +Signed-off-by: Nicolas Dichtel +Reviewed-by: David Ahern +Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/fib_semantics.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index a308d3f0f845c..57883bd6b5597 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -2285,6 +2285,15 @@ void fib_select_path(struct net *net, struct fib_result *res, + fib_select_default(fl4, res); + + check_saddr: +- if (!fl4->saddr) +- fl4->saddr = fib_result_prefsrc(net, res); ++ if (!fl4->saddr) { ++ struct net_device *l3mdev; ++ ++ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); ++ ++ if (!l3mdev || ++ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) ++ fl4->saddr = fib_result_prefsrc(net, res); ++ else ++ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); ++ } + } +-- +2.43.0 + diff --git a/queue-5.10/irqchip-imx-irqsteer-add-runtime-pm-support.patch b/queue-5.10/irqchip-imx-irqsteer-add-runtime-pm-support.patch new file mode 100644 index 00000000000..f7dd0f93388 --- /dev/null +++ b/queue-5.10/irqchip-imx-irqsteer-add-runtime-pm-support.patch @@ -0,0 +1,85 @@ +From c2293deb2fc707d228b9697bfe49452de3f511a3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Apr 2022 18:37:01 +0200 +Subject: irqchip/imx-irqsteer: Add runtime PM support + +From: Lucas Stach + +[ Upstream commit 4730d2233311d86cad9dc510318d1b40e4b53cf2 ] + +There are now SoCs that integrate the irqsteer controller within +a separate power domain. In order to allow this domain to be +powered down when not needed, add runtime PM support to the driver. + +Signed-off-by: Lucas Stach +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20220406163701.1277930-2-l.stach@pengutronix.de +Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly") +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-imx-irqsteer.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c +index c9998b46414cc..b1ecdb08c618c 100644 +--- a/drivers/irqchip/irq-imx-irqsteer.c ++++ b/drivers/irqchip/irq-imx-irqsteer.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + + #define CTRL_STRIDE_OFF(_t, _r) (_t * 4 * _r) +@@ -178,7 +179,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev) + data->irq_count = DIV_ROUND_UP(irqs_num, 64); + data->reg_num = irqs_num / 32; + +- if (IS_ENABLED(CONFIG_PM_SLEEP)) { ++ if (IS_ENABLED(CONFIG_PM)) { + data->saved_reg = devm_kzalloc(&pdev->dev, + sizeof(u32) * data->reg_num, + GFP_KERNEL); +@@ -202,6 +203,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev) + ret = -ENOMEM; + goto out; + } ++ irq_domain_set_pm_device(data->domain, &pdev->dev); + + if (!data->irq_count || data->irq_count > CHAN_MAX_OUTPUT_INT) { + ret = -EINVAL; +@@ -222,6 +224,9 @@ static int imx_irqsteer_probe(struct platform_device *pdev) + + platform_set_drvdata(pdev, data); + ++ pm_runtime_set_active(&pdev->dev); ++ pm_runtime_enable(&pdev->dev); ++ + return 0; + out: + clk_disable_unprepare(data->ipg_clk); +@@ -244,7 +249,7 @@ static int imx_irqsteer_remove(struct platform_device *pdev) + return 0; + } + +-#ifdef CONFIG_PM_SLEEP ++#ifdef CONFIG_PM + static void imx_irqsteer_save_regs(struct irqsteer_data *data) + { + int i; +@@ -291,7 +296,10 @@ static int imx_irqsteer_resume(struct device *dev) + #endif + + static const struct dev_pm_ops imx_irqsteer_pm_ops = { +- SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_irqsteer_suspend, imx_irqsteer_resume) ++ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, ++ pm_runtime_force_resume) ++ SET_RUNTIME_PM_OPS(imx_irqsteer_suspend, ++ imx_irqsteer_resume, NULL) + }; + + static const struct of_device_id imx_irqsteer_dt_ids[] = { +-- +2.43.0 + diff --git a/queue-5.10/irqchip-imx-irqsteer-constify-irq_chip-struct.patch b/queue-5.10/irqchip-imx-irqsteer-constify-irq_chip-struct.patch new file mode 100644 index 00000000000..05d2e581954 --- /dev/null +++ b/queue-5.10/irqchip-imx-irqsteer-constify-irq_chip-struct.patch @@ -0,0 +1,36 @@ +From 72e979f5103330213ab860f5c77ab54ce7be3edc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Apr 2022 18:37:00 +0200 +Subject: irqchip/imx-irqsteer: Constify irq_chip struct + +From: Lucas Stach + +[ Upstream commit e9a50f12e579a48e124ac5adb93dafc35f0a46b8 ] + +The imx_irqsteer_irq_chip struct is constant data. + +Signed-off-by: Lucas Stach +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20220406163701.1277930-1-l.stach@pengutronix.de +Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly") +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-imx-irqsteer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c +index 1edf7692a790b..c9998b46414cc 100644 +--- a/drivers/irqchip/irq-imx-irqsteer.c ++++ b/drivers/irqchip/irq-imx-irqsteer.c +@@ -70,7 +70,7 @@ static void imx_irqsteer_irq_mask(struct irq_data *d) + raw_spin_unlock_irqrestore(&data->lock, flags); + } + +-static struct irq_chip imx_irqsteer_irq_chip = { ++static const struct irq_chip imx_irqsteer_irq_chip = { + .name = "irqsteer", + .irq_mask = imx_irqsteer_irq_mask, + .irq_unmask = imx_irqsteer_irq_unmask, +-- +2.43.0 + diff --git a/queue-5.10/irqchip-imx-irqsteer-handle-runtime-power-management.patch b/queue-5.10/irqchip-imx-irqsteer-handle-runtime-power-management.patch new file mode 100644 index 00000000000..d35f6e5f2d5 --- /dev/null +++ b/queue-5.10/irqchip-imx-irqsteer-handle-runtime-power-management.patch @@ -0,0 +1,107 @@ +From dfbf49aec91243e105dc7c26f3565f1448edaeb0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Jul 2024 11:32:50 -0500 +Subject: irqchip/imx-irqsteer: Handle runtime power management correctly + +From: Shenwei Wang + +[ Upstream commit 33b1c47d1fc0b5f06a393bb915db85baacba18ea ] + +The power domain is automatically activated from clk_prepare(). However, on +certain platforms like i.MX8QM and i.MX8QXP, the power-on handling invokes +sleeping functions, which triggers the 'scheduling while atomic' bug in the +context switch path during device probing: + + BUG: scheduling while atomic: kworker/u13:1/48/0x00000002 + Call trace: + __schedule_bug+0x54/0x6c + __schedule+0x7f0/0xa94 + schedule+0x5c/0xc4 + schedule_preempt_disabled+0x24/0x40 + __mutex_lock.constprop.0+0x2c0/0x540 + __mutex_lock_slowpath+0x14/0x20 + mutex_lock+0x48/0x54 + clk_prepare_lock+0x44/0xa0 + clk_prepare+0x20/0x44 + imx_irqsteer_resume+0x28/0xe0 + pm_generic_runtime_resume+0x2c/0x44 + __genpd_runtime_resume+0x30/0x80 + genpd_runtime_resume+0xc8/0x2c0 + __rpm_callback+0x48/0x1d8 + rpm_callback+0x6c/0x78 + rpm_resume+0x490/0x6b4 + __pm_runtime_resume+0x50/0x94 + irq_chip_pm_get+0x2c/0xa0 + __irq_do_set_handler+0x178/0x24c + irq_set_chained_handler_and_data+0x60/0xa4 + mxc_gpio_probe+0x160/0x4b0 + +Cure this by implementing the irq_bus_lock/sync_unlock() interrupt chip +callbacks and handle power management in them as they are invoked from +non-atomic context. + +[ tglx: Rewrote change log, added Fixes tag ] + +Fixes: 0136afa08967 ("irqchip: Add driver for imx-irqsteer controller") +Signed-off-by: Shenwei Wang +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240703163250.47887-1-shenwei.wang@nxp.com +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-imx-irqsteer.c | 24 +++++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c +index b1ecdb08c618c..4bdcefa44f11e 100644 +--- a/drivers/irqchip/irq-imx-irqsteer.c ++++ b/drivers/irqchip/irq-imx-irqsteer.c +@@ -35,6 +35,7 @@ struct irqsteer_data { + int channel; + struct irq_domain *domain; + u32 *saved_reg; ++ struct device *dev; + }; + + static int imx_irqsteer_get_reg_index(struct irqsteer_data *data, +@@ -71,10 +72,26 @@ static void imx_irqsteer_irq_mask(struct irq_data *d) + raw_spin_unlock_irqrestore(&data->lock, flags); + } + ++static void imx_irqsteer_irq_bus_lock(struct irq_data *d) ++{ ++ struct irqsteer_data *data = d->chip_data; ++ ++ pm_runtime_get_sync(data->dev); ++} ++ ++static void imx_irqsteer_irq_bus_sync_unlock(struct irq_data *d) ++{ ++ struct irqsteer_data *data = d->chip_data; ++ ++ pm_runtime_put_autosuspend(data->dev); ++} ++ + static const struct irq_chip imx_irqsteer_irq_chip = { +- .name = "irqsteer", +- .irq_mask = imx_irqsteer_irq_mask, +- .irq_unmask = imx_irqsteer_irq_unmask, ++ .name = "irqsteer", ++ .irq_mask = imx_irqsteer_irq_mask, ++ .irq_unmask = imx_irqsteer_irq_unmask, ++ .irq_bus_lock = imx_irqsteer_irq_bus_lock, ++ .irq_bus_sync_unlock = imx_irqsteer_irq_bus_sync_unlock, + }; + + static int imx_irqsteer_irq_map(struct irq_domain *h, unsigned int irq, +@@ -152,6 +169,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev) + if (!data) + return -ENOMEM; + ++ data->dev = &pdev->dev; + data->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(data->regs)) { + dev_err(&pdev->dev, "failed to initialize reg\n"); +-- +2.43.0 + diff --git a/queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch b/queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch new file mode 100644 index 00000000000..dcb7494a9d1 --- /dev/null +++ b/queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch @@ -0,0 +1,419 @@ +From f255c8425c2d31cac7c0aebceabe903c271db4d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Mar 2022 14:45:51 -0600 +Subject: net: Add l3mdev index to flow struct and avoid oif reset for port + devices + +From: David Ahern + +[ Upstream commit 40867d74c374b235e14d839f3a77f26684feefe5 ] + +The fundamental premise of VRF and l3mdev core code is binding a socket +to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope. +Legacy code resets flowi_oif to the l3mdev losing any original port +device binding. Ben (among others) has demonstrated use cases where the +original port device binding is important and needs to be retained. +This patch handles that by adding a new entry to the common flow struct +that can indicate the l3mdev index for later rule and table matching +avoiding the need to reset flowi_oif. + +In addition to allowing more use cases that require port device binds, +this patch brings a few datapath simplications: + +1. l3mdev_fib_rule_match is only called when walking fib rules and + always after l3mdev_update_flow. That allows an optimization to bail + early for non-VRF type uses cases when flowi_l3mdev is not set. Also, + only that index needs to be checked for the FIB table id. + +2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev + (e.g., VRF) device. By resetting flowi_oif only for this case the + FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed, + removing several checks in the datapath. The flowi_iif path can be + simplified to only be called if the it is not loopback (loopback can + not be assigned to an L3 domain) and the l3mdev index is not already + set. + +3. Avoid another device lookup in the output path when the fib lookup + returns a reject failure. + +Note: 2 functional tests for local traffic with reject fib rules are +updated to reflect the new direct failure at FIB lookup time for ping +rather than the failure on packet path. The current code fails like this: + + HINT: Fails since address on vrf device is out of device scope + COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 + ping: Warning: source address might be selected on device other than: eth1 + PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data. + + --- 172.16.3.1 ping statistics --- + 1 packets transmitted, 0 received, 100% packet loss, time 0ms + +where the test now directly fails: + + HINT: Fails since address on vrf device is out of device scope + COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 + ping: connect: No route to host + +Signed-off-by: David Ahern +Tested-by: Ben Greear +Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 680735235356 ("ipv4: fix source address selection with route leak") +Signed-off-by: Sasha Levin +--- + drivers/net/vrf.c | 7 ++-- + include/net/flow.h | 6 +++- + net/ipv4/fib_frontend.c | 7 ++-- + net/ipv4/fib_semantics.c | 2 +- + net/ipv4/fib_trie.c | 7 ++-- + net/ipv4/route.c | 4 +-- + net/ipv4/xfrm4_policy.c | 4 +-- + net/ipv6/ip6_output.c | 3 +- + net/ipv6/route.c | 12 ------- + net/ipv6/xfrm6_policy.c | 3 +- + net/l3mdev/l3mdev.c | 43 +++++++++-------------- + tools/testing/selftests/net/fcnal-test.sh | 2 +- + 12 files changed, 37 insertions(+), 63 deletions(-) + +diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c +index 8ab0b5a8dfeff..13ad434643b80 100644 +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -470,14 +470,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, + + memset(&fl6, 0, sizeof(fl6)); + /* needed to match OIF rule */ +- fl6.flowi6_oif = dev->ifindex; ++ fl6.flowi6_l3mdev = dev->ifindex; + fl6.flowi6_iif = LOOPBACK_IFINDEX; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = iph->nexthdr; +- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; + + dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); + if (IS_ERR(dst) || dst == dst_null) +@@ -550,10 +549,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, + + memset(&fl4, 0, sizeof(fl4)); + /* needed to match OIF rule */ +- fl4.flowi4_oif = vrf_dev->ifindex; ++ fl4.flowi4_l3mdev = vrf_dev->ifindex; + fl4.flowi4_iif = LOOPBACK_IFINDEX; + fl4.flowi4_tos = RT_TOS(ip4h->tos); +- fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; ++ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; + fl4.flowi4_proto = ip4h->protocol; + fl4.daddr = ip4h->daddr; + fl4.saddr = ip4h->saddr; +diff --git a/include/net/flow.h b/include/net/flow.h +index 7ffa1fe1107cc..1c19af4f3b97e 100644 +--- a/include/net/flow.h ++++ b/include/net/flow.h +@@ -29,6 +29,7 @@ struct flowi_tunnel { + struct flowi_common { + int flowic_oif; + int flowic_iif; ++ int flowic_l3mdev; + __u32 flowic_mark; + __u8 flowic_tos; + __u8 flowic_scope; +@@ -36,7 +37,6 @@ struct flowi_common { + __u8 flowic_flags; + #define FLOWI_FLAG_ANYSRC 0x01 + #define FLOWI_FLAG_KNOWN_NH 0x02 +-#define FLOWI_FLAG_SKIP_NH_OIF 0x04 + __u32 flowic_secid; + kuid_t flowic_uid; + __u32 flowic_multipath_hash; +@@ -66,6 +66,7 @@ struct flowi4 { + struct flowi_common __fl_common; + #define flowi4_oif __fl_common.flowic_oif + #define flowi4_iif __fl_common.flowic_iif ++#define flowi4_l3mdev __fl_common.flowic_l3mdev + #define flowi4_mark __fl_common.flowic_mark + #define flowi4_tos __fl_common.flowic_tos + #define flowi4_scope __fl_common.flowic_scope +@@ -99,6 +100,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, + { + fl4->flowi4_oif = oif; + fl4->flowi4_iif = LOOPBACK_IFINDEX; ++ fl4->flowi4_l3mdev = 0; + fl4->flowi4_mark = mark; + fl4->flowi4_tos = tos; + fl4->flowi4_scope = scope; +@@ -129,6 +131,7 @@ struct flowi6 { + struct flowi_common __fl_common; + #define flowi6_oif __fl_common.flowic_oif + #define flowi6_iif __fl_common.flowic_iif ++#define flowi6_l3mdev __fl_common.flowic_l3mdev + #define flowi6_mark __fl_common.flowic_mark + #define flowi6_scope __fl_common.flowic_scope + #define flowi6_proto __fl_common.flowic_proto +@@ -159,6 +162,7 @@ struct flowi { + } u; + #define flowi_oif u.__fl_common.flowic_oif + #define flowi_iif u.__fl_common.flowic_iif ++#define flowi_l3mdev u.__fl_common.flowic_l3mdev + #define flowi_mark u.__fl_common.flowic_mark + #define flowi_tos u.__fl_common.flowic_tos + #define flowi_scope u.__fl_common.flowic_scope +diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c +index 41f890bf9d4c4..0a61b993d823f 100644 +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -290,7 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) + bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); + struct flowi4 fl4 = { + .flowi4_iif = LOOPBACK_IFINDEX, +- .flowi4_oif = l3mdev_master_ifindex_rcu(dev), ++ .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), + .daddr = ip_hdr(skb)->saddr, + .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, + .flowi4_scope = scope, +@@ -352,9 +352,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, + bool dev_match; + + fl4.flowi4_oif = 0; +- fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); +- if (!fl4.flowi4_iif) +- fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; ++ fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); ++ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; + fl4.daddr = src; + fl4.saddr = dst; + fl4.flowi4_tos = tos; +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index bb5255178d75c..a308d3f0f845c 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -2268,7 +2268,7 @@ void fib_select_multipath(struct fib_result *res, int hash) + void fib_select_path(struct net *net, struct fib_result *res, + struct flowi4 *fl4, const struct sk_buff *skb) + { +- if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) ++ if (fl4->flowi4_oif) + goto check_saddr; + + #ifdef CONFIG_IP_ROUTE_MULTIPATH +diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c +index 3f4f6458d40e9..1bdcdc79d43f9 100644 +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -1384,11 +1384,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) + return false; + +- if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { +- if (flp->flowi4_oif && +- flp->flowi4_oif != nhc->nhc_oif) +- return false; +- } ++ if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif) ++ return false; + + return true; + } +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 1eb1e4316ed6d..c34386a9d99b4 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2200,6 +2200,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, + /* + * Now we are ready to route packet. + */ ++ fl4.flowi4_l3mdev = 0; + fl4.flowi4_oif = 0; + fl4.flowi4_iif = dev->ifindex; + fl4.flowi4_mark = skb->mark; +@@ -2676,8 +2677,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, + res->fi = NULL; + res->table = NULL; + if (fl4->flowi4_oif && +- (ipv4_is_multicast(fl4->daddr) || +- !netif_index_is_l3_master(net, fl4->flowi4_oif))) { ++ (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { + /* Apparently, routing tables are wrong. Assume, + that the destination is on link. + +diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c +index 9ebd54752e03b..4548a91acdc89 100644 +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = daddr->a4; + fl4->flowi4_tos = tos; +- fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); ++ fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); + fl4->flowi4_mark = mark; + if (saddr) + fl4->saddr = saddr->a4; + +- fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; +- + rt = __ip_route_output_key(net, fl4); + if (!IS_ERR(rt)) + return &rt->dst; +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index 32512b8ca5e72..ae00e2c7ee058 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1067,8 +1067,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + #ifdef CONFIG_IPV6_SUBTREES + ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || + #endif +- (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && +- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { ++ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; + } +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 799779475c7de..37e05a77fe49e 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1207,9 +1207,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, + struct fib6_node *fn; + struct rt6_info *rt; + +- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) +- flags &= ~RT6_LOOKUP_F_IFACE; +- + rcu_read_lock(); + fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + restart: +@@ -2183,9 +2180,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, + fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + saved_fn = fn; + +- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) +- oif = 0; +- + redo_rt6_select: + rt6_select(net, fn, oif, res, strict); + if (res->f6i == net->ipv6.fib6_null_entry) { +@@ -2932,12 +2926,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, + struct fib6_info *rt; + struct fib6_node *fn; + +- /* l3mdev_update_flow overrides oif if the device is enslaved; in +- * this case we must match on the real ingress device, so reset it +- */ +- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) +- fl6->flowi6_oif = skb->dev->ifindex; +- + /* Get the "current" route for this destination and + * check if the redirect has come from appropriate router. + * +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index 7c903e0e446cb..492b9692c0dc0 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, + int err; + + memset(&fl6, 0, sizeof(fl6)); +- fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); +- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; ++ fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); + fl6.flowi6_mark = mark; + memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); + if (saddr) +diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c +index f2c3a61ad134b..42794581762cb 100644 +--- a/net/l3mdev/l3mdev.c ++++ b/net/l3mdev/l3mdev.c +@@ -249,25 +249,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct net_device *dev; + int rc = 0; + +- rcu_read_lock(); ++ /* update flow ensures flowi_l3mdev is set when relevant */ ++ if (!fl->flowi_l3mdev) ++ return 0; + +- dev = dev_get_by_index_rcu(net, fl->flowi_oif); +- if (dev && netif_is_l3_master(dev) && +- dev->l3mdev_ops->l3mdev_fib_table) { +- arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); +- rc = 1; +- goto out; +- } ++ rcu_read_lock(); + +- dev = dev_get_by_index_rcu(net, fl->flowi_iif); ++ dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev); + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_fib_table) { + arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); + rc = 1; +- goto out; + } + +-out: + rcu_read_unlock(); + + return rc; +@@ -276,31 +270,28 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + void l3mdev_update_flow(struct net *net, struct flowi *fl) + { + struct net_device *dev; +- int ifindex; + + rcu_read_lock(); + + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (dev) { +- ifindex = l3mdev_master_ifindex_rcu(dev); +- if (ifindex) { +- fl->flowi_oif = ifindex; +- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; +- goto out; +- } ++ if (!fl->flowi_l3mdev) ++ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); ++ ++ /* oif set to L3mdev directs lookup to its table; ++ * reset to avoid oif match in fib_lookup ++ */ ++ if (netif_is_l3_master(dev)) ++ fl->flowi_oif = 0; ++ goto out; + } + } + +- if (fl->flowi_iif) { ++ if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) { + dev = dev_get_by_index_rcu(net, fl->flowi_iif); +- if (dev) { +- ifindex = l3mdev_master_ifindex_rcu(dev); +- if (ifindex) { +- fl->flowi_iif = ifindex; +- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; +- } +- } ++ if (dev) ++ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); + } + + out: +diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh +index e13b0fb63333f..acffe0029fdd1 100755 +--- a/tools/testing/selftests/net/fcnal-test.sh ++++ b/tools/testing/selftests/net/fcnal-test.sh +@@ -741,7 +741,7 @@ ipv4_ping_vrf() + log_start + show_hint "Fails since address on vrf device is out of device scope" + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} +- log_test_addr ${a} $? 1 "ping local, device bind" ++ log_test_addr ${a} $? 2 "ping local, device bind" + done + + # +-- +2.43.0 + diff --git a/queue-5.10/remoteproc-imx_rproc-fix-ignoring-mapping-vdev-regio.patch b/queue-5.10/remoteproc-imx_rproc-fix-ignoring-mapping-vdev-regio.patch new file mode 100644 index 00000000000..049735bb98d --- /dev/null +++ b/queue-5.10/remoteproc-imx_rproc-fix-ignoring-mapping-vdev-regio.patch @@ -0,0 +1,44 @@ +From b044e39b28e9c4e72b3e49ef6df2cbdce3a9a0a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Sep 2021 17:06:19 +0800 +Subject: remoteproc: imx_rproc: Fix ignoring mapping vdev regions + +From: Dong Aisheng + +[ Upstream commit afe670e23af91d8a74a8d7049f6e0984bbf6ea11 ] + +vdev regions are typically named vdev0buffer, vdev0ring0, vdev0ring1 and +etc. Change to strncmp to cover them all. + +Fixes: 8f2d8961640f ("remoteproc: imx_rproc: ignore mapping vdev regions") +Reviewed-and-tested-by: Peng Fan +Signed-off-by: Dong Aisheng +Signed-off-by: Peng Fan +Cc: stable +Link: https://lore.kernel.org/r/20210910090621.3073540-5-peng.fan@oss.nxp.com +Signed-off-by: Mathieu Poirier +Signed-off-by: Bjorn Andersson +Stable-dep-of: 2fa26ca8b786 ("remoteproc: imx_rproc: Skip over memory region when node value is NULL") +Signed-off-by: Sasha Levin +--- + drivers/remoteproc/imx_rproc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c +index 6e233f6289200..517d1b5733288 100644 +--- a/drivers/remoteproc/imx_rproc.c ++++ b/drivers/remoteproc/imx_rproc.c +@@ -287,8 +287,8 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, + struct resource res; + + node = of_parse_phandle(np, "memory-region", a); +- /* Not map vdev region */ +- if (!strcmp(node->name, "vdev")) ++ /* Not map vdevbuffer, vdevring region */ ++ if (!strncmp(node->name, "vdev", strlen("vdev"))) + continue; + err = of_address_to_resource(node, 0, &res); + if (err) { +-- +2.43.0 + diff --git a/queue-5.10/remoteproc-imx_rproc-ignore-mapping-vdev-regions.patch b/queue-5.10/remoteproc-imx_rproc-ignore-mapping-vdev-regions.patch new file mode 100644 index 00000000000..659f0034b30 --- /dev/null +++ b/queue-5.10/remoteproc-imx_rproc-ignore-mapping-vdev-regions.patch @@ -0,0 +1,40 @@ +From 736e96345bb3c37fd6ba1c879dff082224e8ee26 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Mar 2021 19:24:24 +0800 +Subject: remoteproc: imx_rproc: ignore mapping vdev regions + +From: Peng Fan + +[ Upstream commit 8f2d8961640f0346cbe892273c3260a0d30c1931 ] + +vdev regions are vdev0vring0, vdev0vring1, vdevbuffer and similar. +They are handled by remoteproc common code, no need to map in imx +rproc driver. + +Signed-off-by: Peng Fan +Reviewed-by: Mathieu Poirier +Link: https://lore.kernel.org/r/1615029865-23312-10-git-send-email-peng.fan@oss.nxp.com +Signed-off-by: Bjorn Andersson +Stable-dep-of: 2fa26ca8b786 ("remoteproc: imx_rproc: Skip over memory region when node value is NULL") +Signed-off-by: Sasha Levin +--- + drivers/remoteproc/imx_rproc.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c +index 8957ed271d209..6e233f6289200 100644 +--- a/drivers/remoteproc/imx_rproc.c ++++ b/drivers/remoteproc/imx_rproc.c +@@ -287,6 +287,9 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, + struct resource res; + + node = of_parse_phandle(np, "memory-region", a); ++ /* Not map vdev region */ ++ if (!strcmp(node->name, "vdev")) ++ continue; + err = of_address_to_resource(node, 0, &res); + if (err) { + dev_err(dev, "unable to resolve memory region\n"); +-- +2.43.0 + diff --git a/queue-5.10/remoteproc-imx_rproc-skip-over-memory-region-when-no.patch b/queue-5.10/remoteproc-imx_rproc-skip-over-memory-region-when-no.patch new file mode 100644 index 00000000000..7bcfb504b11 --- /dev/null +++ b/queue-5.10/remoteproc-imx_rproc-skip-over-memory-region-when-no.patch @@ -0,0 +1,45 @@ +From 0152ad810d1dff071647bdad9460b1b613c2951e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Jun 2024 10:52:04 +0300 +Subject: remoteproc: imx_rproc: Skip over memory region when node value is + NULL + +From: Aleksandr Mishin + +[ Upstream commit 2fa26ca8b786888673689ccc9da6094150939982 ] + +In imx_rproc_addr_init() "nph = of_count_phandle_with_args()" just counts +number of phandles. But phandles may be empty. So of_parse_phandle() in +the parsing loop (0 < a < nph) may return NULL which is later dereferenced. +Adjust this issue by adding NULL-return check. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: a0ff4aa6f010 ("remoteproc: imx_rproc: add a NXP/Freescale imx_rproc driver") +Signed-off-by: Aleksandr Mishin +Reviewed-by: Peng Fan +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240606075204.12354-1-amishin@t-argos.ru +[Fixed title to fit within the prescribed 70-75 charcters] +Signed-off-by: Mathieu Poirier +Signed-off-by: Sasha Levin +--- + drivers/remoteproc/imx_rproc.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c +index 517d1b5733288..373fce8b91064 100644 +--- a/drivers/remoteproc/imx_rproc.c ++++ b/drivers/remoteproc/imx_rproc.c +@@ -287,6 +287,8 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, + struct resource res; + + node = of_parse_phandle(np, "memory-region", a); ++ if (!node) ++ continue; + /* Not map vdevbuffer, vdevring region */ + if (!strncmp(node->name, "vdev", strlen("vdev"))) + continue; +-- +2.43.0 + diff --git a/queue-5.10/series b/queue-5.10/series index a025b88c97a..e67dc8ee69e 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -215,3 +215,29 @@ nvme-split-command-copy-into-a-helper.patch nvme-pci-add-missing-condition-check-for-existence-o.patch fs-don-t-allow-non-init-s_user_ns-for-filesystems-wi.patch powerpc-configs-update-defconfig-with-now-user-visible-config_fsl_ifc.patch +net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch +ipv4-fix-source-address-selection-with-route-leak.patch +fuse-name-fs_context-consistently.patch +fuse-verify-g-u-id-mount-options-correctly.patch +ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch +ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch +ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch +ipc-store-ipc-sysctls-in-the-ipc-namespace.patch +ipc-check-permissions-for-checkpoint_restart-sysctls.patch +sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch +sysctl-allow-to-change-limits-for-posix-messages-que.patch +sysctl-treewide-drop-unused-argument-ctl_table_root-.patch +sysctl-always-initialize-i_uid-i_gid.patch +ext4-factor-out-a-common-helper-to-query-extent-map.patch +ext4-check-the-extent-status-again-before-inserting-.patch +soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch +drivers-soc-xilinx-check-return-status-of-get_api_ve.patch +driver-core-cast-to-void-with-__force-for-__percpu-p.patch +devres-fix-memory-leakage-caused-by-driver-api-devm_.patch +genirq-allow-the-pm-device-to-originate-from-irq-dom.patch +irqchip-imx-irqsteer-constify-irq_chip-struct.patch +irqchip-imx-irqsteer-add-runtime-pm-support.patch +irqchip-imx-irqsteer-handle-runtime-power-management.patch +remoteproc-imx_rproc-ignore-mapping-vdev-regions.patch +remoteproc-imx_rproc-fix-ignoring-mapping-vdev-regio.patch +remoteproc-imx_rproc-skip-over-memory-region-when-no.patch diff --git a/queue-5.10/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch b/queue-5.10/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch new file mode 100644 index 00000000000..495226569bf --- /dev/null +++ b/queue-5.10/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch @@ -0,0 +1,92 @@ +From 79285036ed34e612265d038c29426a15a864b967 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Aug 2021 17:03:10 +0200 +Subject: soc: xilinx: move PM_INIT_FINALIZE to zynqmp_pm_domains driver + +From: Michael Tretter + +[ Upstream commit 7fd890b89dea55eb5866640eb8befad26d558161 ] + +PM_INIT_FINALIZE tells the PMU FW that Linux is able to handle the power +management nodes that are provided by the PMU FW. Nodes that are not +requested are shut down after this call. + +Calling PM_INIT_FINALIZE from the zynqmp_power driver is wrong. The PM +node request mechanism is implemented in the zynqmp_pm_domains driver, +which must also call PM_INIT_FINALIZE. + +Due to the behavior of the PMU FW, all devices must be powered up before +PM_INIT_FINALIZE is called, because otherwise the devices might +misbehave. Calling PM_INIT_FINALIZE from the sync_state device callback +ensures that all users probed successfully before the PMU FW is allowed +to power off unused domains. + +Signed-off-by: Michael Tretter +Acked-by: Michal Simek +Acked-by: Rajan Vaja +Link: https://lore.kernel.org/r/20210825150313.4033156-2-m.tretter@pengutronix.de +Signed-off-by: Michal Simek +Stable-dep-of: 9b003e14801c ("drivers: soc: xilinx: check return status of get_api_version()") +Signed-off-by: Sasha Levin +--- + drivers/soc/xilinx/zynqmp_pm_domains.c | 16 ++++++++++++++++ + drivers/soc/xilinx/zynqmp_power.c | 1 - + 2 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/drivers/soc/xilinx/zynqmp_pm_domains.c b/drivers/soc/xilinx/zynqmp_pm_domains.c +index 226d343f0a6a5..81e8e10f10929 100644 +--- a/drivers/soc/xilinx/zynqmp_pm_domains.c ++++ b/drivers/soc/xilinx/zynqmp_pm_domains.c +@@ -152,11 +152,17 @@ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain) + static int zynqmp_gpd_attach_dev(struct generic_pm_domain *domain, + struct device *dev) + { ++ struct device_link *link; + int ret; + struct zynqmp_pm_domain *pd; + + pd = container_of(domain, struct zynqmp_pm_domain, gpd); + ++ link = device_link_add(dev, &domain->dev, DL_FLAG_SYNC_STATE_ONLY); ++ if (!link) ++ dev_dbg(&domain->dev, "failed to create device link for %s\n", ++ dev_name(dev)); ++ + /* If this is not the first device to attach there is nothing to do */ + if (domain->device_count) + return 0; +@@ -299,9 +305,19 @@ static int zynqmp_gpd_remove(struct platform_device *pdev) + return 0; + } + ++static void zynqmp_gpd_sync_state(struct device *dev) ++{ ++ int ret; ++ ++ ret = zynqmp_pm_init_finalize(); ++ if (ret) ++ dev_warn(dev, "failed to release power management to firmware\n"); ++} ++ + static struct platform_driver zynqmp_power_domain_driver = { + .driver = { + .name = "zynqmp_power_controller", ++ .sync_state = zynqmp_gpd_sync_state, + }, + .probe = zynqmp_gpd_probe, + .remove = zynqmp_gpd_remove, +diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c +index c556623dae024..f8c301984d4f9 100644 +--- a/drivers/soc/xilinx/zynqmp_power.c ++++ b/drivers/soc/xilinx/zynqmp_power.c +@@ -178,7 +178,6 @@ static int zynqmp_pm_probe(struct platform_device *pdev) + u32 pm_api_version; + struct mbox_client *client; + +- zynqmp_pm_init_finalize(); + zynqmp_pm_get_api_version(&pm_api_version); + + /* Check PM API version number */ +-- +2.43.0 + diff --git a/queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch b/queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch new file mode 100644 index 00000000000..a5744c7bdc3 --- /dev/null +++ b/queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch @@ -0,0 +1,140 @@ +From 8443e2e5cb24ee6393549421a32d0798a334e92e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 15 Jan 2024 15:46:41 +0000 +Subject: sysctl: allow change system v ipc sysctls inside ipc namespace + +From: Alexey Gladkov + +[ Upstream commit 50ec499b9a43e46200c9f7b7d723ab2e4af540b3 ] + +Patch series "Allow to change ipc/mq sysctls inside ipc namespace", v3. + +Right now ipc and mq limits count as per ipc namespace, but only real root +can change them. By default, the current values of these limits are such +that it can only be reduced. Since only root can change the values, it is +impossible to reduce these limits in the rootless container. + +We can allow limit changes within ipc namespace because mq parameters are +limited by RLIMIT_MSGQUEUE and ipc parameters are not limited to anything +other than cgroups. + +This patch (of 3): + +Rootless containers are not allowed to modify kernel IPC parameters. + +All default limits are set to such high values that in fact there are no +limits at all. All limits are not inherited and are initialized to +default values when a new ipc_namespace is created. + +For new ipc_namespace: + +size_t ipc_ns.shm_ctlmax = SHMMAX; // (ULONG_MAX - (1UL << 24)) +size_t ipc_ns.shm_ctlall = SHMALL; // (ULONG_MAX - (1UL << 24)) +int ipc_ns.shm_ctlmni = IPCMNI; // (1 << 15) +int ipc_ns.shm_rmid_forced = 0; +unsigned int ipc_ns.msg_ctlmax = MSGMAX; // 8192 +unsigned int ipc_ns.msg_ctlmni = MSGMNI; // 32000 +unsigned int ipc_ns.msg_ctlmnb = MSGMNB; // 16384 + +The shm_tot (total amount of shared pages) has also ceased to be global, +it is located in ipc_namespace and is not inherited from anywhere. + +In such conditions, it cannot be said that these limits limit anything. +The real limiter for them is cgroups. + +If we allow rootless containers to change these parameters, then it can +only be reduced. + +Link: https://lkml.kernel.org/r/cover.1705333426.git.legion@kernel.org +Link: https://lkml.kernel.org/r/d2f4603305cbfed58a24755aa61d027314b73a45.1705333426.git.legion@kernel.org +Signed-off-by: Alexey Gladkov +Signed-off-by: Eric W. Biederman +Link: https://lkml.kernel.org/r/e2d84d3ec0172cfff759e6065da84ce0cc2736f8.1663756794.git.legion@kernel.org +Cc: Christian Brauner +Cc: Joel Granados +Cc: Kees Cook +Cc: Luis Chamberlain +Cc: Manfred Spraul +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/ipc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 35 insertions(+), 2 deletions(-) + +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index a2b871d006da7..2864fd7fafaac 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include "util.h" + + static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, +@@ -198,25 +199,57 @@ static int set_is_seen(struct ctl_table_set *set) + return ¤t->nsproxy->ipc_ns->ipc_set == set; + } + ++static void ipc_set_ownership(struct ctl_table_header *head, ++ struct ctl_table *table, ++ kuid_t *uid, kgid_t *gid) ++{ ++ struct ipc_namespace *ns = ++ container_of(head->set, struct ipc_namespace, ipc_set); ++ ++ kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); ++ kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); ++ ++ *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; ++ *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; ++} ++ + static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) + { + int mode = table->mode; + + #ifdef CONFIG_CHECKPOINT_RESTORE +- struct ipc_namespace *ns = current->nsproxy->ipc_ns; ++ struct ipc_namespace *ns = ++ container_of(head->set, struct ipc_namespace, ipc_set); + + if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || + (table->data == &ns->ids[IPC_MSG_IDS].next_id) || + (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && + checkpoint_restore_ns_capable(ns->user_ns)) + mode = 0666; ++ else + #endif +- return mode; ++ { ++ kuid_t ns_root_uid; ++ kgid_t ns_root_gid; ++ ++ ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); ++ ++ if (uid_eq(current_euid(), ns_root_uid)) ++ mode >>= 6; ++ ++ else if (in_egroup_p(ns_root_gid)) ++ mode >>= 3; ++ } ++ ++ mode &= 7; ++ ++ return (mode << 6) | (mode << 3) | mode; + } + + static struct ctl_table_root set_root = { + .lookup = set_lookup, + .permissions = ipc_permissions, ++ .set_ownership = ipc_set_ownership, + }; + + bool setup_ipc_sysctls(struct ipc_namespace *ns) +-- +2.43.0 + diff --git a/queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch b/queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch new file mode 100644 index 00000000000..608fe54340f --- /dev/null +++ b/queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch @@ -0,0 +1,95 @@ +From 2c050d168b37b543fedfec9d518ccbc182113e09 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 15 Jan 2024 15:46:43 +0000 +Subject: sysctl: allow to change limits for posix messages queues + +From: Alexey Gladkov + +[ Upstream commit f9436a5d0497f759330d07e1189565edd4456be8 ] + +All parameters of posix messages queues (queues_max/msg_max/msgsize_max) +end up being limited by RLIMIT_MSGQUEUE. The code in mqueue_get_inode is +where that limiting happens. + +The RLIMIT_MSGQUEUE is bound to the user namespace and is counted +hierarchically. + +We can allow root in the user namespace to modify the posix messages +queues parameters. + +Link: https://lkml.kernel.org/r/6ad67f23d1459a4f4339f74aa73bac0ecf3995e1.1705333426.git.legion@kernel.org +Signed-off-by: Alexey Gladkov +Signed-off-by: Eric W. Biederman +Link: https://lkml.kernel.org/r/7eb21211c8622e91d226e63416b1b93c079f60ee.1663756794.git.legion@kernel.org +Cc: Christian Brauner +Cc: Davidlohr Bueso +Cc: Joel Granados +Cc: Kees Cook +Cc: Luis Chamberlain +Cc: Manfred Spraul +Signed-off-by: Andrew Morton +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/mq_sysctl.c | 36 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c +index fbf6a8b93a265..ce03930aced55 100644 +--- a/ipc/mq_sysctl.c ++++ b/ipc/mq_sysctl.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + static int msg_max_limit_min = MIN_MSGMAX; + static int msg_max_limit_max = HARD_MSGMAX; +@@ -76,8 +77,43 @@ static int set_is_seen(struct ctl_table_set *set) + return ¤t->nsproxy->ipc_ns->mq_set == set; + } + ++static void mq_set_ownership(struct ctl_table_header *head, ++ struct ctl_table *table, ++ kuid_t *uid, kgid_t *gid) ++{ ++ struct ipc_namespace *ns = ++ container_of(head->set, struct ipc_namespace, mq_set); ++ ++ kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); ++ kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); ++ ++ *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; ++ *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; ++} ++ ++static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table) ++{ ++ int mode = table->mode; ++ kuid_t ns_root_uid; ++ kgid_t ns_root_gid; ++ ++ mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); ++ ++ if (uid_eq(current_euid(), ns_root_uid)) ++ mode >>= 6; ++ ++ else if (in_egroup_p(ns_root_gid)) ++ mode >>= 3; ++ ++ mode &= 7; ++ ++ return (mode << 6) | (mode << 3) | mode; ++} ++ + static struct ctl_table_root set_root = { + .lookup = set_lookup, ++ .permissions = mq_permissions, ++ .set_ownership = mq_set_ownership, + }; + + bool setup_mq_sysctls(struct ipc_namespace *ns) +-- +2.43.0 + diff --git a/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch b/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch new file mode 100644 index 00000000000..881e29cfd7d --- /dev/null +++ b/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch @@ -0,0 +1,52 @@ +From 5d7defeab6026e3ce687533b8c44874295f0a96a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Apr 2024 23:10:34 +0200 +Subject: sysctl: always initialize i_uid/i_gid +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Weißschuh + +[ Upstream commit 98ca62ba9e2be5863c7d069f84f7166b45a5b2f4 ] + +Always initialize i_uid/i_gid inside the sysfs core so set_ownership() +can safely skip setting them. + +Commit 5ec27ec735ba ("fs/proc/proc_sysctl.c: fix the default values of +i_uid/i_gid on /proc/sys inodes.") added defaults for i_uid/i_gid when +set_ownership() was not implemented. It also missed adjusting +net_ctl_set_ownership() to use the same default values in case the +computation of a better value failed. + +Fixes: 5ec27ec735ba ("fs/proc/proc_sysctl.c: fix the default values of i_uid/i_gid on /proc/sys inodes.") +Cc: stable@vger.kernel.org +Signed-off-by: Thomas Weißschuh +Signed-off-by: Joel Granados +Signed-off-by: Sasha Levin +--- + fs/proc/proc_sysctl.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c +index d61777c67ada8..d97e2d399fe6d 100644 +--- a/fs/proc/proc_sysctl.c ++++ b/fs/proc/proc_sysctl.c +@@ -471,12 +471,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, + make_empty_dir_inode(inode); + } + ++ inode->i_uid = GLOBAL_ROOT_UID; ++ inode->i_gid = GLOBAL_ROOT_GID; + if (root->set_ownership) + root->set_ownership(head, &inode->i_uid, &inode->i_gid); +- else { +- inode->i_uid = GLOBAL_ROOT_UID; +- inode->i_gid = GLOBAL_ROOT_GID; +- } + + return inode; + } +-- +2.43.0 + diff --git a/queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch b/queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch new file mode 100644 index 00000000000..746430ade9c --- /dev/null +++ b/queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch @@ -0,0 +1,127 @@ +From 1b75a9102932eba02f4300ecd460eff59af6dbdd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Mar 2024 19:11:30 +0100 +Subject: sysctl: treewide: drop unused argument + ctl_table_root::set_ownership(table) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Weißschuh + +[ Upstream commit 520713a93d550406dae14d49cdb8778d70cecdfd ] + +Remove the 'table' argument from set_ownership as it is never used. This +change is a step towards putting "struct ctl_table" into .rodata and +eventually having sysctl core only use "const struct ctl_table". + +The patch was created with the following coccinelle script: + + @@ + identifier func, head, table, uid, gid; + @@ + + void func( + struct ctl_table_header *head, + - struct ctl_table *table, + kuid_t *uid, kgid_t *gid) + { ... } + +No additional occurrences of 'set_ownership' were found after doing a +tree-wide search. + +Reviewed-by: Joel Granados +Signed-off-by: Thomas Weißschuh +Signed-off-by: Joel Granados +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + fs/proc/proc_sysctl.c | 2 +- + include/linux/sysctl.h | 1 - + ipc/ipc_sysctl.c | 3 +-- + ipc/mq_sysctl.c | 3 +-- + net/sysctl_net.c | 1 - + 5 files changed, 3 insertions(+), 7 deletions(-) + +diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c +index aff9593feb73c..d61777c67ada8 100644 +--- a/fs/proc/proc_sysctl.c ++++ b/fs/proc/proc_sysctl.c +@@ -472,7 +472,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, + } + + if (root->set_ownership) +- root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); ++ root->set_ownership(head, &inode->i_uid, &inode->i_gid); + else { + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; +diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h +index 47cf70c8eb93c..cde78b5a54295 100644 +--- a/include/linux/sysctl.h ++++ b/include/linux/sysctl.h +@@ -173,7 +173,6 @@ struct ctl_table_root { + struct ctl_table_set default_set; + struct ctl_table_set *(*lookup)(struct ctl_table_root *root); + void (*set_ownership)(struct ctl_table_header *head, +- struct ctl_table *table, + kuid_t *uid, kgid_t *gid); + int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); + }; +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index 2864fd7fafaac..c118d8293d3b6 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -200,7 +200,6 @@ static int set_is_seen(struct ctl_table_set *set) + } + + static void ipc_set_ownership(struct ctl_table_header *head, +- struct ctl_table *table, + kuid_t *uid, kgid_t *gid) + { + struct ipc_namespace *ns = +@@ -232,7 +231,7 @@ static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *tabl + kuid_t ns_root_uid; + kgid_t ns_root_gid; + +- ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); ++ ipc_set_ownership(head, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; +diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c +index ce03930aced55..c960691fc24d9 100644 +--- a/ipc/mq_sysctl.c ++++ b/ipc/mq_sysctl.c +@@ -78,7 +78,6 @@ static int set_is_seen(struct ctl_table_set *set) + } + + static void mq_set_ownership(struct ctl_table_header *head, +- struct ctl_table *table, + kuid_t *uid, kgid_t *gid) + { + struct ipc_namespace *ns = +@@ -97,7 +96,7 @@ static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table + kuid_t ns_root_uid; + kgid_t ns_root_gid; + +- mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); ++ mq_set_ownership(head, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; +diff --git a/net/sysctl_net.c b/net/sysctl_net.c +index d14dab8b6774c..592f61eb1089b 100644 +--- a/net/sysctl_net.c ++++ b/net/sysctl_net.c +@@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head, + } + + static void net_ctl_set_ownership(struct ctl_table_header *head, +- struct ctl_table *table, + kuid_t *uid, kgid_t *gid) + { + struct net *net = container_of(head->set, struct net, sysctls); +-- +2.43.0 +