From: Greg Kroah-Hartman Date: Tue, 13 Aug 2024 13:00:28 +0000 (+0200) Subject: clean up some unneeded and broken 5.10 and 5.15 commits X-Git-Tag: v6.1.105~15 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e1ca46fe9115cb33c2760bf7febfb20cbaa3b1b0;p=thirdparty%2Fkernel%2Fstable-queue.git clean up some unneeded and broken 5.10 and 5.15 commits --- diff --git a/queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch b/queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch deleted file mode 100644 index f5755045956..00000000000 --- a/queue-5.10/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 2b1bb86dc48a14692989153e5b91def24b213416 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 8 Nov 2021 18:35:59 -0800 -Subject: ipc: check checkpoint_restore_ns_capable() to modify C/R proc files - -From: Michal Clapinski - -[ Upstream commit 5563cabdde7ee53c34ec7e5e0283bfcc9a1bc893 ] - -This commit removes the requirement to be root to modify sem_next_id, -msg_next_id and shm_next_id and checks checkpoint_restore_ns_capable -instead. - -Since those files are specific to the IPC namespace, there is no reason -they should require root privileges. This is similar to ns_last_pid, -which also only checks checkpoint_restore_ns_capable. - -[akpm@linux-foundation.org: ipc/ipc_sysctl.c needs capability.h for checkpoint_restore_ns_capable()] - -Link: https://lkml.kernel.org/r/20210916163717.3179496-1-mclapinski@google.com -Signed-off-by: Michal Clapinski -Reviewed-by: Davidlohr Bueso -Reviewed-by: Manfred Spraul -Cc: "Eric W. Biederman" -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/ipc_sysctl.c | 29 +++++++++++++++++++++++------ - 1 file changed, 23 insertions(+), 6 deletions(-) - -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index 3f312bf2b1163..345e4d673e61e 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - #include - #include - #include "util.h" -@@ -104,6 +105,19 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - return ret; - } - -+#ifdef CONFIG_CHECKPOINT_RESTORE -+static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, -+ int write, void *buffer, size_t *lenp, loff_t *ppos) -+{ -+ struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; -+ -+ if (write && !checkpoint_restore_ns_capable(user_ns)) -+ return -EPERM; -+ -+ return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); -+} -+#endif -+ - #else - #define proc_ipc_doulongvec_minmax NULL - #define proc_ipc_dointvec NULL -@@ -111,6 +125,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - #define proc_ipc_dointvec_minmax_orphans NULL - #define proc_ipc_auto_msgmni NULL - #define proc_ipc_sem_dointvec NULL -+#ifdef CONFIG_CHECKPOINT_RESTORE -+#define proc_ipc_dointvec_minmax_checkpoint_restore NULL -+#endif /* CONFIG_CHECKPOINT_RESTORE */ - #endif - - int ipc_mni = IPCMNI; -@@ -198,8 +215,8 @@ static struct ctl_table ipc_kern_table[] = { - .procname = "sem_next_id", - .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), -- .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .mode = 0666, -+ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, -@@ -207,8 +224,8 @@ static struct ctl_table ipc_kern_table[] = { - .procname = "msg_next_id", - .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), -- .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .mode = 0666, -+ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, -@@ -216,8 +233,8 @@ static struct ctl_table ipc_kern_table[] = { - .procname = "shm_next_id", - .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), -- .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .mode = 0666, -+ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, --- -2.43.0 - diff --git a/queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch b/queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch deleted file mode 100644 index 7828ef2ab1d..00000000000 --- a/queue-5.10/ipc-check-permissions-for-checkpoint_restart-sysctls.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 58463ddf843a769113f5f44de099157c98150f50 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 3 May 2022 15:39:56 +0200 -Subject: ipc: Check permissions for checkpoint_restart sysctls at open time - -From: Alexey Gladkov - -[ Upstream commit 0889f44e281034e180daa6daf3e2d57c012452d4 ] - -As Eric Biederman pointed out, it is possible not to use a custom -proc_handler and check permissions for every write, but to use a -.permission handler. That will allow the checkpoint_restart sysctls to -perform all of their permission checks at open time, and not need any -other special code. - -Link: https://lore.kernel.org/lkml/87czib9g38.fsf@email.froward.int.ebiederm.org/ -Fixes: 1f5c135ee509 ("ipc: Store ipc sysctls in the ipc namespace") -Signed-off-by: Eric W. Biederman -Signed-off-by: Alexey Gladkov -Link: https://lkml.kernel.org/r/65fa8459803830608da4610a39f33c76aa933eb9.1651584847.git.legion@kernel.org -Signed-off-by: Eric W. Biederman -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/ipc_sysctl.c | 57 ++++++++++++++++++++++++------------------------ - 1 file changed, 29 insertions(+), 28 deletions(-) - -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index 15210ac47e9e1..a2b871d006da7 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -78,25 +78,6 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - return ret; - } - --#ifdef CONFIG_CHECKPOINT_RESTORE --static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, -- int write, void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ipc_namespace *ns = table->extra1; -- struct ctl_table ipc_table; -- -- if (write && !checkpoint_restore_ns_capable(ns->user_ns)) -- return -EPERM; -- -- memcpy(&ipc_table, table, sizeof(ipc_table)); -- -- ipc_table.extra1 = SYSCTL_ZERO; -- ipc_table.extra2 = SYSCTL_INT_MAX; -- -- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); --} --#endif -- - int ipc_mni = IPCMNI; - int ipc_mni_shift = IPCMNI_SHIFT; - int ipc_min_cycle = RADIX_TREE_MAP_SIZE; -@@ -180,22 +161,28 @@ static struct ctl_table ipc_sysctls[] = { - .procname = "sem_next_id", - .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), -- .mode = 0666, -- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -+ .mode = 0444, -+ .proc_handler = proc_dointvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_INT_MAX, - }, - { - .procname = "msg_next_id", - .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), -- .mode = 0666, -- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -+ .mode = 0444, -+ .proc_handler = proc_dointvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_INT_MAX, - }, - { - .procname = "shm_next_id", - .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), -- .mode = 0666, -- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -+ .mode = 0444, -+ .proc_handler = proc_dointvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_INT_MAX, - }, - #endif - {} -@@ -211,8 +198,25 @@ static int set_is_seen(struct ctl_table_set *set) - return ¤t->nsproxy->ipc_ns->ipc_set == set; - } - -+static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) -+{ -+ int mode = table->mode; -+ -+#ifdef CONFIG_CHECKPOINT_RESTORE -+ struct ipc_namespace *ns = current->nsproxy->ipc_ns; -+ -+ if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || -+ (table->data == &ns->ids[IPC_MSG_IDS].next_id) || -+ (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && -+ checkpoint_restore_ns_capable(ns->user_ns)) -+ mode = 0666; -+#endif -+ return mode; -+} -+ - static struct ctl_table_root set_root = { - .lookup = set_lookup, -+ .permissions = ipc_permissions, - }; - - bool setup_ipc_sysctls(struct ipc_namespace *ns) -@@ -254,15 +258,12 @@ bool setup_ipc_sysctls(struct ipc_namespace *ns) - #ifdef CONFIG_CHECKPOINT_RESTORE - } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { - tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; -- tbl[i].extra1 = ns; - - } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { - tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; -- tbl[i].extra1 = ns; - - } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { - tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; -- tbl[i].extra1 = ns; - #endif - } else { - tbl[i].data = NULL; --- -2.43.0 - diff --git a/queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch b/queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch deleted file mode 100644 index e00eca47c44..00000000000 --- a/queue-5.10/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch +++ /dev/null @@ -1,69 +0,0 @@ -From dc71da9fbefdbd0a29e74eda21a5d3a87dbd1729 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 8 Nov 2021 18:36:02 -0800 -Subject: ipc/ipc_sysctl.c: remove fallback for !CONFIG_PROC_SYSCTL - -From: Manfred Spraul - -[ Upstream commit 0e9beb8a96f21a6df1579cb3a679e150e3269d80 ] - -Compilation of ipc/ipc_sysctl.c is controlled by -obj-$(CONFIG_SYSVIPC_SYSCTL) -[see ipc/Makefile] - -And CONFIG_SYSVIPC_SYSCTL depends on SYSCTL -[see init/Kconfig] - -An SYSCTL is selected by PROC_SYSCTL. -[see fs/proc/Kconfig] - -Thus: #ifndef CONFIG_PROC_SYSCTL in ipc/ipc_sysctl.c is impossible, the -fallback can be removed. - -Link: https://lkml.kernel.org/r/20210918145337.3369-1-manfred@colorfullife.com -Signed-off-by: Manfred Spraul -Reviewed-by: "Eric W. Biederman" -Acked-by: Davidlohr Bueso -Cc: Manfred Spraul -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/ipc_sysctl.c | 13 ------------- - 1 file changed, 13 deletions(-) - -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index 345e4d673e61e..f101c171753f6 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -23,7 +23,6 @@ static void *get_ipc(struct ctl_table *table) - return which; - } - --#ifdef CONFIG_PROC_SYSCTL - static int proc_ipc_dointvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) - { -@@ -118,18 +117,6 @@ static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, - } - #endif - --#else --#define proc_ipc_doulongvec_minmax NULL --#define proc_ipc_dointvec NULL --#define proc_ipc_dointvec_minmax NULL --#define proc_ipc_dointvec_minmax_orphans NULL --#define proc_ipc_auto_msgmni NULL --#define proc_ipc_sem_dointvec NULL --#ifdef CONFIG_CHECKPOINT_RESTORE --#define proc_ipc_dointvec_minmax_checkpoint_restore NULL --#endif /* CONFIG_CHECKPOINT_RESTORE */ --#endif -- - int ipc_mni = IPCMNI; - int ipc_mni_shift = IPCMNI_SHIFT; - int ipc_min_cycle = RADIX_TREE_MAP_SIZE; --- -2.43.0 - diff --git a/queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch b/queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch deleted file mode 100644 index a273b9510ab..00000000000 --- a/queue-5.10/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch +++ /dev/null @@ -1,406 +0,0 @@ -From d7b4862364b01676155853a01f0065e8caa36e1a Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 14 Feb 2022 19:18:15 +0100 -Subject: ipc: Store ipc sysctls in the ipc namespace - -From: Alexey Gladkov - -[ Upstream commit 1f5c135ee509e89e0cc274333a65f73c62cb16e5 ] - -The ipc sysctls are not available for modification inside the user -namespace. Following the mqueue sysctls, we changed the implementation -to be more userns friendly. - -So far, the changes do not provide additional access to files. This -will be done in a future patch. - -Signed-off-by: Alexey Gladkov -Link: https://lkml.kernel.org/r/be6f9d014276f4dddd0c3aa05a86052856c1c555.1644862280.git.legion@kernel.org -Signed-off-by: Eric W. Biederman -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - include/linux/ipc_namespace.h | 21 ++++ - ipc/ipc_sysctl.c | 189 ++++++++++++++++++++++------------ - ipc/namespace.c | 4 + - 3 files changed, 147 insertions(+), 67 deletions(-) - -diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h -index 60cd84c1ba146..efcfa7f1d4551 100644 ---- a/include/linux/ipc_namespace.h -+++ b/include/linux/ipc_namespace.h -@@ -68,6 +68,9 @@ struct ipc_namespace { - struct ctl_table_set mq_set; - struct ctl_table_header *mq_sysctls; - -+ struct ctl_table_set ipc_set; -+ struct ctl_table_header *ipc_sysctls; -+ - /* user_ns which owns the ipc ns */ - struct user_namespace *user_ns; - struct ucounts *ucounts; -@@ -189,4 +192,22 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns) - } - - #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ -+ -+#ifdef CONFIG_SYSVIPC_SYSCTL -+ -+bool setup_ipc_sysctls(struct ipc_namespace *ns); -+void retire_ipc_sysctls(struct ipc_namespace *ns); -+ -+#else /* CONFIG_SYSVIPC_SYSCTL */ -+ -+static inline void retire_ipc_sysctls(struct ipc_namespace *ns) -+{ -+} -+ -+static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) -+{ -+ return true; -+} -+ -+#endif /* CONFIG_SYSVIPC_SYSCTL */ - #endif -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index f101c171753f6..15210ac47e9e1 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -13,43 +13,22 @@ - #include - #include - #include -+#include - #include "util.h" - --static void *get_ipc(struct ctl_table *table) --{ -- char *which = table->data; -- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; -- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; -- return which; --} -- --static int proc_ipc_dointvec(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ctl_table ipc_table; -- -- memcpy(&ipc_table, table, sizeof(ipc_table)); -- ipc_table.data = get_ipc(table); -- -- return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); --} -- --static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, -+static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) - { -+ struct ipc_namespace *ns = table->extra1; - struct ctl_table ipc_table; -+ int err; - - memcpy(&ipc_table, table, sizeof(ipc_table)); -- ipc_table.data = get_ipc(table); - -- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); --} -+ ipc_table.extra1 = SYSCTL_ZERO; -+ ipc_table.extra2 = SYSCTL_ONE; - --static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ipc_namespace *ns = current->nsproxy->ipc_ns; -- int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); -+ err = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); - - if (err < 0) - return err; -@@ -58,17 +37,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, - return err; - } - --static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ctl_table ipc_table; -- memcpy(&ipc_table, table, sizeof(ipc_table)); -- ipc_table.data = get_ipc(table); -- -- return proc_doulongvec_minmax(&ipc_table, write, buffer, -- lenp, ppos); --} -- - static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) - { -@@ -87,11 +55,17 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, - static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) - { -+ struct ipc_namespace *ns = table->extra1; -+ struct ctl_table ipc_table; - int ret, semmni; -- struct ipc_namespace *ns = current->nsproxy->ipc_ns; -+ -+ memcpy(&ipc_table, table, sizeof(ipc_table)); -+ -+ ipc_table.extra1 = NULL; -+ ipc_table.extra2 = NULL; - - semmni = ns->sem_ctls[3]; -- ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos); -+ ret = proc_dointvec(table, write, buffer, lenp, ppos); - - if (!ret) - ret = sem_check_semmni(current->nsproxy->ipc_ns); -@@ -108,12 +82,18 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, - int write, void *buffer, size_t *lenp, loff_t *ppos) - { -- struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; -+ struct ipc_namespace *ns = table->extra1; -+ struct ctl_table ipc_table; - -- if (write && !checkpoint_restore_ns_capable(user_ns)) -+ if (write && !checkpoint_restore_ns_capable(ns->user_ns)) - return -EPERM; - -- return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); -+ memcpy(&ipc_table, table, sizeof(ipc_table)); -+ -+ ipc_table.extra1 = SYSCTL_ZERO; -+ ipc_table.extra2 = SYSCTL_INT_MAX; -+ -+ return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); - } - #endif - -@@ -121,27 +101,27 @@ int ipc_mni = IPCMNI; - int ipc_mni_shift = IPCMNI_SHIFT; - int ipc_min_cycle = RADIX_TREE_MAP_SIZE; - --static struct ctl_table ipc_kern_table[] = { -+static struct ctl_table ipc_sysctls[] = { - { - .procname = "shmmax", - .data = &init_ipc_ns.shm_ctlmax, - .maxlen = sizeof(init_ipc_ns.shm_ctlmax), - .mode = 0644, -- .proc_handler = proc_ipc_doulongvec_minmax, -+ .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "shmall", - .data = &init_ipc_ns.shm_ctlall, - .maxlen = sizeof(init_ipc_ns.shm_ctlall), - .mode = 0644, -- .proc_handler = proc_ipc_doulongvec_minmax, -+ .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "shmmni", - .data = &init_ipc_ns.shm_ctlmni, - .maxlen = sizeof(init_ipc_ns.shm_ctlmni), - .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ipc_mni, - }, -@@ -151,15 +131,13 @@ static struct ctl_table ipc_kern_table[] = { - .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), - .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax_orphans, -- .extra1 = SYSCTL_ZERO, -- .extra2 = SYSCTL_ONE, - }, - { - .procname = "msgmax", - .data = &init_ipc_ns.msg_ctlmax, - .maxlen = sizeof(init_ipc_ns.msg_ctlmax), - .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, -@@ -168,7 +146,7 @@ static struct ctl_table ipc_kern_table[] = { - .data = &init_ipc_ns.msg_ctlmni, - .maxlen = sizeof(init_ipc_ns.msg_ctlmni), - .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ipc_mni, - }, -@@ -186,7 +164,7 @@ static struct ctl_table ipc_kern_table[] = { - .data = &init_ipc_ns.msg_ctlmnb, - .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), - .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, -@@ -204,8 +182,6 @@ static struct ctl_table ipc_kern_table[] = { - .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -- .extra1 = SYSCTL_ZERO, -- .extra2 = SYSCTL_INT_MAX, - }, - { - .procname = "msg_next_id", -@@ -213,8 +189,6 @@ static struct ctl_table ipc_kern_table[] = { - .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -- .extra1 = SYSCTL_ZERO, -- .extra2 = SYSCTL_INT_MAX, - }, - { - .procname = "shm_next_id", -@@ -222,25 +196,106 @@ static struct ctl_table ipc_kern_table[] = { - .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -- .extra1 = SYSCTL_ZERO, -- .extra2 = SYSCTL_INT_MAX, - }, - #endif - {} - }; - --static struct ctl_table ipc_root_table[] = { -- { -- .procname = "kernel", -- .mode = 0555, -- .child = ipc_kern_table, -- }, -- {} -+static struct ctl_table_set *set_lookup(struct ctl_table_root *root) -+{ -+ return ¤t->nsproxy->ipc_ns->ipc_set; -+} -+ -+static int set_is_seen(struct ctl_table_set *set) -+{ -+ return ¤t->nsproxy->ipc_ns->ipc_set == set; -+} -+ -+static struct ctl_table_root set_root = { -+ .lookup = set_lookup, - }; - -+bool setup_ipc_sysctls(struct ipc_namespace *ns) -+{ -+ struct ctl_table *tbl; -+ -+ setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); -+ -+ tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); -+ if (tbl) { -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { -+ if (tbl[i].data == &init_ipc_ns.shm_ctlmax) { -+ tbl[i].data = &ns->shm_ctlmax; -+ -+ } else if (tbl[i].data == &init_ipc_ns.shm_ctlall) { -+ tbl[i].data = &ns->shm_ctlall; -+ -+ } else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) { -+ tbl[i].data = &ns->shm_ctlmni; -+ -+ } else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) { -+ tbl[i].data = &ns->shm_rmid_forced; -+ tbl[i].extra1 = ns; -+ -+ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) { -+ tbl[i].data = &ns->msg_ctlmax; -+ -+ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) { -+ tbl[i].data = &ns->msg_ctlmni; -+ -+ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) { -+ tbl[i].data = &ns->msg_ctlmnb; -+ -+ } else if (tbl[i].data == &init_ipc_ns.sem_ctls) { -+ tbl[i].data = &ns->sem_ctls; -+ tbl[i].extra1 = ns; -+#ifdef CONFIG_CHECKPOINT_RESTORE -+ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { -+ tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; -+ tbl[i].extra1 = ns; -+ -+ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { -+ tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; -+ tbl[i].extra1 = ns; -+ -+ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { -+ tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; -+ tbl[i].extra1 = ns; -+#endif -+ } else { -+ tbl[i].data = NULL; -+ } -+ } -+ -+ ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl); -+ } -+ if (!ns->ipc_sysctls) { -+ kfree(tbl); -+ retire_sysctl_set(&ns->ipc_set); -+ return false; -+ } -+ -+ return true; -+} -+ -+void retire_ipc_sysctls(struct ipc_namespace *ns) -+{ -+ struct ctl_table *tbl; -+ -+ tbl = ns->ipc_sysctls->ctl_table_arg; -+ unregister_sysctl_table(ns->ipc_sysctls); -+ retire_sysctl_set(&ns->ipc_set); -+ kfree(tbl); -+} -+ - static int __init ipc_sysctl_init(void) - { -- register_sysctl_table(ipc_root_table); -+ if (!setup_ipc_sysctls(&init_ipc_ns)) { -+ pr_warn("ipc sysctl registration failed\n"); -+ return -ENOMEM; -+ } - return 0; - } - -diff --git a/ipc/namespace.c b/ipc/namespace.c -index 5d68e20f7d2bf..14bb40c9d0b85 100644 ---- a/ipc/namespace.c -+++ b/ipc/namespace.c -@@ -63,6 +63,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, - if (!setup_mq_sysctls(ns)) - goto fail_put; - -+ if (!setup_ipc_sysctls(ns)) -+ goto fail_put; -+ - sem_init_ns(ns); - msg_init_ns(ns); - shm_init_ns(ns); -@@ -130,6 +133,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) - shm_exit_ns(ns); - - retire_mq_sysctls(ns); -+ retire_ipc_sysctls(ns); - - dec_ipc_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); --- -2.43.0 - diff --git a/queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch b/queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch deleted file mode 100644 index dab916ccd59..00000000000 --- a/queue-5.10/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch +++ /dev/null @@ -1,323 +0,0 @@ -From cb98de8a508d409d94c225f80e4ac33f3b6dfad5 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 14 Feb 2022 19:18:14 +0100 -Subject: ipc: Store mqueue sysctls in the ipc namespace - -From: Alexey Gladkov - -[ Upstream commit dc55e35f9e810f23dd69cfdc91a3d636023f57a2 ] - -Right now, the mqueue sysctls take ipc namespaces into account in a -rather hacky way. This works in most cases, but does not respect the -user namespace. - -Within the user namespace, the user cannot change the /proc/sys/fs/mqueue/* -parametres. This poses a problem in the rootless containers. - -To solve this I changed the implementation of the mqueue sysctls just -like some other sysctls. - -So far, the changes do not provide additional access to files. This will -be done in a future patch. - -v3: -* Don't implemenet set_permissions to keep the current behavior. - -v2: -* Fixed compilation problem if CONFIG_POSIX_MQUEUE_SYSCTL is not - specified. - -Reported-by: kernel test robot -Signed-off-by: Alexey Gladkov -Link: https://lkml.kernel.org/r/b0ccbb2489119f1f20c737cf1930c3a9c4e4243a.1644862280.git.legion@kernel.org -Signed-off-by: Eric W. Biederman -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - include/linux/ipc_namespace.h | 16 +++-- - ipc/mq_sysctl.c | 121 ++++++++++++++++++---------------- - ipc/mqueue.c | 10 ++- - ipc/namespace.c | 6 ++ - 4 files changed, 88 insertions(+), 65 deletions(-) - -diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h -index 08325105131a2..60cd84c1ba146 100644 ---- a/include/linux/ipc_namespace.h -+++ b/include/linux/ipc_namespace.h -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - - struct user_namespace; - -@@ -64,6 +65,9 @@ struct ipc_namespace { - unsigned int mq_msg_default; - unsigned int mq_msgsize_default; - -+ struct ctl_table_set mq_set; -+ struct ctl_table_header *mq_sysctls; -+ - /* user_ns which owns the ipc ns */ - struct user_namespace *user_ns; - struct ucounts *ucounts; -@@ -170,14 +174,18 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) - - #ifdef CONFIG_POSIX_MQUEUE_SYSCTL - --struct ctl_table_header; --extern struct ctl_table_header *mq_register_sysctl_table(void); -+void retire_mq_sysctls(struct ipc_namespace *ns); -+bool setup_mq_sysctls(struct ipc_namespace *ns); - - #else /* CONFIG_POSIX_MQUEUE_SYSCTL */ - --static inline struct ctl_table_header *mq_register_sysctl_table(void) -+static inline void retire_mq_sysctls(struct ipc_namespace *ns) - { -- return NULL; -+} -+ -+static inline bool setup_mq_sysctls(struct ipc_namespace *ns) -+{ -+ return true; - } - - #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ -diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c -index 72a92a08c848e..fbf6a8b93a265 100644 ---- a/ipc/mq_sysctl.c -+++ b/ipc/mq_sysctl.c -@@ -9,39 +9,9 @@ - #include - #include - --#ifdef CONFIG_PROC_SYSCTL --static void *get_mq(struct ctl_table *table) --{ -- char *which = table->data; -- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; -- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; -- return which; --} -- --static int proc_mq_dointvec(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ctl_table mq_table; -- memcpy(&mq_table, table, sizeof(mq_table)); -- mq_table.data = get_mq(table); -- -- return proc_dointvec(&mq_table, write, buffer, lenp, ppos); --} -- --static int proc_mq_dointvec_minmax(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ctl_table mq_table; -- memcpy(&mq_table, table, sizeof(mq_table)); -- mq_table.data = get_mq(table); -- -- return proc_dointvec_minmax(&mq_table, write, buffer, -- lenp, ppos); --} --#else --#define proc_mq_dointvec NULL --#define proc_mq_dointvec_minmax NULL --#endif -+#include -+#include -+#include - - static int msg_max_limit_min = MIN_MSGMAX; - static int msg_max_limit_max = HARD_MSGMAX; -@@ -55,14 +25,14 @@ static struct ctl_table mq_sysctls[] = { - .data = &init_ipc_ns.mq_queues_max, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec, -+ .proc_handler = proc_dointvec, - }, - { - .procname = "msg_max", - .data = &init_ipc_ns.mq_msg_max, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = &msg_max_limit_min, - .extra2 = &msg_max_limit_max, - }, -@@ -71,7 +41,7 @@ static struct ctl_table mq_sysctls[] = { - .data = &init_ipc_ns.mq_msgsize_max, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = &msg_maxsize_limit_min, - .extra2 = &msg_maxsize_limit_max, - }, -@@ -80,7 +50,7 @@ static struct ctl_table mq_sysctls[] = { - .data = &init_ipc_ns.mq_msg_default, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = &msg_max_limit_min, - .extra2 = &msg_max_limit_max, - }, -@@ -89,32 +59,73 @@ static struct ctl_table mq_sysctls[] = { - .data = &init_ipc_ns.mq_msgsize_default, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = &msg_maxsize_limit_min, - .extra2 = &msg_maxsize_limit_max, - }, - {} - }; - --static struct ctl_table mq_sysctl_dir[] = { -- { -- .procname = "mqueue", -- .mode = 0555, -- .child = mq_sysctls, -- }, -- {} --}; -+static struct ctl_table_set *set_lookup(struct ctl_table_root *root) -+{ -+ return ¤t->nsproxy->ipc_ns->mq_set; -+} - --static struct ctl_table mq_sysctl_root[] = { -- { -- .procname = "fs", -- .mode = 0555, -- .child = mq_sysctl_dir, -- }, -- {} -+static int set_is_seen(struct ctl_table_set *set) -+{ -+ return ¤t->nsproxy->ipc_ns->mq_set == set; -+} -+ -+static struct ctl_table_root set_root = { -+ .lookup = set_lookup, - }; - --struct ctl_table_header *mq_register_sysctl_table(void) -+bool setup_mq_sysctls(struct ipc_namespace *ns) - { -- return register_sysctl_table(mq_sysctl_root); -+ struct ctl_table *tbl; -+ -+ setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen); -+ -+ tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL); -+ if (tbl) { -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) { -+ if (tbl[i].data == &init_ipc_ns.mq_queues_max) -+ tbl[i].data = &ns->mq_queues_max; -+ -+ else if (tbl[i].data == &init_ipc_ns.mq_msg_max) -+ tbl[i].data = &ns->mq_msg_max; -+ -+ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max) -+ tbl[i].data = &ns->mq_msgsize_max; -+ -+ else if (tbl[i].data == &init_ipc_ns.mq_msg_default) -+ tbl[i].data = &ns->mq_msg_default; -+ -+ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default) -+ tbl[i].data = &ns->mq_msgsize_default; -+ else -+ tbl[i].data = NULL; -+ } -+ -+ ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl); -+ } -+ if (!ns->mq_sysctls) { -+ kfree(tbl); -+ retire_sysctl_set(&ns->mq_set); -+ return false; -+ } -+ -+ return true; -+} -+ -+void retire_mq_sysctls(struct ipc_namespace *ns) -+{ -+ struct ctl_table *tbl; -+ -+ tbl = ns->mq_sysctls->ctl_table_arg; -+ unregister_sysctl_table(ns->mq_sysctls); -+ retire_sysctl_set(&ns->mq_set); -+ kfree(tbl); - } -diff --git a/ipc/mqueue.c b/ipc/mqueue.c -index 86969de170843..b14ea1dcd50d4 100644 ---- a/ipc/mqueue.c -+++ b/ipc/mqueue.c -@@ -164,8 +164,6 @@ static void remove_notification(struct mqueue_inode_info *info); - - static struct kmem_cache *mqueue_inode_cachep; - --static struct ctl_table_header *mq_sysctl_table; -- - static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) - { - return container_of(inode, struct mqueue_inode_info, vfs_inode); -@@ -1724,8 +1722,10 @@ static int __init init_mqueue_fs(void) - if (mqueue_inode_cachep == NULL) - return -ENOMEM; - -- /* ignore failures - they are not fatal */ -- mq_sysctl_table = mq_register_sysctl_table(); -+ if (!setup_mq_sysctls(&init_ipc_ns)) { -+ pr_warn("sysctl registration failed\n"); -+ return -ENOMEM; -+ } - - error = register_filesystem(&mqueue_fs_type); - if (error) -@@ -1742,8 +1742,6 @@ static int __init init_mqueue_fs(void) - out_filesystem: - unregister_filesystem(&mqueue_fs_type); - out_sysctl: -- if (mq_sysctl_table) -- unregister_sysctl_table(mq_sysctl_table); - kmem_cache_destroy(mqueue_inode_cachep); - return error; - } -diff --git a/ipc/namespace.c b/ipc/namespace.c -index 24e7b45320f72..5d68e20f7d2bf 100644 ---- a/ipc/namespace.c -+++ b/ipc/namespace.c -@@ -59,6 +59,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, - if (err) - goto fail_put; - -+ err = -ENOMEM; -+ if (!setup_mq_sysctls(ns)) -+ goto fail_put; -+ - sem_init_ns(ns); - msg_init_ns(ns); - shm_init_ns(ns); -@@ -125,6 +129,8 @@ static void free_ipc_ns(struct ipc_namespace *ns) - msg_exit_ns(ns); - shm_exit_ns(ns); - -+ retire_mq_sysctls(ns); -+ - dec_ipc_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); --- -2.43.0 - diff --git a/queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch b/queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch deleted file mode 100644 index e9fd9d5bb2a..00000000000 --- a/queue-5.10/ipv4-fix-source-address-selection-with-route-leak.patch +++ /dev/null @@ -1,53 +0,0 @@ -From ab586543de36f330ced813886c8321973345ff1a Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 10 Jul 2024 10:14:27 +0200 -Subject: ipv4: fix source address selection with route leak - -From: Nicolas Dichtel - -[ Upstream commit 6807352353561187a718e87204458999dbcbba1b ] - -By default, an address assigned to the output interface is selected when -the source address is not specified. This is problematic when a route, -configured in a vrf, uses an interface from another vrf (aka route leak). -The original vrf does not own the selected source address. - -Let's add a check against the output interface and call the appropriate -function to select the source address. - -CC: stable@vger.kernel.org -Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF") -Signed-off-by: Nicolas Dichtel -Reviewed-by: David Ahern -Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com -Signed-off-by: Jakub Kicinski -Signed-off-by: Sasha Levin ---- - net/ipv4/fib_semantics.c | 13 +++++++++++-- - 1 file changed, 11 insertions(+), 2 deletions(-) - -diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c -index a308d3f0f845c..57883bd6b5597 100644 ---- a/net/ipv4/fib_semantics.c -+++ b/net/ipv4/fib_semantics.c -@@ -2285,6 +2285,15 @@ void fib_select_path(struct net *net, struct fib_result *res, - fib_select_default(fl4, res); - - check_saddr: -- if (!fl4->saddr) -- fl4->saddr = fib_result_prefsrc(net, res); -+ if (!fl4->saddr) { -+ struct net_device *l3mdev; -+ -+ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); -+ -+ if (!l3mdev || -+ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) -+ fl4->saddr = fib_result_prefsrc(net, res); -+ else -+ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); -+ } - } --- -2.43.0 - diff --git a/queue-5.10/ipv6-fix-source-address-selection-with-route-leak.patch b/queue-5.10/ipv6-fix-source-address-selection-with-route-leak.patch deleted file mode 100644 index 67fd534ecaf..00000000000 --- a/queue-5.10/ipv6-fix-source-address-selection-with-route-leak.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 -From: Nicolas Dichtel -Date: Wed, 10 Jul 2024 10:14:28 +0200 -Subject: ipv6: fix source address selection with route leak - -From: Nicolas Dichtel - -commit 252442f2ae317d109ef0b4b39ce0608c09563042 upstream. - -By default, an address assigned to the output interface is selected when -the source address is not specified. This is problematic when a route, -configured in a vrf, uses an interface from another vrf (aka route leak). -The original vrf does not own the selected source address. - -Let's add a check against the output interface and call the appropriate -function to select the source address. - -CC: stable@vger.kernel.org -Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") -Signed-off-by: Nicolas Dichtel -Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com -Signed-off-by: Jakub Kicinski -Signed-off-by: Greg Kroah-Hartman ---- - include/net/ip6_route.h | 20 ++++++++++++++------ - net/ipv6/ip6_output.c | 1 + - net/ipv6/route.c | 2 +- - 3 files changed, 16 insertions(+), 7 deletions(-) - ---- a/include/net/ip6_route.h -+++ b/include/net/ip6_route.h -@@ -132,18 +132,26 @@ void rt6_age_exceptions(struct fib6_info - - static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, - const struct in6_addr *daddr, -- unsigned int prefs, -+ unsigned int prefs, int l3mdev_index, - struct in6_addr *saddr) - { -+ struct net_device *l3mdev; -+ struct net_device *dev; -+ bool same_vrf; - int err = 0; - -- if (f6i && f6i->fib6_prefsrc.plen) { -+ rcu_read_lock(); -+ -+ l3mdev = dev_get_by_index_rcu(net, l3mdev_index); -+ if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) -+ dev = f6i ? fib6_info_nh_dev(f6i) : NULL; -+ same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; -+ if (f6i && f6i->fib6_prefsrc.plen && same_vrf) - *saddr = f6i->fib6_prefsrc.addr; -- } else { -- struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; -+ else -+ err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); - -- err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); -- } -+ rcu_read_unlock(); - - return err; - } ---- a/net/ipv6/ip6_output.c -+++ b/net/ipv6/ip6_output.c -@@ -1108,6 +1108,7 @@ static int ip6_dst_lookup_tail(struct ne - from = rt ? rcu_dereference(rt->from) : NULL; - err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, -+ fl6->flowi6_l3mdev, - &fl6->saddr); - rcu_read_unlock(); - ---- a/net/ipv6/route.c -+++ b/net/ipv6/route.c -@@ -5569,7 +5569,7 @@ static int rt6_fill_node(struct net *net - goto nla_put_failure; - } else if (dest) { - struct in6_addr saddr_buf; -- if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && -+ if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && - nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) - goto nla_put_failure; - } diff --git a/queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch b/queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch deleted file mode 100644 index dcb7494a9d1..00000000000 --- a/queue-5.10/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch +++ /dev/null @@ -1,419 +0,0 @@ -From f255c8425c2d31cac7c0aebceabe903c271db4d2 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 14 Mar 2022 14:45:51 -0600 -Subject: net: Add l3mdev index to flow struct and avoid oif reset for port - devices - -From: David Ahern - -[ Upstream commit 40867d74c374b235e14d839f3a77f26684feefe5 ] - -The fundamental premise of VRF and l3mdev core code is binding a socket -to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope. -Legacy code resets flowi_oif to the l3mdev losing any original port -device binding. Ben (among others) has demonstrated use cases where the -original port device binding is important and needs to be retained. -This patch handles that by adding a new entry to the common flow struct -that can indicate the l3mdev index for later rule and table matching -avoiding the need to reset flowi_oif. - -In addition to allowing more use cases that require port device binds, -this patch brings a few datapath simplications: - -1. l3mdev_fib_rule_match is only called when walking fib rules and - always after l3mdev_update_flow. That allows an optimization to bail - early for non-VRF type uses cases when flowi_l3mdev is not set. Also, - only that index needs to be checked for the FIB table id. - -2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev - (e.g., VRF) device. By resetting flowi_oif only for this case the - FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed, - removing several checks in the datapath. The flowi_iif path can be - simplified to only be called if the it is not loopback (loopback can - not be assigned to an L3 domain) and the l3mdev index is not already - set. - -3. Avoid another device lookup in the output path when the fib lookup - returns a reject failure. - -Note: 2 functional tests for local traffic with reject fib rules are -updated to reflect the new direct failure at FIB lookup time for ping -rather than the failure on packet path. The current code fails like this: - - HINT: Fails since address on vrf device is out of device scope - COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 - ping: Warning: source address might be selected on device other than: eth1 - PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data. - - --- 172.16.3.1 ping statistics --- - 1 packets transmitted, 0 received, 100% packet loss, time 0ms - -where the test now directly fails: - - HINT: Fails since address on vrf device is out of device scope - COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 - ping: connect: No route to host - -Signed-off-by: David Ahern -Tested-by: Ben Greear -Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org -Signed-off-by: Jakub Kicinski -Stable-dep-of: 680735235356 ("ipv4: fix source address selection with route leak") -Signed-off-by: Sasha Levin ---- - drivers/net/vrf.c | 7 ++-- - include/net/flow.h | 6 +++- - net/ipv4/fib_frontend.c | 7 ++-- - net/ipv4/fib_semantics.c | 2 +- - net/ipv4/fib_trie.c | 7 ++-- - net/ipv4/route.c | 4 +-- - net/ipv4/xfrm4_policy.c | 4 +-- - net/ipv6/ip6_output.c | 3 +- - net/ipv6/route.c | 12 ------- - net/ipv6/xfrm6_policy.c | 3 +- - net/l3mdev/l3mdev.c | 43 +++++++++-------------- - tools/testing/selftests/net/fcnal-test.sh | 2 +- - 12 files changed, 37 insertions(+), 63 deletions(-) - -diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c -index 8ab0b5a8dfeff..13ad434643b80 100644 ---- a/drivers/net/vrf.c -+++ b/drivers/net/vrf.c -@@ -470,14 +470,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, - - memset(&fl6, 0, sizeof(fl6)); - /* needed to match OIF rule */ -- fl6.flowi6_oif = dev->ifindex; -+ fl6.flowi6_l3mdev = dev->ifindex; - fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.daddr = iph->daddr; - fl6.saddr = iph->saddr; - fl6.flowlabel = ip6_flowinfo(iph); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = iph->nexthdr; -- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; - - dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); - if (IS_ERR(dst) || dst == dst_null) -@@ -550,10 +549,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, - - memset(&fl4, 0, sizeof(fl4)); - /* needed to match OIF rule */ -- fl4.flowi4_oif = vrf_dev->ifindex; -+ fl4.flowi4_l3mdev = vrf_dev->ifindex; - fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.flowi4_tos = RT_TOS(ip4h->tos); -- fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; -+ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; - fl4.flowi4_proto = ip4h->protocol; - fl4.daddr = ip4h->daddr; - fl4.saddr = ip4h->saddr; -diff --git a/include/net/flow.h b/include/net/flow.h -index 7ffa1fe1107cc..1c19af4f3b97e 100644 ---- a/include/net/flow.h -+++ b/include/net/flow.h -@@ -29,6 +29,7 @@ struct flowi_tunnel { - struct flowi_common { - int flowic_oif; - int flowic_iif; -+ int flowic_l3mdev; - __u32 flowic_mark; - __u8 flowic_tos; - __u8 flowic_scope; -@@ -36,7 +37,6 @@ struct flowi_common { - __u8 flowic_flags; - #define FLOWI_FLAG_ANYSRC 0x01 - #define FLOWI_FLAG_KNOWN_NH 0x02 --#define FLOWI_FLAG_SKIP_NH_OIF 0x04 - __u32 flowic_secid; - kuid_t flowic_uid; - __u32 flowic_multipath_hash; -@@ -66,6 +66,7 @@ struct flowi4 { - struct flowi_common __fl_common; - #define flowi4_oif __fl_common.flowic_oif - #define flowi4_iif __fl_common.flowic_iif -+#define flowi4_l3mdev __fl_common.flowic_l3mdev - #define flowi4_mark __fl_common.flowic_mark - #define flowi4_tos __fl_common.flowic_tos - #define flowi4_scope __fl_common.flowic_scope -@@ -99,6 +100,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, - { - fl4->flowi4_oif = oif; - fl4->flowi4_iif = LOOPBACK_IFINDEX; -+ fl4->flowi4_l3mdev = 0; - fl4->flowi4_mark = mark; - fl4->flowi4_tos = tos; - fl4->flowi4_scope = scope; -@@ -129,6 +131,7 @@ struct flowi6 { - struct flowi_common __fl_common; - #define flowi6_oif __fl_common.flowic_oif - #define flowi6_iif __fl_common.flowic_iif -+#define flowi6_l3mdev __fl_common.flowic_l3mdev - #define flowi6_mark __fl_common.flowic_mark - #define flowi6_scope __fl_common.flowic_scope - #define flowi6_proto __fl_common.flowic_proto -@@ -159,6 +162,7 @@ struct flowi { - } u; - #define flowi_oif u.__fl_common.flowic_oif - #define flowi_iif u.__fl_common.flowic_iif -+#define flowi_l3mdev u.__fl_common.flowic_l3mdev - #define flowi_mark u.__fl_common.flowic_mark - #define flowi_tos u.__fl_common.flowic_tos - #define flowi_scope u.__fl_common.flowic_scope -diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c -index 41f890bf9d4c4..0a61b993d823f 100644 ---- a/net/ipv4/fib_frontend.c -+++ b/net/ipv4/fib_frontend.c -@@ -290,7 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) - bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); - struct flowi4 fl4 = { - .flowi4_iif = LOOPBACK_IFINDEX, -- .flowi4_oif = l3mdev_master_ifindex_rcu(dev), -+ .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), - .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, - .flowi4_scope = scope, -@@ -352,9 +352,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - bool dev_match; - - fl4.flowi4_oif = 0; -- fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); -- if (!fl4.flowi4_iif) -- fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; -+ fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); -+ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; - fl4.daddr = src; - fl4.saddr = dst; - fl4.flowi4_tos = tos; -diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c -index bb5255178d75c..a308d3f0f845c 100644 ---- a/net/ipv4/fib_semantics.c -+++ b/net/ipv4/fib_semantics.c -@@ -2268,7 +2268,7 @@ void fib_select_multipath(struct fib_result *res, int hash) - void fib_select_path(struct net *net, struct fib_result *res, - struct flowi4 *fl4, const struct sk_buff *skb) - { -- if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) -+ if (fl4->flowi4_oif) - goto check_saddr; - - #ifdef CONFIG_IP_ROUTE_MULTIPATH -diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c -index 3f4f6458d40e9..1bdcdc79d43f9 100644 ---- a/net/ipv4/fib_trie.c -+++ b/net/ipv4/fib_trie.c -@@ -1384,11 +1384,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, - !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) - return false; - -- if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { -- if (flp->flowi4_oif && -- flp->flowi4_oif != nhc->nhc_oif) -- return false; -- } -+ if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif) -+ return false; - - return true; - } -diff --git a/net/ipv4/route.c b/net/ipv4/route.c -index 1eb1e4316ed6d..c34386a9d99b4 100644 ---- a/net/ipv4/route.c -+++ b/net/ipv4/route.c -@@ -2200,6 +2200,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - /* - * Now we are ready to route packet. - */ -+ fl4.flowi4_l3mdev = 0; - fl4.flowi4_oif = 0; - fl4.flowi4_iif = dev->ifindex; - fl4.flowi4_mark = skb->mark; -@@ -2676,8 +2677,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, - res->fi = NULL; - res->table = NULL; - if (fl4->flowi4_oif && -- (ipv4_is_multicast(fl4->daddr) || -- !netif_index_is_l3_master(net, fl4->flowi4_oif))) { -+ (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { - /* Apparently, routing tables are wrong. Assume, - that the destination is on link. - -diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c -index 9ebd54752e03b..4548a91acdc89 100644 ---- a/net/ipv4/xfrm4_policy.c -+++ b/net/ipv4/xfrm4_policy.c -@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, - memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = daddr->a4; - fl4->flowi4_tos = tos; -- fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); -+ fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); - fl4->flowi4_mark = mark; - if (saddr) - fl4->saddr = saddr->a4; - -- fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; -- - rt = __ip_route_output_key(net, fl4); - if (!IS_ERR(rt)) - return &rt->dst; -diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c -index 32512b8ca5e72..ae00e2c7ee058 100644 ---- a/net/ipv6/ip6_output.c -+++ b/net/ipv6/ip6_output.c -@@ -1067,8 +1067,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, - #ifdef CONFIG_IPV6_SUBTREES - ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || - #endif -- (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && -- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { -+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { - dst_release(dst); - dst = NULL; - } -diff --git a/net/ipv6/route.c b/net/ipv6/route.c -index 799779475c7de..37e05a77fe49e 100644 ---- a/net/ipv6/route.c -+++ b/net/ipv6/route.c -@@ -1207,9 +1207,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, - struct fib6_node *fn; - struct rt6_info *rt; - -- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) -- flags &= ~RT6_LOOKUP_F_IFACE; -- - rcu_read_lock(); - fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); - restart: -@@ -2183,9 +2180,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, - fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); - saved_fn = fn; - -- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) -- oif = 0; -- - redo_rt6_select: - rt6_select(net, fn, oif, res, strict); - if (res->f6i == net->ipv6.fib6_null_entry) { -@@ -2932,12 +2926,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, - struct fib6_info *rt; - struct fib6_node *fn; - -- /* l3mdev_update_flow overrides oif if the device is enslaved; in -- * this case we must match on the real ingress device, so reset it -- */ -- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) -- fl6->flowi6_oif = skb->dev->ifindex; -- - /* Get the "current" route for this destination and - * check if the redirect has come from appropriate router. - * -diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c -index 7c903e0e446cb..492b9692c0dc0 100644 ---- a/net/ipv6/xfrm6_policy.c -+++ b/net/ipv6/xfrm6_policy.c -@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, - int err; - - memset(&fl6, 0, sizeof(fl6)); -- fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); -- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; -+ fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); - fl6.flowi6_mark = mark; - memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); - if (saddr) -diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c -index f2c3a61ad134b..42794581762cb 100644 ---- a/net/l3mdev/l3mdev.c -+++ b/net/l3mdev/l3mdev.c -@@ -249,25 +249,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, - struct net_device *dev; - int rc = 0; - -- rcu_read_lock(); -+ /* update flow ensures flowi_l3mdev is set when relevant */ -+ if (!fl->flowi_l3mdev) -+ return 0; - -- dev = dev_get_by_index_rcu(net, fl->flowi_oif); -- if (dev && netif_is_l3_master(dev) && -- dev->l3mdev_ops->l3mdev_fib_table) { -- arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); -- rc = 1; -- goto out; -- } -+ rcu_read_lock(); - -- dev = dev_get_by_index_rcu(net, fl->flowi_iif); -+ dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev); - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_fib_table) { - arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); - rc = 1; -- goto out; - } - --out: - rcu_read_unlock(); - - return rc; -@@ -276,31 +270,28 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, - void l3mdev_update_flow(struct net *net, struct flowi *fl) - { - struct net_device *dev; -- int ifindex; - - rcu_read_lock(); - - if (fl->flowi_oif) { - dev = dev_get_by_index_rcu(net, fl->flowi_oif); - if (dev) { -- ifindex = l3mdev_master_ifindex_rcu(dev); -- if (ifindex) { -- fl->flowi_oif = ifindex; -- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; -- goto out; -- } -+ if (!fl->flowi_l3mdev) -+ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); -+ -+ /* oif set to L3mdev directs lookup to its table; -+ * reset to avoid oif match in fib_lookup -+ */ -+ if (netif_is_l3_master(dev)) -+ fl->flowi_oif = 0; -+ goto out; - } - } - -- if (fl->flowi_iif) { -+ if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) { - dev = dev_get_by_index_rcu(net, fl->flowi_iif); -- if (dev) { -- ifindex = l3mdev_master_ifindex_rcu(dev); -- if (ifindex) { -- fl->flowi_iif = ifindex; -- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; -- } -- } -+ if (dev) -+ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); - } - - out: -diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh -index e13b0fb63333f..acffe0029fdd1 100755 ---- a/tools/testing/selftests/net/fcnal-test.sh -+++ b/tools/testing/selftests/net/fcnal-test.sh -@@ -741,7 +741,7 @@ ipv4_ping_vrf() - log_start - show_hint "Fails since address on vrf device is out of device scope" - run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} -- log_test_addr ${a} $? 1 "ping local, device bind" -+ log_test_addr ${a} $? 2 "ping local, device bind" - done - - # --- -2.43.0 - diff --git a/queue-5.10/series b/queue-5.10/series index 79bac552005..49be6ea3c28 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -214,18 +214,8 @@ nvme-split-command-copy-into-a-helper.patch nvme-pci-add-missing-condition-check-for-existence-o.patch fs-don-t-allow-non-init-s_user_ns-for-filesystems-wi.patch powerpc-configs-update-defconfig-with-now-user-visible-config_fsl_ifc.patch -net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch -ipv4-fix-source-address-selection-with-route-leak.patch fuse-name-fs_context-consistently.patch fuse-verify-g-u-id-mount-options-correctly.patch -ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch -ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch -ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch -ipc-store-ipc-sysctls-in-the-ipc-namespace.patch -ipc-check-permissions-for-checkpoint_restart-sysctls.patch -sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch -sysctl-allow-to-change-limits-for-posix-messages-que.patch -sysctl-treewide-drop-unused-argument-ctl_table_root-.patch sysctl-always-initialize-i_uid-i_gid.patch ext4-factor-out-a-common-helper-to-query-extent-map.patch ext4-check-the-extent-status-again-before-inserting-.patch @@ -341,7 +331,6 @@ mptcp-fix-nl-pm-announced-address-accounting.patch mptcp-mib-count-mpj-with-backup-flag.patch mptcp-export-local_address.patch mptcp-pm-fix-backup-support-in-signal-endpoints.patch -ipv6-fix-source-address-selection-with-route-leak.patch samples-add-fs-error-monitoring-example.patch samples-make-fs-monitor-depend-on-libc-and-headers.patch add-gitignore-file-for-samples-fanotify-subdirectory.patch diff --git a/queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch b/queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch deleted file mode 100644 index a5744c7bdc3..00000000000 --- a/queue-5.10/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 8443e2e5cb24ee6393549421a32d0798a334e92e Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 15 Jan 2024 15:46:41 +0000 -Subject: sysctl: allow change system v ipc sysctls inside ipc namespace - -From: Alexey Gladkov - -[ Upstream commit 50ec499b9a43e46200c9f7b7d723ab2e4af540b3 ] - -Patch series "Allow to change ipc/mq sysctls inside ipc namespace", v3. - -Right now ipc and mq limits count as per ipc namespace, but only real root -can change them. By default, the current values of these limits are such -that it can only be reduced. Since only root can change the values, it is -impossible to reduce these limits in the rootless container. - -We can allow limit changes within ipc namespace because mq parameters are -limited by RLIMIT_MSGQUEUE and ipc parameters are not limited to anything -other than cgroups. - -This patch (of 3): - -Rootless containers are not allowed to modify kernel IPC parameters. - -All default limits are set to such high values that in fact there are no -limits at all. All limits are not inherited and are initialized to -default values when a new ipc_namespace is created. - -For new ipc_namespace: - -size_t ipc_ns.shm_ctlmax = SHMMAX; // (ULONG_MAX - (1UL << 24)) -size_t ipc_ns.shm_ctlall = SHMALL; // (ULONG_MAX - (1UL << 24)) -int ipc_ns.shm_ctlmni = IPCMNI; // (1 << 15) -int ipc_ns.shm_rmid_forced = 0; -unsigned int ipc_ns.msg_ctlmax = MSGMAX; // 8192 -unsigned int ipc_ns.msg_ctlmni = MSGMNI; // 32000 -unsigned int ipc_ns.msg_ctlmnb = MSGMNB; // 16384 - -The shm_tot (total amount of shared pages) has also ceased to be global, -it is located in ipc_namespace and is not inherited from anywhere. - -In such conditions, it cannot be said that these limits limit anything. -The real limiter for them is cgroups. - -If we allow rootless containers to change these parameters, then it can -only be reduced. - -Link: https://lkml.kernel.org/r/cover.1705333426.git.legion@kernel.org -Link: https://lkml.kernel.org/r/d2f4603305cbfed58a24755aa61d027314b73a45.1705333426.git.legion@kernel.org -Signed-off-by: Alexey Gladkov -Signed-off-by: Eric W. Biederman -Link: https://lkml.kernel.org/r/e2d84d3ec0172cfff759e6065da84ce0cc2736f8.1663756794.git.legion@kernel.org -Cc: Christian Brauner -Cc: Joel Granados -Cc: Kees Cook -Cc: Luis Chamberlain -Cc: Manfred Spraul -Cc: Davidlohr Bueso -Signed-off-by: Andrew Morton -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/ipc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++-- - 1 file changed, 35 insertions(+), 2 deletions(-) - -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index a2b871d006da7..2864fd7fafaac 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - #include "util.h" - - static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, -@@ -198,25 +199,57 @@ static int set_is_seen(struct ctl_table_set *set) - return ¤t->nsproxy->ipc_ns->ipc_set == set; - } - -+static void ipc_set_ownership(struct ctl_table_header *head, -+ struct ctl_table *table, -+ kuid_t *uid, kgid_t *gid) -+{ -+ struct ipc_namespace *ns = -+ container_of(head->set, struct ipc_namespace, ipc_set); -+ -+ kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); -+ kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); -+ -+ *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; -+ *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; -+} -+ - static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) - { - int mode = table->mode; - - #ifdef CONFIG_CHECKPOINT_RESTORE -- struct ipc_namespace *ns = current->nsproxy->ipc_ns; -+ struct ipc_namespace *ns = -+ container_of(head->set, struct ipc_namespace, ipc_set); - - if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || - (table->data == &ns->ids[IPC_MSG_IDS].next_id) || - (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && - checkpoint_restore_ns_capable(ns->user_ns)) - mode = 0666; -+ else - #endif -- return mode; -+ { -+ kuid_t ns_root_uid; -+ kgid_t ns_root_gid; -+ -+ ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); -+ -+ if (uid_eq(current_euid(), ns_root_uid)) -+ mode >>= 6; -+ -+ else if (in_egroup_p(ns_root_gid)) -+ mode >>= 3; -+ } -+ -+ mode &= 7; -+ -+ return (mode << 6) | (mode << 3) | mode; - } - - static struct ctl_table_root set_root = { - .lookup = set_lookup, - .permissions = ipc_permissions, -+ .set_ownership = ipc_set_ownership, - }; - - bool setup_ipc_sysctls(struct ipc_namespace *ns) --- -2.43.0 - diff --git a/queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch b/queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch deleted file mode 100644 index 608fe54340f..00000000000 --- a/queue-5.10/sysctl-allow-to-change-limits-for-posix-messages-que.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 2c050d168b37b543fedfec9d518ccbc182113e09 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 15 Jan 2024 15:46:43 +0000 -Subject: sysctl: allow to change limits for posix messages queues - -From: Alexey Gladkov - -[ Upstream commit f9436a5d0497f759330d07e1189565edd4456be8 ] - -All parameters of posix messages queues (queues_max/msg_max/msgsize_max) -end up being limited by RLIMIT_MSGQUEUE. The code in mqueue_get_inode is -where that limiting happens. - -The RLIMIT_MSGQUEUE is bound to the user namespace and is counted -hierarchically. - -We can allow root in the user namespace to modify the posix messages -queues parameters. - -Link: https://lkml.kernel.org/r/6ad67f23d1459a4f4339f74aa73bac0ecf3995e1.1705333426.git.legion@kernel.org -Signed-off-by: Alexey Gladkov -Signed-off-by: Eric W. Biederman -Link: https://lkml.kernel.org/r/7eb21211c8622e91d226e63416b1b93c079f60ee.1663756794.git.legion@kernel.org -Cc: Christian Brauner -Cc: Davidlohr Bueso -Cc: Joel Granados -Cc: Kees Cook -Cc: Luis Chamberlain -Cc: Manfred Spraul -Signed-off-by: Andrew Morton -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/mq_sysctl.c | 36 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 36 insertions(+) - -diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c -index fbf6a8b93a265..ce03930aced55 100644 ---- a/ipc/mq_sysctl.c -+++ b/ipc/mq_sysctl.c -@@ -12,6 +12,7 @@ - #include - #include - #include -+#include - - static int msg_max_limit_min = MIN_MSGMAX; - static int msg_max_limit_max = HARD_MSGMAX; -@@ -76,8 +77,43 @@ static int set_is_seen(struct ctl_table_set *set) - return ¤t->nsproxy->ipc_ns->mq_set == set; - } - -+static void mq_set_ownership(struct ctl_table_header *head, -+ struct ctl_table *table, -+ kuid_t *uid, kgid_t *gid) -+{ -+ struct ipc_namespace *ns = -+ container_of(head->set, struct ipc_namespace, mq_set); -+ -+ kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); -+ kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); -+ -+ *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; -+ *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; -+} -+ -+static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table) -+{ -+ int mode = table->mode; -+ kuid_t ns_root_uid; -+ kgid_t ns_root_gid; -+ -+ mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); -+ -+ if (uid_eq(current_euid(), ns_root_uid)) -+ mode >>= 6; -+ -+ else if (in_egroup_p(ns_root_gid)) -+ mode >>= 3; -+ -+ mode &= 7; -+ -+ return (mode << 6) | (mode << 3) | mode; -+} -+ - static struct ctl_table_root set_root = { - .lookup = set_lookup, -+ .permissions = mq_permissions, -+ .set_ownership = mq_set_ownership, - }; - - bool setup_mq_sysctls(struct ipc_namespace *ns) --- -2.43.0 - diff --git a/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch b/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch index 881e29cfd7d..9aa3669e603 100644 --- a/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch +++ b/queue-5.10/sysctl-always-initialize-i_uid-i_gid.patch @@ -1,4 +1,4 @@ -From 5d7defeab6026e3ce687533b8c44874295f0a96a Mon Sep 17 00:00:00 2001 +From 692f0a1c18b96128339a77efdc7c9f533740bcd1 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 2 Apr 2024 23:10:34 +0200 Subject: sysctl: always initialize i_uid/i_gid @@ -25,21 +25,19 @@ Signed-off-by: Thomas Weißschuh Signed-off-by: Joel Granados Signed-off-by: Sasha Levin --- - fs/proc/proc_sysctl.c | 6 ++---- + fs/proc/proc_sysctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) -diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c -index d61777c67ada8..d97e2d399fe6d 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c -@@ -471,12 +471,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, +@@ -471,12 +471,10 @@ static struct inode *proc_sys_make_inode make_empty_dir_inode(inode); } + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; if (root->set_ownership) - root->set_ownership(head, &inode->i_uid, &inode->i_gid); + root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); - else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; @@ -47,6 +45,3 @@ index d61777c67ada8..d97e2d399fe6d 100644 return inode; } --- -2.43.0 - diff --git a/queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch b/queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch deleted file mode 100644 index 746430ade9c..00000000000 --- a/queue-5.10/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 1b75a9102932eba02f4300ecd460eff59af6dbdd Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 15 Mar 2024 19:11:30 +0100 -Subject: sysctl: treewide: drop unused argument - ctl_table_root::set_ownership(table) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -From: Thomas Weißschuh - -[ Upstream commit 520713a93d550406dae14d49cdb8778d70cecdfd ] - -Remove the 'table' argument from set_ownership as it is never used. This -change is a step towards putting "struct ctl_table" into .rodata and -eventually having sysctl core only use "const struct ctl_table". - -The patch was created with the following coccinelle script: - - @@ - identifier func, head, table, uid, gid; - @@ - - void func( - struct ctl_table_header *head, - - struct ctl_table *table, - kuid_t *uid, kgid_t *gid) - { ... } - -No additional occurrences of 'set_ownership' were found after doing a -tree-wide search. - -Reviewed-by: Joel Granados -Signed-off-by: Thomas Weißschuh -Signed-off-by: Joel Granados -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - fs/proc/proc_sysctl.c | 2 +- - include/linux/sysctl.h | 1 - - ipc/ipc_sysctl.c | 3 +-- - ipc/mq_sysctl.c | 3 +-- - net/sysctl_net.c | 1 - - 5 files changed, 3 insertions(+), 7 deletions(-) - -diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c -index aff9593feb73c..d61777c67ada8 100644 ---- a/fs/proc/proc_sysctl.c -+++ b/fs/proc/proc_sysctl.c -@@ -472,7 +472,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, - } - - if (root->set_ownership) -- root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); -+ root->set_ownership(head, &inode->i_uid, &inode->i_gid); - else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; -diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h -index 47cf70c8eb93c..cde78b5a54295 100644 ---- a/include/linux/sysctl.h -+++ b/include/linux/sysctl.h -@@ -173,7 +173,6 @@ struct ctl_table_root { - struct ctl_table_set default_set; - struct ctl_table_set *(*lookup)(struct ctl_table_root *root); - void (*set_ownership)(struct ctl_table_header *head, -- struct ctl_table *table, - kuid_t *uid, kgid_t *gid); - int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); - }; -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index 2864fd7fafaac..c118d8293d3b6 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -200,7 +200,6 @@ static int set_is_seen(struct ctl_table_set *set) - } - - static void ipc_set_ownership(struct ctl_table_header *head, -- struct ctl_table *table, - kuid_t *uid, kgid_t *gid) - { - struct ipc_namespace *ns = -@@ -232,7 +231,7 @@ static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *tabl - kuid_t ns_root_uid; - kgid_t ns_root_gid; - -- ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); -+ ipc_set_ownership(head, &ns_root_uid, &ns_root_gid); - - if (uid_eq(current_euid(), ns_root_uid)) - mode >>= 6; -diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c -index ce03930aced55..c960691fc24d9 100644 ---- a/ipc/mq_sysctl.c -+++ b/ipc/mq_sysctl.c -@@ -78,7 +78,6 @@ static int set_is_seen(struct ctl_table_set *set) - } - - static void mq_set_ownership(struct ctl_table_header *head, -- struct ctl_table *table, - kuid_t *uid, kgid_t *gid) - { - struct ipc_namespace *ns = -@@ -97,7 +96,7 @@ static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table - kuid_t ns_root_uid; - kgid_t ns_root_gid; - -- mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); -+ mq_set_ownership(head, &ns_root_uid, &ns_root_gid); - - if (uid_eq(current_euid(), ns_root_uid)) - mode >>= 6; -diff --git a/net/sysctl_net.c b/net/sysctl_net.c -index d14dab8b6774c..592f61eb1089b 100644 ---- a/net/sysctl_net.c -+++ b/net/sysctl_net.c -@@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head, - } - - static void net_ctl_set_ownership(struct ctl_table_header *head, -- struct ctl_table *table, - kuid_t *uid, kgid_t *gid) - { - struct net *net = container_of(head->set, struct net, sysctls); --- -2.43.0 - diff --git a/queue-5.15/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch b/queue-5.15/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch deleted file mode 100644 index f61d4ff709f..00000000000 --- a/queue-5.15/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch +++ /dev/null @@ -1,110 +0,0 @@ -From efc647f45293868fee5381817dc674cf2d997844 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 8 Nov 2021 18:35:59 -0800 -Subject: ipc: check checkpoint_restore_ns_capable() to modify C/R proc files - -From: Michal Clapinski - -[ Upstream commit 5563cabdde7ee53c34ec7e5e0283bfcc9a1bc893 ] - -This commit removes the requirement to be root to modify sem_next_id, -msg_next_id and shm_next_id and checks checkpoint_restore_ns_capable -instead. - -Since those files are specific to the IPC namespace, there is no reason -they should require root privileges. This is similar to ns_last_pid, -which also only checks checkpoint_restore_ns_capable. - -[akpm@linux-foundation.org: ipc/ipc_sysctl.c needs capability.h for checkpoint_restore_ns_capable()] - -Link: https://lkml.kernel.org/r/20210916163717.3179496-1-mclapinski@google.com -Signed-off-by: Michal Clapinski -Reviewed-by: Davidlohr Bueso -Reviewed-by: Manfred Spraul -Cc: "Eric W. Biederman" -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/ipc_sysctl.c | 29 +++++++++++++++++++++++------ - 1 file changed, 23 insertions(+), 6 deletions(-) - -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index 3f312bf2b1163..345e4d673e61e 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - #include - #include - #include "util.h" -@@ -104,6 +105,19 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - return ret; - } - -+#ifdef CONFIG_CHECKPOINT_RESTORE -+static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, -+ int write, void *buffer, size_t *lenp, loff_t *ppos) -+{ -+ struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; -+ -+ if (write && !checkpoint_restore_ns_capable(user_ns)) -+ return -EPERM; -+ -+ return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); -+} -+#endif -+ - #else - #define proc_ipc_doulongvec_minmax NULL - #define proc_ipc_dointvec NULL -@@ -111,6 +125,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - #define proc_ipc_dointvec_minmax_orphans NULL - #define proc_ipc_auto_msgmni NULL - #define proc_ipc_sem_dointvec NULL -+#ifdef CONFIG_CHECKPOINT_RESTORE -+#define proc_ipc_dointvec_minmax_checkpoint_restore NULL -+#endif /* CONFIG_CHECKPOINT_RESTORE */ - #endif - - int ipc_mni = IPCMNI; -@@ -198,8 +215,8 @@ static struct ctl_table ipc_kern_table[] = { - .procname = "sem_next_id", - .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), -- .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .mode = 0666, -+ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, -@@ -207,8 +224,8 @@ static struct ctl_table ipc_kern_table[] = { - .procname = "msg_next_id", - .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), -- .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .mode = 0666, -+ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, -@@ -216,8 +233,8 @@ static struct ctl_table ipc_kern_table[] = { - .procname = "shm_next_id", - .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), -- .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .mode = 0666, -+ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, --- -2.43.0 - diff --git a/queue-5.15/ipc-check-permissions-for-checkpoint_restart-sysctls.patch b/queue-5.15/ipc-check-permissions-for-checkpoint_restart-sysctls.patch deleted file mode 100644 index 4a5838425a6..00000000000 --- a/queue-5.15/ipc-check-permissions-for-checkpoint_restart-sysctls.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 6bf48e86990d981eec1125c59ee6a81a3ff9bf05 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Tue, 3 May 2022 15:39:56 +0200 -Subject: ipc: Check permissions for checkpoint_restart sysctls at open time - -From: Alexey Gladkov - -[ Upstream commit 0889f44e281034e180daa6daf3e2d57c012452d4 ] - -As Eric Biederman pointed out, it is possible not to use a custom -proc_handler and check permissions for every write, but to use a -.permission handler. That will allow the checkpoint_restart sysctls to -perform all of their permission checks at open time, and not need any -other special code. - -Link: https://lore.kernel.org/lkml/87czib9g38.fsf@email.froward.int.ebiederm.org/ -Fixes: 1f5c135ee509 ("ipc: Store ipc sysctls in the ipc namespace") -Signed-off-by: Eric W. Biederman -Signed-off-by: Alexey Gladkov -Link: https://lkml.kernel.org/r/65fa8459803830608da4610a39f33c76aa933eb9.1651584847.git.legion@kernel.org -Signed-off-by: Eric W. Biederman -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/ipc_sysctl.c | 57 ++++++++++++++++++++++++------------------------ - 1 file changed, 29 insertions(+), 28 deletions(-) - -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index 15210ac47e9e1..a2b871d006da7 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -78,25 +78,6 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - return ret; - } - --#ifdef CONFIG_CHECKPOINT_RESTORE --static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, -- int write, void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ipc_namespace *ns = table->extra1; -- struct ctl_table ipc_table; -- -- if (write && !checkpoint_restore_ns_capable(ns->user_ns)) -- return -EPERM; -- -- memcpy(&ipc_table, table, sizeof(ipc_table)); -- -- ipc_table.extra1 = SYSCTL_ZERO; -- ipc_table.extra2 = SYSCTL_INT_MAX; -- -- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); --} --#endif -- - int ipc_mni = IPCMNI; - int ipc_mni_shift = IPCMNI_SHIFT; - int ipc_min_cycle = RADIX_TREE_MAP_SIZE; -@@ -180,22 +161,28 @@ static struct ctl_table ipc_sysctls[] = { - .procname = "sem_next_id", - .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), -- .mode = 0666, -- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -+ .mode = 0444, -+ .proc_handler = proc_dointvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_INT_MAX, - }, - { - .procname = "msg_next_id", - .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), -- .mode = 0666, -- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -+ .mode = 0444, -+ .proc_handler = proc_dointvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_INT_MAX, - }, - { - .procname = "shm_next_id", - .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, - .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), -- .mode = 0666, -- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -+ .mode = 0444, -+ .proc_handler = proc_dointvec_minmax, -+ .extra1 = SYSCTL_ZERO, -+ .extra2 = SYSCTL_INT_MAX, - }, - #endif - {} -@@ -211,8 +198,25 @@ static int set_is_seen(struct ctl_table_set *set) - return ¤t->nsproxy->ipc_ns->ipc_set == set; - } - -+static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) -+{ -+ int mode = table->mode; -+ -+#ifdef CONFIG_CHECKPOINT_RESTORE -+ struct ipc_namespace *ns = current->nsproxy->ipc_ns; -+ -+ if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || -+ (table->data == &ns->ids[IPC_MSG_IDS].next_id) || -+ (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && -+ checkpoint_restore_ns_capable(ns->user_ns)) -+ mode = 0666; -+#endif -+ return mode; -+} -+ - static struct ctl_table_root set_root = { - .lookup = set_lookup, -+ .permissions = ipc_permissions, - }; - - bool setup_ipc_sysctls(struct ipc_namespace *ns) -@@ -254,15 +258,12 @@ bool setup_ipc_sysctls(struct ipc_namespace *ns) - #ifdef CONFIG_CHECKPOINT_RESTORE - } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { - tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; -- tbl[i].extra1 = ns; - - } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { - tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; -- tbl[i].extra1 = ns; - - } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { - tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; -- tbl[i].extra1 = ns; - #endif - } else { - tbl[i].data = NULL; --- -2.43.0 - diff --git a/queue-5.15/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch b/queue-5.15/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch deleted file mode 100644 index c1140b64f19..00000000000 --- a/queue-5.15/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 25f6606b9e49210b3beb55303a36275e7c923b29 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 8 Nov 2021 18:36:02 -0800 -Subject: ipc/ipc_sysctl.c: remove fallback for !CONFIG_PROC_SYSCTL - -From: Manfred Spraul - -[ Upstream commit 0e9beb8a96f21a6df1579cb3a679e150e3269d80 ] - -Compilation of ipc/ipc_sysctl.c is controlled by -obj-$(CONFIG_SYSVIPC_SYSCTL) -[see ipc/Makefile] - -And CONFIG_SYSVIPC_SYSCTL depends on SYSCTL -[see init/Kconfig] - -An SYSCTL is selected by PROC_SYSCTL. -[see fs/proc/Kconfig] - -Thus: #ifndef CONFIG_PROC_SYSCTL in ipc/ipc_sysctl.c is impossible, the -fallback can be removed. - -Link: https://lkml.kernel.org/r/20210918145337.3369-1-manfred@colorfullife.com -Signed-off-by: Manfred Spraul -Reviewed-by: "Eric W. Biederman" -Acked-by: Davidlohr Bueso -Cc: Manfred Spraul -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/ipc_sysctl.c | 13 ------------- - 1 file changed, 13 deletions(-) - -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index 345e4d673e61e..f101c171753f6 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -23,7 +23,6 @@ static void *get_ipc(struct ctl_table *table) - return which; - } - --#ifdef CONFIG_PROC_SYSCTL - static int proc_ipc_dointvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) - { -@@ -118,18 +117,6 @@ static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, - } - #endif - --#else --#define proc_ipc_doulongvec_minmax NULL --#define proc_ipc_dointvec NULL --#define proc_ipc_dointvec_minmax NULL --#define proc_ipc_dointvec_minmax_orphans NULL --#define proc_ipc_auto_msgmni NULL --#define proc_ipc_sem_dointvec NULL --#ifdef CONFIG_CHECKPOINT_RESTORE --#define proc_ipc_dointvec_minmax_checkpoint_restore NULL --#endif /* CONFIG_CHECKPOINT_RESTORE */ --#endif -- - int ipc_mni = IPCMNI; - int ipc_mni_shift = IPCMNI_SHIFT; - int ipc_min_cycle = RADIX_TREE_MAP_SIZE; --- -2.43.0 - diff --git a/queue-5.15/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch b/queue-5.15/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch deleted file mode 100644 index f94d17b32f5..00000000000 --- a/queue-5.15/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch +++ /dev/null @@ -1,406 +0,0 @@ -From 9b69bb3c9c4743bd5ae28e9a658635b06408cd0a Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 14 Feb 2022 19:18:15 +0100 -Subject: ipc: Store ipc sysctls in the ipc namespace - -From: Alexey Gladkov - -[ Upstream commit 1f5c135ee509e89e0cc274333a65f73c62cb16e5 ] - -The ipc sysctls are not available for modification inside the user -namespace. Following the mqueue sysctls, we changed the implementation -to be more userns friendly. - -So far, the changes do not provide additional access to files. This -will be done in a future patch. - -Signed-off-by: Alexey Gladkov -Link: https://lkml.kernel.org/r/be6f9d014276f4dddd0c3aa05a86052856c1c555.1644862280.git.legion@kernel.org -Signed-off-by: Eric W. Biederman -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - include/linux/ipc_namespace.h | 21 ++++ - ipc/ipc_sysctl.c | 189 ++++++++++++++++++++++------------ - ipc/namespace.c | 4 + - 3 files changed, 147 insertions(+), 67 deletions(-) - -diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h -index fa787d97d60a9..e3e8c8662b490 100644 ---- a/include/linux/ipc_namespace.h -+++ b/include/linux/ipc_namespace.h -@@ -67,6 +67,9 @@ struct ipc_namespace { - struct ctl_table_set mq_set; - struct ctl_table_header *mq_sysctls; - -+ struct ctl_table_set ipc_set; -+ struct ctl_table_header *ipc_sysctls; -+ - /* user_ns which owns the ipc ns */ - struct user_namespace *user_ns; - struct ucounts *ucounts; -@@ -188,4 +191,22 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns) - } - - #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ -+ -+#ifdef CONFIG_SYSVIPC_SYSCTL -+ -+bool setup_ipc_sysctls(struct ipc_namespace *ns); -+void retire_ipc_sysctls(struct ipc_namespace *ns); -+ -+#else /* CONFIG_SYSVIPC_SYSCTL */ -+ -+static inline void retire_ipc_sysctls(struct ipc_namespace *ns) -+{ -+} -+ -+static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) -+{ -+ return true; -+} -+ -+#endif /* CONFIG_SYSVIPC_SYSCTL */ - #endif -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index f101c171753f6..15210ac47e9e1 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -13,43 +13,22 @@ - #include - #include - #include -+#include - #include "util.h" - --static void *get_ipc(struct ctl_table *table) --{ -- char *which = table->data; -- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; -- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; -- return which; --} -- --static int proc_ipc_dointvec(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ctl_table ipc_table; -- -- memcpy(&ipc_table, table, sizeof(ipc_table)); -- ipc_table.data = get_ipc(table); -- -- return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); --} -- --static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, -+static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) - { -+ struct ipc_namespace *ns = table->extra1; - struct ctl_table ipc_table; -+ int err; - - memcpy(&ipc_table, table, sizeof(ipc_table)); -- ipc_table.data = get_ipc(table); - -- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); --} -+ ipc_table.extra1 = SYSCTL_ZERO; -+ ipc_table.extra2 = SYSCTL_ONE; - --static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ipc_namespace *ns = current->nsproxy->ipc_ns; -- int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); -+ err = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); - - if (err < 0) - return err; -@@ -58,17 +37,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, - return err; - } - --static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ctl_table ipc_table; -- memcpy(&ipc_table, table, sizeof(ipc_table)); -- ipc_table.data = get_ipc(table); -- -- return proc_doulongvec_minmax(&ipc_table, write, buffer, -- lenp, ppos); --} -- - static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) - { -@@ -87,11 +55,17 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, - static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) - { -+ struct ipc_namespace *ns = table->extra1; -+ struct ctl_table ipc_table; - int ret, semmni; -- struct ipc_namespace *ns = current->nsproxy->ipc_ns; -+ -+ memcpy(&ipc_table, table, sizeof(ipc_table)); -+ -+ ipc_table.extra1 = NULL; -+ ipc_table.extra2 = NULL; - - semmni = ns->sem_ctls[3]; -- ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos); -+ ret = proc_dointvec(table, write, buffer, lenp, ppos); - - if (!ret) - ret = sem_check_semmni(current->nsproxy->ipc_ns); -@@ -108,12 +82,18 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, - static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, - int write, void *buffer, size_t *lenp, loff_t *ppos) - { -- struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; -+ struct ipc_namespace *ns = table->extra1; -+ struct ctl_table ipc_table; - -- if (write && !checkpoint_restore_ns_capable(user_ns)) -+ if (write && !checkpoint_restore_ns_capable(ns->user_ns)) - return -EPERM; - -- return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); -+ memcpy(&ipc_table, table, sizeof(ipc_table)); -+ -+ ipc_table.extra1 = SYSCTL_ZERO; -+ ipc_table.extra2 = SYSCTL_INT_MAX; -+ -+ return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); - } - #endif - -@@ -121,27 +101,27 @@ int ipc_mni = IPCMNI; - int ipc_mni_shift = IPCMNI_SHIFT; - int ipc_min_cycle = RADIX_TREE_MAP_SIZE; - --static struct ctl_table ipc_kern_table[] = { -+static struct ctl_table ipc_sysctls[] = { - { - .procname = "shmmax", - .data = &init_ipc_ns.shm_ctlmax, - .maxlen = sizeof(init_ipc_ns.shm_ctlmax), - .mode = 0644, -- .proc_handler = proc_ipc_doulongvec_minmax, -+ .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "shmall", - .data = &init_ipc_ns.shm_ctlall, - .maxlen = sizeof(init_ipc_ns.shm_ctlall), - .mode = 0644, -- .proc_handler = proc_ipc_doulongvec_minmax, -+ .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "shmmni", - .data = &init_ipc_ns.shm_ctlmni, - .maxlen = sizeof(init_ipc_ns.shm_ctlmni), - .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ipc_mni, - }, -@@ -151,15 +131,13 @@ static struct ctl_table ipc_kern_table[] = { - .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), - .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax_orphans, -- .extra1 = SYSCTL_ZERO, -- .extra2 = SYSCTL_ONE, - }, - { - .procname = "msgmax", - .data = &init_ipc_ns.msg_ctlmax, - .maxlen = sizeof(init_ipc_ns.msg_ctlmax), - .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, -@@ -168,7 +146,7 @@ static struct ctl_table ipc_kern_table[] = { - .data = &init_ipc_ns.msg_ctlmni, - .maxlen = sizeof(init_ipc_ns.msg_ctlmni), - .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ipc_mni, - }, -@@ -186,7 +164,7 @@ static struct ctl_table ipc_kern_table[] = { - .data = &init_ipc_ns.msg_ctlmnb, - .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), - .mode = 0644, -- .proc_handler = proc_ipc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_INT_MAX, - }, -@@ -204,8 +182,6 @@ static struct ctl_table ipc_kern_table[] = { - .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -- .extra1 = SYSCTL_ZERO, -- .extra2 = SYSCTL_INT_MAX, - }, - { - .procname = "msg_next_id", -@@ -213,8 +189,6 @@ static struct ctl_table ipc_kern_table[] = { - .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -- .extra1 = SYSCTL_ZERO, -- .extra2 = SYSCTL_INT_MAX, - }, - { - .procname = "shm_next_id", -@@ -222,25 +196,106 @@ static struct ctl_table ipc_kern_table[] = { - .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), - .mode = 0666, - .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, -- .extra1 = SYSCTL_ZERO, -- .extra2 = SYSCTL_INT_MAX, - }, - #endif - {} - }; - --static struct ctl_table ipc_root_table[] = { -- { -- .procname = "kernel", -- .mode = 0555, -- .child = ipc_kern_table, -- }, -- {} -+static struct ctl_table_set *set_lookup(struct ctl_table_root *root) -+{ -+ return ¤t->nsproxy->ipc_ns->ipc_set; -+} -+ -+static int set_is_seen(struct ctl_table_set *set) -+{ -+ return ¤t->nsproxy->ipc_ns->ipc_set == set; -+} -+ -+static struct ctl_table_root set_root = { -+ .lookup = set_lookup, - }; - -+bool setup_ipc_sysctls(struct ipc_namespace *ns) -+{ -+ struct ctl_table *tbl; -+ -+ setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); -+ -+ tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); -+ if (tbl) { -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { -+ if (tbl[i].data == &init_ipc_ns.shm_ctlmax) { -+ tbl[i].data = &ns->shm_ctlmax; -+ -+ } else if (tbl[i].data == &init_ipc_ns.shm_ctlall) { -+ tbl[i].data = &ns->shm_ctlall; -+ -+ } else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) { -+ tbl[i].data = &ns->shm_ctlmni; -+ -+ } else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) { -+ tbl[i].data = &ns->shm_rmid_forced; -+ tbl[i].extra1 = ns; -+ -+ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) { -+ tbl[i].data = &ns->msg_ctlmax; -+ -+ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) { -+ tbl[i].data = &ns->msg_ctlmni; -+ -+ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) { -+ tbl[i].data = &ns->msg_ctlmnb; -+ -+ } else if (tbl[i].data == &init_ipc_ns.sem_ctls) { -+ tbl[i].data = &ns->sem_ctls; -+ tbl[i].extra1 = ns; -+#ifdef CONFIG_CHECKPOINT_RESTORE -+ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { -+ tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; -+ tbl[i].extra1 = ns; -+ -+ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { -+ tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; -+ tbl[i].extra1 = ns; -+ -+ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { -+ tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; -+ tbl[i].extra1 = ns; -+#endif -+ } else { -+ tbl[i].data = NULL; -+ } -+ } -+ -+ ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl); -+ } -+ if (!ns->ipc_sysctls) { -+ kfree(tbl); -+ retire_sysctl_set(&ns->ipc_set); -+ return false; -+ } -+ -+ return true; -+} -+ -+void retire_ipc_sysctls(struct ipc_namespace *ns) -+{ -+ struct ctl_table *tbl; -+ -+ tbl = ns->ipc_sysctls->ctl_table_arg; -+ unregister_sysctl_table(ns->ipc_sysctls); -+ retire_sysctl_set(&ns->ipc_set); -+ kfree(tbl); -+} -+ - static int __init ipc_sysctl_init(void) - { -- register_sysctl_table(ipc_root_table); -+ if (!setup_ipc_sysctls(&init_ipc_ns)) { -+ pr_warn("ipc sysctl registration failed\n"); -+ return -ENOMEM; -+ } - return 0; - } - -diff --git a/ipc/namespace.c b/ipc/namespace.c -index f760243ca685c..754f3237194aa 100644 ---- a/ipc/namespace.c -+++ b/ipc/namespace.c -@@ -63,6 +63,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, - if (!setup_mq_sysctls(ns)) - goto fail_put; - -+ if (!setup_ipc_sysctls(ns)) -+ goto fail_put; -+ - sem_init_ns(ns); - msg_init_ns(ns); - shm_init_ns(ns); -@@ -130,6 +133,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) - shm_exit_ns(ns); - - retire_mq_sysctls(ns); -+ retire_ipc_sysctls(ns); - - dec_ipc_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); --- -2.43.0 - diff --git a/queue-5.15/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch b/queue-5.15/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch deleted file mode 100644 index 58a55cbcf92..00000000000 --- a/queue-5.15/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch +++ /dev/null @@ -1,323 +0,0 @@ -From 7ec01f48cee5af0c38b69108ec374e095a80b742 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 14 Feb 2022 19:18:14 +0100 -Subject: ipc: Store mqueue sysctls in the ipc namespace - -From: Alexey Gladkov - -[ Upstream commit dc55e35f9e810f23dd69cfdc91a3d636023f57a2 ] - -Right now, the mqueue sysctls take ipc namespaces into account in a -rather hacky way. This works in most cases, but does not respect the -user namespace. - -Within the user namespace, the user cannot change the /proc/sys/fs/mqueue/* -parametres. This poses a problem in the rootless containers. - -To solve this I changed the implementation of the mqueue sysctls just -like some other sysctls. - -So far, the changes do not provide additional access to files. This will -be done in a future patch. - -v3: -* Don't implemenet set_permissions to keep the current behavior. - -v2: -* Fixed compilation problem if CONFIG_POSIX_MQUEUE_SYSCTL is not - specified. - -Reported-by: kernel test robot -Signed-off-by: Alexey Gladkov -Link: https://lkml.kernel.org/r/b0ccbb2489119f1f20c737cf1930c3a9c4e4243a.1644862280.git.legion@kernel.org -Signed-off-by: Eric W. Biederman -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - include/linux/ipc_namespace.h | 16 +++-- - ipc/mq_sysctl.c | 121 ++++++++++++++++++---------------- - ipc/mqueue.c | 10 ++- - ipc/namespace.c | 6 ++ - 4 files changed, 88 insertions(+), 65 deletions(-) - -diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h -index b75395ec8d521..fa787d97d60a9 100644 ---- a/include/linux/ipc_namespace.h -+++ b/include/linux/ipc_namespace.h -@@ -10,6 +10,7 @@ - #include - #include - #include -+#include - - struct user_namespace; - -@@ -63,6 +64,9 @@ struct ipc_namespace { - unsigned int mq_msg_default; - unsigned int mq_msgsize_default; - -+ struct ctl_table_set mq_set; -+ struct ctl_table_header *mq_sysctls; -+ - /* user_ns which owns the ipc ns */ - struct user_namespace *user_ns; - struct ucounts *ucounts; -@@ -169,14 +173,18 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) - - #ifdef CONFIG_POSIX_MQUEUE_SYSCTL - --struct ctl_table_header; --extern struct ctl_table_header *mq_register_sysctl_table(void); -+void retire_mq_sysctls(struct ipc_namespace *ns); -+bool setup_mq_sysctls(struct ipc_namespace *ns); - - #else /* CONFIG_POSIX_MQUEUE_SYSCTL */ - --static inline struct ctl_table_header *mq_register_sysctl_table(void) -+static inline void retire_mq_sysctls(struct ipc_namespace *ns) - { -- return NULL; -+} -+ -+static inline bool setup_mq_sysctls(struct ipc_namespace *ns) -+{ -+ return true; - } - - #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ -diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c -index 72a92a08c848e..fbf6a8b93a265 100644 ---- a/ipc/mq_sysctl.c -+++ b/ipc/mq_sysctl.c -@@ -9,39 +9,9 @@ - #include - #include - --#ifdef CONFIG_PROC_SYSCTL --static void *get_mq(struct ctl_table *table) --{ -- char *which = table->data; -- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; -- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; -- return which; --} -- --static int proc_mq_dointvec(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ctl_table mq_table; -- memcpy(&mq_table, table, sizeof(mq_table)); -- mq_table.data = get_mq(table); -- -- return proc_dointvec(&mq_table, write, buffer, lenp, ppos); --} -- --static int proc_mq_dointvec_minmax(struct ctl_table *table, int write, -- void *buffer, size_t *lenp, loff_t *ppos) --{ -- struct ctl_table mq_table; -- memcpy(&mq_table, table, sizeof(mq_table)); -- mq_table.data = get_mq(table); -- -- return proc_dointvec_minmax(&mq_table, write, buffer, -- lenp, ppos); --} --#else --#define proc_mq_dointvec NULL --#define proc_mq_dointvec_minmax NULL --#endif -+#include -+#include -+#include - - static int msg_max_limit_min = MIN_MSGMAX; - static int msg_max_limit_max = HARD_MSGMAX; -@@ -55,14 +25,14 @@ static struct ctl_table mq_sysctls[] = { - .data = &init_ipc_ns.mq_queues_max, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec, -+ .proc_handler = proc_dointvec, - }, - { - .procname = "msg_max", - .data = &init_ipc_ns.mq_msg_max, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = &msg_max_limit_min, - .extra2 = &msg_max_limit_max, - }, -@@ -71,7 +41,7 @@ static struct ctl_table mq_sysctls[] = { - .data = &init_ipc_ns.mq_msgsize_max, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = &msg_maxsize_limit_min, - .extra2 = &msg_maxsize_limit_max, - }, -@@ -80,7 +50,7 @@ static struct ctl_table mq_sysctls[] = { - .data = &init_ipc_ns.mq_msg_default, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = &msg_max_limit_min, - .extra2 = &msg_max_limit_max, - }, -@@ -89,32 +59,73 @@ static struct ctl_table mq_sysctls[] = { - .data = &init_ipc_ns.mq_msgsize_default, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_mq_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = &msg_maxsize_limit_min, - .extra2 = &msg_maxsize_limit_max, - }, - {} - }; - --static struct ctl_table mq_sysctl_dir[] = { -- { -- .procname = "mqueue", -- .mode = 0555, -- .child = mq_sysctls, -- }, -- {} --}; -+static struct ctl_table_set *set_lookup(struct ctl_table_root *root) -+{ -+ return ¤t->nsproxy->ipc_ns->mq_set; -+} - --static struct ctl_table mq_sysctl_root[] = { -- { -- .procname = "fs", -- .mode = 0555, -- .child = mq_sysctl_dir, -- }, -- {} -+static int set_is_seen(struct ctl_table_set *set) -+{ -+ return ¤t->nsproxy->ipc_ns->mq_set == set; -+} -+ -+static struct ctl_table_root set_root = { -+ .lookup = set_lookup, - }; - --struct ctl_table_header *mq_register_sysctl_table(void) -+bool setup_mq_sysctls(struct ipc_namespace *ns) - { -- return register_sysctl_table(mq_sysctl_root); -+ struct ctl_table *tbl; -+ -+ setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen); -+ -+ tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL); -+ if (tbl) { -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) { -+ if (tbl[i].data == &init_ipc_ns.mq_queues_max) -+ tbl[i].data = &ns->mq_queues_max; -+ -+ else if (tbl[i].data == &init_ipc_ns.mq_msg_max) -+ tbl[i].data = &ns->mq_msg_max; -+ -+ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max) -+ tbl[i].data = &ns->mq_msgsize_max; -+ -+ else if (tbl[i].data == &init_ipc_ns.mq_msg_default) -+ tbl[i].data = &ns->mq_msg_default; -+ -+ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default) -+ tbl[i].data = &ns->mq_msgsize_default; -+ else -+ tbl[i].data = NULL; -+ } -+ -+ ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl); -+ } -+ if (!ns->mq_sysctls) { -+ kfree(tbl); -+ retire_sysctl_set(&ns->mq_set); -+ return false; -+ } -+ -+ return true; -+} -+ -+void retire_mq_sysctls(struct ipc_namespace *ns) -+{ -+ struct ctl_table *tbl; -+ -+ tbl = ns->mq_sysctls->ctl_table_arg; -+ unregister_sysctl_table(ns->mq_sysctls); -+ retire_sysctl_set(&ns->mq_set); -+ kfree(tbl); - } -diff --git a/ipc/mqueue.c b/ipc/mqueue.c -index 089c34d0732cf..79b0079ee1acb 100644 ---- a/ipc/mqueue.c -+++ b/ipc/mqueue.c -@@ -164,8 +164,6 @@ static void remove_notification(struct mqueue_inode_info *info); - - static struct kmem_cache *mqueue_inode_cachep; - --static struct ctl_table_header *mq_sysctl_table; -- - static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) - { - return container_of(inode, struct mqueue_inode_info, vfs_inode); -@@ -1727,8 +1725,10 @@ static int __init init_mqueue_fs(void) - if (mqueue_inode_cachep == NULL) - return -ENOMEM; - -- /* ignore failures - they are not fatal */ -- mq_sysctl_table = mq_register_sysctl_table(); -+ if (!setup_mq_sysctls(&init_ipc_ns)) { -+ pr_warn("sysctl registration failed\n"); -+ return -ENOMEM; -+ } - - error = register_filesystem(&mqueue_fs_type); - if (error) -@@ -1745,8 +1745,6 @@ static int __init init_mqueue_fs(void) - out_filesystem: - unregister_filesystem(&mqueue_fs_type); - out_sysctl: -- if (mq_sysctl_table) -- unregister_sysctl_table(mq_sysctl_table); - kmem_cache_destroy(mqueue_inode_cachep); - return error; - } -diff --git a/ipc/namespace.c b/ipc/namespace.c -index ae83f0f2651b7..f760243ca685c 100644 ---- a/ipc/namespace.c -+++ b/ipc/namespace.c -@@ -59,6 +59,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, - if (err) - goto fail_put; - -+ err = -ENOMEM; -+ if (!setup_mq_sysctls(ns)) -+ goto fail_put; -+ - sem_init_ns(ns); - msg_init_ns(ns); - shm_init_ns(ns); -@@ -125,6 +129,8 @@ static void free_ipc_ns(struct ipc_namespace *ns) - msg_exit_ns(ns); - shm_exit_ns(ns); - -+ retire_mq_sysctls(ns); -+ - dec_ipc_namespaces(ns->ucounts); - put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); --- -2.43.0 - diff --git a/queue-5.15/ipv4-fix-source-address-selection-with-route-leak.patch b/queue-5.15/ipv4-fix-source-address-selection-with-route-leak.patch deleted file mode 100644 index cee7af756bc..00000000000 --- a/queue-5.15/ipv4-fix-source-address-selection-with-route-leak.patch +++ /dev/null @@ -1,53 +0,0 @@ -From dfd009372d960dc1ccf694e7369d58e63cd133c4 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Wed, 10 Jul 2024 10:14:27 +0200 -Subject: ipv4: fix source address selection with route leak - -From: Nicolas Dichtel - -[ Upstream commit 6807352353561187a718e87204458999dbcbba1b ] - -By default, an address assigned to the output interface is selected when -the source address is not specified. This is problematic when a route, -configured in a vrf, uses an interface from another vrf (aka route leak). -The original vrf does not own the selected source address. - -Let's add a check against the output interface and call the appropriate -function to select the source address. - -CC: stable@vger.kernel.org -Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF") -Signed-off-by: Nicolas Dichtel -Reviewed-by: David Ahern -Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com -Signed-off-by: Jakub Kicinski -Signed-off-by: Sasha Levin ---- - net/ipv4/fib_semantics.c | 13 +++++++++++-- - 1 file changed, 11 insertions(+), 2 deletions(-) - -diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c -index 3d00253afbb8d..4f1236458c214 100644 ---- a/net/ipv4/fib_semantics.c -+++ b/net/ipv4/fib_semantics.c -@@ -2286,6 +2286,15 @@ void fib_select_path(struct net *net, struct fib_result *res, - fib_select_default(fl4, res); - - check_saddr: -- if (!fl4->saddr) -- fl4->saddr = fib_result_prefsrc(net, res); -+ if (!fl4->saddr) { -+ struct net_device *l3mdev; -+ -+ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); -+ -+ if (!l3mdev || -+ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) -+ fl4->saddr = fib_result_prefsrc(net, res); -+ else -+ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); -+ } - } --- -2.43.0 - diff --git a/queue-5.15/ipv6-fix-source-address-selection-with-route-leak.patch b/queue-5.15/ipv6-fix-source-address-selection-with-route-leak.patch deleted file mode 100644 index 52323d52d55..00000000000 --- a/queue-5.15/ipv6-fix-source-address-selection-with-route-leak.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 -From: Nicolas Dichtel -Date: Wed, 10 Jul 2024 10:14:28 +0200 -Subject: ipv6: fix source address selection with route leak - -From: Nicolas Dichtel - -commit 252442f2ae317d109ef0b4b39ce0608c09563042 upstream. - -By default, an address assigned to the output interface is selected when -the source address is not specified. This is problematic when a route, -configured in a vrf, uses an interface from another vrf (aka route leak). -The original vrf does not own the selected source address. - -Let's add a check against the output interface and call the appropriate -function to select the source address. - -CC: stable@vger.kernel.org -Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") -Signed-off-by: Nicolas Dichtel -Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com -Signed-off-by: Jakub Kicinski -Signed-off-by: Greg Kroah-Hartman ---- - include/net/ip6_route.h | 20 ++++++++++++++------ - net/ipv6/ip6_output.c | 1 + - net/ipv6/route.c | 2 +- - 3 files changed, 16 insertions(+), 7 deletions(-) - ---- a/include/net/ip6_route.h -+++ b/include/net/ip6_route.h -@@ -132,18 +132,26 @@ void rt6_age_exceptions(struct fib6_info - - static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, - const struct in6_addr *daddr, -- unsigned int prefs, -+ unsigned int prefs, int l3mdev_index, - struct in6_addr *saddr) - { -+ struct net_device *l3mdev; -+ struct net_device *dev; -+ bool same_vrf; - int err = 0; - -- if (f6i && f6i->fib6_prefsrc.plen) { -+ rcu_read_lock(); -+ -+ l3mdev = dev_get_by_index_rcu(net, l3mdev_index); -+ if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) -+ dev = f6i ? fib6_info_nh_dev(f6i) : NULL; -+ same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; -+ if (f6i && f6i->fib6_prefsrc.plen && same_vrf) - *saddr = f6i->fib6_prefsrc.addr; -- } else { -- struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; -+ else -+ err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); - -- err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); -- } -+ rcu_read_unlock(); - - return err; - } ---- a/net/ipv6/ip6_output.c -+++ b/net/ipv6/ip6_output.c -@@ -1097,6 +1097,7 @@ static int ip6_dst_lookup_tail(struct ne - from = rt ? rcu_dereference(rt->from) : NULL; - err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, -+ fl6->flowi6_l3mdev, - &fl6->saddr); - rcu_read_unlock(); - ---- a/net/ipv6/route.c -+++ b/net/ipv6/route.c -@@ -5680,7 +5680,7 @@ static int rt6_fill_node(struct net *net - goto nla_put_failure; - } else if (dest) { - struct in6_addr saddr_buf; -- if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && -+ if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && - nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) - goto nla_put_failure; - } diff --git a/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch b/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch deleted file mode 100644 index b2687491f2a..00000000000 --- a/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch +++ /dev/null @@ -1,419 +0,0 @@ -From 16e9d306e5b7b1f72a5ad15fef79d17b4b72c9fd Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 14 Mar 2022 14:45:51 -0600 -Subject: net: Add l3mdev index to flow struct and avoid oif reset for port - devices - -From: David Ahern - -[ Upstream commit 40867d74c374b235e14d839f3a77f26684feefe5 ] - -The fundamental premise of VRF and l3mdev core code is binding a socket -to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope. -Legacy code resets flowi_oif to the l3mdev losing any original port -device binding. Ben (among others) has demonstrated use cases where the -original port device binding is important and needs to be retained. -This patch handles that by adding a new entry to the common flow struct -that can indicate the l3mdev index for later rule and table matching -avoiding the need to reset flowi_oif. - -In addition to allowing more use cases that require port device binds, -this patch brings a few datapath simplications: - -1. l3mdev_fib_rule_match is only called when walking fib rules and - always after l3mdev_update_flow. That allows an optimization to bail - early for non-VRF type uses cases when flowi_l3mdev is not set. Also, - only that index needs to be checked for the FIB table id. - -2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev - (e.g., VRF) device. By resetting flowi_oif only for this case the - FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed, - removing several checks in the datapath. The flowi_iif path can be - simplified to only be called if the it is not loopback (loopback can - not be assigned to an L3 domain) and the l3mdev index is not already - set. - -3. Avoid another device lookup in the output path when the fib lookup - returns a reject failure. - -Note: 2 functional tests for local traffic with reject fib rules are -updated to reflect the new direct failure at FIB lookup time for ping -rather than the failure on packet path. The current code fails like this: - - HINT: Fails since address on vrf device is out of device scope - COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 - ping: Warning: source address might be selected on device other than: eth1 - PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data. - - --- 172.16.3.1 ping statistics --- - 1 packets transmitted, 0 received, 100% packet loss, time 0ms - -where the test now directly fails: - - HINT: Fails since address on vrf device is out of device scope - COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 - ping: connect: No route to host - -Signed-off-by: David Ahern -Tested-by: Ben Greear -Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org -Signed-off-by: Jakub Kicinski -Stable-dep-of: 680735235356 ("ipv4: fix source address selection with route leak") -Signed-off-by: Sasha Levin ---- - drivers/net/vrf.c | 7 ++-- - include/net/flow.h | 6 +++- - net/ipv4/fib_frontend.c | 7 ++-- - net/ipv4/fib_semantics.c | 2 +- - net/ipv4/fib_trie.c | 7 ++-- - net/ipv4/route.c | 4 +-- - net/ipv4/xfrm4_policy.c | 4 +-- - net/ipv6/ip6_output.c | 3 +- - net/ipv6/route.c | 12 ------- - net/ipv6/xfrm6_policy.c | 3 +- - net/l3mdev/l3mdev.c | 43 +++++++++-------------- - tools/testing/selftests/net/fcnal-test.sh | 2 +- - 12 files changed, 37 insertions(+), 63 deletions(-) - -diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c -index 091dd7caf10cc..85f5d78ff9ac0 100644 ---- a/drivers/net/vrf.c -+++ b/drivers/net/vrf.c -@@ -471,14 +471,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, - - memset(&fl6, 0, sizeof(fl6)); - /* needed to match OIF rule */ -- fl6.flowi6_oif = dev->ifindex; -+ fl6.flowi6_l3mdev = dev->ifindex; - fl6.flowi6_iif = LOOPBACK_IFINDEX; - fl6.daddr = iph->daddr; - fl6.saddr = iph->saddr; - fl6.flowlabel = ip6_flowinfo(iph); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = iph->nexthdr; -- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; - - dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); - if (IS_ERR(dst) || dst == dst_null) -@@ -550,10 +549,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, - - memset(&fl4, 0, sizeof(fl4)); - /* needed to match OIF rule */ -- fl4.flowi4_oif = vrf_dev->ifindex; -+ fl4.flowi4_l3mdev = vrf_dev->ifindex; - fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.flowi4_tos = RT_TOS(ip4h->tos); -- fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; -+ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; - fl4.flowi4_proto = ip4h->protocol; - fl4.daddr = ip4h->daddr; - fl4.saddr = ip4h->saddr; -diff --git a/include/net/flow.h b/include/net/flow.h -index 776bacc96242a..079cc493fe67d 100644 ---- a/include/net/flow.h -+++ b/include/net/flow.h -@@ -29,6 +29,7 @@ struct flowi_tunnel { - struct flowi_common { - int flowic_oif; - int flowic_iif; -+ int flowic_l3mdev; - __u32 flowic_mark; - __u8 flowic_tos; - __u8 flowic_scope; -@@ -36,7 +37,6 @@ struct flowi_common { - __u8 flowic_flags; - #define FLOWI_FLAG_ANYSRC 0x01 - #define FLOWI_FLAG_KNOWN_NH 0x02 --#define FLOWI_FLAG_SKIP_NH_OIF 0x04 - __u32 flowic_secid; - kuid_t flowic_uid; - __u32 flowic_multipath_hash; -@@ -65,6 +65,7 @@ struct flowi4 { - struct flowi_common __fl_common; - #define flowi4_oif __fl_common.flowic_oif - #define flowi4_iif __fl_common.flowic_iif -+#define flowi4_l3mdev __fl_common.flowic_l3mdev - #define flowi4_mark __fl_common.flowic_mark - #define flowi4_tos __fl_common.flowic_tos - #define flowi4_scope __fl_common.flowic_scope -@@ -97,6 +98,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, - { - fl4->flowi4_oif = oif; - fl4->flowi4_iif = LOOPBACK_IFINDEX; -+ fl4->flowi4_l3mdev = 0; - fl4->flowi4_mark = mark; - fl4->flowi4_tos = tos; - fl4->flowi4_scope = scope; -@@ -127,6 +129,7 @@ struct flowi6 { - struct flowi_common __fl_common; - #define flowi6_oif __fl_common.flowic_oif - #define flowi6_iif __fl_common.flowic_iif -+#define flowi6_l3mdev __fl_common.flowic_l3mdev - #define flowi6_mark __fl_common.flowic_mark - #define flowi6_scope __fl_common.flowic_scope - #define flowi6_proto __fl_common.flowic_proto -@@ -156,6 +159,7 @@ struct flowi { - } u; - #define flowi_oif u.__fl_common.flowic_oif - #define flowi_iif u.__fl_common.flowic_iif -+#define flowi_l3mdev u.__fl_common.flowic_l3mdev - #define flowi_mark u.__fl_common.flowic_mark - #define flowi_tos u.__fl_common.flowic_tos - #define flowi_scope u.__fl_common.flowic_scope -diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c -index c21d57f02c651..5a3af86ee417a 100644 ---- a/net/ipv4/fib_frontend.c -+++ b/net/ipv4/fib_frontend.c -@@ -290,7 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) - bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); - struct flowi4 fl4 = { - .flowi4_iif = LOOPBACK_IFINDEX, -- .flowi4_oif = l3mdev_master_ifindex_rcu(dev), -+ .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), - .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, - .flowi4_scope = scope, -@@ -352,9 +352,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - bool dev_match; - - fl4.flowi4_oif = 0; -- fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); -- if (!fl4.flowi4_iif) -- fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; -+ fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); -+ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; - fl4.daddr = src; - fl4.saddr = dst; - fl4.flowi4_tos = tos; -diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c -index 735901b8c9f69..3d00253afbb8d 100644 ---- a/net/ipv4/fib_semantics.c -+++ b/net/ipv4/fib_semantics.c -@@ -2269,7 +2269,7 @@ void fib_select_multipath(struct fib_result *res, int hash) - void fib_select_path(struct net *net, struct fib_result *res, - struct flowi4 *fl4, const struct sk_buff *skb) - { -- if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) -+ if (fl4->flowi4_oif) - goto check_saddr; - - #ifdef CONFIG_IP_ROUTE_MULTIPATH -diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c -index 0b74debeecbb1..ec0113ecf3949 100644 ---- a/net/ipv4/fib_trie.c -+++ b/net/ipv4/fib_trie.c -@@ -1428,11 +1428,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, - !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) - return false; - -- if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { -- if (flp->flowi4_oif && -- flp->flowi4_oif != nhc->nhc_oif) -- return false; -- } -+ if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif) -+ return false; - - return true; - } -diff --git a/net/ipv4/route.c b/net/ipv4/route.c -index 60fc35defdf8b..3522801885787 100644 ---- a/net/ipv4/route.c -+++ b/net/ipv4/route.c -@@ -2285,6 +2285,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - /* - * Now we are ready to route packet. - */ -+ fl4.flowi4_l3mdev = 0; - fl4.flowi4_oif = 0; - fl4.flowi4_iif = dev->ifindex; - fl4.flowi4_mark = skb->mark; -@@ -2761,8 +2762,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, - res->fi = NULL; - res->table = NULL; - if (fl4->flowi4_oif && -- (ipv4_is_multicast(fl4->daddr) || -- !netif_index_is_l3_master(net, fl4->flowi4_oif))) { -+ (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { - /* Apparently, routing tables are wrong. Assume, - * that the destination is on link. - * -diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c -index 9ebd54752e03b..4548a91acdc89 100644 ---- a/net/ipv4/xfrm4_policy.c -+++ b/net/ipv4/xfrm4_policy.c -@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, - memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = daddr->a4; - fl4->flowi4_tos = tos; -- fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); -+ fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); - fl4->flowi4_mark = mark; - if (saddr) - fl4->saddr = saddr->a4; - -- fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; -- - rt = __ip_route_output_key(net, fl4); - if (!IS_ERR(rt)) - return &rt->dst; -diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c -index ce37c83455796..afcc3c44d87cf 100644 ---- a/net/ipv6/ip6_output.c -+++ b/net/ipv6/ip6_output.c -@@ -1058,8 +1058,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, - #ifdef CONFIG_IPV6_SUBTREES - ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || - #endif -- (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && -- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { -+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { - dst_release(dst); - dst = NULL; - } -diff --git a/net/ipv6/route.c b/net/ipv6/route.c -index d937ee942a4fc..35d3f02ddf163 100644 ---- a/net/ipv6/route.c -+++ b/net/ipv6/route.c -@@ -1209,9 +1209,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, - struct fib6_node *fn; - struct rt6_info *rt; - -- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) -- flags &= ~RT6_LOOKUP_F_IFACE; -- - rcu_read_lock(); - fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); - restart: -@@ -2182,9 +2179,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, - fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); - saved_fn = fn; - -- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) -- oif = 0; -- - redo_rt6_select: - rt6_select(net, fn, oif, res, strict); - if (res->f6i == net->ipv6.fib6_null_entry) { -@@ -3060,12 +3054,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, - struct fib6_info *rt; - struct fib6_node *fn; - -- /* l3mdev_update_flow overrides oif if the device is enslaved; in -- * this case we must match on the real ingress device, so reset it -- */ -- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) -- fl6->flowi6_oif = skb->dev->ifindex; -- - /* Get the "current" route for this destination and - * check if the redirect has come from appropriate router. - * -diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c -index 7c903e0e446cb..492b9692c0dc0 100644 ---- a/net/ipv6/xfrm6_policy.c -+++ b/net/ipv6/xfrm6_policy.c -@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, - int err; - - memset(&fl6, 0, sizeof(fl6)); -- fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); -- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; -+ fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); - fl6.flowi6_mark = mark; - memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); - if (saddr) -diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c -index 8b14a24f10404..ca10916340b09 100644 ---- a/net/l3mdev/l3mdev.c -+++ b/net/l3mdev/l3mdev.c -@@ -250,25 +250,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, - struct net_device *dev; - int rc = 0; - -- rcu_read_lock(); -+ /* update flow ensures flowi_l3mdev is set when relevant */ -+ if (!fl->flowi_l3mdev) -+ return 0; - -- dev = dev_get_by_index_rcu(net, fl->flowi_oif); -- if (dev && netif_is_l3_master(dev) && -- dev->l3mdev_ops->l3mdev_fib_table) { -- arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); -- rc = 1; -- goto out; -- } -+ rcu_read_lock(); - -- dev = dev_get_by_index_rcu(net, fl->flowi_iif); -+ dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev); - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_fib_table) { - arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); - rc = 1; -- goto out; - } - --out: - rcu_read_unlock(); - - return rc; -@@ -277,31 +271,28 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, - void l3mdev_update_flow(struct net *net, struct flowi *fl) - { - struct net_device *dev; -- int ifindex; - - rcu_read_lock(); - - if (fl->flowi_oif) { - dev = dev_get_by_index_rcu(net, fl->flowi_oif); - if (dev) { -- ifindex = l3mdev_master_ifindex_rcu(dev); -- if (ifindex) { -- fl->flowi_oif = ifindex; -- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; -- goto out; -- } -+ if (!fl->flowi_l3mdev) -+ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); -+ -+ /* oif set to L3mdev directs lookup to its table; -+ * reset to avoid oif match in fib_lookup -+ */ -+ if (netif_is_l3_master(dev)) -+ fl->flowi_oif = 0; -+ goto out; - } - } - -- if (fl->flowi_iif) { -+ if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) { - dev = dev_get_by_index_rcu(net, fl->flowi_iif); -- if (dev) { -- ifindex = l3mdev_master_ifindex_rcu(dev); -- if (ifindex) { -- fl->flowi_iif = ifindex; -- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; -- } -- } -+ if (dev) -+ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); - } - - out: -diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh -index 6ecdbbe1b54fb..bed85001da735 100755 ---- a/tools/testing/selftests/net/fcnal-test.sh -+++ b/tools/testing/selftests/net/fcnal-test.sh -@@ -750,7 +750,7 @@ ipv4_ping_vrf() - log_start - show_hint "Fails since address on vrf device is out of device scope" - run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} -- log_test_addr ${a} $? 1 "ping local, device bind" -+ log_test_addr ${a} $? 2 "ping local, device bind" - done - - # --- -2.43.0 - diff --git a/queue-5.15/series b/queue-5.15/series index e5c7e30bed2..42989e2317f 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -307,16 +307,6 @@ arm64-dts-qcom-msm8998-drop-usb-phy-clock-index.patch arm64-dts-qcom-msm8998-switch-usb-qmp-phy-to-new-sty.patch arm64-dts-qcom-msm8998-disable-ss-instance-in-parkmo.patch arm64-dts-qcom-ipq8074-disable-ss-instance-in-parkmo.patch -net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch -ipv4-fix-source-address-selection-with-route-leak.patch -ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch -ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch -ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch -ipc-store-ipc-sysctls-in-the-ipc-namespace.patch -ipc-check-permissions-for-checkpoint_restart-sysctls.patch -sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch -sysctl-allow-to-change-limits-for-posix-messages-que.patch -sysctl-treewide-drop-unused-argument-ctl_table_root-.patch sysctl-always-initialize-i_uid-i_gid.patch ext4-make-ext4_es_insert_extent-return-void.patch ext4-refactor-ext4_da_map_blocks.patch @@ -482,7 +472,6 @@ mptcp-pm-fix-backup-support-in-signal-endpoints.patch selftests-mptcp-join-validate-backup-in-mpj.patch selftests-mptcp-join-check-backup-support-in-signal-endp.patch btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch -ipv6-fix-source-address-selection-with-route-leak.patch xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch btrfs-fix-double-inode-unlock-for-direct-io-sync-writes.patch pci-dpc-fix-use-after-free-on-concurrent-dpc-and-hot-removal.patch diff --git a/queue-5.15/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch b/queue-5.15/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch deleted file mode 100644 index d92f228b1c4..00000000000 --- a/queue-5.15/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 696cacd9d0e086e88f59a5d41fbe5a7e64e2d281 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 15 Jan 2024 15:46:41 +0000 -Subject: sysctl: allow change system v ipc sysctls inside ipc namespace - -From: Alexey Gladkov - -[ Upstream commit 50ec499b9a43e46200c9f7b7d723ab2e4af540b3 ] - -Patch series "Allow to change ipc/mq sysctls inside ipc namespace", v3. - -Right now ipc and mq limits count as per ipc namespace, but only real root -can change them. By default, the current values of these limits are such -that it can only be reduced. Since only root can change the values, it is -impossible to reduce these limits in the rootless container. - -We can allow limit changes within ipc namespace because mq parameters are -limited by RLIMIT_MSGQUEUE and ipc parameters are not limited to anything -other than cgroups. - -This patch (of 3): - -Rootless containers are not allowed to modify kernel IPC parameters. - -All default limits are set to such high values that in fact there are no -limits at all. All limits are not inherited and are initialized to -default values when a new ipc_namespace is created. - -For new ipc_namespace: - -size_t ipc_ns.shm_ctlmax = SHMMAX; // (ULONG_MAX - (1UL << 24)) -size_t ipc_ns.shm_ctlall = SHMALL; // (ULONG_MAX - (1UL << 24)) -int ipc_ns.shm_ctlmni = IPCMNI; // (1 << 15) -int ipc_ns.shm_rmid_forced = 0; -unsigned int ipc_ns.msg_ctlmax = MSGMAX; // 8192 -unsigned int ipc_ns.msg_ctlmni = MSGMNI; // 32000 -unsigned int ipc_ns.msg_ctlmnb = MSGMNB; // 16384 - -The shm_tot (total amount of shared pages) has also ceased to be global, -it is located in ipc_namespace and is not inherited from anywhere. - -In such conditions, it cannot be said that these limits limit anything. -The real limiter for them is cgroups. - -If we allow rootless containers to change these parameters, then it can -only be reduced. - -Link: https://lkml.kernel.org/r/cover.1705333426.git.legion@kernel.org -Link: https://lkml.kernel.org/r/d2f4603305cbfed58a24755aa61d027314b73a45.1705333426.git.legion@kernel.org -Signed-off-by: Alexey Gladkov -Signed-off-by: Eric W. Biederman -Link: https://lkml.kernel.org/r/e2d84d3ec0172cfff759e6065da84ce0cc2736f8.1663756794.git.legion@kernel.org -Cc: Christian Brauner -Cc: Joel Granados -Cc: Kees Cook -Cc: Luis Chamberlain -Cc: Manfred Spraul -Cc: Davidlohr Bueso -Signed-off-by: Andrew Morton -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/ipc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++-- - 1 file changed, 35 insertions(+), 2 deletions(-) - -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index a2b871d006da7..2864fd7fafaac 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -14,6 +14,7 @@ - #include - #include - #include -+#include - #include "util.h" - - static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, -@@ -198,25 +199,57 @@ static int set_is_seen(struct ctl_table_set *set) - return ¤t->nsproxy->ipc_ns->ipc_set == set; - } - -+static void ipc_set_ownership(struct ctl_table_header *head, -+ struct ctl_table *table, -+ kuid_t *uid, kgid_t *gid) -+{ -+ struct ipc_namespace *ns = -+ container_of(head->set, struct ipc_namespace, ipc_set); -+ -+ kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); -+ kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); -+ -+ *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; -+ *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; -+} -+ - static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) - { - int mode = table->mode; - - #ifdef CONFIG_CHECKPOINT_RESTORE -- struct ipc_namespace *ns = current->nsproxy->ipc_ns; -+ struct ipc_namespace *ns = -+ container_of(head->set, struct ipc_namespace, ipc_set); - - if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || - (table->data == &ns->ids[IPC_MSG_IDS].next_id) || - (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && - checkpoint_restore_ns_capable(ns->user_ns)) - mode = 0666; -+ else - #endif -- return mode; -+ { -+ kuid_t ns_root_uid; -+ kgid_t ns_root_gid; -+ -+ ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); -+ -+ if (uid_eq(current_euid(), ns_root_uid)) -+ mode >>= 6; -+ -+ else if (in_egroup_p(ns_root_gid)) -+ mode >>= 3; -+ } -+ -+ mode &= 7; -+ -+ return (mode << 6) | (mode << 3) | mode; - } - - static struct ctl_table_root set_root = { - .lookup = set_lookup, - .permissions = ipc_permissions, -+ .set_ownership = ipc_set_ownership, - }; - - bool setup_ipc_sysctls(struct ipc_namespace *ns) --- -2.43.0 - diff --git a/queue-5.15/sysctl-allow-to-change-limits-for-posix-messages-que.patch b/queue-5.15/sysctl-allow-to-change-limits-for-posix-messages-que.patch deleted file mode 100644 index af54c4d2b02..00000000000 --- a/queue-5.15/sysctl-allow-to-change-limits-for-posix-messages-que.patch +++ /dev/null @@ -1,95 +0,0 @@ -From bf1288be3757c0a1ba8305c9c39990c649695786 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 15 Jan 2024 15:46:43 +0000 -Subject: sysctl: allow to change limits for posix messages queues - -From: Alexey Gladkov - -[ Upstream commit f9436a5d0497f759330d07e1189565edd4456be8 ] - -All parameters of posix messages queues (queues_max/msg_max/msgsize_max) -end up being limited by RLIMIT_MSGQUEUE. The code in mqueue_get_inode is -where that limiting happens. - -The RLIMIT_MSGQUEUE is bound to the user namespace and is counted -hierarchically. - -We can allow root in the user namespace to modify the posix messages -queues parameters. - -Link: https://lkml.kernel.org/r/6ad67f23d1459a4f4339f74aa73bac0ecf3995e1.1705333426.git.legion@kernel.org -Signed-off-by: Alexey Gladkov -Signed-off-by: Eric W. Biederman -Link: https://lkml.kernel.org/r/7eb21211c8622e91d226e63416b1b93c079f60ee.1663756794.git.legion@kernel.org -Cc: Christian Brauner -Cc: Davidlohr Bueso -Cc: Joel Granados -Cc: Kees Cook -Cc: Luis Chamberlain -Cc: Manfred Spraul -Signed-off-by: Andrew Morton -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - ipc/mq_sysctl.c | 36 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 36 insertions(+) - -diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c -index fbf6a8b93a265..ce03930aced55 100644 ---- a/ipc/mq_sysctl.c -+++ b/ipc/mq_sysctl.c -@@ -12,6 +12,7 @@ - #include - #include - #include -+#include - - static int msg_max_limit_min = MIN_MSGMAX; - static int msg_max_limit_max = HARD_MSGMAX; -@@ -76,8 +77,43 @@ static int set_is_seen(struct ctl_table_set *set) - return ¤t->nsproxy->ipc_ns->mq_set == set; - } - -+static void mq_set_ownership(struct ctl_table_header *head, -+ struct ctl_table *table, -+ kuid_t *uid, kgid_t *gid) -+{ -+ struct ipc_namespace *ns = -+ container_of(head->set, struct ipc_namespace, mq_set); -+ -+ kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); -+ kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); -+ -+ *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; -+ *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; -+} -+ -+static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table) -+{ -+ int mode = table->mode; -+ kuid_t ns_root_uid; -+ kgid_t ns_root_gid; -+ -+ mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); -+ -+ if (uid_eq(current_euid(), ns_root_uid)) -+ mode >>= 6; -+ -+ else if (in_egroup_p(ns_root_gid)) -+ mode >>= 3; -+ -+ mode &= 7; -+ -+ return (mode << 6) | (mode << 3) | mode; -+} -+ - static struct ctl_table_root set_root = { - .lookup = set_lookup, -+ .permissions = mq_permissions, -+ .set_ownership = mq_set_ownership, - }; - - bool setup_mq_sysctls(struct ipc_namespace *ns) --- -2.43.0 - diff --git a/queue-5.15/sysctl-always-initialize-i_uid-i_gid.patch b/queue-5.15/sysctl-always-initialize-i_uid-i_gid.patch index f9aead22f23..07208b2d38b 100644 --- a/queue-5.15/sysctl-always-initialize-i_uid-i_gid.patch +++ b/queue-5.15/sysctl-always-initialize-i_uid-i_gid.patch @@ -25,21 +25,19 @@ Signed-off-by: Thomas Weißschuh Signed-off-by: Joel Granados Signed-off-by: Sasha Levin --- - fs/proc/proc_sysctl.c | 6 ++---- + fs/proc/proc_sysctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) -diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c -index 4288fa4614eb2..6dd7efd8562e2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c -@@ -466,12 +466,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, +@@ -466,12 +466,10 @@ static struct inode *proc_sys_make_inode make_empty_dir_inode(inode); } + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; if (root->set_ownership) - root->set_ownership(head, &inode->i_uid, &inode->i_gid); + root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); - else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; @@ -47,6 +45,3 @@ index 4288fa4614eb2..6dd7efd8562e2 100644 return inode; } --- -2.43.0 - diff --git a/queue-5.15/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch b/queue-5.15/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch deleted file mode 100644 index fed43d6fac9..00000000000 --- a/queue-5.15/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch +++ /dev/null @@ -1,127 +0,0 @@ -From d734e3ca18c278370bbab4db7f20821f5fbba65b Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 15 Mar 2024 19:11:30 +0100 -Subject: sysctl: treewide: drop unused argument - ctl_table_root::set_ownership(table) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -From: Thomas Weißschuh - -[ Upstream commit 520713a93d550406dae14d49cdb8778d70cecdfd ] - -Remove the 'table' argument from set_ownership as it is never used. This -change is a step towards putting "struct ctl_table" into .rodata and -eventually having sysctl core only use "const struct ctl_table". - -The patch was created with the following coccinelle script: - - @@ - identifier func, head, table, uid, gid; - @@ - - void func( - struct ctl_table_header *head, - - struct ctl_table *table, - kuid_t *uid, kgid_t *gid) - { ... } - -No additional occurrences of 'set_ownership' were found after doing a -tree-wide search. - -Reviewed-by: Joel Granados -Signed-off-by: Thomas Weißschuh -Signed-off-by: Joel Granados -Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") -Signed-off-by: Sasha Levin ---- - fs/proc/proc_sysctl.c | 2 +- - include/linux/sysctl.h | 1 - - ipc/ipc_sysctl.c | 3 +-- - ipc/mq_sysctl.c | 3 +-- - net/sysctl_net.c | 1 - - 5 files changed, 3 insertions(+), 7 deletions(-) - -diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c -index 4192fe6ec3da2..4288fa4614eb2 100644 ---- a/fs/proc/proc_sysctl.c -+++ b/fs/proc/proc_sysctl.c -@@ -467,7 +467,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, - } - - if (root->set_ownership) -- root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); -+ root->set_ownership(head, &inode->i_uid, &inode->i_gid); - else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; -diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h -index 32d79ef906e51..153755e07017f 100644 ---- a/include/linux/sysctl.h -+++ b/include/linux/sysctl.h -@@ -173,7 +173,6 @@ struct ctl_table_root { - struct ctl_table_set default_set; - struct ctl_table_set *(*lookup)(struct ctl_table_root *root); - void (*set_ownership)(struct ctl_table_header *head, -- struct ctl_table *table, - kuid_t *uid, kgid_t *gid); - int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); - }; -diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c -index 2864fd7fafaac..c118d8293d3b6 100644 ---- a/ipc/ipc_sysctl.c -+++ b/ipc/ipc_sysctl.c -@@ -200,7 +200,6 @@ static int set_is_seen(struct ctl_table_set *set) - } - - static void ipc_set_ownership(struct ctl_table_header *head, -- struct ctl_table *table, - kuid_t *uid, kgid_t *gid) - { - struct ipc_namespace *ns = -@@ -232,7 +231,7 @@ static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *tabl - kuid_t ns_root_uid; - kgid_t ns_root_gid; - -- ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); -+ ipc_set_ownership(head, &ns_root_uid, &ns_root_gid); - - if (uid_eq(current_euid(), ns_root_uid)) - mode >>= 6; -diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c -index ce03930aced55..c960691fc24d9 100644 ---- a/ipc/mq_sysctl.c -+++ b/ipc/mq_sysctl.c -@@ -78,7 +78,6 @@ static int set_is_seen(struct ctl_table_set *set) - } - - static void mq_set_ownership(struct ctl_table_header *head, -- struct ctl_table *table, - kuid_t *uid, kgid_t *gid) - { - struct ipc_namespace *ns = -@@ -97,7 +96,7 @@ static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table - kuid_t ns_root_uid; - kgid_t ns_root_gid; - -- mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); -+ mq_set_ownership(head, &ns_root_uid, &ns_root_gid); - - if (uid_eq(current_euid(), ns_root_uid)) - mode >>= 6; -diff --git a/net/sysctl_net.c b/net/sysctl_net.c -index f6cb0d4d114cd..95e9f40aeff0b 100644 ---- a/net/sysctl_net.c -+++ b/net/sysctl_net.c -@@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head, - } - - static void net_ctl_set_ownership(struct ctl_table_header *head, -- struct ctl_table *table, - kuid_t *uid, kgid_t *gid) - { - struct net *net = container_of(head->set, struct net, sysctls); --- -2.43.0 -