From: Greg Kroah-Hartman Date: Mon, 2 Jun 2025 12:50:26 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v5.4.294~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c0f4bd18bfa11cd4fcad1387c85ea51b6b028d75;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: coredump-fix-error-handling-for-replace_fd.patch coredump-hand-a-pidfd-to-the-usermode-coredump-helper.patch fork-use-pidfd_prepare.patch pid-add-pidfd_prepare.patch pidfd-check-pid-has-attached-task-in-fdinfo.patch --- diff --git a/queue-5.4/coredump-fix-error-handling-for-replace_fd.patch b/queue-5.4/coredump-fix-error-handling-for-replace_fd.patch new file mode 100644 index 0000000000..2f49cf6f07 --- /dev/null +++ b/queue-5.4/coredump-fix-error-handling-for-replace_fd.patch @@ -0,0 +1,52 @@ +From 20e69cedec655211e7d1ddf00a8e3375df56ae36 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 14 Apr 2025 15:55:06 +0200 +Subject: coredump: fix error handling for replace_fd() + +From: Christian Brauner + +commit 95c5f43181fe9c1b5e5a4bd3281c857a5259991f upstream. + +The replace_fd() helper returns the file descriptor number on success +and a negative error code on failure. The current error handling in +umh_pipe_setup() only works because the file descriptor that is replaced +is zero but that's pretty volatile. Explicitly check for a negative +error code. + +Link: https://lore.kernel.org/20250414-work-coredump-v2-2-685bf231f828@kernel.org +Tested-by: Luca Boccassi +Reviewed-by: Oleg Nesterov +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/coredump.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/fs/coredump.c ++++ b/fs/coredump.c +@@ -551,7 +551,9 @@ static int umh_pipe_setup(struct subproc + { + struct file *files[2]; + struct coredump_params *cp = (struct coredump_params *)info->data; +- int err = create_pipe_files(files, 0); ++ int err; ++ ++ err = create_pipe_files(files, 0); + if (err) + return err; + +@@ -559,10 +561,13 @@ static int umh_pipe_setup(struct subproc + + err = replace_fd(0, files[0], 0); + fput(files[0]); ++ if (err < 0) ++ return err; ++ + /* and disallow core files too */ + current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; + +- return err; ++ return 0; + } + + void do_coredump(const kernel_siginfo_t *siginfo) diff --git a/queue-5.4/coredump-hand-a-pidfd-to-the-usermode-coredump-helper.patch b/queue-5.4/coredump-hand-a-pidfd-to-the-usermode-coredump-helper.patch new file mode 100644 index 0000000000..d0f1248382 --- /dev/null +++ b/queue-5.4/coredump-hand-a-pidfd-to-the-usermode-coredump-helper.patch @@ -0,0 +1,194 @@ +From b8e412e55db1729d182a471fb83273bbcbd18325 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 2 Jun 2025 13:16:07 +0200 +Subject: coredump: hand a pidfd to the usermode coredump helper + +From: Christian Brauner + +commit b5325b2a270fcaf7b2a9a0f23d422ca8a5a8bdea upstream. + +Give userspace a way to instruct the kernel to install a pidfd into the +usermode helper process. This makes coredump handling a lot more +reliable for userspace. In parallel with this commit we already have +systemd adding support for this in [1]. + +We create a pidfs file for the coredumping process when we process the +corename pattern. When the usermode helper process is forked we then +install the pidfs file as file descriptor three into the usermode +helpers file descriptor table so it's available to the exec'd program. + +Since usermode helpers are either children of the system_unbound_wq +workqueue or kthreadd we know that the file descriptor table is empty +and can thus always use three as the file descriptor number. + +Note, that we'll install a pidfd for the thread-group leader even if a +subthread is calling do_coredump(). We know that task linkage hasn't +been removed due to delay_group_leader() and even if this @current isn't +the actual thread-group leader we know that the thread-group leader +cannot be reaped until @current has exited. + +[brauner: This is a backport for the v5.4 series. Upstream has +significantly changed and backporting all that infra is a non-starter. +So simply backport the pidfd_prepare() helper and waste the file +descriptor we allocated. Then we minimally massage the umh coredump +setup code.] + +Link: https://github.com/systemd/systemd/pull/37125 [1] +Link: https://lore.kernel.org/20250414-work-coredump-v2-3-685bf231f828@kernel.org +Tested-by: Luca Boccassi +Reviewed-by: Oleg Nesterov +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/coredump.c | 77 +++++++++++++++++++++++++++++++++++++++++++----- + include/linux/binfmts.h | 1 + 2 files changed, 71 insertions(+), 7 deletions(-) + +--- a/fs/coredump.c ++++ b/fs/coredump.c +@@ -52,6 +52,13 @@ + + #include + ++/* ++ * File descriptor number for the pidfd for the thread-group leader of ++ * the coredumping task installed into the usermode helper's file ++ * descriptor table. ++ */ ++#define COREDUMP_PIDFD_NUMBER 3 ++ + int core_uses_pid; + unsigned int core_pipe_limit; + char core_pattern[CORENAME_MAX_SIZE] = "core"; +@@ -314,6 +321,27 @@ static int format_corename(struct core_n + err = cn_printf(cn, "%lu", + rlimit(RLIMIT_CORE)); + break; ++ /* pidfd number */ ++ case 'F': { ++ /* ++ * Installing a pidfd only makes sense if ++ * we actually spawn a usermode helper. ++ */ ++ if (!ispipe) ++ break; ++ ++ /* ++ * Note that we'll install a pidfd for the ++ * thread-group leader. We know that task ++ * linkage hasn't been removed yet and even if ++ * this @current isn't the actual thread-group ++ * leader we know that the thread-group leader ++ * cannot be reaped until @current has exited. ++ */ ++ cprm->pid = task_tgid(current); ++ err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); ++ break; ++ } + default: + break; + } +@@ -537,7 +565,7 @@ static void wait_for_dump_helpers(struct + } + + /* +- * umh_pipe_setup ++ * umh_coredump_setup + * helper function to customize the process used + * to collect the core in userspace. Specifically + * it sets up a pipe and installs it as fd 0 (stdin) +@@ -547,27 +575,62 @@ static void wait_for_dump_helpers(struct + * is a special value that we use to trap recursive + * core dumps + */ +-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) ++static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) + { + struct file *files[2]; ++ struct file *pidfs_file = NULL; + struct coredump_params *cp = (struct coredump_params *)info->data; + int err; + ++ if (cp->pid) { ++ int fd; ++ ++ fd = pidfd_prepare(cp->pid, 0, &pidfs_file); ++ if (fd < 0) ++ return fd; ++ ++ /* ++ * We don't care about the fd. We also cannot simply ++ * replace it below because dup2() will refuse to close ++ * this file descriptor if its in a larval state. So ++ * close it! ++ */ ++ put_unused_fd(fd); ++ ++ /* ++ * Usermode helpers are childen of either ++ * system_unbound_wq or of kthreadd. So we know that ++ * we're starting off with a clean file descriptor ++ * table. So we should always be able to use ++ * COREDUMP_PIDFD_NUMBER as our file descriptor value. ++ */ ++ err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); ++ if (err < 0) ++ goto out_fail; ++ ++ pidfs_file = NULL; ++ } ++ + err = create_pipe_files(files, 0); + if (err) +- return err; ++ goto out_fail; + + cp->file = files[1]; + + err = replace_fd(0, files[0], 0); + fput(files[0]); + if (err < 0) +- return err; ++ goto out_fail; + + /* and disallow core files too */ + current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; + +- return 0; ++ err = 0; ++ ++out_fail: ++ if (pidfs_file) ++ fput(pidfs_file); ++ return err; + } + + void do_coredump(const kernel_siginfo_t *siginfo) +@@ -643,7 +706,7 @@ void do_coredump(const kernel_siginfo_t + } + + if (cprm.limit == 1) { +- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. ++ /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. + * + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use +@@ -688,7 +751,7 @@ void do_coredump(const kernel_siginfo_t + retval = -ENOMEM; + sub_info = call_usermodehelper_setup(helper_argv[0], + helper_argv, NULL, GFP_KERNEL, +- umh_pipe_setup, NULL, &cprm); ++ umh_coredump_setup, NULL, &cprm); + if (sub_info) + retval = call_usermodehelper_exec(sub_info, + UMH_WAIT_EXEC); +--- a/include/linux/binfmts.h ++++ b/include/linux/binfmts.h +@@ -93,6 +93,7 @@ struct coredump_params { + unsigned long mm_flags; + loff_t written; + loff_t pos; ++ struct pid *pid; + }; + + /* diff --git a/queue-5.4/fork-use-pidfd_prepare.patch b/queue-5.4/fork-use-pidfd_prepare.patch new file mode 100644 index 0000000000..dbc99d0e40 --- /dev/null +++ b/queue-5.4/fork-use-pidfd_prepare.patch @@ -0,0 +1,46 @@ +From bae079933aa8f0f9ae4eb0711b7b537caf828e0b Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 27 Mar 2023 20:22:52 +0200 +Subject: fork: use pidfd_prepare() + +From: Christian Brauner + +commit ca7707f5430ad6b1c9cb7cee0a7f67d69328bb2d upstream. + +Stop open-coding get_unused_fd_flags() and anon_inode_getfile(). That's +brittle just for keeping the flags between both calls in sync. Use the +dedicated helper. + +Message-Id: <20230327-pidfd-file-api-v1-2-5c0e9a3158e4@kernel.org> +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + kernel/fork.c | 13 ++----------- + 1 file changed, 2 insertions(+), 11 deletions(-) + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -2155,21 +2155,12 @@ static __latent_entropy struct task_stru + * if the fd table isn't shared). + */ + if (clone_flags & CLONE_PIDFD) { +- retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC); ++ /* Note that no task has been attached to @pid yet. */ ++ retval = __pidfd_prepare(pid, O_RDWR | O_CLOEXEC, &pidfile); + if (retval < 0) + goto bad_fork_free_pid; +- + pidfd = retval; + +- pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid, +- O_RDWR | O_CLOEXEC); +- if (IS_ERR(pidfile)) { +- put_unused_fd(pidfd); +- retval = PTR_ERR(pidfile); +- goto bad_fork_free_pid; +- } +- get_pid(pid); /* held by pidfile now */ +- + retval = put_user(pidfd, args->pidfd); + if (retval) + goto bad_fork_put_pidfd; diff --git a/queue-5.4/pid-add-pidfd_prepare.patch b/queue-5.4/pid-add-pidfd_prepare.patch new file mode 100644 index 0000000000..6bc52b3867 --- /dev/null +++ b/queue-5.4/pid-add-pidfd_prepare.patch @@ -0,0 +1,128 @@ +From ae66170f2448e655d4280fd4b595bf0b446fb251 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Mon, 27 Mar 2023 20:22:51 +0200 +Subject: pid: add pidfd_prepare() + +From: Christian Brauner + +commit 6ae930d9dbf2d093157be33428538c91966d8a9f upstream. + +Add a new helper that allows to reserve a pidfd and allocates a new +pidfd file that stashes the provided struct pid. This will allow us to +remove places that either open code this function or that call +pidfd_create() but then have to call close_fd() because there are still +failure points after pidfd_create() has been called. + +Reviewed-by: Jan Kara +Message-Id: <20230327-pidfd-file-api-v1-1-5c0e9a3158e4@kernel.org> +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/pid.h | 1 + kernel/fork.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 86 insertions(+) + +--- a/include/linux/pid.h ++++ b/include/linux/pid.h +@@ -75,6 +75,7 @@ extern const struct file_operations pidf + struct file; + + extern struct pid *pidfd_pid(const struct file *file); ++int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); + + static inline struct pid *get_pid(struct pid *pid) + { +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1750,6 +1750,91 @@ const struct file_operations pidfd_fops + #endif + }; + ++/** ++ * __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd ++ * @pid: the struct pid for which to create a pidfd ++ * @flags: flags of the new @pidfd ++ * @pidfd: the pidfd to return ++ * ++ * Allocate a new file that stashes @pid and reserve a new pidfd number in the ++ * caller's file descriptor table. The pidfd is reserved but not installed yet. ++ ++ * The helper doesn't perform checks on @pid which makes it useful for pidfds ++ * created via CLONE_PIDFD where @pid has no task attached when the pidfd and ++ * pidfd file are prepared. ++ * ++ * If this function returns successfully the caller is responsible to either ++ * call fd_install() passing the returned pidfd and pidfd file as arguments in ++ * order to install the pidfd into its file descriptor table or they must use ++ * put_unused_fd() and fput() on the returned pidfd and pidfd file ++ * respectively. ++ * ++ * This function is useful when a pidfd must already be reserved but there ++ * might still be points of failure afterwards and the caller wants to ensure ++ * that no pidfd is leaked into its file descriptor table. ++ * ++ * Return: On success, a reserved pidfd is returned from the function and a new ++ * pidfd file is returned in the last argument to the function. On ++ * error, a negative error code is returned from the function and the ++ * last argument remains unchanged. ++ */ ++static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) ++{ ++ int pidfd; ++ struct file *pidfd_file; ++ ++ if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC)) ++ return -EINVAL; ++ ++ pidfd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); ++ if (pidfd < 0) ++ return pidfd; ++ ++ pidfd_file = anon_inode_getfile("[pidfd]", &pidfd_fops, pid, ++ flags | O_RDWR | O_CLOEXEC); ++ if (IS_ERR(pidfd_file)) { ++ put_unused_fd(pidfd); ++ return PTR_ERR(pidfd_file); ++ } ++ get_pid(pid); /* held by pidfd_file now */ ++ *ret = pidfd_file; ++ return pidfd; ++} ++ ++/** ++ * pidfd_prepare - allocate a new pidfd_file and reserve a pidfd ++ * @pid: the struct pid for which to create a pidfd ++ * @flags: flags of the new @pidfd ++ * @pidfd: the pidfd to return ++ * ++ * Allocate a new file that stashes @pid and reserve a new pidfd number in the ++ * caller's file descriptor table. The pidfd is reserved but not installed yet. ++ * ++ * The helper verifies that @pid is used as a thread group leader. ++ * ++ * If this function returns successfully the caller is responsible to either ++ * call fd_install() passing the returned pidfd and pidfd file as arguments in ++ * order to install the pidfd into its file descriptor table or they must use ++ * put_unused_fd() and fput() on the returned pidfd and pidfd file ++ * respectively. ++ * ++ * This function is useful when a pidfd must already be reserved but there ++ * might still be points of failure afterwards and the caller wants to ensure ++ * that no pidfd is leaked into its file descriptor table. ++ * ++ * Return: On success, a reserved pidfd is returned from the function and a new ++ * pidfd file is returned in the last argument to the function. On ++ * error, a negative error code is returned from the function and the ++ * last argument remains unchanged. ++ */ ++int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) ++{ ++ if (!pid || !pid_has_task(pid, PIDTYPE_TGID)) ++ return -EINVAL; ++ ++ return __pidfd_prepare(pid, flags, ret); ++} ++ + static void __delayed_free_task(struct rcu_head *rhp) + { + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); diff --git a/queue-5.4/pidfd-check-pid-has-attached-task-in-fdinfo.patch b/queue-5.4/pidfd-check-pid-has-attached-task-in-fdinfo.patch new file mode 100644 index 0000000000..2e05b66675 --- /dev/null +++ b/queue-5.4/pidfd-check-pid-has-attached-task-in-fdinfo.patch @@ -0,0 +1,76 @@ +From 8ff8b16bd17ff1dd55591dd81098673d81d86317 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Thu, 17 Oct 2019 12:18:28 +0200 +Subject: pidfd: check pid has attached task in fdinfo + +From: Christian Brauner + +commit 3d6d8da48d0b214d65ea0227d47228abc75d7c88 upstream. + +Currently, when a task is dead we still print the pid it used to use in +the fdinfo files of its pidfds. This doesn't make much sense since the +pid may have already been reused. So verify that the task is still alive +by introducing the pid_has_task() helper which will be used by other +callers in follow-up patches. +If the task is not alive anymore, we will print -1. This allows us to +differentiate between a task not being present in a given pid namespace +- in which case we already print 0 - and a task having been reaped. + +Note that this uses PIDTYPE_PID for the check. Technically, we could've +checked PIDTYPE_TGID since pidfds currently only refer to thread-group +leaders but if they won't anymore in the future then this check becomes +problematic without it being immediately obvious to non-experts imho. If +a thread is created via clone(CLONE_THREAD) than struct pid has a single +non-empty list pid->tasks[PIDTYPE_PID] and this pid can't be used as a +PIDTYPE_TGID meaning pid->tasks[PIDTYPE_TGID] will return NULL even +though the thread-group leader might still be very much alive. So +checking PIDTYPE_PID is fine and is easier to maintain should we ever +allow pidfds to refer to threads. + +Cc: Jann Horn +Cc: Christian Kellner +Cc: linux-api@vger.kernel.org +Signed-off-by: Christian Brauner +Reviewed-by: Oleg Nesterov +Link: https://lore.kernel.org/r/20191017101832.5985-1-christian.brauner@ubuntu.com +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/pid.h | 4 ++++ + kernel/fork.c | 10 ++++++++-- + 2 files changed, 12 insertions(+), 2 deletions(-) + +--- a/include/linux/pid.h ++++ b/include/linux/pid.h +@@ -85,6 +85,10 @@ static inline struct pid *get_pid(struct + + extern void put_pid(struct pid *pid); + extern struct task_struct *pid_task(struct pid *pid, enum pid_type); ++static inline bool pid_has_task(struct pid *pid, enum pid_type type) ++{ ++ return !hlist_empty(&pid->tasks[type]); ++} + extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type); + + extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1703,10 +1703,16 @@ static int pidfd_release(struct inode *i + #ifdef CONFIG_PROC_FS + static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) + { +- struct pid_namespace *ns = proc_pid_ns(file_inode(m->file)); + struct pid *pid = f->private_data; ++ struct pid_namespace *ns; ++ pid_t nr = -1; + +- seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns)); ++ if (likely(pid_has_task(pid, PIDTYPE_PID))) { ++ ns = proc_pid_ns(file_inode(m->file)); ++ nr = pid_nr_ns(pid, ns); ++ } ++ ++ seq_put_decimal_ll(m, "Pid:\t", nr); + seq_putc(m, '\n'); + } + #endif diff --git a/queue-5.4/series b/queue-5.4/series index eeb5536eba..dff2e33213 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -189,3 +189,8 @@ drm-i915-gvt-fix-unterminated-string-initialization-warning.patch smb-client-fix-use-after-free-in-cifs_fill_dirent.patch smb-client-reset-all-search-buffer-pointers-when-releasing-buffer.patch net_sched-hfsc-address-reentrant-enqueue-adding-class-to-eltree-twice.patch +coredump-fix-error-handling-for-replace_fd.patch +pidfd-check-pid-has-attached-task-in-fdinfo.patch +pid-add-pidfd_prepare.patch +fork-use-pidfd_prepare.patch +coredump-hand-a-pidfd-to-the-usermode-coredump-helper.patch