From: Christian Brauner Date: Wed, 5 Mar 2025 10:08:15 +0000 (+0100) Subject: pidfs: record exit code and cgroupid at exit X-Git-Tag: v6.15-rc1~250^2~2^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4513522984a0af9c170af991c37fbb483cca654b;p=thirdparty%2Fkernel%2Fstable.git pidfs: record exit code and cgroupid at exit Record the exit code and cgroupid in release_task() and stash in struct pidfs_exit_info so it can be retrieved even after the task has been reaped. Link: https://lore.kernel.org/r/20250305-work-pidfs-kill_on_last_close-v3-5-c8c3d8361705@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Oleg Nesterov Signed-off-by: Christian Brauner --- diff --git a/fs/internal.h b/fs/internal.h index e7f02ae1e0981..c1e6d8b294cb2 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -325,6 +325,7 @@ struct stashed_operations { int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data, struct path *path); void stashed_dentry_prune(struct dentry *dentry); +struct dentry *stashed_dentry_get(struct dentry **stashed); /** * path_mounted - check whether path is mounted * @path: path to check diff --git a/fs/libfs.c b/fs/libfs.c index 8444f5cc40641..cf5a267aafe43 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -2113,7 +2113,7 @@ struct timespec64 simple_inode_init_ts(struct inode *inode) } EXPORT_SYMBOL(simple_inode_init_ts); -static inline struct dentry *get_stashed_dentry(struct dentry **stashed) +struct dentry *stashed_dentry_get(struct dentry **stashed) { struct dentry *dentry; @@ -2215,7 +2215,7 @@ int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data, const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info; /* See if dentry can be reused. */ - path->dentry = get_stashed_dentry(stashed); + path->dentry = stashed_dentry_get(stashed); if (path->dentry) { sops->put_data(data); goto out_path; diff --git a/fs/pidfs.c b/fs/pidfs.c index 282511a36fd96..c4e6527013e77 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -458,6 +458,47 @@ struct pid *pidfd_pid(const struct file *file) return file_inode(file)->i_private; } +/* + * We're called from release_task(). We know there's at least one + * reference to struct pid being held that won't be released until the + * task has been reaped which cannot happen until we're out of + * release_task(). + * + * If this struct pid is referred to by a pidfd then + * stashed_dentry_get() will return the dentry and inode for that struct + * pid. Since we've taken a reference on it there's now an additional + * reference from the exit path on it. Which is fine. We're going to put + * it again in a second and we know that the pid is kept alive anyway. + * + * Worst case is that we've filled in the info and immediately free the + * dentry and inode afterwards since the pidfd has been closed. Since + * pidfs_exit() currently is placed after exit_task_work() we know that + * it cannot be us aka the exiting task holding a pidfd to ourselves. + */ +void pidfs_exit(struct task_struct *tsk) +{ + struct dentry *dentry; + + might_sleep(); + + dentry = stashed_dentry_get(&task_pid(tsk)->stashed); + if (dentry) { + struct inode *inode = d_inode(dentry); + struct pidfs_exit_info *exit_info = &pidfs_i(inode)->exit_info; +#ifdef CONFIG_CGROUPS + struct cgroup *cgrp; + + rcu_read_lock(); + cgrp = task_dfl_cgroup(tsk); + exit_info->cgroupid = cgroup_id(cgrp); + rcu_read_unlock(); +#endif + exit_info->exit_code = tsk->exit_code; + + dput(dentry); + } +} + static struct vfsmount *pidfs_mnt __ro_after_init; /* diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 7c830d0dec9a7..05e6f8f4a0264 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -6,6 +6,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); void pidfs_add_pid(struct pid *pid); void pidfs_remove_pid(struct pid *pid); +void pidfs_exit(struct task_struct *tsk); extern const struct dentry_operations pidfs_dentry_operations; #endif /* _LINUX_PID_FS_H */ diff --git a/kernel/exit.c b/kernel/exit.c index 3485e5fc499e4..9916305e34d34 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -69,6 +69,7 @@ #include #include #include +#include #include @@ -249,6 +250,7 @@ repeat: dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); rcu_read_unlock(); + pidfs_exit(p); cgroup_release(p); write_lock_irq(&tasklist_lock);