]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
exec: introduce struct task_exec_state
authorChristian Brauner (Amutable) <brauner@kernel.org>
Wed, 20 May 2026 21:48:53 +0000 (23:48 +0200)
committerChristian Brauner <brauner@kernel.org>
Tue, 26 May 2026 09:02:01 +0000 (11:02 +0200)
Introduce struct task_exec_state, a per-task RCU-protected structure
that holds the dumpable mode and the user namespace and stays attached
to the task for its full lifetime.

task_exec_state_rcu() is the canonical reader: asserts RCU or
task_lock is held, WARNs on a NULL state, returns the
rcu_dereference()'d pointer.

Reviewed-by: Jann Horn <jannh@google.com>
Link: https://patch.msgid.link/20260520-work-task_exec_state-v3-2-69f895bc1385@kernel.org
Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
include/linux/sched.h
include/linux/sched/exec_state.h [new file with mode: 0644]
kernel/Makefile
kernel/exec_state.c [new file with mode: 0644]

index ee06cba5c6f538aac677b2c0847f93e44c5ab18a..6674dbf960b5d4ec46adf97e0e894e73e6d72229 100644 (file)
@@ -962,6 +962,8 @@ struct task_struct {
        struct mm_struct                *mm;
        struct mm_struct                *active_mm;
 
+       struct task_exec_state __rcu    *exec_state;
+
        int                             exit_state;
        int                             exit_code;
        int                             exit_signal;
diff --git a/include/linux/sched/exec_state.h b/include/linux/sched/exec_state.h
new file mode 100644 (file)
index 0000000..dc5a795
--- /dev/null
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+#ifndef _LINUX_SCHED_EXEC_STATE_H
+#define _LINUX_SCHED_EXEC_STATE_H
+
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
+#include <linux/sched/coredump.h>
+#include <linux/user_namespace.h>
+
+struct task_exec_state {
+       refcount_t              count;
+       enum task_dumpable      dumpable;
+       struct user_namespace   *user_ns;
+       struct rcu_head         rcu;
+};
+
+struct task_exec_state *alloc_task_exec_state(struct user_namespace *user_ns);
+void put_task_exec_state(struct task_exec_state *exec_state);
+struct task_exec_state *task_exec_state_rcu(const struct task_struct *tsk);
+struct task_exec_state *task_exec_state_replace(struct task_struct *tsk,
+                                               struct task_exec_state *exec_state);
+void task_exec_state_set_dumpable(enum task_dumpable value);
+enum task_dumpable task_exec_state_get_dumpable(struct task_struct *task);
+int task_exec_state_copy(struct task_struct *tsk);
+void __init exec_state_init(void);
+
+DEFINE_FREE(put_task_exec_state, struct task_exec_state *, put_task_exec_state(_T))
+
+#endif /* _LINUX_SCHED_EXEC_STATE_H */
index 6785982013dced54d7731dd9404bb4bf5899b635..1e1a31673577d6ab58a0ca11013ff52531b421a5 100644 (file)
@@ -3,7 +3,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = fork.o exec_domain.o panic.o \
+obj-y     = fork.o exec_domain.o exec_state.o panic.o \
            cpu.o exit.o softirq.o resource.o \
            sysctl.o capability.o ptrace.o user.o \
            signal.o sys.o umh.o workqueue.o pid.o task_work.o \
diff --git a/kernel/exec_state.c b/kernel/exec_state.c
new file mode 100644 (file)
index 0000000..1e0b59f
--- /dev/null
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
+#include <linux/sched.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/exec_state.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+
+static struct kmem_cache *task_exec_state_cachep;
+
+static void __free_task_exec_state(struct rcu_head *rcu)
+{
+       struct task_exec_state *exec_state = container_of(rcu, struct task_exec_state, rcu);
+
+       put_user_ns(exec_state->user_ns);
+       kmem_cache_free(task_exec_state_cachep, exec_state);
+}
+
+void put_task_exec_state(struct task_exec_state *exec_state)
+{
+       if (exec_state && refcount_dec_and_test(&exec_state->count))
+               call_rcu(&exec_state->rcu, __free_task_exec_state);
+}
+
+struct task_exec_state *alloc_task_exec_state(struct user_namespace *user_ns)
+{
+       struct task_exec_state *exec_state;
+
+       exec_state = kmem_cache_alloc(task_exec_state_cachep, GFP_KERNEL);
+       if (!exec_state)
+               return NULL;
+       refcount_set(&exec_state->count, 1);
+       exec_state->dumpable = TASK_DUMPABLE_OFF;
+       exec_state->user_ns = get_user_ns(user_ns);
+       return exec_state;
+}
+
+struct task_exec_state *task_exec_state_rcu(const struct task_struct *tsk)
+{
+       struct task_exec_state *exec_state;
+
+       exec_state = rcu_dereference_check(tsk->exec_state,
+                                          lockdep_is_held(&tsk->alloc_lock));
+       WARN_ON_ONCE(!exec_state);
+       return exec_state;
+}
+
+struct task_exec_state *task_exec_state_replace(struct task_struct *tsk,
+                                               struct task_exec_state *exec_state)
+{
+       /*
+        * Updates must hold both locks so callers needing a consistent
+        * snapshot of mm + dumpability are covered.
+        */
+       lockdep_assert_held(&tsk->alloc_lock);
+       lockdep_assert_held_write(&tsk->signal->exec_update_lock);
+
+       return rcu_replace_pointer(tsk->exec_state, exec_state, true);
+}
+
+/*
+ * The non-CLONE_VM clone path: allocate a fresh exec_state and
+ * inherit the parent's dumpable mode and user_ns reference.  CLONE_VM
+ * siblings refcount-share via copy_exec_state() in fork.c; only this
+ * path and execve() ever allocate.
+ */
+int task_exec_state_copy(struct task_struct *tsk)
+{
+       struct task_exec_state *src, *dst;
+
+       src = rcu_dereference_protected(current->exec_state, true);
+       dst = alloc_task_exec_state(src->user_ns);
+       if (!dst)
+               return -ENOMEM;
+       dst->dumpable = READ_ONCE(src->dumpable);
+       rcu_assign_pointer(tsk->exec_state, dst);
+       return 0;
+}
+
+/*
+ * Store TASK_DUMPABLE_* on current->exec_state.  All callers
+ * (commit_creds, begin_new_exec, prctl(PR_SET_DUMPABLE)) act on the
+ * running task, which guarantees ->exec_state is allocated and cannot
+ * be replaced under us.
+ */
+void task_exec_state_set_dumpable(enum task_dumpable value)
+{
+       struct task_exec_state *exec_state;
+
+       if (WARN_ON_ONCE(value > TASK_DUMPABLE_ROOT))
+               value = TASK_DUMPABLE_OFF;
+
+       exec_state = rcu_dereference_protected(current->exec_state, true);
+       WRITE_ONCE(exec_state->dumpable, value);
+}
+
+enum task_dumpable task_exec_state_get_dumpable(struct task_struct *task)
+{
+       struct task_exec_state *exec_state;
+
+       guard(rcu)();
+       exec_state = rcu_dereference(task->exec_state);
+       return READ_ONCE(exec_state->dumpable);
+}
+
+void __init exec_state_init(void)
+{
+       task_exec_state_cachep = kmem_cache_create("task_exec_state",
+                       sizeof(struct task_exec_state), 0,
+                       SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
+                       NULL);
+}