]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
unwind_user/deferred: Add unwind_user_faultable()
authorSteven Rostedt <rostedt@goodmis.org>
Tue, 29 Jul 2025 18:23:06 +0000 (14:23 -0400)
committerSteven Rostedt (Google) <rostedt@goodmis.org>
Tue, 29 Jul 2025 18:46:07 +0000 (14:46 -0400)
Add a new API to retrieve a user space callstack called
unwind_user_faultable(). The difference between this user space stack
tracer from the current user space stack tracer is that this must be
called from faultable context as it may use routines to access user space
data that needs to be faulted in.

It can be safely called from entering or exiting a system call as the code
can still be faulted in there.

This code is based on work by Josh Poimboeuf's deferred unwinding code:

Link: https://lore.kernel.org/all/6052e8487746603bdb29b65f4033e739092d9925.1737511963.git.jpoimboe@kernel.org/
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Indu Bhagat <indu.bhagat@oracle.com>
Cc: "Jose E. Marchesi" <jemarch@gnu.org>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Sam James <sam@gentoo.org>
Link: https://lore.kernel.org/20250729182405.147896868@kernel.org
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
include/linux/sched.h
include/linux/unwind_deferred.h [new file with mode: 0644]
include/linux/unwind_deferred_types.h [new file with mode: 0644]
kernel/fork.c
kernel/unwind/Makefile
kernel/unwind/deferred.c [new file with mode: 0644]

index 4f78a64beb52c425e46c66427b2f608ba30b3d99..59fdf7d9bb1eaf86d3afd7069d86a2551b6ab8be 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/rv.h>
 #include <linux/uidgid_types.h>
 #include <linux/tracepoint-defs.h>
+#include <linux/unwind_deferred_types.h>
 #include <asm/kmap_size.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
@@ -1654,6 +1655,10 @@ struct task_struct {
        struct user_event_mm            *user_event_mm;
 #endif
 
+#ifdef CONFIG_UNWIND_USER
+       struct unwind_task_info         unwind_info;
+#endif
+
        /* CPU-specific state of this task: */
        struct thread_struct            thread;
 
diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h
new file mode 100644 (file)
index 0000000..a5f6e8f
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UNWIND_USER_DEFERRED_H
+#define _LINUX_UNWIND_USER_DEFERRED_H
+
+#include <linux/unwind_user.h>
+#include <linux/unwind_deferred_types.h>
+
+#ifdef CONFIG_UNWIND_USER
+
+void unwind_task_init(struct task_struct *task);
+void unwind_task_free(struct task_struct *task);
+
+int unwind_user_faultable(struct unwind_stacktrace *trace);
+
+#else /* !CONFIG_UNWIND_USER */
+
+static inline void unwind_task_init(struct task_struct *task) {}
+static inline void unwind_task_free(struct task_struct *task) {}
+
+static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; }
+
+#endif /* !CONFIG_UNWIND_USER */
+
+#endif /* _LINUX_UNWIND_USER_DEFERRED_H */
diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
new file mode 100644 (file)
index 0000000..aa32db5
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
+#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
+
+struct unwind_task_info {
+       unsigned long           *entries;
+};
+
+#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
index 1ee8eb11f38bae1d2eb6de9494aea94b7a19e6c3..3341d50c61f2227246ab765037fa07f3d6465b5f 100644 (file)
 #include <uapi/linux/pidfd.h>
 #include <linux/pidfs.h>
 #include <linux/tick.h>
+#include <linux/unwind_deferred.h>
 
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
@@ -732,6 +733,7 @@ void __put_task_struct(struct task_struct *tsk)
        WARN_ON(refcount_read(&tsk->usage));
        WARN_ON(tsk == current);
 
+       unwind_task_free(tsk);
        sched_ext_free(tsk);
        io_uring_free(tsk);
        cgroup_free(tsk);
@@ -2135,6 +2137,8 @@ __latent_entropy struct task_struct *copy_process(
        p->bpf_ctx = NULL;
 #endif
 
+       unwind_task_init(p);
+
        /* Perform scheduler related setup. Assign this task to a CPU. */
        retval = sched_fork(clone_flags, p);
        if (retval)
index 349ce36775268115a1925d27bd65887210db2b86..eae37bea54fdbe37df7cd22ad0076d0dd4e7b668 100644 (file)
@@ -1 +1 @@
- obj-$(CONFIG_UNWIND_USER) += user.o
+ obj-$(CONFIG_UNWIND_USER)     += user.o deferred.o
diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
new file mode 100644 (file)
index 0000000..a0badbe
--- /dev/null
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Deferred user space unwinding
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/unwind_deferred.h>
+
+#define UNWIND_MAX_ENTRIES 512
+
+/**
+ * unwind_user_faultable - Produce a user stacktrace in faultable context
+ * @trace: The descriptor that will store the user stacktrace
+ *
+ * This must be called in a known faultable context (usually when entering
+ * or exiting user space). Depending on the available implementations
+ * the @trace will be loaded with the addresses of the user space stacktrace
+ * if it can be found.
+ *
+ * Return: 0 on success and negative on error
+ *         On success @trace will contain the user space stacktrace
+ */
+int unwind_user_faultable(struct unwind_stacktrace *trace)
+{
+       struct unwind_task_info *info = &current->unwind_info;
+
+       /* Should always be called from faultable context */
+       might_fault();
+
+       if (current->flags & PF_EXITING)
+               return -EINVAL;
+
+       if (!info->entries) {
+               info->entries = kmalloc_array(UNWIND_MAX_ENTRIES, sizeof(long),
+                                             GFP_KERNEL);
+               if (!info->entries)
+                       return -ENOMEM;
+       }
+
+       trace->nr = 0;
+       trace->entries = info->entries;
+       unwind_user(trace, UNWIND_MAX_ENTRIES);
+
+       return 0;
+}
+
+void unwind_task_init(struct task_struct *task)
+{
+       struct unwind_task_info *info = &task->unwind_info;
+
+       memset(info, 0, sizeof(*info));
+}
+
+void unwind_task_free(struct task_struct *task)
+{
+       struct unwind_task_info *info = &task->unwind_info;
+
+       kfree(info->entries);
+}