]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bpf: Add sleepable support for raw tracepoint programs
authorMykyta Yatsenko <yatsenko@meta.com>
Wed, 22 Apr 2026 19:41:06 +0000 (12:41 -0700)
committerKumar Kartikeya Dwivedi <memxor@gmail.com>
Wed, 22 Apr 2026 20:44:24 +0000 (22:44 +0200)
Rework __bpf_trace_run() to support sleepable BPF programs by using
explicit RCU flavor selection, following the uprobe_prog_run() pattern.

For sleepable programs, use rcu_read_lock_tasks_trace() for lifetime
protection with migrate_disable(). For non-sleepable programs, use the
regular rcu_read_lock_dont_migrate().

Remove the preempt_disable_notrace/preempt_enable_notrace pair from
the faultable tracepoint BPF probe wrapper in bpf_probe.h, since
migration protection and RCU locking are now handled per-program
inside __bpf_trace_run().

Adapt bpf_prog_test_run_raw_tp() for sleepable programs: reject
BPF_F_TEST_RUN_ON_CPU since sleepable programs cannot run in hardirq
or preempt-disabled context, and call __bpf_prog_test_run_raw_tp()
directly instead of via smp_call_function_single(). Rework
__bpf_prog_test_run_raw_tp() to select RCU flavor per-program and
add per-program recursion context guard for private stack safety.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/20260422-sleepable_tracepoints-v13-1-99005dff21ef@meta.com
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
include/trace/bpf_probe.h
kernel/trace/bpf_trace.c
net/bpf/test_run.c

index 9391d54d3f124ab0d56ec57445cfc79baeffc28c..d1de8f9aa07fb76e9ee8037ce43099efb95b05d5 100644 (file)
@@ -58,9 +58,7 @@ static notrace void                                                   \
 __bpf_trace_##call(void *__data, proto)                                        \
 {                                                                      \
        might_fault();                                                  \
-       preempt_disable_notrace();                                      \
        CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args));        \
-       preempt_enable_notrace();                                       \
 }
 
 #undef DECLARE_EVENT_SYSCALL_CLASS
index e916f0ccbed96156f03e530ad20efe0a23cb5f9d..7276c72c1d31e7ce7ef3397f680c7e1673e04b55 100644 (file)
@@ -2072,11 +2072,19 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
 static __always_inline
 void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
 {
+       struct srcu_ctr __percpu *scp = NULL;
        struct bpf_prog *prog = link->link.prog;
+       bool sleepable = prog->sleepable;
        struct bpf_run_ctx *old_run_ctx;
        struct bpf_trace_run_ctx run_ctx;
 
-       rcu_read_lock_dont_migrate();
+       if (sleepable) {
+               scp = rcu_read_lock_tasks_trace();
+               migrate_disable();
+       } else {
+               rcu_read_lock_dont_migrate();
+       }
+
        if (unlikely(!bpf_prog_get_recursion_context(prog))) {
                bpf_prog_inc_misses_counter(prog);
                goto out;
@@ -2085,12 +2093,18 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
        run_ctx.bpf_cookie = link->cookie;
        old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 
-       (void) bpf_prog_run(prog, args);
+       (void)bpf_prog_run(prog, args);
 
        bpf_reset_run_ctx(old_run_ctx);
 out:
        bpf_prog_put_recursion_context(prog);
-       rcu_read_unlock_migrate();
+
+       if (sleepable) {
+               migrate_enable();
+               rcu_read_unlock_tasks_trace(scp);
+       } else {
+               rcu_read_unlock_migrate();
+       }
 }
 
 #define UNPACK(...)                    __VA_ARGS__
index 2bc04feadfabedd4584431905c513af5e4882803..c9aea7052ba7290da925b2f8473dbc2c0d53a955 100644 (file)
@@ -748,14 +748,35 @@ static void
 __bpf_prog_test_run_raw_tp(void *data)
 {
        struct bpf_raw_tp_test_run_info *info = data;
+       struct srcu_ctr __percpu *scp = NULL;
        struct bpf_trace_run_ctx run_ctx = {};
        struct bpf_run_ctx *old_run_ctx;
 
        old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 
-       rcu_read_lock();
+       if (info->prog->sleepable) {
+               scp = rcu_read_lock_tasks_trace();
+               migrate_disable();
+       } else {
+               rcu_read_lock();
+       }
+
+       if (unlikely(!bpf_prog_get_recursion_context(info->prog))) {
+               bpf_prog_inc_misses_counter(info->prog);
+               goto out;
+       }
+
        info->retval = bpf_prog_run(info->prog, info->ctx);
-       rcu_read_unlock();
+
+out:
+       bpf_prog_put_recursion_context(info->prog);
+
+       if (info->prog->sleepable) {
+               migrate_enable();
+               rcu_read_unlock_tasks_trace(scp);
+       } else {
+               rcu_read_unlock();
+       }
 
        bpf_reset_run_ctx(old_run_ctx);
 }
@@ -783,6 +804,13 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
        if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
                return -EINVAL;
 
+       /*
+        * Sleepable programs cannot run with preemption disabled or in
+        * hardirq context (smp_call_function_single), reject the flag.
+        */
+       if (prog->sleepable && (kattr->test.flags & BPF_F_TEST_RUN_ON_CPU))
+               return -EINVAL;
+
        if (ctx_size_in) {
                info.ctx = memdup_user(ctx_in, ctx_size_in);
                if (IS_ERR(info.ctx))
@@ -791,24 +819,31 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
                info.ctx = NULL;
        }
 
+       info.retval = 0;
        info.prog = prog;
 
-       current_cpu = get_cpu();
-       if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
-           cpu == current_cpu) {
+       if (prog->sleepable) {
                __bpf_prog_test_run_raw_tp(&info);
-       } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
-               /* smp_call_function_single() also checks cpu_online()
-                * after csd_lock(). However, since cpu is from user
-                * space, let's do an extra quick check to filter out
-                * invalid value before smp_call_function_single().
-                */
-               err = -ENXIO;
        } else {
-               err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
-                                              &info, 1);
+               current_cpu = get_cpu();
+               if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
+                   cpu == current_cpu) {
+                       __bpf_prog_test_run_raw_tp(&info);
+               } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+                       /*
+                        * smp_call_function_single() also checks cpu_online()
+                        * after csd_lock(). However, since cpu is from user
+                        * space, let's do an extra quick check to filter out
+                        * invalid value before smp_call_function_single().
+                        */
+                       err = -ENXIO;
+               } else {
+                       err = smp_call_function_single(cpu,
+                                                      __bpf_prog_test_run_raw_tp,
+                                                      &info, 1);
+               }
+               put_cpu();
        }
-       put_cpu();
 
        if (!err &&
            copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))