]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bpf: Always allow sleepable programs on syscalls
authorViktor Malik <vmalik@redhat.com>
Mon, 9 Mar 2026 11:23:56 +0000 (12:23 +0100)
committerAlexei Starovoitov <ast@kernel.org>
Mon, 9 Mar 2026 16:28:42 +0000 (09:28 -0700)
Sleepable BPF programs can only be attached to selected functions. For
convenience, the error injection list was originally used, which
contains syscalls and several other functions.

When error injection is disabled (CONFIG_FUNCTION_ERROR_INJECTION=n),
that list is empty and sleepable tracing programs are effectively
unavailable. In such a case, at least enable sleepable programs on
syscalls. For discussion why syscalls were chosen, see [1].

To detect that a function is a syscall handler, we check for
arch-specific prefixes for the most common architectures. Unfortunately,
the prefixes are hard-coded in arch syscall code so we need to hard-code
them, too.

[1] https://lore.kernel.org/bpf/CAADnVQK6qP8izg+k9yV0vdcT-+=axtFQ2fKw7D-2Ei-V6WS5Dw@mail.gmail.com/

Signed-off-by: Viktor Malik <vmalik@redhat.com>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Leon Hwang <leon.hwang@linux.dev>
Link: https://lore.kernel.org/r/2704a8512746655037e3c02b471b31bd0d76c8db.1773055375.git.vmalik@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
kernel/bpf/verifier.c

index 7aa06f534cb2f3fb736a5f70ec349f45360298f9..a52e57f3eb80f4d57b3e134bc6988c6cca302460 100644 (file)
@@ -24961,6 +24961,8 @@ static int check_attach_modify_return(unsigned long addr, const char *func_name)
        return -EINVAL;
 }
 
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+
 /* list of non-sleepable functions that are otherwise on
  * ALLOW_ERROR_INJECTION list
  */
@@ -24982,6 +24984,57 @@ static int check_non_sleepable_error_inject(u32 btf_id)
        return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
 }
 
+static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
+{
+       /* fentry/fexit/fmod_ret progs can be sleepable if they are
+        * attached to ALLOW_ERROR_INJECTION and are not in denylist.
+        */
+       if (!check_non_sleepable_error_inject(btf_id) &&
+           within_error_injection_list(addr))
+               return 0;
+
+       return -EINVAL;
+}
+
+#else
+
+/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
+ * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
+ * but that just compares two concrete function names.
+ */
+static bool has_arch_syscall_prefix(const char *func_name)
+{
+#if defined(__x86_64__)
+       return !strncmp(func_name, "__x64_", 6);
+#elif defined(__i386__)
+       return !strncmp(func_name, "__ia32_", 7);
+#elif defined(__s390x__)
+       return !strncmp(func_name, "__s390x_", 8);
+#elif defined(__aarch64__)
+       return !strncmp(func_name, "__arm64_", 8);
+#elif defined(__riscv)
+       return !strncmp(func_name, "__riscv_", 8);
+#elif defined(__powerpc__) || defined(__powerpc64__)
+       return !strncmp(func_name, "sys_", 4);
+#elif defined(__loongarch__)
+       return !strncmp(func_name, "sys_", 4);
+#else
+       return false;
+#endif
+}
+
+/* Without error injection, allow sleepable progs on syscalls. */
+
+static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
+{
+       if (has_arch_syscall_prefix(func_name))
+               return 0;
+
+       return -EINVAL;
+}
+
+#endif /* CONFIG_FUNCTION_ERROR_INJECTION */
+
 int bpf_check_attach_target(struct bpf_verifier_log *log,
                            const struct bpf_prog *prog,
                            const struct bpf_prog *tgt_prog,
@@ -25261,12 +25314,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
                        ret = -EINVAL;
                        switch (prog->type) {
                        case BPF_PROG_TYPE_TRACING:
-
-                               /* fentry/fexit/fmod_ret progs can be sleepable if they are
-                                * attached to ALLOW_ERROR_INJECTION and are not in denylist.
-                                */
-                               if (!check_non_sleepable_error_inject(btf_id) &&
-                                   within_error_injection_list(addr))
+                               if (!check_attach_sleepable(btf_id, addr, tname))
                                        ret = 0;
                                /* fentry/fexit/fmod_ret progs can also be sleepable if they are
                                 * in the fmodret id set with the KF_SLEEPABLE flag.