From: Viktor Malik Date: Mon, 9 Mar 2026 11:23:56 +0000 (+0100) Subject: bpf: Always allow sleepable programs on syscalls X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=16d9c5660692d6f0e6aba367274de2b6dfd4343c;p=thirdparty%2Flinux.git bpf: Always allow sleepable programs on syscalls Sleepable BPF programs can only be attached to selected functions. For convenience, the error injection list was originally used, which contains syscalls and several other functions. When error injection is disabled (CONFIG_FUNCTION_ERROR_INJECTION=n), that list is empty and sleepable tracing programs are effectively unavailable. In such a case, at least enable sleepable programs on syscalls. For discussion why syscalls were chosen, see [1]. To detect that a function is a syscall handler, we check for arch-specific prefixes for the most common architectures. Unfortunately, the prefixes are hard-coded in arch syscall code so we need to hard-code them, too. [1] https://lore.kernel.org/bpf/CAADnVQK6qP8izg+k9yV0vdcT-+=axtFQ2fKw7D-2Ei-V6WS5Dw@mail.gmail.com/ Signed-off-by: Viktor Malik Acked-by: Kumar Kartikeya Dwivedi Acked-by: Leon Hwang Link: https://lore.kernel.org/r/2704a8512746655037e3c02b471b31bd0d76c8db.1773055375.git.vmalik@redhat.com Signed-off-by: Alexei Starovoitov --- diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7aa06f534cb2f..a52e57f3eb80f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -24961,6 +24961,8 @@ static int check_attach_modify_return(unsigned long addr, const char *func_name) return -EINVAL; } +#ifdef CONFIG_FUNCTION_ERROR_INJECTION + /* list of non-sleepable functions that are otherwise on * ALLOW_ERROR_INJECTION list */ @@ -24982,6 +24984,57 @@ static int check_non_sleepable_error_inject(u32 btf_id) return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); } +static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) +{ + /* fentry/fexit/fmod_ret progs can be sleepable if they are + * attached to ALLOW_ERROR_INJECTION and are not in denylist. + */ + if (!check_non_sleepable_error_inject(btf_id) && + within_error_injection_list(addr)) + return 0; + + return -EINVAL; +} + +#else + +/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code + * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name() + * but that just compares two concrete function names. + */ +static bool has_arch_syscall_prefix(const char *func_name) +{ +#if defined(__x86_64__) + return !strncmp(func_name, "__x64_", 6); +#elif defined(__i386__) + return !strncmp(func_name, "__ia32_", 7); +#elif defined(__s390x__) + return !strncmp(func_name, "__s390x_", 8); +#elif defined(__aarch64__) + return !strncmp(func_name, "__arm64_", 8); +#elif defined(__riscv) + return !strncmp(func_name, "__riscv_", 8); +#elif defined(__powerpc__) || defined(__powerpc64__) + return !strncmp(func_name, "sys_", 4); +#elif defined(__loongarch__) + return !strncmp(func_name, "sys_", 4); +#else + return false; +#endif +} + +/* Without error injection, allow sleepable progs on syscalls. */ + +static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) +{ + if (has_arch_syscall_prefix(func_name)) + return 0; + + return -EINVAL; +} + +#endif /* CONFIG_FUNCTION_ERROR_INJECTION */ + int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog, @@ -25261,12 +25314,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, ret = -EINVAL; switch (prog->type) { case BPF_PROG_TYPE_TRACING: - - /* fentry/fexit/fmod_ret progs can be sleepable if they are - * attached to ALLOW_ERROR_INJECTION and are not in denylist. - */ - if (!check_non_sleepable_error_inject(btf_id) && - within_error_injection_list(addr)) + if (!check_attach_sleepable(btf_id, addr, tname)) ret = 0; /* fentry/fexit/fmod_ret progs can also be sleepable if they are * in the fmodret id set with the KF_SLEEPABLE flag.