From: Cheng-Yang Chou Date: Mon, 20 Apr 2026 09:28:47 +0000 (+0800) Subject: sched_ext: Deny SCX kfuncs to non-SCX struct_ops programs X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=2d2b026c3ea792a0c91d4acf4430d8b65bedf271;p=thirdparty%2Fkernel%2Fstable.git sched_ext: Deny SCX kfuncs to non-SCX struct_ops programs scx_kfunc_context_filter() currently allows non-SCX struct_ops programs (e.g. tcp_congestion_ops) to call SCX unlocked kfuncs. This is wrong for two reasons: - It is semantically incorrect: a TCP congestion control program has no business calling SCX kfuncs such as scx_bpf_kick_cpu(). - With CONFIG_EXT_SUB_SCHED=y, kfuncs like scx_bpf_kick_cpu() call scx_prog_sched(aux), which invokes bpf_prog_get_assoc_struct_ops(aux) and casts the result to struct sched_ext_ops * before reading ops->priv. For a non-SCX struct_ops program the returned pointer is the kdata of that struct_ops type, which is far smaller than sched_ext_ops, making the read an out-of-bounds access (confirmed with KASAN). Extend the filter to cover scx_kfunc_set_any and scx_kfunc_set_idle as well, and deny all SCX kfuncs for any struct_ops program that is not the SCX struct_ops. This addresses both issues: the semantic contract is enforced at the verifier level, and the runtime out-of-bounds access becomes unreachable. Fixes: d1d3c1c6ae36 ("sched_ext: Add verifier-time kfunc context filter") Suggested-by: Tejun Heo Signed-off-by: Cheng-Yang Chou Signed-off-by: Tejun Heo --- diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7edd46f3ac43..d66fea57ee69 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -9480,6 +9480,7 @@ BTF_KFUNCS_END(scx_kfunc_ids_any) static const struct btf_kfunc_id_set scx_kfunc_set_any = { .owner = THIS_MODULE, .set = &scx_kfunc_ids_any, + .filter = scx_kfunc_context_filter, }; /* @@ -9527,13 +9528,12 @@ static const u32 scx_kf_allow_flags[] = { }; /* - * Verifier-time filter for context-sensitive SCX kfuncs. Registered via the - * .filter field on each per-group btf_kfunc_id_set. The BPF core invokes this - * for every kfunc call in the registered hook (BPF_PROG_TYPE_STRUCT_OPS or + * Verifier-time filter for SCX kfuncs. Registered via the .filter field on + * each per-group btf_kfunc_id_set. The BPF core invokes this for every kfunc + * call in the registered hook (BPF_PROG_TYPE_STRUCT_OPS or * BPF_PROG_TYPE_SYSCALL), regardless of which set originally introduced the - * kfunc - so the filter must short-circuit on kfuncs it doesn't govern (e.g. - * scx_kfunc_ids_any) by falling through to "allow" when none of the - * context-sensitive sets contain the kfunc. + * kfunc - so the filter must short-circuit on kfuncs it doesn't govern by + * falling through to "allow" when none of the SCX sets contain the kfunc. */ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id) { @@ -9542,18 +9542,21 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id) bool in_enqueue = btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id); bool in_dispatch = btf_id_set8_contains(&scx_kfunc_ids_dispatch, kfunc_id); bool in_cpu_release = btf_id_set8_contains(&scx_kfunc_ids_cpu_release, kfunc_id); + bool in_idle = btf_id_set8_contains(&scx_kfunc_ids_idle, kfunc_id); + bool in_any = btf_id_set8_contains(&scx_kfunc_ids_any, kfunc_id); u32 moff, flags; - /* Not a context-sensitive kfunc (e.g. from scx_kfunc_ids_any) - allow. */ - if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch || in_cpu_release)) + /* Not an SCX kfunc - allow. */ + if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch || + in_cpu_release || in_idle || in_any)) return 0; /* SYSCALL progs (e.g. BPF test_run()) may call unlocked and select_cpu kfuncs. */ if (prog->type == BPF_PROG_TYPE_SYSCALL) - return (in_unlocked || in_select_cpu) ? 0 : -EACCES; + return (in_unlocked || in_select_cpu || in_idle || in_any) ? 0 : -EACCES; if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) - return -EACCES; + return (in_any || in_idle) ? 0 : -EACCES; /* * add_subprog_and_kfunc() collects all kfunc calls, including dead code @@ -9566,14 +9569,15 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id) return 0; /* - * Non-SCX struct_ops: only unlocked kfuncs are safe. The other - * context-sensitive kfuncs assume the rq lock is held by the SCX - * dispatch path, which doesn't apply to other struct_ops users. + * Non-SCX struct_ops: SCX kfuncs are not permitted. */ if (prog->aux->st_ops != &bpf_sched_ext_ops) - return in_unlocked ? 0 : -EACCES; + return -EACCES; /* SCX struct_ops: check the per-op allow list. */ + if (in_any || in_idle) + return 0; + moff = prog->aux->attach_st_ops_member_off; flags = scx_kf_allow_flags[SCX_MOFF_IDX(moff)]; diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 443d12a3df67..c43d62d90e40 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -1467,6 +1467,7 @@ BTF_KFUNCS_END(scx_kfunc_ids_idle) static const struct btf_kfunc_id_set scx_kfunc_set_idle = { .owner = THIS_MODULE, .set = &scx_kfunc_ids_idle, + .filter = scx_kfunc_context_filter, }; /* diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h index dc35f850481e..8d169d3bbdf9 100644 --- a/kernel/sched/ext_idle.h +++ b/kernel/sched/ext_idle.h @@ -12,6 +12,7 @@ struct sched_ext_ops; +extern struct btf_id_set8 scx_kfunc_ids_idle; extern struct btf_id_set8 scx_kfunc_ids_select_cpu; void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops);