]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched_ext: Refuse cross-task select_cpu_from_kfunc calls
authorTejun Heo <tj@kernel.org>
Sat, 25 Apr 2026 00:31:36 +0000 (14:31 -1000)
committerTejun Heo <tj@kernel.org>
Sat, 25 Apr 2026 00:31:36 +0000 (14:31 -1000)
select_cpu_from_kfunc() skipped pi_lock for @p when called from
ops.select_cpu() or another rq-locked SCX op, assuming the held lock
protects @p. scx_bpf_select_cpu_dfl() / __scx_bpf_select_cpu_and() accept an
arbitrary KF_RCU task_struct, so a caller in e.g. ops.select_cpu(p1) or
ops.enqueue(p1) can pass some other p2 - the held pi_lock / rq lock is p1's,
not p2's - and reading p2->cpus_ptr / nr_cpus_allowed races with
set_cpus_allowed_ptr() and migrate_disable_switch() on another CPU.

Abort the scheduler on cross-task calls in both branches: for
ops.select_cpu() use scx_kf_arg_task_ok() to verify @p is the wake-up
task recorded in current->scx.kf_tasks[] by SCX_CALL_OP_TASK_RET();
for other rq-locked SCX ops compare task_rq(p) against scx_locked_rq().

v2: Switch the in_select_cpu cross-task check from direct_dispatch_task
    comparison to scx_kf_arg_task_ok(). The former spuriously rejects when
    ops.select_cpu() calls scx_bpf_dsq_insert() first, then calls
    scx_bpf_select_cpu_*() on the same task. (Andrea Righi)

Fixes: 0022b328504d ("sched_ext: Decouple kfunc unlocked-context check from kf_mask")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andrea Righi <arighi@nvidia.com>
kernel/sched/ext_idle.c

index c43d62d90e40fc2df9e95fb0d9fe2acf200b1a3b..7468560a6d80414504b6e1266814c2216706d0cd 100644 (file)
@@ -927,14 +927,24 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
         * Accessing p->cpus_ptr / p->nr_cpus_allowed needs either @p's rq
         * lock or @p's pi_lock. Three cases:
         *
-        *  - inside ops.select_cpu(): try_to_wake_up() holds @p's pi_lock.
+        *  - inside ops.select_cpu(): try_to_wake_up() holds the wake-up
+        *    task's pi_lock; the wake-up task is recorded in kf_tasks[0]
+        *    by SCX_CALL_OP_TASK_RET().
         *  - other rq-locked SCX op: scx_locked_rq() points at the held rq.
         *  - truly unlocked (UNLOCKED ops, SYSCALL, non-SCX struct_ops):
         *    nothing held, take pi_lock ourselves.
+        *
+        * In the first two cases, BPF schedulers may pass an arbitrary task
+        * that the held lock doesn't cover. Refuse those.
         */
        if (this_rq()->scx.in_select_cpu) {
+               if (!scx_kf_arg_task_ok(sch, p))
+                       return -EINVAL;
                lockdep_assert_held(&p->pi_lock);
-       } else if (!scx_locked_rq()) {
+       } else if (scx_locked_rq()) {
+               if (task_rq(p) != scx_locked_rq())
+                       goto cross_task;
+       } else {
                raw_spin_lock_irqsave(&p->pi_lock, irq_flags);
                we_locked = true;
        }
@@ -960,6 +970,11 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
                raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags);
 
        return cpu;
+
+cross_task:
+       scx_error(sch, "select_cpu kfunc called cross-task on %s[%d]",
+                 p->comm, p->pid);
+       return -EINVAL;
 }
 
 /**