From: Tejun Heo Date: Sat, 25 Apr 2026 00:31:35 +0000 (-1000) Subject: sched_ext: Don't disable tasks in scx_sub_enable_workfn() abort path X-Git-Tag: v7.1-rc2~27^2~13 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=21a5a97ba47842ef0c52d6c89e501dce27806550;p=thirdparty%2Fkernel%2Flinux.git sched_ext: Don't disable tasks in scx_sub_enable_workfn() abort path scx_sub_enable_workfn()'s prep loop calls __scx_init_task(sch, p, false) without transitioning task state, then sets SCX_TASK_SUB_INIT. If prep fails partway, the abort path runs __scx_disable_and_exit_task(sch, p) on the marked tasks. Task state is still the parent's ENABLED, so that dispatches to the SCX_TASK_ENABLED arm and calls scx_disable_task(sch, p) - i.e. child->ops.disable() - for tasks on which child->ops.enable() never ran. A BPF sub-scheduler allocating per-task state in enable/freeing in disable would operate on uninitialized state. The dying-task branch in scx_disable_and_exit_task() has the same problem, and scx_enabling_sub_sched was cleared before the abort cleanup loop - a task exiting during cleanup tripped the WARN and skipped both ops.exit_task and the SCX_TASK_SUB_INIT clear, leaking per-task resources and leaving the task stuck. Introduce scx_sub_init_cancel_task() that calls ops.exit_task with cancelled=true - matching what the top-level init path does when init_task itself returns -errno. Use it in the abort loop and in the dying-task branch. scx_enabling_sub_sched now stays set until the abort loop finishes clearing SUB_INIT, so concurrent exits hitting the dying-task branch can still find @sch. That branch also clears SCX_TASK_SUB_INIT unconditionally when seen, leaving the task unmarked even if the WARN fires. Fixes: 337ec00b1d9c ("sched_ext: Implement cgroup sub-sched enabling and disabling") Reported-by: Chris Mason Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index f8500ce37b22..dd0539ab9ba8 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3633,6 +3633,22 @@ static void __scx_disable_and_exit_task(struct scx_sched *sch, SCX_CALL_OP_TASK(sch, exit_task, task_rq(p), p, &args); } +/* + * Undo a completed __scx_init_task(sch, p, false) when scx_enable_task() never + * ran. The task state has not been transitioned, so this mirrors the + * SCX_TASK_INIT branch in __scx_disable_and_exit_task(). + */ +static void scx_sub_init_cancel_task(struct scx_sched *sch, struct task_struct *p) +{ + struct scx_exit_task_args args = { .cancelled = true }; + + lockdep_assert_held(&p->pi_lock); + lockdep_assert_rq_held(task_rq(p)); + + if (SCX_HAS_OP(sch, exit_task)) + SCX_CALL_OP_TASK(sch, exit_task, task_rq(p), p, &args); +} + static void scx_disable_and_exit_task(struct scx_sched *sch, struct task_struct *p) { @@ -3641,11 +3657,12 @@ static void scx_disable_and_exit_task(struct scx_sched *sch, /* * If set, @p exited between __scx_init_task() and scx_enable_task() in * scx_sub_enable() and is initialized for both the associated sched and - * its parent. Disable and exit for the child too. + * its parent. Exit for the child too - scx_enable_task() never ran for + * it, so undo only init_task. */ - if ((p->scx.flags & SCX_TASK_SUB_INIT) && - !WARN_ON_ONCE(!scx_enabling_sub_sched)) { - __scx_disable_and_exit_task(scx_enabling_sub_sched, p); + if (p->scx.flags & SCX_TASK_SUB_INIT) { + if (!WARN_ON_ONCE(!scx_enabling_sub_sched)) + scx_sub_init_cancel_task(scx_enabling_sub_sched, p); p->scx.flags &= ~SCX_TASK_SUB_INIT; } @@ -7124,16 +7141,23 @@ out_unlock: abort: put_task_struct(p); scx_task_iter_stop(&sti); - scx_enabling_sub_sched = NULL; + /* + * Undo __scx_init_task() for tasks we marked. scx_enable_task() never + * ran for @sch on them, so calling scx_disable_task() here would invoke + * ops.disable() without a matching ops.enable(). scx_enabling_sub_sched + * must stay set until SUB_INIT is cleared from every marked task - + * scx_disable_and_exit_task() reads it when a task exits concurrently. + */ scx_task_iter_start(&sti, sch->cgrp); while ((p = scx_task_iter_next_locked(&sti))) { if (p->scx.flags & SCX_TASK_SUB_INIT) { - __scx_disable_and_exit_task(sch, p); + scx_sub_init_cancel_task(sch, p); p->scx.flags &= ~SCX_TASK_SUB_INIT; } } scx_task_iter_stop(&sti); + scx_enabling_sub_sched = NULL; err_unlock_and_disable: /* we'll soon enter disable path, keep bypass on */ scx_cgroup_unlock();