]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
sched_ext: Fix incorrect sched_class settings for per-cpu migration tasks
authorZqiang <qiang.zhang@linux.dev>
Mon, 29 Dec 2025 19:39:20 +0000 (14:39 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 8 Jan 2026 09:14:56 +0000 (10:14 +0100)
[ Upstream commit 1dd6c84f1c544e552848a8968599220bd464e338 ]

When loading the ebpf scheduler, the tasks in the scx_tasks list will
be traversed and invoke __setscheduler_class() to get new sched_class.
however, this would also incorrectly set the per-cpu migration
task's->sched_class to rt_sched_class, even after unload, the per-cpu
migration task's->sched_class remains sched_rt_class.

The log for this issue is as follows:

./scx_rustland --stats 1
[  199.245639][  T630] sched_ext: "rustland" does not implement cgroup cpu.weight
[  199.269213][  T630] sched_ext: BPF scheduler "rustland" enabled
04:25:09 [INFO] RustLand scheduler attached

bpftrace -e 'iter:task /strcontains(ctx->task->comm, "migration")/
{ printf("%s:%d->%pS\n", ctx->task->comm, ctx->task->pid, ctx->task->sched_class); }'
Attaching 1 probe...
migration/0:24->rt_sched_class+0x0/0xe0
migration/1:27->rt_sched_class+0x0/0xe0
migration/2:33->rt_sched_class+0x0/0xe0
migration/3:39->rt_sched_class+0x0/0xe0
migration/4:45->rt_sched_class+0x0/0xe0
migration/5:52->rt_sched_class+0x0/0xe0
migration/6:58->rt_sched_class+0x0/0xe0
migration/7:64->rt_sched_class+0x0/0xe0

sched_ext: BPF scheduler "rustland" disabled (unregistered from user space)
EXIT: unregistered from user space
04:25:21 [INFO] Unregister RustLand scheduler

bpftrace -e 'iter:task /strcontains(ctx->task->comm, "migration")/
{ printf("%s:%d->%pS\n", ctx->task->comm, ctx->task->pid, ctx->task->sched_class); }'
Attaching 1 probe...
migration/0:24->rt_sched_class+0x0/0xe0
migration/1:27->rt_sched_class+0x0/0xe0
migration/2:33->rt_sched_class+0x0/0xe0
migration/3:39->rt_sched_class+0x0/0xe0
migration/4:45->rt_sched_class+0x0/0xe0
migration/5:52->rt_sched_class+0x0/0xe0
migration/6:58->rt_sched_class+0x0/0xe0
migration/7:64->rt_sched_class+0x0/0xe0

This commit therefore generate a new scx_setscheduler_class() and
add check for stop_sched_class to replace __setscheduler_class().

Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class")
Cc: stable@vger.kernel.org # v6.12+
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
[ Adjust context ]
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
kernel/sched/ext.c

index ad1d438b3085c072c7f2cf2abcd7c435fc63724a..614275e4b05ca1ce538997820c2e979e0a067123 100644 (file)
@@ -1057,6 +1057,14 @@ static struct scx_dispatch_q *find_user_dsq(u64 dsq_id)
        return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params);
 }
 
+static const struct sched_class *scx_setscheduler_class(struct task_struct *p)
+{
+       if (p->sched_class == &stop_sched_class)
+               return &stop_sched_class;
+
+       return __setscheduler_class(p->policy, p->prio);
+}
+
 /*
  * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
  * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
@@ -4653,8 +4661,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
        scx_task_iter_start(&sti);
        while ((p = scx_task_iter_next_locked(&sti))) {
                const struct sched_class *old_class = p->sched_class;
-               const struct sched_class *new_class =
-                       __setscheduler_class(p->policy, p->prio);
+               const struct sched_class *new_class = scx_setscheduler_class(p);
                struct sched_enq_and_set_ctx ctx;
 
                if (old_class != new_class && p->se.sched_delayed)
@@ -5368,8 +5375,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
        scx_task_iter_start(&sti);
        while ((p = scx_task_iter_next_locked(&sti))) {
                const struct sched_class *old_class = p->sched_class;
-               const struct sched_class *new_class =
-                       __setscheduler_class(p->policy, p->prio);
+               const struct sched_class *new_class = scx_setscheduler_class(p);
                struct sched_enq_and_set_ctx ctx;
 
                if (!tryget_task_struct(p))