sched_ext: Fix spurious WARN on stale ops_state in ops_dequeue()

author Samuele Mariotti <smariotti@disroot.org>

Thu, 21 May 2026 10:59:11 +0000 (12:59 +0200)

committer Tejun Heo <tj@kernel.org>

Thu, 21 May 2026 16:27:44 +0000 (06:27 -1000)
author Samuele Mariotti <smariotti@disroot.org>
Thu, 21 May 2026 10:59:11 +0000 (12:59 +0200)
committer Tejun Heo <tj@kernel.org>
Thu, 21 May 2026 16:27:44 +0000 (06:27 -1000)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c

index 547ca398f646beb35289ac6fbd8d7b4618055ad8..c1762420cc35cd66df59b45a52174047701740b5 100644 (file)
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2078,6 +2078,7 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
         /* dequeue is always temporary, don't reset runnable_at */
         clr_task_runnable(p, false);
  
+retry:
         /* acquire ensures that we see the preceding updates on QUEUED */
         opss = atomic_long_read_acquire(&p->scx.ops_state);
  
@@ -2091,8 +2092,20 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
                  */
                 BUG();
         case SCX_OPSS_QUEUED:
-               /* A queued task must always be in BPF scheduler's custody */
-               WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_IN_CUSTODY));
+               /*
+                * A queued task must always be in BPF scheduler's custody. If
+                * SCX_TASK_IN_CUSTODY is clear, finish_dispatch() on another
+                * CPU has already passed call_task_dequeue() (which clears the
+                * flag), but has not yet written SCX_OPSS_NONE. That final
+                * store does not require this rq's lock, so retrying with
+                * cpu_relax() is bounded: we will observe NONE (or DISPATCHING,
+                * handled by the fallthrough) on a subsequent iteration.
+                */
+               if (unlikely(!(READ_ONCE(p->scx.flags) & SCX_TASK_IN_CUSTODY))) {
+                       cpu_relax();
+                       goto retry;
+               }
+
                 if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
                                             SCX_OPSS_NONE))
                         break;
author	Samuele Mariotti <smariotti@disroot.org>
	Thu, 21 May 2026 10:59:11 +0000 (12:59 +0200)
committer	Tejun Heo <tj@kernel.org>
	Thu, 21 May 2026 16:27:44 +0000 (06:27 -1000)