sched_ext: Factor out scx_dispatch_sched()

author Tejun Heo <tj@kernel.org>

Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)

committer Tejun Heo <tj@kernel.org>

Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
author Tejun Heo <tj@kernel.org>
Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
committer Tejun Heo <tj@kernel.org>
Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c

index 0bec650c0ab69974846b6b1828722607887c40c2..28ac7ba3ece08ad2a316b2cd75acf272c69f631f 100644 (file)
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2388,67 +2388,22 @@ static inline void maybe_queue_balance_callback(struct rq *rq)
         rq->scx.flags &= ~SCX_RQ_BAL_CB_PENDING;
  }
  
-static int balance_one(struct rq *rq, struct task_struct *prev)
+static bool scx_dispatch_sched(struct scx_sched *sch, struct rq *rq,
+                              struct task_struct *prev)
  {
-       struct scx_sched *sch = scx_root;
         struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
         bool prev_on_scx = prev->sched_class == &ext_sched_class;
-       bool prev_on_rq = prev->scx.flags & SCX_TASK_QUEUED;
         int nr_loops = SCX_DSP_MAX_LOOPS;
         s32 cpu = cpu_of(rq);
  
-       lockdep_assert_rq_held(rq);
-       rq->scx.flags |= SCX_RQ_IN_BALANCE;
-       rq->scx.flags &= ~SCX_RQ_BAL_KEEP;
-
-       if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) &&
-           unlikely(rq->scx.cpu_released)) {
-               /*
-                * If the previous sched_class for the current CPU was not SCX,
-                * notify the BPF scheduler that it again has control of the
-                * core. This callback complements ->cpu_release(), which is
-                * emitted in switch_class().
-                */
-               if (SCX_HAS_OP(sch, cpu_acquire))
-                       SCX_CALL_OP(sch, SCX_KF_REST, cpu_acquire, rq, cpu, NULL);
-               rq->scx.cpu_released = false;
-       }
-
-       if (prev_on_scx) {
-               update_curr_scx(rq);
-
-               /*
-                * If @prev is runnable & has slice left, it has priority and
-                * fetching more just increases latency for the fetched tasks.
-                * Tell pick_task_scx() to keep running @prev. If the BPF
-                * scheduler wants to handle this explicitly, it should
-                * implement ->cpu_release().
-                *
-                * See scx_disable_workfn() for the explanation on the bypassing
-                * test.
-                */
-               if (prev_on_rq && prev->scx.slice && !scx_bypassing(sch, cpu)) {
-                       rq->scx.flags |= SCX_RQ_BAL_KEEP;
-                       goto has_tasks;
-               }
-       }
-
-       /* if there already are tasks to run, nothing to do */
-       if (rq->scx.local_dsq.nr)
-               goto has_tasks;
-
         if (consume_global_dsq(sch, rq))
-               goto has_tasks;
+               return true;
  
-       if (scx_bypassing(sch, cpu)) {
-               if (consume_dispatch_q(sch, rq, bypass_dsq(sch, cpu)))
-                       goto has_tasks;
-               else
-                       goto no_tasks;
-       }
+       if (scx_bypassing(sch, cpu))
+               return consume_dispatch_q(sch, rq, bypass_dsq(sch, cpu));
  
         if (unlikely(!SCX_HAS_OP(sch, dispatch)) || !scx_rq_online(rq))
-               goto no_tasks;
+               return false;
  
         dspc->rq = rq;
  
@@ -2467,14 +2422,14 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
  
                 flush_dispatch_buf(sch, rq);
  
-               if (prev_on_rq && prev->scx.slice) {
+               if ((prev->scx.flags & SCX_TASK_QUEUED) && prev->scx.slice) {
                         rq->scx.flags |= SCX_RQ_BAL_KEEP;
-                       goto has_tasks;
+                       return true;
                 }
                 if (rq->scx.local_dsq.nr)
-                       goto has_tasks;
+                       return true;
                 if (consume_global_dsq(sch, rq))
-                       goto has_tasks;
+                       return true;
  
                 /*
                  * ops.dispatch() can trap us in this loop by repeatedly
@@ -2483,7 +2438,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
                  * balance(), we want to complete this scheduling cycle and then
                  * start a new one. IOW, we want to call resched_curr() on the
                  * next, most likely idle, task, not the current one. Use
-                * scx_kick_cpu() for deferred kicking.
+                * __scx_bpf_kick_cpu() for deferred kicking.
                  */
                 if (unlikely(!--nr_loops)) {
                         scx_kick_cpu(sch, cpu, 0);
@@ -2491,12 +2446,64 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
                 }
         } while (dspc->nr_tasks);
  
-no_tasks:
+       return false;
+}
+
+static int balance_one(struct rq *rq, struct task_struct *prev)
+{
+       struct scx_sched *sch = scx_root;
+       s32 cpu = cpu_of(rq);
+
+       lockdep_assert_rq_held(rq);
+       rq->scx.flags |= SCX_RQ_IN_BALANCE;
+       rq->scx.flags &= ~SCX_RQ_BAL_KEEP;
+
+       if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) &&
+           unlikely(rq->scx.cpu_released)) {
+               /*
+                * If the previous sched_class for the current CPU was not SCX,
+                * notify the BPF scheduler that it again has control of the
+                * core. This callback complements ->cpu_release(), which is
+                * emitted in switch_class().
+                */
+               if (SCX_HAS_OP(sch, cpu_acquire))
+                       SCX_CALL_OP(sch, SCX_KF_REST, cpu_acquire, rq, cpu, NULL);
+               rq->scx.cpu_released = false;
+       }
+
+       if (prev->sched_class == &ext_sched_class) {
+               update_curr_scx(rq);
+
+               /*
+                * If @prev is runnable & has slice left, it has priority and
+                * fetching more just increases latency for the fetched tasks.
+                * Tell pick_task_scx() to keep running @prev. If the BPF
+                * scheduler wants to handle this explicitly, it should
+                * implement ->cpu_release().
+                *
+                * See scx_disable_workfn() for the explanation on the bypassing
+                * test.
+                */
+               if ((prev->scx.flags & SCX_TASK_QUEUED) && prev->scx.slice &&
+                   !scx_bypassing(sch, cpu)) {
+                       rq->scx.flags |= SCX_RQ_BAL_KEEP;
+                       goto has_tasks;
+               }
+       }
+
+       /* if there already are tasks to run, nothing to do */
+       if (rq->scx.local_dsq.nr)
+               goto has_tasks;
+
+       /* dispatch @sch */
+       if (scx_dispatch_sched(sch, rq, prev))
+               goto has_tasks;
+
         /*
          * Didn't find another task to run. Keep running @prev unless
          * %SCX_OPS_ENQ_LAST is in effect.
          */
-       if (prev_on_rq &&
+       if ((prev->scx.flags & SCX_TASK_QUEUED) &&
             (!(sch->ops.flags & SCX_OPS_ENQ_LAST) || scx_bypassing(sch, cpu))) {
                 rq->scx.flags |= SCX_RQ_BAL_KEEP;
                 __scx_add_event(sch, SCX_EV_DISPATCH_KEEP_LAST, 1);
author	Tejun Heo <tj@kernel.org>
	Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
committer	Tejun Heo <tj@kernel.org>
	Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)