sched_ext: Move bypass_dsq into scx_sched_pcpu

author Tejun Heo <tj@kernel.org>

Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)

committer Tejun Heo <tj@kernel.org>

Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
author Tejun Heo <tj@kernel.org>
Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
committer Tejun Heo <tj@kernel.org>
Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c

index 47b51bd31a30d1551ed24bca95e13d384cd824ba..a371877c40bad4c8a9e08e8d8a658c53aaf60bb5 100644 (file)
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -359,6 +359,11 @@ static const struct sched_class *scx_setscheduler_class(struct task_struct *p)
         return __setscheduler_class(p->policy, p->prio);
  }
  
+static struct scx_dispatch_q *bypass_dsq(struct scx_sched *sch, s32 cpu)
+{
+       return &per_cpu_ptr(sch->pcpu, cpu)->bypass_dsq;
+}
+
  /*
   * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
   * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
@@ -1632,7 +1637,7 @@ global:
         dsq = find_global_dsq(sch, p);
         goto enqueue;
  bypass:
-       dsq = &task_rq(p)->scx.bypass_dsq;
+       dsq = bypass_dsq(sch, task_cpu(p));
         goto enqueue;
  
  enqueue:
@@ -2443,7 +2448,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
                 goto has_tasks;
  
         if (scx_rq_bypassing(rq)) {
-               if (consume_dispatch_q(sch, rq, &rq->scx.bypass_dsq))
+               if (consume_dispatch_q(sch, rq, bypass_dsq(sch, cpu_of(rq))))
                         goto has_tasks;
                 else
                         goto no_tasks;
@@ -4210,11 +4215,12 @@ bool scx_hardlockup(int cpu)
         return true;
  }
  
-static u32 bypass_lb_cpu(struct scx_sched *sch, struct rq *rq,
+static u32 bypass_lb_cpu(struct scx_sched *sch, s32 donor,
                          struct cpumask *donee_mask, struct cpumask *resched_mask,
                          u32 nr_donor_target, u32 nr_donee_target)
  {
-       struct scx_dispatch_q *donor_dsq = &rq->scx.bypass_dsq;
+       struct rq *donor_rq = cpu_rq(donor);
+       struct scx_dispatch_q *donor_dsq = bypass_dsq(sch, donor);
         struct task_struct *p, *n;
         struct scx_dsq_list_node cursor = INIT_DSQ_LIST_CURSOR(cursor, 0, 0);
         s32 delta = READ_ONCE(donor_dsq->nr) - nr_donor_target;
@@ -4230,7 +4236,7 @@ static u32 bypass_lb_cpu(struct scx_sched *sch, struct rq *rq,
         if (delta < DIV_ROUND_UP(min_delta_us, READ_ONCE(scx_slice_bypass_us)))
                 return 0;
  
-       raw_spin_rq_lock_irq(rq);
+       raw_spin_rq_lock_irq(donor_rq);
         raw_spin_lock(&donor_dsq->lock);
         list_add(&cursor.node, &donor_dsq->list);
  resume:
@@ -4238,7 +4244,6 @@ resume:
         n = nldsq_next_task(donor_dsq, n, false);
  
         while ((p = n)) {
-               struct rq *donee_rq;
                 struct scx_dispatch_q *donee_dsq;
                 int donee;
  
@@ -4254,14 +4259,13 @@ resume:
                 if (donee >= nr_cpu_ids)
                         continue;
  
-               donee_rq = cpu_rq(donee);
-               donee_dsq = &donee_rq->scx.bypass_dsq;
+               donee_dsq = bypass_dsq(sch, donee);
  
                 /*
                  * $p's rq is not locked but $p's DSQ lock protects its
                  * scheduling properties making this test safe.
                  */
-               if (!task_can_run_on_remote_rq(sch, p, donee_rq, false))
+               if (!task_can_run_on_remote_rq(sch, p, cpu_rq(donee), false))
                         continue;
  
                 /*
@@ -4276,7 +4280,7 @@ resume:
                  * between bypass DSQs.
                  */
                 dispatch_dequeue_locked(p, donor_dsq);
-               dispatch_enqueue(sch, donee_rq, donee_dsq, p, SCX_ENQ_NESTED);
+               dispatch_enqueue(sch, cpu_rq(donee), donee_dsq, p, SCX_ENQ_NESTED);
  
                 /*
                  * $donee might have been idle and need to be woken up. No need
@@ -4291,9 +4295,9 @@ resume:
                 if (!(nr_balanced % SCX_BYPASS_LB_BATCH) && n) {
                         list_move_tail(&cursor.node, &n->scx.dsq_list.node);
                         raw_spin_unlock(&donor_dsq->lock);
-                       raw_spin_rq_unlock_irq(rq);
+                       raw_spin_rq_unlock_irq(donor_rq);
                         cpu_relax();
-                       raw_spin_rq_lock_irq(rq);
+                       raw_spin_rq_lock_irq(donor_rq);
                         raw_spin_lock(&donor_dsq->lock);
                         goto resume;
                 }
@@ -4301,7 +4305,7 @@ resume:
  
         list_del_init(&cursor.node);
         raw_spin_unlock(&donor_dsq->lock);
-       raw_spin_rq_unlock_irq(rq);
+       raw_spin_rq_unlock_irq(donor_rq);
  
         return nr_balanced;
  }
@@ -4319,7 +4323,7 @@ static void bypass_lb_node(struct scx_sched *sch, int node)
  
         /* count the target tasks and CPUs */
         for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
-               u32 nr = READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr);
+               u32 nr = READ_ONCE(bypass_dsq(sch, cpu)->nr);
  
                 nr_tasks += nr;
                 nr_cpus++;
@@ -4341,24 +4345,21 @@ static void bypass_lb_node(struct scx_sched *sch, int node)
  
         cpumask_clear(donee_mask);
         for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
-               if (READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr) < nr_target)
+               if (READ_ONCE(bypass_dsq(sch, cpu)->nr) < nr_target)
                         cpumask_set_cpu(cpu, donee_mask);
         }
  
         /* iterate !donee CPUs and see if they should be offloaded */
         cpumask_clear(resched_mask);
         for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
-               struct rq *rq = cpu_rq(cpu);
-               struct scx_dispatch_q *donor_dsq = &rq->scx.bypass_dsq;
-
                 if (cpumask_empty(donee_mask))
                         break;
                 if (cpumask_test_cpu(cpu, donee_mask))
                         continue;
-               if (READ_ONCE(donor_dsq->nr) <= nr_donor_target)
+               if (READ_ONCE(bypass_dsq(sch, cpu)->nr) <= nr_donor_target)
                         continue;
  
-               nr_balanced += bypass_lb_cpu(sch, rq, donee_mask, resched_mask,
+               nr_balanced += bypass_lb_cpu(sch, cpu, donee_mask, resched_mask,
                                              nr_donor_target, nr_target);
         }
  
@@ -4366,7 +4367,7 @@ static void bypass_lb_node(struct scx_sched *sch, int node)
                 resched_cpu(cpu);
  
         for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
-               u32 nr = READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr);
+               u32 nr = READ_ONCE(bypass_dsq(sch, cpu)->nr);
  
                 after_min = min(nr, after_min);
                 after_max = max(nr, after_max);
@@ -5261,7 +5262,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
  {
         struct scx_sched *sch;
         s32 level = parent ? parent->level + 1 : 0;
-       int node, ret;
+       s32 node, cpu, ret;
  
         sch = kzalloc_flex(*sch, ancestors, level);
         if (!sch)
@@ -5302,6 +5303,9 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
                 goto err_free_gdsqs;
         }
  
+       for_each_possible_cpu(cpu)
+               init_dsq(bypass_dsq(sch, cpu), SCX_DSQ_BYPASS, sch);
+
         sch->helper = kthread_run_worker(0, "sched_ext_helper");
         if (IS_ERR(sch->helper)) {
                 ret = PTR_ERR(sch->helper);
@@ -5490,7 +5494,6 @@ static void scx_root_enable_workfn(struct kthread_work *work)
                 struct rq *rq = cpu_rq(cpu);
  
                 rq->scx.local_dsq.sched = sch;
-               rq->scx.bypass_dsq.sched = sch;
                 rq->scx.cpuperf_target = SCX_CPUPERF_ONE;
         }
  
@@ -6465,9 +6468,8 @@ void __init init_sched_ext_class(void)
                 struct rq *rq = cpu_rq(cpu);
                 int  n = cpu_to_node(cpu);
  
-               /* local/bypass dsq's sch will be set during scx_root_enable() */
+               /* local_dsq's sch will be set during scx_root_enable() */
                 init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL, NULL);
-               init_dsq(&rq->scx.bypass_dsq, SCX_DSQ_BYPASS, NULL);
  
                 INIT_LIST_HEAD(&rq->scx.runnable_list);
                 INIT_LIST_HEAD(&rq->scx.ddsp_deferred_locals);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h

index 87f19aed47fb5d416fc9af0e7895b7cea5d92aeb..1901efd405173b577e51d2bd79094772949aeabd 100644 (file)
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -932,6 +932,8 @@ struct scx_sched_pcpu {
          * constructed when requested by scx_bpf_events().
          */
         struct scx_event_stats  event_stats;
+
+       struct scx_dispatch_q   bypass_dsq;
  };
  
  struct scx_sched {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 43bbf0693cca4079e6f71029126a33fad55cb0f5..9e142c2f50f28441acfab9441c624ec2bbb87f82 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -810,7 +810,6 @@ struct scx_rq {
         struct balance_callback deferred_bal_cb;
         struct irq_work         deferred_irq_work;
         struct irq_work         kick_cpus_irq_work;
-       struct scx_dispatch_q   bypass_dsq;
  };
  #endif /* CONFIG_SCHED_CLASS_EXT */
author	Tejun Heo <tj@kernel.org>
	Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
committer	Tejun Heo <tj@kernel.org>
	Fri, 6 Mar 2026 17:58:03 +0000 (07:58 -1000)
kernel/sched/ext.c		patch \| blob \| blame \| history
kernel/sched/ext_internal.h		patch \| blob \| blame \| history
kernel/sched/sched.h		patch \| blob \| blame \| history