]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched_ext/scx_qmap: Pick idle CPU for direct dispatch on !wakeup enqueues
authorTejun Heo <tj@kernel.org>
Fri, 12 Jul 2024 18:20:33 +0000 (08:20 -1000)
committerTejun Heo <tj@kernel.org>
Fri, 12 Jul 2024 18:20:33 +0000 (08:20 -1000)
Because there was no way to directly dispatch to the local DSQ of a remote
CPU from ops.enqueue(), scx_qmap skipped looking for an idle CPU on !wakeup
enqueues. This restriction was removed and sched_ext now allows
SCX_DSQ_LOCAL_ON verdicts for direct dispatches.

Factor out pick_direct_dispatch_cpu() from ops.select_cpu() and use it to
direct dispatch from ops.enqueue() on !wakeup enqueues.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David Vernet <void@manifault.com>
Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
Cc: Changwoo Min <changwoo@igalia.com>
Cc: Andrea Righi <righi.andrea@gmail.com>
tools/sched_ext/scx_qmap.bpf.c
tools/sched_ext/scx_qmap.c

index 27e35066a6029c864bc9e77bd30dd75ce6d3ec9c..892278f12dce904a8f58f77a254271337ab1d509 100644 (file)
@@ -120,11 +120,26 @@ struct {
 } cpu_ctx_stor SEC(".maps");
 
 /* Statistics */
-u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued;
+u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued, nr_ddsp_from_enq;
 u64 nr_core_sched_execed;
 u32 cpuperf_min, cpuperf_avg, cpuperf_max;
 u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max;
 
+static s32 pick_direct_dispatch_cpu(struct task_struct *p, s32 prev_cpu)
+{
+       s32 cpu;
+
+       if (p->nr_cpus_allowed == 1 ||
+           scx_bpf_test_and_clear_cpu_idle(prev_cpu))
+               return prev_cpu;
+
+       cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
+       if (cpu >= 0)
+               return cpu;
+
+       return -1;
+}
+
 s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
                   s32 prev_cpu, u64 wake_flags)
 {
@@ -137,17 +152,14 @@ s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
                return -ESRCH;
        }
 
-       if (p->nr_cpus_allowed == 1 ||
-           scx_bpf_test_and_clear_cpu_idle(prev_cpu)) {
+       cpu = pick_direct_dispatch_cpu(p, prev_cpu);
+
+       if (cpu >= 0) {
                tctx->force_local = true;
+               return cpu;
+       } else {
                return prev_cpu;
        }
-
-       cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
-       if (cpu >= 0)
-               return cpu;
-
-       return prev_cpu;
 }
 
 static int weight_to_idx(u32 weight)
@@ -172,6 +184,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
        u32 pid = p->pid;
        int idx = weight_to_idx(p->scx.weight);
        void *ring;
+       s32 cpu;
 
        if (p->flags & PF_KTHREAD) {
                if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth))
@@ -207,6 +220,14 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
                return;
        }
 
+       /* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */
+       if (!(enq_flags & SCX_ENQ_WAKEUP) &&
+           (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
+               __sync_fetch_and_add(&nr_ddsp_from_enq, 1);
+               scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
+               return;
+       }
+
        /*
         * If the task was re-enqueued due to the CPU being preempted by a
         * higher priority scheduling class, just re-enqueue the task directly
index 304f0488a386b8d2c64c8404183b9f324d9e991c..c9ca30d62b2b19df5ba9c730a94510b07c1930c1 100644 (file)
@@ -116,10 +116,11 @@ int main(int argc, char **argv)
                long nr_enqueued = skel->bss->nr_enqueued;
                long nr_dispatched = skel->bss->nr_dispatched;
 
-               printf("stats  : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64"\n",
+               printf("stats  : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" enq_ddsp=%"PRIu64"\n",
                       nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched,
                       skel->bss->nr_reenqueued, skel->bss->nr_dequeued,
-                      skel->bss->nr_core_sched_execed);
+                      skel->bss->nr_core_sched_execed,
+                      skel->bss->nr_ddsp_from_enq);
                if (__COMPAT_has_ksym("scx_bpf_cpuperf_cur"))
                        printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n",
                               skel->bss->cpuperf_min,