From: Tejun Heo Date: Sat, 7 Mar 2026 15:29:49 +0000 (-1000) Subject: sched_ext: Introduce scx_bpf_dsq_reenq() for remote local DSQ reenqueue X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9c34c5074d1bc22072fc7f9c86b0028f7e273b2c;p=thirdparty%2Fkernel%2Flinux.git sched_ext: Introduce scx_bpf_dsq_reenq() for remote local DSQ reenqueue scx_bpf_reenqueue_local() can only trigger re-enqueue of the current CPU's local DSQ. Introduce scx_bpf_dsq_reenq() which takes a DSQ ID and can target any local DSQ including remote CPUs via SCX_DSQ_LOCAL_ON | cpu. This will be expanded to support user DSQs by future changes. scx_bpf_reenqueue_local() is reimplemented as a simple wrapper around scx_bpf_dsq_reenq(SCX_DSQ_LOCAL, 0) and may be deprecated in the future. Update compat.bpf.h with a compatibility shim and scx_qmap to test the new functionality. Signed-off-by: Tejun Heo Reviewed-by: Andrea Righi --- diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 3548cf61477ac..efcf7ef72a3e6 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1080,6 +1080,31 @@ static void schedule_deferred_locked(struct rq *rq) schedule_deferred(rq); } +static void schedule_dsq_reenq(struct scx_sched *sch, struct scx_dispatch_q *dsq) +{ + /* + * Allowing reenqueues doesn't make sense while bypassing. This also + * blocks from new reenqueues to be scheduled on dead scheds. + */ + if (unlikely(READ_ONCE(sch->bypass_depth))) + return; + + if (dsq->id == SCX_DSQ_LOCAL) { + struct rq *rq = container_of(dsq, struct rq, scx.local_dsq); + struct scx_sched_pcpu *sch_pcpu = per_cpu_ptr(sch->pcpu, cpu_of(rq)); + struct scx_deferred_reenq_local *drl = &sch_pcpu->deferred_reenq_local; + + scoped_guard (raw_spinlock_irqsave, &rq->scx.deferred_reenq_lock) { + if (list_empty(&drl->node)) + list_move_tail(&drl->node, &rq->scx.deferred_reenq_locals); + } + + schedule_deferred(rq); + } else { + scx_error(sch, "DSQ 0x%llx not allowed for reenq", dsq->id); + } +} + /** * touch_core_sched - Update timestamp used for core-sched task ordering * @rq: rq to read clock from, must be locked @@ -7774,9 +7799,6 @@ __bpf_kfunc_start_defs(); * Iterate over all of the tasks currently enqueued on the local DSQ of the * caller's CPU, and re-enqueue them in the BPF scheduler. Returns the number of * processed tasks. Can only be called from ops.cpu_release(). - * - * COMPAT: Will be removed in v6.23 along with the ___v2 suffix on the void - * returning variant that can be called from anywhere. */ __bpf_kfunc u32 scx_bpf_reenqueue_local(const struct bpf_prog_aux *aux) { @@ -8206,6 +8228,52 @@ __bpf_kfunc struct task_struct *scx_bpf_dsq_peek(u64 dsq_id, return rcu_dereference(dsq->first_task); } +/** + * scx_bpf_dsq_reenq - Re-enqueue tasks on a DSQ + * @dsq_id: DSQ to re-enqueue + * @reenq_flags: %SCX_RENQ_* + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs + * + * Iterate over all of the tasks currently enqueued on the DSQ identified by + * @dsq_id, and re-enqueue them in the BPF scheduler. The following DSQs are + * supported: + * + * - Local DSQs (%SCX_DSQ_LOCAL or %SCX_DSQ_LOCAL_ON | $cpu) + * + * Re-enqueues are performed asynchronously. Can be called from anywhere. + */ +__bpf_kfunc void scx_bpf_dsq_reenq(u64 dsq_id, u64 reenq_flags, + const struct bpf_prog_aux *aux) +{ + struct scx_sched *sch; + struct scx_dispatch_q *dsq; + + guard(preempt)(); + + sch = scx_prog_sched(aux); + if (unlikely(!sch)) + return; + + dsq = find_dsq_for_dispatch(sch, this_rq(), dsq_id, smp_processor_id()); + schedule_dsq_reenq(sch, dsq); +} + +/** + * scx_bpf_reenqueue_local - Re-enqueue tasks on a local DSQ + * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs + * + * Iterate over all of the tasks currently enqueued on the local DSQ of the + * caller's CPU, and re-enqueue them in the BPF scheduler. Can be called from + * anywhere. + * + * This is now a special case of scx_bpf_dsq_reenq() and may be removed in the + * future. + */ +__bpf_kfunc void scx_bpf_reenqueue_local___v2(const struct bpf_prog_aux *aux) +{ + scx_bpf_dsq_reenq(SCX_DSQ_LOCAL, 0, aux); +} + __bpf_kfunc_end_defs(); static s32 __bstr_format(struct scx_sched *sch, u64 *data_buf, char *line_buf, @@ -8363,47 +8431,6 @@ __bpf_kfunc void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, ops_dump_flush(); } -/** - * scx_bpf_reenqueue_local - Re-enqueue tasks on a local DSQ - * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs - * - * Iterate over all of the tasks currently enqueued on the local DSQ of the - * caller's CPU, and re-enqueue them in the BPF scheduler. Can be called from - * anywhere. - */ -__bpf_kfunc void scx_bpf_reenqueue_local___v2(const struct bpf_prog_aux *aux) -{ - unsigned long flags; - struct scx_sched *sch; - struct rq *rq; - - raw_local_irq_save(flags); - - sch = scx_prog_sched(aux); - if (unlikely(!sch)) - goto out_irq_restore; - - /* - * Allowing reenqueue-locals doesn't make sense while bypassing. This - * also blocks from new reenqueues to be scheduled on dead scheds. - */ - if (unlikely(sch->bypass_depth)) - goto out_irq_restore; - - rq = this_rq(); - scoped_guard (raw_spinlock, &rq->scx.deferred_reenq_lock) { - struct scx_sched_pcpu *pcpu = this_cpu_ptr(sch->pcpu); - - if (list_empty(&pcpu->deferred_reenq_local.node)) - list_move_tail(&pcpu->deferred_reenq_local.node, - &rq->scx.deferred_reenq_locals); - } - - schedule_deferred(rq); -out_irq_restore: - raw_local_irq_restore(flags); -} - /** * scx_bpf_cpuperf_cap - Query the maximum relative capacity of a CPU * @cpu: CPU of interest @@ -8820,13 +8847,14 @@ BTF_ID_FLAGS(func, scx_bpf_kick_cpu, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued) BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) BTF_ID_FLAGS(func, scx_bpf_dsq_peek, KF_IMPLICIT_ARGS | KF_RCU_PROTECTED | KF_RET_NULL) +BTF_ID_FLAGS(func, scx_bpf_dsq_reenq, KF_IMPLICIT_ARGS) +BTF_ID_FLAGS(func, scx_bpf_reenqueue_local___v2, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, bpf_iter_scx_dsq_new, KF_IMPLICIT_ARGS | KF_ITER_NEW | KF_RCU_PROTECTED) BTF_ID_FLAGS(func, bpf_iter_scx_dsq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_scx_dsq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, scx_bpf_dump_bstr, KF_IMPLICIT_ARGS) -BTF_ID_FLAGS(func, scx_bpf_reenqueue_local___v2, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, scx_bpf_cpuperf_cur, KF_IMPLICIT_ARGS) BTF_ID_FLAGS(func, scx_bpf_cpuperf_set, KF_IMPLICIT_ARGS) diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h index f2969c3061a75..2d3985be7e2c1 100644 --- a/tools/sched_ext/include/scx/compat.bpf.h +++ b/tools/sched_ext/include/scx/compat.bpf.h @@ -375,6 +375,27 @@ static inline void scx_bpf_reenqueue_local(void) scx_bpf_reenqueue_local___v1(); } +/* + * v6.20: New scx_bpf_dsq_reenq() that allows re-enqueues on more DSQs. This + * will eventually deprecate scx_bpf_reenqueue_local(). + */ +void scx_bpf_dsq_reenq___compat(u64 dsq_id, u64 reenq_flags, const struct bpf_prog_aux *aux__prog) __ksym __weak; + +static inline bool __COMPAT_has_generic_reenq(void) +{ + return bpf_ksym_exists(scx_bpf_dsq_reenq___compat); +} + +static inline void scx_bpf_dsq_reenq(u64 dsq_id, u64 reenq_flags) +{ + if (bpf_ksym_exists(scx_bpf_dsq_reenq___compat)) + scx_bpf_dsq_reenq___compat(dsq_id, reenq_flags, NULL); + else if (dsq_id == SCX_DSQ_LOCAL && reenq_flags == 0) + scx_bpf_reenqueue_local(); + else + scx_bpf_error("kernel too old to reenqueue foreign local or user DSQs"); +} + /* * Define sched_ext_ops. This may be expanded to define multiple variants for * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 91b8eac83f527..83e8289e8c0cb 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -131,7 +131,7 @@ struct { } cpu_ctx_stor SEC(".maps"); /* Statistics */ -u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued, nr_ddsp_from_enq; +u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_reenqueued_cpu0, nr_dequeued, nr_ddsp_from_enq; u64 nr_core_sched_execed; u64 nr_expedited_local, nr_expedited_remote, nr_expedited_lost, nr_expedited_from_timer; u32 cpuperf_min, cpuperf_avg, cpuperf_max; @@ -206,8 +206,11 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) void *ring; s32 cpu; - if (enq_flags & SCX_ENQ_REENQ) + if (enq_flags & SCX_ENQ_REENQ) { __sync_fetch_and_add(&nr_reenqueued, 1); + if (scx_bpf_task_cpu(p) == 0) + __sync_fetch_and_add(&nr_reenqueued_cpu0, 1); + } if (p->flags & PF_KTHREAD) { if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth)) @@ -561,6 +564,10 @@ int BPF_PROG(qmap_sched_switch, bool preempt, struct task_struct *prev, case 2: /* SCHED_RR */ case 6: /* SCHED_DEADLINE */ scx_bpf_reenqueue_local(); + + /* trigger re-enqueue on CPU0 just to exercise LOCAL_ON */ + if (__COMPAT_has_generic_reenq()) + scx_bpf_dsq_reenq(SCX_DSQ_LOCAL_ON | 0, 0); } return 0; diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index 5d762d10f4dbd..9252037284d30 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -137,9 +137,10 @@ int main(int argc, char **argv) long nr_enqueued = skel->bss->nr_enqueued; long nr_dispatched = skel->bss->nr_dispatched; - printf("stats : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" enq_ddsp=%"PRIu64"\n", + printf("stats : enq=%lu dsp=%lu delta=%ld reenq/cpu0=%"PRIu64"/%"PRIu64" deq=%"PRIu64" core=%"PRIu64" enq_ddsp=%"PRIu64"\n", nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched, - skel->bss->nr_reenqueued, skel->bss->nr_dequeued, + skel->bss->nr_reenqueued, skel->bss->nr_reenqueued_cpu0, + skel->bss->nr_dequeued, skel->bss->nr_core_sched_execed, skel->bss->nr_ddsp_from_enq); printf(" exp_local=%"PRIu64" exp_remote=%"PRIu64" exp_timer=%"PRIu64" exp_lost=%"PRIu64"\n",