From: Peter Zijlstra Date: Wed, 1 Oct 2025 13:50:15 +0000 (+0200) Subject: sched: Detect per-class runqueue changes X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1e900f415c6082cd4bcdae4c92515d21fb389473;p=thirdparty%2Flinux.git sched: Detect per-class runqueue changes Have enqueue/dequeue set a per-class bit in rq->queue_mask. This then enables easy tracking of which runqueues are modified over a lock-break. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo --- diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e2199e4db0dc4..9fc990ff68454 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2089,6 +2089,7 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags) */ uclamp_rq_inc(rq, p, flags); + rq->queue_mask |= p->sched_class->queue_mask; p->sched_class->enqueue_task(rq, p, flags); psi_enqueue(p, flags); @@ -2121,6 +2122,7 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags) * and mark the task ->sched_delayed. */ uclamp_rq_dec(rq, p); + rq->queue_mask |= p->sched_class->queue_mask; return p->sched_class->dequeue_task(rq, p, flags); } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1f94994984038..83e6175d79f5f 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -3092,6 +3092,8 @@ static int task_is_throttled_dl(struct task_struct *p, int cpu) DEFINE_SCHED_CLASS(dl) = { + .queue_mask = 8, + .enqueue_task = enqueue_task_dl, .dequeue_task = dequeue_task_dl, .yield_task = yield_task_dl, diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 57170423c3d97..949c3a6e24d4b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3234,6 +3234,8 @@ static void scx_cgroup_unlock(void) {} * their current sched_class. Call them directly from sched core instead. */ DEFINE_SCHED_CLASS(ext) = { + .queue_mask = 1, + .enqueue_task = enqueue_task_scx, .dequeue_task = dequeue_task_scx, .yield_task = yield_task_scx, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 77a713ecde9d1..23ac05cca4a49 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12841,6 +12841,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) } rcu_read_unlock(); + rq_modified_clear(this_rq); raw_spin_rq_unlock(this_rq); t0 = sched_clock_cpu(this_cpu); @@ -12898,8 +12899,8 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) if (this_rq->cfs.h_nr_queued && !pulled_task) pulled_task = 1; - /* Is there a task of a high priority class? */ - if (this_rq->nr_running != this_rq->cfs.h_nr_queued) + /* If a higher prio class was modified, restart the pick */ + if (rq_modified_above(this_rq, &fair_sched_class)) pulled_task = -1; out: @@ -13633,6 +13634,8 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task */ DEFINE_SCHED_CLASS(fair) = { + .queue_mask = 2, + .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index dee6e019dcf81..055b0ddbcd54d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -521,6 +521,8 @@ static void update_curr_idle(struct rq *rq) */ DEFINE_SCHED_CLASS(idle) = { + .queue_mask = 0, + /* no enqueue/yield_task for idle tasks */ /* dequeue is not valid, we print a debug message there: */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c2347e485dc65..9bc828d59121a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2569,6 +2569,8 @@ static int task_is_throttled_rt(struct task_struct *p, int cpu) DEFINE_SCHED_CLASS(rt) = { + .queue_mask = 4, + .enqueue_task = enqueue_task_rt, .dequeue_task = dequeue_task_rt, .yield_task = yield_task_rt, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e3d271013c8b0..f4a323007dced 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1118,6 +1118,8 @@ struct rq { /* runqueue lock: */ raw_spinlock_t __lock; + /* Per class runqueue modification mask; bits in class order. */ + unsigned int queue_mask; unsigned int nr_running; #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; @@ -2414,6 +2416,15 @@ struct sched_class { #ifdef CONFIG_UCLAMP_TASK int uclamp_enabled; #endif + /* + * idle: 0 + * ext: 1 + * fair: 2 + * rt: 4 + * dl: 8 + * stop: 16 + */ + unsigned int queue_mask; /* * move_queued_task/activate_task/enqueue_task: rq->lock @@ -2571,6 +2582,20 @@ struct sched_class { #endif }; +/* + * Does not nest; only used around sched_class::pick_task() rq-lock-breaks. + */ +static inline void rq_modified_clear(struct rq *rq) +{ + rq->queue_mask = 0; +} + +static inline bool rq_modified_above(struct rq *rq, const struct sched_class * class) +{ + unsigned int mask = class->queue_mask; + return rq->queue_mask & ~((mask << 1) - 1); +} + static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { WARN_ON_ONCE(rq->donor != prev); diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 73aa8de190675..d98c453c9b4eb 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -98,6 +98,8 @@ static void update_curr_stop(struct rq *rq) */ DEFINE_SCHED_CLASS(stop) = { + .queue_mask = 16, + .enqueue_task = enqueue_task_stop, .dequeue_task = dequeue_task_stop, .yield_task = yield_task_stop,