]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched: Detect per-class runqueue changes
authorPeter Zijlstra <peterz@infradead.org>
Wed, 1 Oct 2025 13:50:15 +0000 (15:50 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Thu, 16 Oct 2025 09:13:55 +0000 (11:13 +0200)
Have enqueue/dequeue set a per-class bit in rq->queue_mask. This then
enables easy tracking of which runqueues are modified over a
lock-break.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/ext.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/stop_task.c

index e2199e4db0dc44f50bc7f922cb796c01bed87d38..9fc990ff68454103eaa4aec82bf5b40f55e0fe51 100644 (file)
@@ -2089,6 +2089,7 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
         */
        uclamp_rq_inc(rq, p, flags);
 
+       rq->queue_mask |= p->sched_class->queue_mask;
        p->sched_class->enqueue_task(rq, p, flags);
 
        psi_enqueue(p, flags);
@@ -2121,6 +2122,7 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
         * and mark the task ->sched_delayed.
         */
        uclamp_rq_dec(rq, p);
+       rq->queue_mask |= p->sched_class->queue_mask;
        return p->sched_class->dequeue_task(rq, p, flags);
 }
 
index 1f949949840381f7385021408ed9d3073ee6d443..83e6175d79f5f7c8d6437616cd0456d03c7386e9 100644 (file)
@@ -3092,6 +3092,8 @@ static int task_is_throttled_dl(struct task_struct *p, int cpu)
 
 DEFINE_SCHED_CLASS(dl) = {
 
+       .queue_mask             = 8,
+
        .enqueue_task           = enqueue_task_dl,
        .dequeue_task           = dequeue_task_dl,
        .yield_task             = yield_task_dl,
index 57170423c3d973c601716c1f4c3167304aaf4e40..949c3a6e24d4bc875820986ff4190837780cc480 100644 (file)
@@ -3234,6 +3234,8 @@ static void scx_cgroup_unlock(void) {}
  *   their current sched_class. Call them directly from sched core instead.
  */
 DEFINE_SCHED_CLASS(ext) = {
+       .queue_mask             = 1,
+
        .enqueue_task           = enqueue_task_scx,
        .dequeue_task           = dequeue_task_scx,
        .yield_task             = yield_task_scx,
index 77a713ecde9d10eb00ff214350bd66dbf3bacadb..23ac05cca4a49e8368ccc286c332abb91a1df51c 100644 (file)
@@ -12841,6 +12841,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
        }
        rcu_read_unlock();
 
+       rq_modified_clear(this_rq);
        raw_spin_rq_unlock(this_rq);
 
        t0 = sched_clock_cpu(this_cpu);
@@ -12898,8 +12899,8 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
        if (this_rq->cfs.h_nr_queued && !pulled_task)
                pulled_task = 1;
 
-       /* Is there a task of a high priority class? */
-       if (this_rq->nr_running != this_rq->cfs.h_nr_queued)
+       /* If a higher prio class was modified, restart the pick */
+       if (rq_modified_above(this_rq, &fair_sched_class))
                pulled_task = -1;
 
 out:
@@ -13633,6 +13634,8 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
  */
 DEFINE_SCHED_CLASS(fair) = {
 
+       .queue_mask             = 2,
+
        .enqueue_task           = enqueue_task_fair,
        .dequeue_task           = dequeue_task_fair,
        .yield_task             = yield_task_fair,
index dee6e019dcf81fbdab568d952c00e4a111b07a73..055b0ddbcd54dfb1727d7925c2a2a2bb7e0e5bcd 100644 (file)
@@ -521,6 +521,8 @@ static void update_curr_idle(struct rq *rq)
  */
 DEFINE_SCHED_CLASS(idle) = {
 
+       .queue_mask             = 0,
+
        /* no enqueue/yield_task for idle tasks */
 
        /* dequeue is not valid, we print a debug message there: */
index c2347e485dc651b964379257f8f3991629407e34..9bc828d59121a3165c462dacc80a8094fbc2db5d 100644 (file)
@@ -2569,6 +2569,8 @@ static int task_is_throttled_rt(struct task_struct *p, int cpu)
 
 DEFINE_SCHED_CLASS(rt) = {
 
+       .queue_mask             = 4,
+
        .enqueue_task           = enqueue_task_rt,
        .dequeue_task           = dequeue_task_rt,
        .yield_task             = yield_task_rt,
index e3d271013c8b0f00f68fae21603b0739dc53e105..f4a323007dced65942244eeda33791686615da23 100644 (file)
@@ -1118,6 +1118,8 @@ struct rq {
        /* runqueue lock: */
        raw_spinlock_t          __lock;
 
+       /* Per class runqueue modification mask; bits in class order. */
+       unsigned int            queue_mask;
        unsigned int            nr_running;
 #ifdef CONFIG_NUMA_BALANCING
        unsigned int            nr_numa_running;
@@ -2414,6 +2416,15 @@ struct sched_class {
 #ifdef CONFIG_UCLAMP_TASK
        int uclamp_enabled;
 #endif
+       /*
+        * idle:  0
+        * ext:   1
+        * fair:  2
+        * rt:    4
+        * dl:    8
+        * stop: 16
+        */
+       unsigned int queue_mask;
 
        /*
         * move_queued_task/activate_task/enqueue_task: rq->lock
@@ -2571,6 +2582,20 @@ struct sched_class {
 #endif
 };
 
+/*
+ * Does not nest; only used around sched_class::pick_task() rq-lock-breaks.
+ */
+static inline void rq_modified_clear(struct rq *rq)
+{
+       rq->queue_mask = 0;
+}
+
+static inline bool rq_modified_above(struct rq *rq, const struct sched_class * class)
+{
+       unsigned int mask = class->queue_mask;
+       return rq->queue_mask & ~((mask << 1) - 1);
+}
+
 static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
        WARN_ON_ONCE(rq->donor != prev);
index 73aa8de190675ca6fae67f6c7ee7bc84cc47f0c4..d98c453c9b4eb50cb66d8452ee191af659bf1b5a 100644 (file)
@@ -98,6 +98,8 @@ static void update_curr_stop(struct rq *rq)
  */
 DEFINE_SCHED_CLASS(stop) = {
 
+       .queue_mask             = 16,
+
        .enqueue_task           = enqueue_task_stop,
        .dequeue_task           = dequeue_task_stop,
        .yield_task             = yield_task_stop,