]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched: Update rq->avg_idle when a task is moved to an idle CPU
authorShubhang Kaushik <shubhang@os.amperecomputing.com>
Wed, 21 Jan 2026 09:31:53 +0000 (01:31 -0800)
committerPeter Zijlstra <peterz@infradead.org>
Thu, 22 Jan 2026 10:11:21 +0000 (11:11 +0100)
Currently, rq->idle_stamp is only used to calculate avg_idle during
wakeups. This means other paths that move a task to an idle CPU such as
fork/clone, execve, or migrations, do not end the CPU's idle status in
the scheduler's eyes, leading to an inaccurate avg_idle.

This patch introduces update_rq_avg_idle() to provide a more accurate
measurement of CPU idle duration. By invoking this helper in
put_prev_task_idle(), we ensure avg_idle is updated whenever a CPU
stops being idle, regardless of how the new task arrived.

Testing on an 80-core Ampere Altra (ARMv8) with 6.19-rc5 baseline:
 - Hackbench : +7.2% performance gain at 16 threads.
 - Schbench: Reduced p99.9 tail latencies at high concurrency.

Signed-off-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Link: https://patch.msgid.link/20260121-v8-patch-series-v8-1-b7f1cbee5055@os.amperecomputing.com
kernel/sched/core.c
kernel/sched/idle.c
kernel/sched/sched.h

index 3cca012d1259cfced14ed38dcb6759719d9ea185..c5431afe23b05452e8c7ecb262feae1a9a8d7dac 100644 (file)
@@ -3613,6 +3613,18 @@ static inline void ttwu_do_wakeup(struct task_struct *p)
        trace_sched_wakeup(p);
 }
 
+void update_rq_avg_idle(struct rq *rq)
+{
+       u64 delta = rq_clock(rq) - rq->idle_stamp;
+       u64 max = 2*rq->max_idle_balance_cost;
+
+       update_avg(&rq->avg_idle, delta);
+
+       if (rq->avg_idle > max)
+               rq->avg_idle = max;
+       rq->idle_stamp = 0;
+}
+
 static void
 ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
                 struct rq_flags *rf)
@@ -3648,18 +3660,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
                p->sched_class->task_woken(rq, p);
                rq_repin_lock(rq, rf);
        }
-
-       if (rq->idle_stamp) {
-               u64 delta = rq_clock(rq) - rq->idle_stamp;
-               u64 max = 2*rq->max_idle_balance_cost;
-
-               update_avg(&rq->avg_idle, delta);
-
-               if (rq->avg_idle > max)
-                       rq->avg_idle = max;
-
-               rq->idle_stamp = 0;
-       }
 }
 
 /*
index 65eb8f8c1a5d3a05dcdc2945c57fd58420697f68..aba5ad53c07d0fe456cab74750184cb456a4c0ea 100644 (file)
@@ -460,6 +460,7 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct t
 {
        update_curr_idle(rq);
        scx_update_idle(rq, false, true);
+       update_rq_avg_idle(rq);
 }
 
 static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
index 58c9d244f12b07d00c8b769e584ebeee3d2cffc8..127633b1377b5d93349d11f348a1741758409f2b 100644 (file)
@@ -1670,6 +1670,7 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
 
 #endif /* !CONFIG_FAIR_GROUP_SCHED */
 
+extern void update_rq_avg_idle(struct rq *rq);
 extern void update_rq_clock(struct rq *rq);
 
 /*