]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched/fair: More complex proportional newidle balance
authorPeter Zijlstra <peterz@infradead.org>
Tue, 27 Jan 2026 15:17:48 +0000 (16:17 +0100)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 23 Feb 2026 17:04:09 +0000 (18:04 +0100)
It turns out that a few workloads (easyWave, fio) have a fairly low
success rate on newidle balance, but still benefit greatly from having
it anyway.

Luckliky these workloads have a faily low newidle rate, so the cost if
doing the newidle is relatively low, even if unsuccessfull.

Add a simple rate based part to the newidle ratio compute, such that
low rate newidle will still have a high newidle ratio.

This cures the easyWave and fio workloads while not affecting the
schbench numbers either (which have a very high newidle rate).

Reported-by: Mario Roy <marioeroy@gmail.com>
Reported-by: "Mohamed Abuelfotoh, Hazem" <abuehaze@amazon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Mario Roy <marioeroy@gmail.com>
Tested-by: "Mohamed Abuelfotoh, Hazem" <abuehaze@amazon.com>
Link: https://patch.msgid.link/20260127151748.GA1079264@noisy.programming.kicks-ass.net
include/linux/sched/topology.h
kernel/sched/fair.c
kernel/sched/features.h
kernel/sched/topology.c

index 45c0022b91ced38e49c4fd37610bbc2c9c967f89..a1e1032426dcc0426cf5308218892e930f2758c9 100644 (file)
@@ -95,6 +95,7 @@ struct sched_domain {
        unsigned int newidle_call;
        unsigned int newidle_success;
        unsigned int newidle_ratio;
+       u64 newidle_stamp;
        u64 max_newidle_lb_cost;
        unsigned long last_decay_max_lb_cost;
 
index bf948db905ed1b3b385791c5a5fecf4f62a94de3..66afa0ac7396c6ebd9b533b1031e830fbb7c8257 100644 (file)
@@ -12289,7 +12289,30 @@ static inline void update_newidle_stats(struct sched_domain *sd, unsigned int su
        sd->newidle_success += success;
 
        if (sd->newidle_call >= 1024) {
-               sd->newidle_ratio = sd->newidle_success;
+               u64 now = sched_clock();
+               s64 delta = now - sd->newidle_stamp;
+               sd->newidle_stamp = now;
+               int ratio = 0;
+
+               if (delta < 0)
+                       delta = 0;
+
+               if (sched_feat(NI_RATE)) {
+                       /*
+                        * ratio  delta   freq
+                        *
+                        * 1024 -  4  s -  128 Hz
+                        *  512 -  2  s -  256 Hz
+                        *  256 -  1  s -  512 Hz
+                        *  128 - .5  s - 1024 Hz
+                        *   64 - .25 s - 2048 Hz
+                        */
+                       ratio = delta >> 22;
+               }
+
+               ratio += sd->newidle_success;
+
+               sd->newidle_ratio = min(1024, ratio);
                sd->newidle_call /= 2;
                sd->newidle_success /= 2;
        }
@@ -12996,7 +13019,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
                if (sd->flags & SD_BALANCE_NEWIDLE) {
                        unsigned int weight = 1;
 
-                       if (sched_feat(NI_RANDOM)) {
+                       if (sched_feat(NI_RANDOM) && sd->newidle_ratio < 1024) {
                                /*
                                 * Throw a 1k sided dice; and only run
                                 * newidle_balance according to the success
index 136a6584be79743045a2891681beb572c79de8e6..37d5928fa6dd58e261ea374489765fa975a0af14 100644 (file)
@@ -126,3 +126,4 @@ SCHED_FEAT(LATENCY_WARN, false)
  * Do newidle balancing proportional to its success rate using randomization.
  */
 SCHED_FEAT(NI_RANDOM, true)
+SCHED_FEAT(NI_RATE, true)
index 32dcddaead82d986bdf8ac30a636531a3547f6c6..061f8c85f5552ce460d79dc60690af68db29ca60 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sched/isolation.h>
+#include <linux/sched/clock.h>
 #include <linux/bsearch.h>
 #include "sched.h"
 
@@ -1642,6 +1643,7 @@ sd_init(struct sched_domain_topology_level *tl,
        struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
        int sd_id, sd_weight, sd_flags = 0;
        struct cpumask *sd_span;
+       u64 now = sched_clock();
 
        sd_weight = cpumask_weight(tl->mask(tl, cpu));
 
@@ -1679,6 +1681,7 @@ sd_init(struct sched_domain_topology_level *tl,
                .newidle_call           = 512,
                .newidle_success        = 256,
                .newidle_ratio          = 512,
+               .newidle_stamp          = now,
 
                .max_newidle_lb_cost    = 0,
                .last_decay_max_lb_cost = jiffies,