From: Greg Kroah-Hartman Date: Thu, 8 Jan 2026 10:16:02 +0000 (+0100) Subject: 6.18-stable patches X-Git-Tag: v6.1.160~57 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0701db3b388c76c6d38f2ad636ebcb1b512df712;p=thirdparty%2Fkernel%2Fstable-queue.git 6.18-stable patches added patches: sched-fair-proportional-newidle-balance.patch sched-fair-small-cleanup-to-sched_balance_newidle.patch sched-fair-small-cleanup-to-update_newidle_cost.patch --- diff --git a/queue-6.18/sched-fair-proportional-newidle-balance.patch b/queue-6.18/sched-fair-proportional-newidle-balance.patch new file mode 100644 index 0000000000..3d518be510 --- /dev/null +++ b/queue-6.18/sched-fair-proportional-newidle-balance.patch @@ -0,0 +1,200 @@ +From 33cf66d88306663d16e4759e9d24766b0aaa2e17 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 7 Nov 2025 17:01:31 +0100 +Subject: sched/fair: Proportional newidle balance + +From: Peter Zijlstra + +commit 33cf66d88306663d16e4759e9d24766b0aaa2e17 upstream. + +Add a randomized algorithm that runs newidle balancing proportional to +its success rate. + +This improves schbench significantly: + + 6.18-rc4: 2.22 Mrps/s + 6.18-rc4+revert: 2.04 Mrps/s + 6.18-rc4+revert+random: 2.18 Mrps/S + +Conversely, per Adam Li this affects SpecJBB slightly, reducing it by 1%: + + 6.17: -6% + 6.17+revert: 0% + 6.17+revert+random: -1% + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Tested-by: Dietmar Eggemann +Tested-by: Chris Mason +Link: https://lkml.kernel.org/r/6825c50d-7fa7-45d8-9b81-c6e7e25738e2@meta.com +Link: https://patch.msgid.link/20251107161739.770122091@infradead.org +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched/topology.h | 3 ++ + kernel/sched/core.c | 3 ++ + kernel/sched/fair.c | 44 +++++++++++++++++++++++++++++++++++++---- + kernel/sched/features.h | 5 ++++ + kernel/sched/sched.h | 7 ++++++ + kernel/sched/topology.c | 6 +++++ + 6 files changed, 64 insertions(+), 4 deletions(-) + +--- a/include/linux/sched/topology.h ++++ b/include/linux/sched/topology.h +@@ -92,6 +92,9 @@ struct sched_domain { + unsigned int nr_balance_failed; /* initialise to 0 */ + + /* idle_balance() stats */ ++ unsigned int newidle_call; ++ unsigned int newidle_success; ++ unsigned int newidle_ratio; + u64 max_newidle_lb_cost; + unsigned long last_decay_max_lb_cost; + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -121,6 +121,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_updat + EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp); + + DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++DEFINE_PER_CPU(struct rnd_state, sched_rnd_state); + + #ifdef CONFIG_SCHED_PROXY_EXEC + DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec); +@@ -8591,6 +8592,8 @@ void __init sched_init_smp(void) + { + sched_init_numa(NUMA_NO_NODE); + ++ prandom_init_once(&sched_rnd_state); ++ + /* + * There's no userspace yet to cause hotplug operations; hence all the + * CPU masks are stable and all blatant races in the below code cannot +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -12122,11 +12122,27 @@ void update_max_interval(void) + max_load_balance_interval = HZ*num_online_cpus()/10; + } + +-static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost) ++static inline void update_newidle_stats(struct sched_domain *sd, unsigned int success) ++{ ++ sd->newidle_call++; ++ sd->newidle_success += success; ++ ++ if (sd->newidle_call >= 1024) { ++ sd->newidle_ratio = sd->newidle_success; ++ sd->newidle_call /= 2; ++ sd->newidle_success /= 2; ++ } ++} ++ ++static inline bool ++update_newidle_cost(struct sched_domain *sd, u64 cost, unsigned int success) + { + unsigned long next_decay = sd->last_decay_max_lb_cost + HZ; + unsigned long now = jiffies; + ++ if (cost) ++ update_newidle_stats(sd, success); ++ + if (cost > sd->max_newidle_lb_cost) { + /* + * Track max cost of a domain to make sure to not delay the +@@ -12174,7 +12190,7 @@ static void sched_balance_domains(struct + * Decay the newidle max times here because this is a regular + * visit to all the domains. + */ +- need_decay = update_newidle_cost(sd, 0); ++ need_decay = update_newidle_cost(sd, 0, 0); + max_cost += sd->max_newidle_lb_cost; + + /* +@@ -12819,6 +12835,22 @@ static int sched_balance_newidle(struct + break; + + if (sd->flags & SD_BALANCE_NEWIDLE) { ++ unsigned int weight = 1; ++ ++ if (sched_feat(NI_RANDOM)) { ++ /* ++ * Throw a 1k sided dice; and only run ++ * newidle_balance according to the success ++ * rate. ++ */ ++ u32 d1k = sched_rng() % 1024; ++ weight = 1 + sd->newidle_ratio; ++ if (d1k > weight) { ++ update_newidle_stats(sd, 0); ++ continue; ++ } ++ weight = (1024 + weight/2) / weight; ++ } + + pulled_task = sched_balance_rq(this_cpu, this_rq, + sd, CPU_NEWLY_IDLE, +@@ -12826,10 +12858,14 @@ static int sched_balance_newidle(struct + + t1 = sched_clock_cpu(this_cpu); + domain_cost = t1 - t0; +- update_newidle_cost(sd, domain_cost); +- + curr_cost += domain_cost; + t0 = t1; ++ ++ /* ++ * Track max cost of a domain to make sure to not delay the ++ * next wakeup on the CPU. ++ */ ++ update_newidle_cost(sd, domain_cost, weight * !!pulled_task); + } + + /* +--- a/kernel/sched/features.h ++++ b/kernel/sched/features.h +@@ -121,3 +121,8 @@ SCHED_FEAT(WA_BIAS, true) + SCHED_FEAT(UTIL_EST, true) + + SCHED_FEAT(LATENCY_WARN, false) ++ ++/* ++ * Do newidle balancing proportional to its success rate using randomization. ++ */ ++SCHED_FEAT(NI_RANDOM, true) +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -5,6 +5,7 @@ + #ifndef _KERNEL_SCHED_SCHED_H + #define _KERNEL_SCHED_SCHED_H + ++#include + #include + #include + #include +@@ -1349,6 +1350,12 @@ static inline bool is_migration_disabled + } + + DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++DECLARE_PER_CPU(struct rnd_state, sched_rnd_state); ++ ++static inline u32 sched_rng(void) ++{ ++ return prandom_u32_state(this_cpu_ptr(&sched_rnd_state)); ++} + + #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) + #define this_rq() this_cpu_ptr(&runqueues) +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -1662,6 +1662,12 @@ sd_init(struct sched_domain_topology_lev + + .last_balance = jiffies, + .balance_interval = sd_weight, ++ ++ /* 50% success rate */ ++ .newidle_call = 512, ++ .newidle_success = 256, ++ .newidle_ratio = 512, ++ + .max_newidle_lb_cost = 0, + .last_decay_max_lb_cost = jiffies, + .child = child, diff --git a/queue-6.18/sched-fair-small-cleanup-to-sched_balance_newidle.patch b/queue-6.18/sched-fair-small-cleanup-to-sched_balance_newidle.patch new file mode 100644 index 0000000000..a219feccd3 --- /dev/null +++ b/queue-6.18/sched-fair-small-cleanup-to-sched_balance_newidle.patch @@ -0,0 +1,44 @@ +From e78e70dbf603c1425f15f32b455ca148c932f6c1 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 7 Nov 2025 17:01:24 +0100 +Subject: sched/fair: Small cleanup to sched_balance_newidle() + +From: Peter Zijlstra + +commit e78e70dbf603c1425f15f32b455ca148c932f6c1 upstream. + +Pull out the !sd check to simplify code. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Tested-by: Dietmar Eggemann +Tested-by: Chris Mason +Link: https://patch.msgid.link/20251107161739.525916173@infradead.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -12787,14 +12787,16 @@ static int sched_balance_newidle(struct + + rcu_read_lock(); + sd = rcu_dereference_check_sched_domain(this_rq->sd); ++ if (!sd) { ++ rcu_read_unlock(); ++ goto out; ++ } + + if (!get_rd_overloaded(this_rq->rd) || +- (sd && this_rq->avg_idle < sd->max_newidle_lb_cost)) { ++ this_rq->avg_idle < sd->max_newidle_lb_cost) { + +- if (sd) +- update_next_balance(sd, &next_balance); ++ update_next_balance(sd, &next_balance); + rcu_read_unlock(); +- + goto out; + } + rcu_read_unlock(); diff --git a/queue-6.18/sched-fair-small-cleanup-to-update_newidle_cost.patch b/queue-6.18/sched-fair-small-cleanup-to-update_newidle_cost.patch new file mode 100644 index 0000000000..c61af7cbc9 --- /dev/null +++ b/queue-6.18/sched-fair-small-cleanup-to-update_newidle_cost.patch @@ -0,0 +1,53 @@ +From 08d473dd8718e4a4d698b1113a14a40ad64a909b Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 7 Nov 2025 17:01:27 +0100 +Subject: sched/fair: Small cleanup to update_newidle_cost() + +From: Peter Zijlstra + +commit 08d473dd8718e4a4d698b1113a14a40ad64a909b upstream. + +Simplify code by adding a few variables. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Tested-by: Dietmar Eggemann +Tested-by: Chris Mason +Link: https://patch.msgid.link/20251107161739.655208666@infradead.org +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -12124,22 +12124,25 @@ void update_max_interval(void) + + static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost) + { ++ unsigned long next_decay = sd->last_decay_max_lb_cost + HZ; ++ unsigned long now = jiffies; ++ + if (cost > sd->max_newidle_lb_cost) { + /* + * Track max cost of a domain to make sure to not delay the + * next wakeup on the CPU. + */ + sd->max_newidle_lb_cost = cost; +- sd->last_decay_max_lb_cost = jiffies; +- } else if (time_after(jiffies, sd->last_decay_max_lb_cost + HZ)) { ++ sd->last_decay_max_lb_cost = now; ++ ++ } else if (time_after(now, next_decay)) { + /* + * Decay the newidle max times by ~1% per second to ensure that + * it is not outdated and the current max cost is actually + * shorter. + */ + sd->max_newidle_lb_cost = (sd->max_newidle_lb_cost * 253) / 256; +- sd->last_decay_max_lb_cost = jiffies; +- ++ sd->last_decay_max_lb_cost = now; + return true; + } + diff --git a/queue-6.18/series b/queue-6.18/series index 5858c5d0f8..c4f9404334 100644 --- a/queue-6.18/series +++ b/queue-6.18/series @@ -1 +1,4 @@ mptcp-ensure-context-reset-on-disconnect.patch +sched-fair-small-cleanup-to-sched_balance_newidle.patch +sched-fair-small-cleanup-to-update_newidle_cost.patch +sched-fair-proportional-newidle-balance.patch