From: Greg Kroah-Hartman Date: Thu, 8 Jan 2026 10:15:51 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.1.160~58 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e60b112244aecba0de10451381e7adbb607026a6;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: sched-fair-proportional-newidle-balance.patch sched-fair-small-cleanup-to-sched_balance_newidle.patch sched-fair-small-cleanup-to-update_newidle_cost.patch --- diff --git a/queue-6.12/sched-fair-proportional-newidle-balance.patch b/queue-6.12/sched-fair-proportional-newidle-balance.patch new file mode 100644 index 0000000000..519e5bf26e --- /dev/null +++ b/queue-6.12/sched-fair-proportional-newidle-balance.patch @@ -0,0 +1,202 @@ +From 33cf66d88306663d16e4759e9d24766b0aaa2e17 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 7 Nov 2025 17:01:31 +0100 +Subject: sched/fair: Proportional newidle balance + +From: Peter Zijlstra + +commit 33cf66d88306663d16e4759e9d24766b0aaa2e17 upstream. + +Add a randomized algorithm that runs newidle balancing proportional to +its success rate. + +This improves schbench significantly: + + 6.18-rc4: 2.22 Mrps/s + 6.18-rc4+revert: 2.04 Mrps/s + 6.18-rc4+revert+random: 2.18 Mrps/S + +Conversely, per Adam Li this affects SpecJBB slightly, reducing it by 1%: + + 6.17: -6% + 6.17+revert: 0% + 6.17+revert+random: -1% + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Tested-by: Dietmar Eggemann +Tested-by: Chris Mason +Link: https://lkml.kernel.org/r/6825c50d-7fa7-45d8-9b81-c6e7e25738e2@meta.com +Link: https://patch.msgid.link/20251107161739.770122091@infradead.org +[ Ajay: Modified to apply on v6.12 ] +Signed-off-by: Ajay Kaher +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched/topology.h | 3 ++ + kernel/sched/core.c | 3 ++ + kernel/sched/fair.c | 44 +++++++++++++++++++++++++++++++++++++---- + kernel/sched/features.h | 5 ++++ + kernel/sched/sched.h | 7 ++++++ + kernel/sched/topology.c | 6 +++++ + 6 files changed, 64 insertions(+), 4 deletions(-) + +--- a/include/linux/sched/topology.h ++++ b/include/linux/sched/topology.h +@@ -106,6 +106,9 @@ struct sched_domain { + unsigned int nr_balance_failed; /* initialise to 0 */ + + /* idle_balance() stats */ ++ unsigned int newidle_call; ++ unsigned int newidle_success; ++ unsigned int newidle_ratio; + u64 max_newidle_lb_cost; + unsigned long last_decay_max_lb_cost; + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -118,6 +118,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_updat + EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp); + + DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++DEFINE_PER_CPU(struct rnd_state, sched_rnd_state); + + #ifdef CONFIG_SCHED_DEBUG + /* +@@ -8335,6 +8336,8 @@ void __init sched_init_smp(void) + { + sched_init_numa(NUMA_NO_NODE); + ++ prandom_init_once(&sched_rnd_state); ++ + /* + * There's no userspace yet to cause hotplug operations; hence all the + * CPU masks are stable and all blatant races in the below code cannot +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -12186,11 +12186,27 @@ void update_max_interval(void) + max_load_balance_interval = HZ*num_online_cpus()/10; + } + +-static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost) ++static inline void update_newidle_stats(struct sched_domain *sd, unsigned int success) ++{ ++ sd->newidle_call++; ++ sd->newidle_success += success; ++ ++ if (sd->newidle_call >= 1024) { ++ sd->newidle_ratio = sd->newidle_success; ++ sd->newidle_call /= 2; ++ sd->newidle_success /= 2; ++ } ++} ++ ++static inline bool ++update_newidle_cost(struct sched_domain *sd, u64 cost, unsigned int success) + { + unsigned long next_decay = sd->last_decay_max_lb_cost + HZ; + unsigned long now = jiffies; + ++ if (cost) ++ update_newidle_stats(sd, success); ++ + if (cost > sd->max_newidle_lb_cost) { + /* + * Track max cost of a domain to make sure to not delay the +@@ -12238,7 +12254,7 @@ static void sched_balance_domains(struct + * Decay the newidle max times here because this is a regular + * visit to all the domains. + */ +- need_decay = update_newidle_cost(sd, 0); ++ need_decay = update_newidle_cost(sd, 0, 0); + max_cost += sd->max_newidle_lb_cost; + + /* +@@ -12896,6 +12912,22 @@ static int sched_balance_newidle(struct + break; + + if (sd->flags & SD_BALANCE_NEWIDLE) { ++ unsigned int weight = 1; ++ ++ if (sched_feat(NI_RANDOM)) { ++ /* ++ * Throw a 1k sided dice; and only run ++ * newidle_balance according to the success ++ * rate. ++ */ ++ u32 d1k = sched_rng() % 1024; ++ weight = 1 + sd->newidle_ratio; ++ if (d1k > weight) { ++ update_newidle_stats(sd, 0); ++ continue; ++ } ++ weight = (1024 + weight/2) / weight; ++ } + + pulled_task = sched_balance_rq(this_cpu, this_rq, + sd, CPU_NEWLY_IDLE, +@@ -12903,10 +12935,14 @@ static int sched_balance_newidle(struct + + t1 = sched_clock_cpu(this_cpu); + domain_cost = t1 - t0; +- update_newidle_cost(sd, domain_cost); +- + curr_cost += domain_cost; + t0 = t1; ++ ++ /* ++ * Track max cost of a domain to make sure to not delay the ++ * next wakeup on the CPU. ++ */ ++ update_newidle_cost(sd, domain_cost, weight * !!pulled_task); + } + + /* +--- a/kernel/sched/features.h ++++ b/kernel/sched/features.h +@@ -122,3 +122,8 @@ SCHED_FEAT(WA_BIAS, true) + SCHED_FEAT(UTIL_EST, true) + + SCHED_FEAT(LATENCY_WARN, false) ++ ++/* ++ * Do newidle balancing proportional to its success rate using randomization. ++ */ ++SCHED_FEAT(NI_RANDOM, true) +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -5,6 +5,7 @@ + #ifndef _KERNEL_SCHED_SCHED_H + #define _KERNEL_SCHED_SCHED_H + ++#include + #include + #include + #include +@@ -1348,6 +1349,12 @@ static inline bool is_migration_disabled + } + + DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); ++DECLARE_PER_CPU(struct rnd_state, sched_rnd_state); ++ ++static inline u32 sched_rng(void) ++{ ++ return prandom_u32_state(this_cpu_ptr(&sched_rnd_state)); ++} + + #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) + #define this_rq() this_cpu_ptr(&runqueues) +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -1632,6 +1632,12 @@ sd_init(struct sched_domain_topology_lev + + .last_balance = jiffies, + .balance_interval = sd_weight, ++ ++ /* 50% success rate */ ++ .newidle_call = 512, ++ .newidle_success = 256, ++ .newidle_ratio = 512, ++ + .max_newidle_lb_cost = 0, + .last_decay_max_lb_cost = jiffies, + .child = child, diff --git a/queue-6.12/sched-fair-small-cleanup-to-sched_balance_newidle.patch b/queue-6.12/sched-fair-small-cleanup-to-sched_balance_newidle.patch new file mode 100644 index 0000000000..e2fd259885 --- /dev/null +++ b/queue-6.12/sched-fair-small-cleanup-to-sched_balance_newidle.patch @@ -0,0 +1,46 @@ +From e78e70dbf603c1425f15f32b455ca148c932f6c1 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 7 Nov 2025 17:01:24 +0100 +Subject: sched/fair: Small cleanup to sched_balance_newidle() + +From: Peter Zijlstra + +commit e78e70dbf603c1425f15f32b455ca148c932f6c1 upstream. + +Pull out the !sd check to simplify code. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Tested-by: Dietmar Eggemann +Tested-by: Chris Mason +Link: https://patch.msgid.link/20251107161739.525916173@infradead.org +[ Ajay: Modified to apply on v6.12 ] +Signed-off-by: Ajay Kaher +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -12864,14 +12864,16 @@ static int sched_balance_newidle(struct + + rcu_read_lock(); + sd = rcu_dereference_check_sched_domain(this_rq->sd); ++ if (!sd) { ++ rcu_read_unlock(); ++ goto out; ++ } + + if (!get_rd_overloaded(this_rq->rd) || +- (sd && this_rq->avg_idle < sd->max_newidle_lb_cost)) { ++ this_rq->avg_idle < sd->max_newidle_lb_cost) { + +- if (sd) +- update_next_balance(sd, &next_balance); ++ update_next_balance(sd, &next_balance); + rcu_read_unlock(); +- + goto out; + } + rcu_read_unlock(); diff --git a/queue-6.12/sched-fair-small-cleanup-to-update_newidle_cost.patch b/queue-6.12/sched-fair-small-cleanup-to-update_newidle_cost.patch new file mode 100644 index 0000000000..6ed646aeab --- /dev/null +++ b/queue-6.12/sched-fair-small-cleanup-to-update_newidle_cost.patch @@ -0,0 +1,55 @@ +From 08d473dd8718e4a4d698b1113a14a40ad64a909b Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 7 Nov 2025 17:01:27 +0100 +Subject: sched/fair: Small cleanup to update_newidle_cost() + +From: Peter Zijlstra + +commit 08d473dd8718e4a4d698b1113a14a40ad64a909b upstream. + +Simplify code by adding a few variables. + +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Tested-by: Dietmar Eggemann +Tested-by: Chris Mason +Link: https://patch.msgid.link/20251107161739.655208666@infradead.org +[ Ajay: Modified to apply on v6.12 ] +Signed-off-by: Ajay Kaher +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -12188,22 +12188,25 @@ void update_max_interval(void) + + static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost) + { ++ unsigned long next_decay = sd->last_decay_max_lb_cost + HZ; ++ unsigned long now = jiffies; ++ + if (cost > sd->max_newidle_lb_cost) { + /* + * Track max cost of a domain to make sure to not delay the + * next wakeup on the CPU. + */ + sd->max_newidle_lb_cost = cost; +- sd->last_decay_max_lb_cost = jiffies; +- } else if (time_after(jiffies, sd->last_decay_max_lb_cost + HZ)) { ++ sd->last_decay_max_lb_cost = now; ++ ++ } else if (time_after(now, next_decay)) { + /* + * Decay the newidle max times by ~1% per second to ensure that + * it is not outdated and the current max cost is actually + * shorter. + */ + sd->max_newidle_lb_cost = (sd->max_newidle_lb_cost * 253) / 256; +- sd->last_decay_max_lb_cost = jiffies; +- ++ sd->last_decay_max_lb_cost = now; + return true; + } + diff --git a/queue-6.12/series b/queue-6.12/series index b1248cc2f4..8d5276c600 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -8,3 +8,6 @@ net-phy-mediatek-fix-nvmem-cell-reference-leak-in-mt798x_phy_calibration.patch drm-amdgpu-forward-vmid-reservation-errors.patch cpufreq-intel_pstate-check-ida-only-before-msr_ia32_perf_ctl-writes.patch net-remove-rtnl-dance-for-siocbraddif-and-siocbrdelif.patch +sched-fair-small-cleanup-to-sched_balance_newidle.patch +sched-fair-small-cleanup-to-update_newidle_cost.patch +sched-fair-proportional-newidle-balance.patch