From cddeea58cdd141ab9855e5228b282d30500c42e7 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Mon, 2 Feb 2026 17:55:57 +0100
Subject: [PATCH] BUG/MINOR: cpu-topo: count cores not cpus to distinguish core
 types

The per-cpu capacity of a cluster was taken into account since 3.2 with
commit 6c88e27cf4 ("MEDIUM: cpu-topo: change "performance" to consider
per-core capacity").

In cpu_policy_performance() and cpu_policy_efficiency(), we're trying
to figure which cores have more capacity than others by comparing their
cluster's average capacity. However, contrary to what the comment says,
we're not averaging per core but per cpu, which makes a difference for
CPUs mixing SMT with non-SMT cores on the same SoC, such as intel's 14th
gen CPUs. Indeed, on a machine where cpufreq is not enabled, all CPUs
can be reported with a capacity of 1024, resulting in a big cluster of
16*1024, and 4 small clusters of 4*1024 each, giving an average of 1024
per CPU, making it impossible to distinguish one from the other. In this
situation, both "cpu-policy performance" and "cpu-policy efficiency"
enable all cores.

But this is wrong, what needs to be taken into account in the divide is
the number of *cores*, not *cpus*, that allows to distinguish big from
little clusters. This was not noticeable on the ARM machines the commit
above aimed at fixing because there, the number of CPUs equals the number
of cores. And on an x86 machine with cpu_freq enabled, the frequencies
continue to help spotting which ones are big/little.

By using nb_cores instead of nb_cpus in the comparison and in the avg_capa
compare function, it properly works again on x86 without affecting other
machines with 1 CPU per core.

This can be backported to 3.2.
---
 src/cpu_topo.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/cpu_topo.c b/src/cpu_topo.c
index d7d39c90a..1af2e3520 100644
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@@ -686,7 +686,10 @@ int _cmp_cluster_avg_capa(const void *a, const void *b)
 {
 	const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
 	const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
-	return r->capa - l->capa;
+
+	if (!r->nb_cores || !l->nb_cores)
+		return r->nb_cores - l->nb_cores;
+	return r->capa * l->nb_cores - l->capa * r->nb_cores;
 }
 
 /* re-order a cluster array by cluster index only */
@@ -1669,7 +1672,7 @@ static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int
 
 	capa = 0;
 	for (cluster = 0; cluster < cpu_topo_maxcpus; cluster++) {
-		if (capa && ha_cpu_clusters[cluster].capa * 10 < ha_cpu_clusters[cluster].nb_cpu * capa * 8) {
+		if (capa && ha_cpu_clusters[cluster].capa * 10 < ha_cpu_clusters[cluster].nb_cores * capa * 8) {
 			/* This cluster is made of cores delivering less than
 			 * 80% of the performance of those of the previous
 			 * cluster, previous one, we're not interested in
@@ -1680,8 +1683,8 @@ static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int
 					ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
 			}
 		}
-		else if (ha_cpu_clusters[cluster].nb_cpu)
-			capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cpu;
+		else if (ha_cpu_clusters[cluster].nb_cores)
+			capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cores;
 		else
 			capa = 0;
 	}
@@ -1714,7 +1717,7 @@ static int cpu_policy_efficiency(int policy, int tmin, int tmax, int gmin, int g
 
 	capa = 0;
 	for (cluster = cpu_topo_maxcpus - 1; cluster >= 0; cluster--) {
-		if (capa && ha_cpu_clusters[cluster].capa * 8 >= ha_cpu_clusters[cluster].nb_cpu * capa * 10) {
+		if (capa && ha_cpu_clusters[cluster].capa * 8 >= ha_cpu_clusters[cluster].nb_cores * capa * 10) {
 			/* This cluster is made of cores each at last 25% faster
 			 * than those of the previous cluster, previous one, we're
 			 * not interested in using it.
@@ -1724,8 +1727,8 @@ static int cpu_policy_efficiency(int policy, int tmin, int tmax, int gmin, int g
 					ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
 			}
 		}
-		else if (ha_cpu_clusters[cluster].nb_cpu)
-			capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cpu;
+		else if (ha_cpu_clusters[cluster].nb_cores)
+			capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cores;
 		else
 			capa = 0;
 	}
-- 
2.47.3