From cddeea58cdd141ab9855e5228b282d30500c42e7 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 2 Feb 2026 17:55:57 +0100 Subject: [PATCH] BUG/MINOR: cpu-topo: count cores not cpus to distinguish core types The per-cpu capacity of a cluster was taken into account since 3.2 with commit 6c88e27cf4 ("MEDIUM: cpu-topo: change "performance" to consider per-core capacity"). In cpu_policy_performance() and cpu_policy_efficiency(), we're trying to figure which cores have more capacity than others by comparing their cluster's average capacity. However, contrary to what the comment says, we're not averaging per core but per cpu, which makes a difference for CPUs mixing SMT with non-SMT cores on the same SoC, such as intel's 14th gen CPUs. Indeed, on a machine where cpufreq is not enabled, all CPUs can be reported with a capacity of 1024, resulting in a big cluster of 16*1024, and 4 small clusters of 4*1024 each, giving an average of 1024 per CPU, making it impossible to distinguish one from the other. In this situation, both "cpu-policy performance" and "cpu-policy efficiency" enable all cores. But this is wrong, what needs to be taken into account in the divide is the number of *cores*, not *cpus*, that allows to distinguish big from little clusters. This was not noticeable on the ARM machines the commit above aimed at fixing because there, the number of CPUs equals the number of cores. And on an x86 machine with cpu_freq enabled, the frequencies continue to help spotting which ones are big/little. By using nb_cores instead of nb_cpus in the comparison and in the avg_capa compare function, it properly works again on x86 without affecting other machines with 1 CPU per core. This can be backported to 3.2. --- src/cpu_topo.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/cpu_topo.c b/src/cpu_topo.c index d7d39c90a..1af2e3520 100644 --- a/src/cpu_topo.c +++ b/src/cpu_topo.c @@ -686,7 +686,10 @@ int _cmp_cluster_avg_capa(const void *a, const void *b) { const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a; const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b; - return r->capa - l->capa; + + if (!r->nb_cores || !l->nb_cores) + return r->nb_cores - l->nb_cores; + return r->capa * l->nb_cores - l->capa * r->nb_cores; } /* re-order a cluster array by cluster index only */ @@ -1669,7 +1672,7 @@ static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int capa = 0; for (cluster = 0; cluster < cpu_topo_maxcpus; cluster++) { - if (capa && ha_cpu_clusters[cluster].capa * 10 < ha_cpu_clusters[cluster].nb_cpu * capa * 8) { + if (capa && ha_cpu_clusters[cluster].capa * 10 < ha_cpu_clusters[cluster].nb_cores * capa * 8) { /* This cluster is made of cores delivering less than * 80% of the performance of those of the previous * cluster, previous one, we're not interested in @@ -1680,8 +1683,8 @@ static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED; } } - else if (ha_cpu_clusters[cluster].nb_cpu) - capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cpu; + else if (ha_cpu_clusters[cluster].nb_cores) + capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cores; else capa = 0; } @@ -1714,7 +1717,7 @@ static int cpu_policy_efficiency(int policy, int tmin, int tmax, int gmin, int g capa = 0; for (cluster = cpu_topo_maxcpus - 1; cluster >= 0; cluster--) { - if (capa && ha_cpu_clusters[cluster].capa * 8 >= ha_cpu_clusters[cluster].nb_cpu * capa * 10) { + if (capa && ha_cpu_clusters[cluster].capa * 8 >= ha_cpu_clusters[cluster].nb_cores * capa * 10) { /* This cluster is made of cores each at last 25% faster * than those of the previous cluster, previous one, we're * not interested in using it. @@ -1724,8 +1727,8 @@ static int cpu_policy_efficiency(int policy, int tmin, int tmax, int gmin, int g ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED; } } - else if (ha_cpu_clusters[cluster].nb_cpu) - capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cpu; + else if (ha_cpu_clusters[cluster].nb_cores) + capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cores; else capa = 0; } -- 2.47.3