From: Willy Tarreau Date: Thu, 13 Mar 2025 14:18:58 +0000 (+0100) Subject: MINOR: cpu-topo: ignore excess of too small clusters X-Git-Tag: v3.2-dev8~50 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=72633666065c045ef67135fae611ae150dbc37e5;p=thirdparty%2Fhaproxy.git MINOR: cpu-topo: ignore excess of too small clusters On some Arm systems (typically A76/N1) where CPUs can be associated in pairs, clusters are reported while they have no incidence on I/O etc. Yet it's possible to have tens of clusters of 2 CPUs each, which is counter productive since it does not even allow to start enough threads. Let's detect this situation as soon as there are at least 4 clusters having each 2 CPUs or less, which is already very suspcious. In this case, all these clusters will be reset as meaningless. In the worst case if needed they'll be re-assigned based on L2/L3. --- diff --git a/src/cpu_topo.c b/src/cpu_topo.c index dc1d446e1..d4bdd9f7e 100644 --- a/src/cpu_topo.c +++ b/src/cpu_topo.c @@ -531,6 +531,7 @@ void cpu_fixup_topology(void) int cpu, cpu2; int curr_id, prev_id; int min_id, neg; + int cl_cpu, small_cl; /* fill the package id, node id and thread_id. First we'll build a bitmap * of all unassigned ones so that we can spot the lowest unassigned one @@ -601,6 +602,73 @@ void cpu_fixup_topology(void) } } + /* Some machines (typically ARM cortex A76 and Neoverse-N1) report 1 + * cluster per pair of cores due to the internal architecture. While + * this can occasionally make sense (i.e. big.LITTLE etc), when there + * are many clusters of few cores, this is totally pointless. Here + * we'll check if there are at least 4 2-cpu clusters, and if so, all + * the 2-cpu clusters will be cancelled. + */ + cpu_reorder_by_cluster(ha_cpu_topo, cpu_topo_maxcpus); + + curr_id = -1; + cl_cpu = small_cl = 0; + for (cpu = cpu2 = 0; cpu <= cpu_topo_lastcpu; cpu++) { + if (ha_cpu_topo[cpu].cl_gid < 0) + continue; + + if (ha_cpu_topo[cpu].st & HA_CPU_F_EXCL_MASK) + continue; + + if (ha_cpu_topo[cpu].cl_gid != curr_id) { + if (curr_id >= 0 && cl_cpu <= 2) + small_cl++; + cl_cpu = 0; + cpu2 = cpu; + curr_id = ha_cpu_topo[cpu].cl_gid; + } + cl_cpu++; + } + + /* last one */ + if (cl_cpu && cl_cpu <= 2) + small_cl++; + + /* here we have the number of small clusters (<=2 cpu) in small_cl */ + if (small_cl >= 4) { + for (cpu = cpu2 = 0; cpu <= cpu_topo_lastcpu; cpu++) { + if (ha_cpu_topo[cpu].cl_gid < 0) + continue; + + if (ha_cpu_topo[cpu].st & HA_CPU_F_EXCL_MASK) + continue; + + if (ha_cpu_topo[cpu].cl_gid != curr_id) { + if (curr_id >= 0 && cl_cpu <= 2) { + /* small cluster found for curr_id */ + while (cpu2 < cpu) { + if (ha_cpu_topo[cpu2].cl_gid == curr_id) + ha_cpu_topo[cpu2].cl_gid = -1; + cpu2++; + } + } + cl_cpu = 0; + cpu2 = cpu; + curr_id = ha_cpu_topo[cpu].cl_gid; + } + cl_cpu++; + } + + /* handle the last cluster */ + while (curr_id >= 0 && cl_cpu <= 2 && cpu2 < cpu) { + if (ha_cpu_topo[cpu2].cl_gid == curr_id) + ha_cpu_topo[cpu2].cl_gid = -1; + cpu2++; + } + } + + cpu_reorder_by_index(ha_cpu_topo, cpu_topo_maxcpus); + /* assign capacity if not filled, based on the number of threads on the * core: in a same package, SMT-capable cores are generally those * optimized for performers while non-SMT ones are generally those