]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: cpu-topo: let the "group-by-cluster" split groups
authorWilly Tarreau <w@1wt.eu>
Thu, 13 Mar 2025 14:41:00 +0000 (15:41 +0100)
committerWilly Tarreau <w@1wt.eu>
Fri, 14 Mar 2025 17:33:16 +0000 (18:33 +0100)
When a cluster is too large to fit into a single group, let's split it
into two equal groups, which will still be allowed to use all the CPUs
of the cluster. This allows haproxy to start all the threads with a
minimum number of groups (e.g. 2x40 for 80 cores).

src/cpu_topo.c

index cd823272fdcfeda65e82eda8fb1f29714d4bdba3..b44c1005b7753e4f4f7a5df513f99cdd1779ee39 100644 (file)
@@ -973,6 +973,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
        int cpu, cpu_start;
        int cpu_count;
        int cid, lcid;
+       int thr_per_grp, nb_grp;
        int thr;
 
        if (global.nbthread)
@@ -984,7 +985,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
        /* iterate over each new cluster */
        lcid = -1;
        cpu_start = 0;
-       while (global.nbtgroups < MAX_TGROUPS) {
+       while (global.nbtgroups < MAX_TGROUPS && global.nbthread < MAX_THREADS) {
                ha_cpuset_zero(&node_cpu_set);
                cid = -1; cpu_count = 0;
 
@@ -1010,35 +1011,49 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
                 * number of CPUs in this cluster, and cpu_start is the next
                 * cpu to restart from to scan for new clusters.
                 */
-               if (cid < 0)
+               if (cid < 0 || !cpu_count)
                        break;
 
-               /* check that we're still within limits */
-               if (cpu_count > MAX_THREADS_PER_GROUP)
-                       cpu_count = MAX_THREADS_PER_GROUP;
-
-               if (cpu_count + global.nbthread > MAX_THREADS)
-                       cpu_count = MAX_THREADS - global.nbthread;
-
-               if (cpu_count <= 0)
-                       break;
+               /* check that we're still within limits. If there are too many
+                * CPUs but enough groups left, we'll try to make more smaller
+                * groups, of the closest size each.
+                */
+               nb_grp = (cpu_count + MAX_THREADS_PER_GROUP - 1) / MAX_THREADS_PER_GROUP;
+               if (nb_grp > MAX_TGROUPS - global.nbtgroups)
+                       nb_grp = MAX_TGROUPS - global.nbtgroups;
+               thr_per_grp = (cpu_count + nb_grp - 1) / nb_grp;
+               if (thr_per_grp > MAX_THREADS_PER_GROUP)
+                       thr_per_grp = MAX_THREADS_PER_GROUP;
+
+               while (nb_grp && cpu_count > 0) {
+                       /* create at most thr_per_grp threads */
+                       if (thr_per_grp > cpu_count)
+                               thr_per_grp = cpu_count;
+
+                       if (thr_per_grp + global.nbthread > MAX_THREADS)
+                               thr_per_grp = MAX_THREADS - global.nbthread;
+
+                       /* let's create the new thread group */
+                       ha_tgroup_info[global.nbtgroups].base  = global.nbthread;
+                       ha_tgroup_info[global.nbtgroups].count = thr_per_grp;
+
+                       /* assign to this group the required number of threads */
+                       for (thr = 0; thr < thr_per_grp; thr++) {
+                               ha_thread_info[thr + global.nbthread].tgid = global.nbtgroups + 1;
+                               ha_thread_info[thr + global.nbthread].tg = &ha_tgroup_info[global.nbtgroups];
+                               ha_thread_info[thr + global.nbthread].tg_ctx = &ha_tgroup_ctx[global.nbtgroups];
+                               /* map these threads to all the CPUs */
+                               ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);
+                       }
 
-               /* let's create the new thread group */
-               ha_tgroup_info[global.nbtgroups].base  = global.nbthread;
-               ha_tgroup_info[global.nbtgroups].count = cpu_count;
-
-               /* assign to this group the required number of threads */
-               for (thr = 0; thr < cpu_count; thr++) {
-                       ha_thread_info[thr + global.nbthread].tgid = global.nbtgroups + 1;
-                       ha_thread_info[thr + global.nbthread].tg = &ha_tgroup_info[global.nbtgroups];
-                       ha_thread_info[thr + global.nbthread].tg_ctx = &ha_tgroup_ctx[global.nbtgroups];
-                       /* map these threads to all the CPUs */
-                       ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);
+                       cpu_count -= thr_per_grp;
+                       global.nbthread += thr_per_grp;
+                       global.nbtgroups++;
+                       if (global.nbtgroups >= MAX_TGROUPS || global.nbthread >= MAX_THREADS)
+                               break;
                }
 
                lcid = cid; // last cluster_id
-               global.nbthread += cpu_count;
-               global.nbtgroups++;
        }
 
        if (global.nbthread)