MEDIUM: cpu-topo: Add a new "max-threads-per-group" global keyword

author Olivier Houchard <ohouchard@haproxy.com>

Mon, 8 Dec 2025 22:13:19 +0000 (23:13 +0100)

committer Olivier Houchard <cognet@ci0.org>

Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
author Olivier Houchard <ohouchard@haproxy.com>
Mon, 8 Dec 2025 22:13:19 +0000 (23:13 +0100)
committer Olivier Houchard <cognet@ci0.org>
Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index 24c7140c8590486a5233df4e77b5046be4854bf6..818383edd7feb5d8146b14845ded461ede614239 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -1787,6 +1787,7 @@ The following keywords are supported in the "global" section :
     - lua-load
     - lua-load-per-thread
     - lua-prepend-path
+   - max-thread-per-group
     - mworker-max-reloads
     - nbthread
     - node
@@ -2997,6 +2998,14 @@ master-worker no-exit-on-failure
    it is only meant for debugging and could put the master process in an
    abnormal state.
  
+max-threads-per-group <number>
+  Defines the maximum number of threads in a thread group. Unless the number
+  of thread groups is fixed with the thread-groups directive, haproxy will
+  create more thread groups if needed. The default and maximum value is 64.
+  Having a lower value means more groups will potentially be created, which
+  can help improve performances, as a number of data structures are per
+  thread group, and that will mean less contention
+
  mworker-max-reloads <number>
    In master-worker mode, this option limits the number of time a worker can
    survive to a reload. If the worker did not leave after a reload, once its
diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h

index d7a1ff5eedd312253b170559178621ec420cd4ab..c8e6dd82da5003f5843fb60a3eaecd8e7cf0a6e5 100644 (file)
--- a/include/haproxy/global-t.h
+++ b/include/haproxy/global-t.h
@@ -261,6 +261,7 @@ struct global {
         unsigned int req_count; /* request counter (HTTP or TCP session) for logs and unique_id */
         int last_checks;
         uint32_t anon_key;
+       int maxthrpertgroup; /* Maximum number of threads per thread group */
  
         /* leave this at the end to make sure we don't share this cache line by accident */
         ALWAYS_ALIGN(64);
diff --git a/src/cpu_topo.c b/src/cpu_topo.c

index 1b57d9fc8e70ce93d967ec440001425f3420d124..3dfd9380f364fdbff586f26c36179d643367e540 100644 (file)
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@@ -1255,12 +1255,12 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
                  * CPUs but enough groups left, we'll try to make more smaller
                  * groups, of the closest size each.
                  */
-               nb_grp = (cpu_count + MAX_THREADS_PER_GROUP - 1) / MAX_THREADS_PER_GROUP;
+               nb_grp = (cpu_count + global.maxthrpertgroup - 1) / global.maxthrpertgroup;
                 if (nb_grp > MAX_TGROUPS - global.nbtgroups)
                         nb_grp = MAX_TGROUPS - global.nbtgroups;
                 thr_per_grp = (cpu_count + nb_grp - 1) / nb_grp;
-               if (thr_per_grp > MAX_THREADS_PER_GROUP)
-                       thr_per_grp = MAX_THREADS_PER_GROUP;
+               if (thr_per_grp > global.maxthrpertgroup)
+                       thr_per_grp = global.maxthrpertgroup;
  
                 while (nb_grp && cpu_count > 0) {
                         /* create at most thr_per_grp threads */
@@ -1414,12 +1414,12 @@ static int cpu_policy_group_by_ccx(int policy, int tmin, int tmax, int gmin, int
                  * CPUs but enough groups left, we'll try to make more smaller
                  * groups, of the closest size each.
                  */
-               nb_grp = (cpu_count + MAX_THREADS_PER_GROUP - 1) / MAX_THREADS_PER_GROUP;
+               nb_grp = (cpu_count + global.maxthrpertgroup - 1) / global.maxthrpertgroup;
                 if (nb_grp > MAX_TGROUPS - global.nbtgroups)
                         nb_grp = MAX_TGROUPS - global.nbtgroups;
                 thr_per_grp = (cpu_count + nb_grp - 1) / nb_grp;
-               if (thr_per_grp > MAX_THREADS_PER_GROUP)
-                       thr_per_grp = MAX_THREADS_PER_GROUP;
+               if (thr_per_grp > global.maxthrpertgroup)
+                       thr_per_grp = global.maxthrpertgroup;
  
                 while (nb_grp && cpu_count > 0) {
                         /* create at most thr_per_grp threads */
diff --git a/src/listener.c b/src/listener.c

index a86e946ce3efe5b1e32df6b92ceb452b9224b496..0ad6d7c848061702b4a2c30f7baa66d4960f5451 100644 (file)
--- a/src/listener.c
+++ b/src/listener.c
@@ -229,7 +229,7 @@ REGISTER_POST_DEINIT(accept_queue_deinit);
   */
  int li_init_per_thr(struct listener *li)
  {
-       int nbthr = MIN(global.nbthread, MAX_THREADS_PER_GROUP);
+       int nbthr = MIN(global.nbthread, global.maxthrpertgroup);
         int i;
  
         /* allocate per-thread elements for listener */
@@ -1394,7 +1394,7 @@ void listener_accept(struct listener *l)
                                                         /* no more threads here, switch to
                                                          * last thread of previous group.
                                                          */
-                                                       t2 = MAX_THREADS_PER_GROUP - 1;
+                                                       t2 = global.maxthrpertgroup - 1;
                                                         if (l->rx.shard_info)
                                                                 r2--;
                                                         /* loop again */
@@ -1456,10 +1456,10 @@ void listener_accept(struct listener *l)
                                                 new_li = l->rx.shard_info->members[r1]->owner;
  
                                         t2--;
-                                       if (t2 >= MAX_THREADS_PER_GROUP) {
+                                       if (t2 >= global.maxthrpertgroup) {
                                                 if (l->rx.shard_info)
                                                         r2--;
-                                               t2 = MAX_THREADS_PER_GROUP - 1;
+                                               t2 = global.maxthrpertgroup - 1;
                                         }
                                 }
                                 else if (q1 - q2 > 0) {
@@ -1480,7 +1480,7 @@ void listener_accept(struct listener *l)
                                                 new_li = l->rx.shard_info->members[r1]->owner;
                                 updt_t1:
                                         t1++;
-                                       if (t1 >= MAX_THREADS_PER_GROUP) {
+                                       if (t1 >= global.maxthrpertgroup) {
                                                 if (l->rx.shard_info)
                                                         r1++;
                                                 t1 = 0;
diff --git a/src/thread.c b/src/thread.c

index b5f5d7d97d8422e49640e4b5933fc7c8e5f78ee9..80b84018ae3f4965537be0a49ea2958559f49024 100644 (file)
--- a/src/thread.c
+++ b/src/thread.c
@@ -1415,7 +1415,7 @@ int thread_map_to_groups()
                  */
                 q = ut / ug;
                 r = ut % ug;
-               if ((q + !!r) > MAX_THREADS_PER_GROUP) {
+               if ((q + !!r) > global.maxthrpertgroup) {
                         ha_alert("Too many remaining unassigned threads (%d) for thread groups (%d). Please increase thread-groups or make sure to keep thread numbers contiguous\n", ut, ug);
                         return -1;
                 }
@@ -1645,6 +1645,9 @@ void thread_detect_count(void)
         if (global.nbtgroups)
                 grp_min = grp_max = global.nbtgroups;
  
+       if (!global.maxthrpertgroup)
+               global.maxthrpertgroup = MAX_THREADS_PER_GROUP;
+
  #if defined(USE_THREAD)
         /* Adjust to boot settings if not forced */
         if (thr_min <= thread_cpus_enabled_at_boot && thread_cpus_enabled_at_boot < thr_max)
@@ -1668,13 +1671,13 @@ void thread_detect_count(void)
         if (thr_min < grp_min && thr_max >= grp_min)
                 thr_min = grp_min;
  
-       if (thr_min <= MAX_THREADS_PER_GROUP * grp_max &&
-           thr_max > MAX_THREADS_PER_GROUP * grp_max)
-               thr_max = MAX_THREADS_PER_GROUP * grp_max;
+       if (thr_min <= global.maxthrpertgroup * grp_max &&
+           thr_max > global.maxthrpertgroup * grp_max)
+               thr_max = global.maxthrpertgroup * grp_max;
  
-       if (grp_min < (thr_min +  MAX_THREADS_PER_GROUP - 1) / MAX_THREADS_PER_GROUP &&
-           grp_max >= (thr_min +  MAX_THREADS_PER_GROUP - 1) / MAX_THREADS_PER_GROUP)
-               grp_min = (thr_min +  MAX_THREADS_PER_GROUP - 1) / MAX_THREADS_PER_GROUP;
+       if (grp_min < (thr_min +  global.maxthrpertgroup - 1) / global.maxthrpertgroup &&
+           grp_max >= (thr_min +  global.maxthrpertgroup - 1) / global.maxthrpertgroup)
+               grp_min = (thr_min +  global.maxthrpertgroup - 1) / global.maxthrpertgroup;
  
         if (grp_max > thr_max && grp_min <= thr_max)
                 grp_max = thr_max;
@@ -1738,10 +1741,10 @@ void thread_detect_count(void)
         if (!global.nbtgroups)
                 global.nbtgroups = 1;
  
-       if (global.nbthread > MAX_THREADS_PER_GROUP * global.nbtgroups) {
+       if (global.nbthread > global.maxthrpertgroup * global.nbtgroups) {
                 ha_diag_warning("nbthread too large or not set, found %d CPUs, limiting to %d threads (maximum is %d per thread group and %d groups). Please set nbthreads and/or increase thread-groups in the global section to silence this warning.\n",
-                               global.nbthread, MAX_THREADS_PER_GROUP * global.nbtgroups, MAX_THREADS_PER_GROUP, MAX_TGROUPS);
-               global.nbthread = MAX_THREADS_PER_GROUP * global.nbtgroups;
+                               global.nbthread, global.maxthrpertgroup * global.nbtgroups, global.maxthrpertgroup, MAX_TGROUPS);
+               global.nbthread = global.maxthrpertgroup * global.nbtgroups;
         }
         return;
  }
@@ -1871,7 +1874,7 @@ int parse_thread_set(const char *arg, struct thread_set *ts, char **err)
                 if (!*set) {
                         /* empty set sets no restriction */
                         min = 1;
-                       max = is_rel ? MAX_THREADS_PER_GROUP : MAX_THREADS;
+                       max = is_rel ? global.maxthrpertgroup : MAX_THREADS;
                 }
                 else {
                         if (sep != set && *sep && *sep != '-' && *sep != ',') {
@@ -1899,9 +1902,9 @@ int parse_thread_set(const char *arg, struct thread_set *ts, char **err)
                                         max = min = 0; // throw an error below
                         }
  
-                       if (min < 1 || min > MAX_THREADS || (is_rel && min > MAX_THREADS_PER_GROUP)) {
+                       if (min < 1 || min > MAX_THREADS || (is_rel && min > global.maxthrpertgroup)) {
                                 memprintf(err, "invalid first thread number '%s', permitted range is 1..%d, or 'all', 'odd', 'even'.",
-                                         set, is_rel ? MAX_THREADS_PER_GROUP : MAX_THREADS);
+                                         set, is_rel ? global.maxthrpertgroup : MAX_THREADS);
                                 return -1;
                         }
  
@@ -1918,15 +1921,15 @@ int parse_thread_set(const char *arg, struct thread_set *ts, char **err)
                                 v = atoi(set);
  
                                 if (sep == set) { // no digit: to the max
-                                       max = is_rel ? MAX_THREADS_PER_GROUP : MAX_THREADS;
+                                       max = is_rel ? global.maxthrpertgroup : MAX_THREADS;
                                         if (*sep && *sep != ',')
                                                 max = 0; // throw an error below
                                 } else
                                         max = v;
  
-                               if (max < 1 || max > MAX_THREADS || (is_rel && max > MAX_THREADS_PER_GROUP)) {
+                               if (max < 1 || max > MAX_THREADS || (is_rel && max > global.maxthrpertgroup)) {
                                         memprintf(err, "invalid last thread number '%s', permitted range is 1..%d.",
-                                                 set, is_rel ? MAX_THREADS_PER_GROUP : MAX_THREADS);
+                                                 set, is_rel ? global.maxthrpertgroup : MAX_THREADS);
                                         return -1;
                                 }
                         }
@@ -2138,14 +2141,36 @@ static int cfg_parse_thread_group(char **args, int section_type, struct proxy *c
                 return -1;
         }
  
-       if (ha_tgroup_info[tgroup-1].count > MAX_THREADS_PER_GROUP) {
-               memprintf(err, "'%s %ld' assigned too many threads (%d, max=%d)", args[0], tgroup, tot, MAX_THREADS_PER_GROUP);
+       if (ha_tgroup_info[tgroup-1].count > global.maxthrpertgroup) {
+               memprintf(err, "'%s %ld' assigned too many threads (%d, max=%d)", args[0], tgroup, tot, global.maxthrpertgroup);
                 return -1;
         }
  
         return 0;
  }
  
+/* Parse the "max-threads-per-group" global directive, which indicates the
+ * maximum number of thread to have in one thread group
+ */
+static int cfg_parse_maxthreadpertgroup(char **args, int section_type, struct proxy *curpx,
+                                        const struct proxy *defpx, const char *file, int line,
+                                        char **err)
+{
+       long maxthrpertg;
+       char *errptr;
+
+       if (too_many_args(1, args, err, NULL))
+               return -1;
+
+       maxthrpertg = strtol(args[1], &errptr, 10);
+       if (!*args[1] || *errptr || maxthrpertg < 0 || maxthrpertg > MAX_THREADS_PER_GROUP) {
+               memprintf(err, "'%s' value must be an integer between 1 and %d,  got '%s'", args[0], MAX_THREADS_PER_GROUP, args[1]);
+               return -1;
+       }
+       global.maxthrpertgroup = maxthrpertg;
+       return 0;
+}
+
  /* Parse the "thread-groups" global directive, which takes an integer argument
   * that contains the desired number of thread groups.
   */
@@ -2196,6 +2221,7 @@ static struct cfg_kw_list cfg_kws = {ILH, {
         { CFG_GLOBAL, "nbthread",       cfg_parse_nbthread, 0 },
         { CFG_GLOBAL, "thread-group",   cfg_parse_thread_group, 0 },
         { CFG_GLOBAL, "thread-groups",  cfg_parse_thread_groups, 0 },
+       { CFG_GLOBAL, "max-threads-per-group", cfg_parse_maxthreadpertgroup, 0 },
         { 0, NULL, NULL }
  }};
author	Olivier Houchard <ohouchard@haproxy.com>
	Mon, 8 Dec 2025 22:13:19 +0000 (23:13 +0100)
committer	Olivier Houchard <cognet@ci0.org>
	Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
doc/configuration.txt		patch \| blob \| blame \| history
include/haproxy/global-t.h		patch \| blob \| blame \| history
src/cpu_topo.c		patch \| blob \| blame \| history
src/listener.c		patch \| blob \| blame \| history
src/thread.c		patch \| blob \| blame \| history