MEDIUM: cpu-topo: Add the "per-thread" cpu_affinity

author Olivier Houchard <ohouchard@haproxy.com>

Wed, 17 Dec 2025 17:58:51 +0000 (18:58 +0100)

committer Olivier Houchard <cognet@ci0.org>

Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
author Olivier Houchard <ohouchard@haproxy.com>
Wed, 17 Dec 2025 17:58:51 +0000 (18:58 +0100)
committer Olivier Houchard <cognet@ci0.org>
Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index 818383edd7feb5d8146b14845ded461ede614239..ca1c9f41fceba6159c8f062a321cdcbd42b01163 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -2233,6 +2233,9 @@ cpu-affinity <affinity>
      group. This is the default unless threads-per-core 1 is used in cpu-policy.
    - auto, per-group will be used, unless threads-per-core 1 is used in
      cpu-policy, in which case per-core will be used. This is the default.
+  - per-thread, that will bind one thread to one hardware thread only.
+    If threads-per-core 1 is used in cpu-policy, then each thread will be bound
+    to one hardware thread of a different core.
  
  cpu-policy <policy> [threads-per-core 1 | auto]
    Selects the CPU allocation policy to be used.
diff --git a/src/cpu_topo.c b/src/cpu_topo.c

index 3dfd9380f364fdbff586f26c36179d643367e540..b50f0c651ba012b99375dcc3a91030b023341589 100644 (file)
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@@ -23,6 +23,7 @@
  /* cpu_policy_conf affinities */
  #define CPU_AFFINITY_PER_GROUP  (1 << 0)
  #define CPU_AFFINITY_PER_CORE   (1 << 1)
+#define CPU_AFFINITY_PER_THREAD (1 << 2)
  
  /* CPU topology information, ha_cpuset_size() entries, allocated at boot */
  int cpu_topo_maxcpus  = -1;  // max number of CPUs supported by OS/haproxy
@@ -71,6 +72,7 @@ static struct cpu_affinity {
  } ha_cpu_affinity[] = {
         {"per-core", CPU_AFFINITY_PER_CORE},
         {"per-group", CPU_AFFINITY_PER_GROUP},
+       {"per-thread", CPU_AFFINITY_PER_THREAD},
         {"auto", 0},
         {NULL, 0}
  };
@@ -1151,6 +1153,46 @@ static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin
                                         ha_cpuset_assign(&cpu_map[0].thread[thr], &thrset);
                                 same_core--;
                         }
+               } else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_THREAD) {
+                       struct hap_cpuset thrset;
+
+                       for (thr = 0; thr < thr_count; thr++) {
+                               ha_cpuset_zero(&thrset);
+                               /*
+                                * if we're binding per-thread, and we have
+                                * a one thread per core policy, then bind each
+                                * thread on a different core, leaving the
+                                * other hardware threads from the core unused.
+                                */
+                               if (cpu_policy_conf.flags & CPU_POLICY_ONE_THREAD_PER_CORE) {
+                                       int got_cpu;
+                                       int next_cpu = 0;
+                                       int tsid;
+
+                                       tsid = ha_cpuset_ffs(&visited_tsid) - 1;
+                                       got_cpu = find_next_cpu_tsid(0, tsid);
+                                       while ((got_cpu = find_next_cpu_tsid(next_cpu, tsid)) != -1) {
+                                               if (!(ha_cpu_topo[got_cpu].st & HA_CPU_F_EXCL_MASK))
+                                                       break;
+                                               next_cpu = got_cpu + 1;
+                                       }
+
+                                       if (got_cpu != -1) {
+                                               ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+                                               ha_cpuset_clr(&thrset, ha_cpu_topo[got_cpu].idx);
+                                       }
+
+                               } else {
+                                       int tid = ha_cpuset_ffs(&node_cpu_set) - 1;
+
+                                       if (tid != -1) {
+                                               ha_cpuset_set(&thrset, tid + 1);
+                                               ha_cpuset_clr(&node_cpu_set, tid + 1);
+                                       }
+                               }
+                               if (ha_cpuset_ffs(&thrset) != 0)
+                                       ha_cpuset_assign(&cpu_map[0].thread[thr], &thrset);
+                       }
                 } else {
  
                         /* assign all threads of all thread groups to this node */
@@ -1312,6 +1354,35 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
                                                 ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
                                         same_core--;
  
+                               } else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_THREAD) {
+                                       ha_cpuset_zero(&thrset);
+                                       if (cpu_policy_conf.flags & CPU_POLICY_ONE_THREAD_PER_CORE) {
+                                               int got_cpu;
+                                               int next_cpu = 0;
+                                               int tsid;
+
+                                               tsid = ha_cpuset_ffs(&visited_tsid) - 1;
+                                               while ((got_cpu = find_next_cpu_tsid(next_cpu, tsid)) != -1) {
+                                                       if (!(ha_cpu_topo[got_cpu].st & HA_CPU_F_EXCL_MASK))
+                                                               break;
+                                                       next_cpu = got_cpu + 1;
+                                               }
+
+                                               if (got_cpu != -1) {
+                                                       ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+                                                       ha_cpuset_clr(&visited_tsid, tsid);
+                                               }
+                                       } else {
+                                               int tid = ha_cpuset_ffs(&node_cpu_set) - 1;
+
+                                               if (tid != -1) {
+                                                       ha_cpuset_set(&thrset, tid);
+                                                       ha_cpuset_clr(&node_cpu_set, tid);
+                                               }
+                                       }
+                                       if (ha_cpuset_ffs(&thrset) != 0)
+                                               ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
+
                                 } else {
                                         /* map these threads to all the CPUs */
                                         ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);
@@ -1471,6 +1542,36 @@ static int cpu_policy_group_by_ccx(int policy, int tmin, int tmax, int gmin, int
                                                 ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
                                         same_core--;
  
+                               } else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_THREAD) {
+                                       ha_cpuset_zero(&thrset);
+                                       if (cpu_policy_conf.flags & CPU_POLICY_ONE_THREAD_PER_CORE) {
+                                               int got_cpu;
+                                               int next_cpu = 0;
+                                               int tsid;
+
+                                               tsid = ha_cpuset_ffs(&visited_tsid) - 1;
+                                               while ((got_cpu = find_next_cpu_tsid(next_cpu, tsid)) != -1) {
+                                                       if (!(ha_cpu_topo[got_cpu].st & HA_CPU_F_EXCL_MASK))
+                                                               break;
+                                                       next_cpu = got_cpu + 1;
+                                               }
+
+                                               if (got_cpu != -1) {
+                                                       ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+                                                       ha_cpuset_clr(&visited_tsid, tsid);
+                                               }
+                                       } else {
+                                               int tid = ha_cpuset_ffs(&node_cpu_set) - 1;
+
+                                               if (tid != -1) {
+                                                       ha_cpuset_set(&thrset, tid);
+                                                       ha_cpuset_clr(&node_cpu_set, tid);
+                                               }
+                                       }
+                                       if (ha_cpuset_ffs(&thrset) != 0)
+                                               ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
+
+
                                 } else {
                                         /* map these threads to all the CPUs */
                                         ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);
author	Olivier Houchard <ohouchard@haproxy.com>
	Wed, 17 Dec 2025 17:58:51 +0000 (18:58 +0100)
committer	Olivier Houchard <cognet@ci0.org>
	Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
doc/configuration.txt		patch \| blob \| blame \| history
src/cpu_topo.c		patch \| blob \| blame \| history