From: Olivier Houchard <ohouchard@haproxy.com>
Date: Wed, 17 Dec 2025 17:58:51 +0000 (+0100)
Subject: MEDIUM: cpu-topo: Add the "per-thread" cpu_affinity
X-Git-Tag: v3.4-dev2~49
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c524b181a248520de3d0dc4e3dc4bcc56dafeac0;p=thirdparty%2Fhaproxy.git

MEDIUM: cpu-topo: Add the "per-thread" cpu_affinity

Add a new cpu-affinity keyword, "per-thread".
If used, each thread will be bound to only one hardware thread of the
thread group.
If used in conjonction with the "threads-per-core 1" cpu_policy, then
each thread will be bound on a different core.
---

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 818383edd..ca1c9f41f 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -2233,6 +2233,9 @@ cpu-affinity <affinity>
     group. This is the default unless threads-per-core 1 is used in cpu-policy.
   - auto, per-group will be used, unless threads-per-core 1 is used in
     cpu-policy, in which case per-core will be used. This is the default.
+  - per-thread, that will bind one thread to one hardware thread only.
+    If threads-per-core 1 is used in cpu-policy, then each thread will be bound
+    to one hardware thread of a different core.
 
 cpu-policy <policy> [threads-per-core 1 | auto]
   Selects the CPU allocation policy to be used.
diff --git a/src/cpu_topo.c b/src/cpu_topo.c
index 3dfd9380f..b50f0c651 100644
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@@ -23,6 +23,7 @@
 /* cpu_policy_conf affinities */
 #define CPU_AFFINITY_PER_GROUP  (1 << 0)
 #define CPU_AFFINITY_PER_CORE   (1 << 1)
+#define CPU_AFFINITY_PER_THREAD (1 << 2)
 
 /* CPU topology information, ha_cpuset_size() entries, allocated at boot */
 int cpu_topo_maxcpus  = -1;  // max number of CPUs supported by OS/haproxy
@@ -71,6 +72,7 @@ static struct cpu_affinity {
 } ha_cpu_affinity[] = {
 	{"per-core", CPU_AFFINITY_PER_CORE},
 	{"per-group", CPU_AFFINITY_PER_GROUP},
+	{"per-thread", CPU_AFFINITY_PER_THREAD},
 	{"auto", 0},
 	{NULL, 0}
 };
@@ -1151,6 +1153,46 @@ static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin
 					ha_cpuset_assign(&cpu_map[0].thread[thr], &thrset);
 				same_core--;
 			}
+		} else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_THREAD) {
+			struct hap_cpuset thrset;
+
+			for (thr = 0; thr < thr_count; thr++) {
+				ha_cpuset_zero(&thrset);
+				/*
+				 * if we're binding per-thread, and we have
+				 * a one thread per core policy, then bind each
+				 * thread on a different core, leaving the
+				 * other hardware threads from the core unused.
+				 */
+				if (cpu_policy_conf.flags & CPU_POLICY_ONE_THREAD_PER_CORE) {
+					int got_cpu;
+					int next_cpu = 0;
+					int tsid;
+
+					tsid = ha_cpuset_ffs(&visited_tsid) - 1;
+					got_cpu = find_next_cpu_tsid(0, tsid);
+					while ((got_cpu = find_next_cpu_tsid(next_cpu, tsid)) != -1) {
+						if (!(ha_cpu_topo[got_cpu].st & HA_CPU_F_EXCL_MASK))
+							break;
+						next_cpu = got_cpu + 1;
+					}
+
+					if (got_cpu != -1) {
+						ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+						ha_cpuset_clr(&thrset, ha_cpu_topo[got_cpu].idx);
+					}
+
+				} else {
+					int tid = ha_cpuset_ffs(&node_cpu_set) - 1;
+
+					if (tid != -1) {
+						ha_cpuset_set(&thrset, tid + 1);
+						ha_cpuset_clr(&node_cpu_set, tid + 1);
+					}
+				}
+				if (ha_cpuset_ffs(&thrset) != 0)
+					ha_cpuset_assign(&cpu_map[0].thread[thr], &thrset);
+			}
 		} else {
 
 			/* assign all threads of all thread groups to this node */
@@ -1312,6 +1354,35 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
 						ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
 					same_core--;
 
+				} else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_THREAD) {
+					ha_cpuset_zero(&thrset);
+					if (cpu_policy_conf.flags & CPU_POLICY_ONE_THREAD_PER_CORE) {
+						int got_cpu;
+						int next_cpu = 0;
+						int tsid;
+
+						tsid = ha_cpuset_ffs(&visited_tsid) - 1;
+						while ((got_cpu = find_next_cpu_tsid(next_cpu, tsid)) != -1) {
+							if (!(ha_cpu_topo[got_cpu].st & HA_CPU_F_EXCL_MASK))
+								break;
+							next_cpu = got_cpu + 1;
+						}
+
+						if (got_cpu != -1) {
+							ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+							ha_cpuset_clr(&visited_tsid, tsid);
+						}
+					} else {
+						int tid = ha_cpuset_ffs(&node_cpu_set) - 1;
+
+						if (tid != -1) {
+							ha_cpuset_set(&thrset, tid);
+							ha_cpuset_clr(&node_cpu_set, tid);
+						}
+					}
+					if (ha_cpuset_ffs(&thrset) != 0)
+						ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
+
 				} else {
 					/* map these threads to all the CPUs */
 					ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);
@@ -1471,6 +1542,36 @@ static int cpu_policy_group_by_ccx(int policy, int tmin, int tmax, int gmin, int
 						ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
 					same_core--;
 
+				} else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_THREAD) {
+					ha_cpuset_zero(&thrset);
+					if (cpu_policy_conf.flags & CPU_POLICY_ONE_THREAD_PER_CORE) {
+						int got_cpu;
+						int next_cpu = 0;
+						int tsid;
+
+						tsid = ha_cpuset_ffs(&visited_tsid) - 1;
+						while ((got_cpu = find_next_cpu_tsid(next_cpu, tsid)) != -1) {
+							if (!(ha_cpu_topo[got_cpu].st & HA_CPU_F_EXCL_MASK))
+								break;
+							next_cpu = got_cpu + 1;
+						}
+
+						if (got_cpu != -1) {
+							ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+							ha_cpuset_clr(&visited_tsid, tsid);
+						}
+					} else {
+						int tid = ha_cpuset_ffs(&node_cpu_set) - 1;
+
+						if (tid != -1) {
+							ha_cpuset_set(&thrset, tid);
+							ha_cpuset_clr(&node_cpu_set, tid);
+						}
+					}
+					if (ha_cpuset_ffs(&thrset) != 0)
+						ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
+
+
 				} else {
 					/* map these threads to all the CPUs */
 					ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);