MEDIUM: cpu-topo: Add the "per-ccx" cpu_affinity

author Olivier Houchard <ohouchard@haproxy.com>

Thu, 18 Dec 2025 14:48:49 +0000 (15:48 +0100)

committer Olivier Houchard <cognet@ci0.org>

Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
author Olivier Houchard <ohouchard@haproxy.com>
Thu, 18 Dec 2025 14:48:49 +0000 (15:48 +0100)
committer Olivier Houchard <cognet@ci0.org>
Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index ca1c9f41fceba6159c8f062a321cdcbd42b01163..73fe093c9d62077f48c33de58993ff8c3f33577b 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -2236,6 +2236,7 @@ cpu-affinity <affinity>
    - per-thread, that will bind one thread to one hardware thread only.
      If threads-per-core 1 is used in cpu-policy, then each thread will be bound
      to one hardware thread of a different core.
+  - per-ccx, each thread will be bound to all the hardware threads of a CCX.
  
  cpu-policy <policy> [threads-per-core 1 | auto]
    Selects the CPU allocation policy to be used.
diff --git a/src/cpu_topo.c b/src/cpu_topo.c

index b50f0c651ba012b99375dcc3a91030b023341589..f853fe693dd28f2d13a5f9ff87accc72eaf50953 100644 (file)
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@@ -24,6 +24,7 @@
  #define CPU_AFFINITY_PER_GROUP  (1 << 0)
  #define CPU_AFFINITY_PER_CORE   (1 << 1)
  #define CPU_AFFINITY_PER_THREAD (1 << 2)
+#define CPU_AFFINITY_PER_CCX    (1 << 3)
  
  /* CPU topology information, ha_cpuset_size() entries, allocated at boot */
  int cpu_topo_maxcpus  = -1;  // max number of CPUs supported by OS/haproxy
@@ -73,6 +74,7 @@ static struct cpu_affinity {
         {"per-core", CPU_AFFINITY_PER_CORE},
         {"per-group", CPU_AFFINITY_PER_GROUP},
         {"per-thread", CPU_AFFINITY_PER_THREAD},
+       {"per-ccx", CPU_AFFINITY_PER_CCX},
         {"auto", 0},
         {NULL, 0}
  };
@@ -1054,6 +1056,17 @@ static int find_next_cpu_tsid(int start, int tsid)
         return -1;
  }
  
+static int find_next_cpu_ccx(int start, int l3id)
+{
+       int cpu;
+
+       for (cpu = start; cpu <= cpu_topo_lastcpu; cpu++)
+               if (ha_cpu_topo[cpu].ca_id[3] == l3id)
+                       return cpu;
+
+       return -1;
+}
+
  /* the "first-usable-node" cpu-policy: historical one
   *  - does nothing if numa_cpu_mapping is not set
   *  - does nothing if nbthread is set
@@ -1065,6 +1078,7 @@ static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin
  {
         struct hap_cpuset node_cpu_set;
         struct hap_cpuset visited_tsid;
+       struct hap_cpuset visited_ccx;
         int first_node_id = -1;
         int second_node_id = -1;
         int cpu;
@@ -1106,6 +1120,7 @@ static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin
          */
         ha_cpuset_zero(&node_cpu_set);
         ha_cpuset_zero(&visited_tsid);
+       ha_cpuset_zero(&visited_ccx);
         for (cpu = cpu_count = 0; cpu <= cpu_topo_lastcpu; cpu++) {
                 if (ha_cpu_topo[cpu].no_id != first_node_id)
                         ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
@@ -1113,6 +1128,7 @@ static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin
                         ha_cpuset_set(&node_cpu_set, ha_cpu_topo[cpu].idx);
                         cpu_count++;
  
+                       ha_cpuset_set(&visited_ccx, ha_cpu_topo[cpu].ca_id[3]);
                         if (!(cpu_policy_conf.flags & CPU_POLICY_ONE_THREAD_PER_CORE) || !ha_cpuset_isset(&visited_tsid, ha_cpu_topo[cpu].ts_id)) {
                                 ha_cpuset_set(&visited_tsid, ha_cpu_topo[cpu].ts_id);
                                 thr_count++;
@@ -1179,8 +1195,8 @@ static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin
  
                                         if (got_cpu != -1) {
                                                 ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
-                                               ha_cpuset_clr(&thrset, ha_cpu_topo[got_cpu].idx);
                                         }
+                                       ha_cpuset_clr(&visited_tsid, tsid);
  
                                 } else {
                                         int tid = ha_cpuset_ffs(&node_cpu_set) - 1;
@@ -1193,6 +1209,29 @@ static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin
                                 if (ha_cpuset_ffs(&thrset) != 0)
                                         ha_cpuset_assign(&cpu_map[0].thread[thr], &thrset);
                         }
+               } else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_CCX) {
+                       struct hap_cpuset thrset;
+                       int same_ccx = 0;
+
+                       for (thr = 0; thr < thr_count; thr++) {
+                               int got_cpu;
+                               int next_try = 0;
+
+                               if (same_ccx == 0) {
+                                       int l3id = ha_cpuset_ffs(&visited_ccx) - 1;
+                                       ha_cpuset_zero(&thrset);
+                                       while ((got_cpu = find_next_cpu_ccx(next_try, l3id)) != -1) {
+                                               next_try = got_cpu + 1;
+                                               same_ccx++;
+                                               ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+                                       }
+                                       ha_cpuset_clr(&visited_ccx, l3id);
+                               }
+                               BUG_ON(same_ccx == 0);
+                               if (ha_cpuset_ffs(&thrset) != 0)
+                                       ha_cpuset_assign(&cpu_map[0].thread[thr], &thrset);
+                               same_ccx--;
+                       }
                 } else {
  
                         /* assign all threads of all thread groups to this node */
@@ -1231,6 +1270,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
         struct hap_cpuset visited_cl_set;
         struct hap_cpuset node_cpu_set;
         struct hap_cpuset visited_tsid;
+       struct hap_cpuset visited_ccx;
         struct hap_cpuset thrset;
         int cpu, cpu_start;
         int cpu_count;
@@ -1258,6 +1298,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
         while (global.nbtgroups < MAX_TGROUPS && global.nbthread < MAX_THREADS) {
                 ha_cpuset_zero(&node_cpu_set);
                 ha_cpuset_zero(&visited_tsid);
+               ha_cpuset_zero(&visited_ccx);
                 cid = -1; cpu_count = 0;
  
                 for (cpu = cpu_start; cpu <= cpu_topo_lastcpu; cpu++) {
@@ -1277,6 +1318,7 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
  
                         /* make a mask of all of this cluster's CPUs */
                         ha_cpuset_set(&node_cpu_set, ha_cpu_topo[cpu].idx);
+                       ha_cpuset_set(&visited_ccx, ha_cpu_topo[cpu].ca_id[3]);
                         if (!ha_cpuset_isset(&visited_tsid, ha_cpu_topo[cpu].ts_id)) {
                                 cpu_count++;
                                 ha_cpuset_set(&visited_tsid, ha_cpu_topo[cpu].ts_id);
@@ -1383,6 +1425,24 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
                                         if (ha_cpuset_ffs(&thrset) != 0)
                                                 ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
  
+                               } else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_CCX) {
+                                       if (same_core == 0) {
+                                               int l3id = ha_cpuset_ffs(&visited_ccx) - 1;
+                                               int got_cpu;
+                                               int next_try = 0;
+                                               ha_cpuset_zero(&thrset);
+
+                                               while ((got_cpu = find_next_cpu_ccx(next_try, l3id)) != -1) {
+                                                       next_try = got_cpu + 1;
+                                                       same_core++;
+                                                       ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+                                               }
+                                               ha_cpuset_clr(&visited_ccx, l3id);
+                                       }
+                                       BUG_ON(same_core == 0);
+                                       if (ha_cpuset_ffs(&thrset) != 0)
+                                               ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
+                                       same_core--;
                                 } else {
                                         /* map these threads to all the CPUs */
                                         ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);
@@ -1419,6 +1479,7 @@ static int cpu_policy_group_by_ccx(int policy, int tmin, int tmax, int gmin, int
         struct hap_cpuset visited_ccx_set;
         struct hap_cpuset node_cpu_set;
         struct hap_cpuset visited_tsid;
+       struct hap_cpuset visited_ccx; /* List of CCXs we'll currently use */
         struct hap_cpuset thrset;
         int cpu, cpu_start;
         int cpu_count;
@@ -1465,6 +1526,7 @@ static int cpu_policy_group_by_ccx(int policy, int tmin, int tmax, int gmin, int
  
                         /* make a mask of all of this cluster's CPUs */
                         ha_cpuset_set(&node_cpu_set, ha_cpu_topo[cpu].idx);
+                       ha_cpuset_set(&visited_ccx, ha_cpu_topo[cpu].ca_id[3]);
                         if (!ha_cpuset_isset(&visited_tsid, ha_cpu_topo[cpu].ts_id)) {
                                 cpu_count++;
                                 ha_cpuset_set(&visited_tsid, ha_cpu_topo[cpu].ts_id);
@@ -1571,7 +1633,24 @@ static int cpu_policy_group_by_ccx(int policy, int tmin, int tmax, int gmin, int
                                         if (ha_cpuset_ffs(&thrset) != 0)
                                                 ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
  
+                               } else if (cpu_policy_conf.affinity & CPU_AFFINITY_PER_CCX) {
+                                       if (same_core == 0) {
+                                               int l3id = ha_cpuset_ffs(&visited_ccx) - 1;
+                                               int got_cpu;
+                                               int next_try = 0;
+                                               ha_cpuset_zero(&thrset);
  
+                                               while ((got_cpu = find_next_cpu_ccx(next_try, l3id)) != -1) {
+                                                       next_try = got_cpu + 1;
+                                                       same_core++;
+                                                       ha_cpuset_set(&thrset, ha_cpu_topo[got_cpu].idx);
+                                               }
+                                               ha_cpuset_clr(&visited_ccx, l3id);
+                                       }
+                                       BUG_ON(same_core == 0);
+                                       if (ha_cpuset_ffs(&thrset) != 0)
+                                               ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &thrset);
+                                       same_core--;
                                 } else {
                                         /* map these threads to all the CPUs */
                                         ha_cpuset_assign(&cpu_map[global.nbtgroups].thread[thr], &node_cpu_set);
author	Olivier Houchard <ohouchard@haproxy.com>
	Thu, 18 Dec 2025 14:48:49 +0000 (15:48 +0100)
committer	Olivier Houchard <cognet@ci0.org>
	Thu, 18 Dec 2025 17:52:52 +0000 (18:52 +0100)
doc/configuration.txt		patch \| blob \| blame \| history
src/cpu_topo.c		patch \| blob \| blame \| history