MINOR: cpu-topo: add a new "performance" cpu-policy

author Willy Tarreau <w@1wt.eu>

Fri, 14 Mar 2025 14:09:07 +0000 (15:09 +0100)

committer Willy Tarreau <w@1wt.eu>

Fri, 14 Mar 2025 17:33:16 +0000 (18:33 +0100)
author Willy Tarreau <w@1wt.eu>
Fri, 14 Mar 2025 14:09:07 +0000 (15:09 +0100)
committer Willy Tarreau <w@1wt.eu>
Fri, 14 Mar 2025 17:33:16 +0000 (18:33 +0100)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index fd8af6bfad061883bb24b7c040b4cd80d8cfd409..9c7ed987a48f6b04a6540062656f057669642fb8 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -1991,6 +1991,17 @@ cpu-policy <policy>
                          respected. This is recommended on multi-socket and NUMA
                          systems, as well as CPUs with bad inter-CCX latencies.
  
+   - performance        exactly like group-by-cluster above, except that CPU
+                        clusters whose performance is less than half of the
+                        next more performant one are evicted. These are
+                        typically "little" or "efficient" cores, whose addition
+                        generally doesn't bring significant gains and can
+                        easily be counter-productive (e.g. TLS handshakes).
+                        Often, keeping such cores for other tasks such as
+                        network handling is much more effective. On development
+                        systems, these can also be used to run auxiliary tools
+                        such as load generators and monitoring tools.
+
    See also: "cpu-map", "cpu-set", "nbthread"
  
  cpu-set <directive>...
diff --git a/src/cpu_topo.c b/src/cpu_topo.c

index b44c1005b7753e4f4f7a5df513f99cdd1779ee39..154763164cfdf6bd41f545688a894de91d8d25b7 100644 (file)
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@@ -53,11 +53,13 @@ static int cpu_policy = 1; // "first-usable-node"
  /* list of CPU policies for "cpu-policy". The default one is the first one. */
  static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
  static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
+static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
  
  static struct ha_cpu_policy ha_cpu_policy[] = {
         { .name = "none",               .desc = "use all available CPUs",                           .fct = NULL   },
         { .name = "first-usable-node",  .desc = "use only first usable node if nbthreads not set",  .fct = cpu_policy_first_usable_node  },
         { .name = "group-by-cluster",   .desc = "make one thread group per core cluster",           .fct = cpu_policy_group_by_cluster   },
+       { .name = "performance",        .desc = "make one thread group per perf. core cluster",     .fct = cpu_policy_performance        },
         { 0 } /* end */
  };
  
@@ -531,6 +533,36 @@ void cpu_reorder_by_cluster_capa(struct ha_cpu_topo *topo, int entries)
         qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster_capa);
  }
  
+/* functions below act on ha_cpu_cluster structs */
+
+/* function used by qsort to reorder clusters by index */
+int _cmp_cluster_index(const void *a, const void *b)
+{
+       const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
+       const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
+       return l->idx - r->idx;
+}
+
+/* function used by qsort to order clustes by reverse capacity */
+int _cmp_cluster_capa(const void *a, const void *b)
+{
+       const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
+       const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
+       return r->capa - l->capa;
+}
+
+/* re-order a cluster array by cluster index only */
+void cpu_cluster_reorder_by_index(struct ha_cpu_cluster *clusters, int entries)
+{
+       qsort(clusters, entries, sizeof(*clusters), _cmp_cluster_index);
+}
+
+/* re-order a CPU topology array by locality and capacity to detect clusters. */
+void cpu_cluster_reorder_by_capa(struct ha_cpu_cluster *clusters, int entries)
+{
+       qsort(clusters, entries, sizeof(*clusters), _cmp_cluster_capa);
+}
+
  /* returns an optimal maxcpus for the current system. It will take into
   * account what is reported by the OS, if any, otherwise will fall back
   * to the cpuset size, which serves as an upper limit in any case.
@@ -1064,6 +1096,45 @@ static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin,
         return 0;
  }
  
+/* the "performance" cpu-policy:
+ *  - does nothing if nbthread or thread-groups are set
+ *  - eliminates clusters whose total capacity is below half of others
+ *  - tries to create one thread-group per cluster, with as many
+ *    threads as CPUs in the cluster, and bind all the threads of
+ *    this group to all the CPUs of the cluster.
+ */
+static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int gmax, char **err)
+{
+       int cpu, cluster;
+       int capa;
+
+       if (global.nbthread || global.nbtgroups)
+               return 0;
+
+       /* sort clusters by reverse capacity */
+       cpu_cluster_reorder_by_capa(ha_cpu_clusters, cpu_topo_maxcpus);
+
+       capa = 0;
+       for (cluster = 0; cluster < cpu_topo_maxcpus; cluster++) {
+               if (capa && ha_cpu_clusters[cluster].capa < capa / 2) {
+                       /* This cluster is more than twice as slow as the
+                        * previous one, we're not interested in using it.
+                        */
+                       for (cpu = 0; cpu <= cpu_topo_lastcpu; cpu++) {
+                               if (ha_cpu_topo[cpu].cl_gid == ha_cpu_clusters[cluster].idx)
+                                       ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
+                       }
+               }
+               else
+                       capa = ha_cpu_clusters[cluster].capa;
+       }
+
+       cpu_cluster_reorder_by_index(ha_cpu_clusters, cpu_topo_maxcpus);
+
+       /* and finish using the group-by-cluster strategy */
+       return cpu_policy_group_by_cluster(policy, tmin, tmax, gmin, gmax, err);
+}
+
  /* apply the chosen CPU policy if no cpu-map was forced. Returns < 0 on failure
   * with a message in *err that must be freed by the caller if non-null.
   */
author	Willy Tarreau <w@1wt.eu>
	Fri, 14 Mar 2025 14:09:07 +0000 (15:09 +0100)
committer	Willy Tarreau <w@1wt.eu>
	Fri, 14 Mar 2025 17:33:16 +0000 (18:33 +0100)
doc/configuration.txt		patch \| blob \| blame \| history
src/cpu_topo.c		patch \| blob \| blame \| history