MEDIUM: cpu-topo: change "performance" to consider per-core capacity

author Willy Tarreau <w@1wt.eu>

Tue, 13 May 2025 14:12:52 +0000 (16:12 +0200)

committer Willy Tarreau <w@1wt.eu>

Tue, 13 May 2025 14:48:30 +0000 (16:48 +0200)
author Willy Tarreau <w@1wt.eu>
Tue, 13 May 2025 14:12:52 +0000 (16:12 +0200)
committer Willy Tarreau <w@1wt.eu>
Tue, 13 May 2025 14:48:30 +0000 (16:48 +0200)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index 430ab4f82bf41b932de87fcd82b0c34536978aba..c84f7f10c0eb368db663ad55d91c034f0756a49d 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -2098,15 +2098,16 @@ cpu-policy <policy>
                          admins to validate setups.
  
     - performance        exactly like group-by-cluster above, except that CPU
-                        clusters whose performance is less than half of the
-                        next more performant one are evicted. These are
-                        typically "little" or "efficient" cores, whose addition
-                        generally doesn't bring significant gains and can
-                        easily be counter-productive (e.g. TLS handshakes).
-                        Often, keeping such cores for other tasks such as
-                        network handling is much more effective. On development
-                        systems, these can also be used to run auxiliary tools
-                        such as load generators and monitoring tools.
+                        clusters composed of cores whose performance is less
+                        than 80% of those of the next more performant one are
+                        evicted. These are typically "little" or "efficient"
+                        cores, whose addition generally doesn't bring significant
+                        gains and can easily be counter-productive (e.g. TLS
+                        handshakes). Often, keeping such cores for other tasks
+                        such as network handling is much more effective. On
+                        development systems, these can also be used to run
+                        auxiliary tools such as load generators and monitoring
+                        tools.
  
     - resource           this is like group-by-cluster above, except that only
                          the smallest and most efficient CPU cluster will be
diff --git a/src/cpu_topo.c b/src/cpu_topo.c

index 8145309f51c67813433014d7a3eacf97b240644b..759f9fab8b0b5e026608d70bd539eccc32374c64 100644 (file)
--- a/src/cpu_topo.c
+++ b/src/cpu_topo.c
@@ -1316,7 +1316,7 @@ static int cpu_policy_group_by_ccx(int policy, int tmin, int tmax, int gmin, int
  
  /* the "performance" cpu-policy:
   *  - does nothing if nbthread or thread-groups are set
- *  - eliminates clusters whose total capacity is below half of others
+ *  - eliminates clusters whose average capacity is less than 80% that of others
   *  - tries to create one thread-group per cluster, with as many
   *    threads as CPUs in the cluster, and bind all the threads of
   *    this group to all the CPUs of the cluster.
@@ -1329,22 +1329,26 @@ static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int
         if (global.nbthread || global.nbtgroups)
                 return 0;
  
-       /* sort clusters by reverse capacity */
-       cpu_cluster_reorder_by_capa(ha_cpu_clusters, cpu_topo_maxcpus);
+       /* sort clusters by average reverse capacity */
+       cpu_cluster_reorder_by_avg_capa(ha_cpu_clusters, cpu_topo_maxcpus);
  
         capa = 0;
         for (cluster = 0; cluster < cpu_topo_maxcpus; cluster++) {
-               if (capa && ha_cpu_clusters[cluster].capa < capa / 2) {
-                       /* This cluster is more than twice as slow as the
-                        * previous one, we're not interested in using it.
+               if (capa && ha_cpu_clusters[cluster].capa * 10 < ha_cpu_clusters[cluster].nb_cpu * capa * 8) {
+                       /* This cluster is made of cores delivering less than
+                        * 80% of the performance of those of the previous
+                        * cluster, previous one, we're not interested in
+                        * using it.
                          */
                         for (cpu = 0; cpu <= cpu_topo_lastcpu; cpu++) {
                                 if (ha_cpu_topo[cpu].cl_gid == ha_cpu_clusters[cluster].idx)
                                         ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
                         }
                 }
+               else if (ha_cpu_clusters[cluster].nb_cpu)
+                       capa = ha_cpu_clusters[cluster].capa / ha_cpu_clusters[cluster].nb_cpu;
                 else
-                       capa = ha_cpu_clusters[cluster].capa;
+                       capa = 0;
         }
  
         cpu_cluster_reorder_by_index(ha_cpu_clusters, cpu_topo_maxcpus);
author	Willy Tarreau <w@1wt.eu>
	Tue, 13 May 2025 14:12:52 +0000 (16:12 +0200)
committer	Willy Tarreau <w@1wt.eu>
	Tue, 13 May 2025 14:48:30 +0000 (16:48 +0200)
doc/configuration.txt		patch \| blob \| blame \| history
src/cpu_topo.c		patch \| blob \| blame \| history