respected. This is recommended on multi-socket and NUMA
systems, as well as CPUs with bad inter-CCX latencies.
+ - performance exactly like group-by-cluster above, except that CPU
+ clusters whose performance is less than half of the
+ next more performant one are evicted. These are
+ typically "little" or "efficient" cores, whose addition
+ generally doesn't bring significant gains and can
+ easily be counter-productive (e.g. TLS handshakes).
+ Often, keeping such cores for other tasks such as
+ network handling is much more effective. On development
+ systems, these can also be used to run auxiliary tools
+ such as load generators and monitoring tools.
+
See also: "cpu-map", "cpu-set", "nbthread"
cpu-set <directive>...
/* list of CPU policies for "cpu-policy". The default one is the first one. */
static int cpu_policy_first_usable_node(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
+static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int gmax, char **err);
static struct ha_cpu_policy ha_cpu_policy[] = {
{ .name = "none", .desc = "use all available CPUs", .fct = NULL },
{ .name = "first-usable-node", .desc = "use only first usable node if nbthreads not set", .fct = cpu_policy_first_usable_node },
{ .name = "group-by-cluster", .desc = "make one thread group per core cluster", .fct = cpu_policy_group_by_cluster },
+ { .name = "performance", .desc = "make one thread group per perf. core cluster", .fct = cpu_policy_performance },
{ 0 } /* end */
};
qsort(topo, entries, sizeof(*topo), _cmp_cpu_cluster_capa);
}
+/* functions below act on ha_cpu_cluster structs */
+
+/* function used by qsort to reorder clusters by index */
+int _cmp_cluster_index(const void *a, const void *b)
+{
+ const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
+ const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
+ return l->idx - r->idx;
+}
+
+/* function used by qsort to order clustes by reverse capacity */
+int _cmp_cluster_capa(const void *a, const void *b)
+{
+ const struct ha_cpu_cluster *l = (const struct ha_cpu_cluster *)a;
+ const struct ha_cpu_cluster *r = (const struct ha_cpu_cluster *)b;
+ return r->capa - l->capa;
+}
+
+/* re-order a cluster array by cluster index only */
+void cpu_cluster_reorder_by_index(struct ha_cpu_cluster *clusters, int entries)
+{
+ qsort(clusters, entries, sizeof(*clusters), _cmp_cluster_index);
+}
+
+/* re-order a CPU topology array by locality and capacity to detect clusters. */
+void cpu_cluster_reorder_by_capa(struct ha_cpu_cluster *clusters, int entries)
+{
+ qsort(clusters, entries, sizeof(*clusters), _cmp_cluster_capa);
+}
+
/* returns an optimal maxcpus for the current system. It will take into
* account what is reported by the OS, if any, otherwise will fall back
* to the cpuset size, which serves as an upper limit in any case.
return 0;
}
+/* the "performance" cpu-policy:
+ * - does nothing if nbthread or thread-groups are set
+ * - eliminates clusters whose total capacity is below half of others
+ * - tries to create one thread-group per cluster, with as many
+ * threads as CPUs in the cluster, and bind all the threads of
+ * this group to all the CPUs of the cluster.
+ */
+static int cpu_policy_performance(int policy, int tmin, int tmax, int gmin, int gmax, char **err)
+{
+ int cpu, cluster;
+ int capa;
+
+ if (global.nbthread || global.nbtgroups)
+ return 0;
+
+ /* sort clusters by reverse capacity */
+ cpu_cluster_reorder_by_capa(ha_cpu_clusters, cpu_topo_maxcpus);
+
+ capa = 0;
+ for (cluster = 0; cluster < cpu_topo_maxcpus; cluster++) {
+ if (capa && ha_cpu_clusters[cluster].capa < capa / 2) {
+ /* This cluster is more than twice as slow as the
+ * previous one, we're not interested in using it.
+ */
+ for (cpu = 0; cpu <= cpu_topo_lastcpu; cpu++) {
+ if (ha_cpu_topo[cpu].cl_gid == ha_cpu_clusters[cluster].idx)
+ ha_cpu_topo[cpu].st |= HA_CPU_F_IGNORED;
+ }
+ }
+ else
+ capa = ha_cpu_clusters[cluster].capa;
+ }
+
+ cpu_cluster_reorder_by_index(ha_cpu_clusters, cpu_topo_maxcpus);
+
+ /* and finish using the group-by-cluster strategy */
+ return cpu_policy_group_by_cluster(policy, tmin, tmax, gmin, gmax, err);
+}
+
/* apply the chosen CPU policy if no cpu-map was forced. Returns < 0 on failure
* with a message in *err that must be freed by the caller if non-null.
*/