the limit of 32 or 64 depending on the system. This is
the default policy.
+ - group-by-2-clusters same as "group-by-cluster" but create a group every
+ two clusters. This can make sense on CPUs having many
+ clusters of few cores each, to avoid creating many
+ groups, or to smooth the distribution a little bit when
+ not all cores are in use. Please note that it can have
+ very bad performance effects when the communication
+ between clusters is slow. This is generally recommended
+ against.
+
+ - group-by-3-clusters same as "group-by-cluster" but create a group every
+ three clusters. This can make sense on CPUs having many
+ clusters of few cores each, to avoid creating many
+ groups, or to smooth the distribution a little bit when
+ not all cores are in use. Please note that it can have
+ very bad performance effects when the communication
+ between clusters is slow. This is generally recommended
+ against.
+
+ - group-by-4-clusters same as "group-by-cluster" but create a group every
+ four clusters. This can make sense on CPUs having many
+ clusters of few cores each, to avoid creating many
+ groups, or to smooth the distribution a little bit when
+ not all cores are in use. Please note that it can have
+ very bad performance effects when the communication
+ between clusters is slow. This is generally recommended
+ against.
+
- group-by-cluster if neither "nbthread" not "nbtgroups" were set, then
one thread group is created for each CPU cluster with
available CPUs, each with as many threads as CPUs. All
{ .name = "none", .desc = "use all available CPUs", .fct = NULL },
{ .name = "first-usable-node", .desc = "use only first usable node if nbthreads not set", .fct = cpu_policy_first_usable_node, .arg = 0 },
{ .name = "group-by-cluster", .desc = "make one thread group per core cluster", .fct = cpu_policy_group_by_cluster , .arg = 1 },
+ { .name = "group-by-2-clusters",.desc = "make one thread group per 2 core clusters", .fct = cpu_policy_group_by_cluster , .arg = 2 },
+ { .name = "group-by-3-clusters",.desc = "make one thread group per 3 core clusters", .fct = cpu_policy_group_by_cluster , .arg = 3 },
+ { .name = "group-by-4-clusters",.desc = "make one thread group per 4 core clusters", .fct = cpu_policy_group_by_cluster , .arg = 4 },
{ .name = "performance", .desc = "make one thread group per perf. core cluster", .fct = cpu_policy_performance , .arg = 0 },
{ .name = "efficiency", .desc = "make one thread group per eff. core cluster", .fct = cpu_policy_efficiency , .arg = 0 },
{ .name = "resource", .desc = "make one thread group from the smallest cluster", .fct = cpu_policy_resource , .arg = 0 },
* - otherwise tries to create one thread-group per cluster, with as many
* threads as CPUs in the cluster, and bind all the threads of this group
* to all the CPUs of the cluster.
+ * Also implements the variants "group-by-2-clusters", "group-by-3-clusters"
+ * and "group-by-4-clusters".
*/
static int cpu_policy_group_by_cluster(int policy, int tmin, int tmax, int gmin, int gmax, char **err)
{
int cid, lcid;
int thr_per_grp, nb_grp;
int thr;
+ int div;
if (global.nbthread)
return 0;
/* iterate over each new cluster */
lcid = -1;
cpu_start = 0;
+
+ /* used as a divisor of clusters*/
+ div = ha_cpu_policy[policy].arg;
+ div = div ? div : 1;
+
while (global.nbtgroups < MAX_TGROUPS && global.nbthread < MAX_THREADS) {
ha_cpuset_zero(&node_cpu_set);
cid = -1; cpu_count = 0;
/* skip disabled and already visited CPUs */
if (ha_cpu_topo[cpu].st & HA_CPU_F_EXCL_MASK)
continue;
- if (ha_cpu_topo[cpu].cl_gid <= lcid)
+ if ((ha_cpu_topo[cpu].cl_gid / div) <= lcid)
continue;
if (cid < 0) {
- cid = ha_cpu_topo[cpu].cl_gid;
+ cid = ha_cpu_topo[cpu].cl_gid / div;
cpu_start = cpu + 1;
}
- else if (cid != ha_cpu_topo[cpu].cl_gid)
+ else if (cid != ha_cpu_topo[cpu].cl_gid / div)
continue;
/* make a mask of all of this cluster's CPUs */