return ln->ncpus - rn->ncpus;
}
-/*
- * Allocate group number for each node, so that for each node:
- *
- * 1) the allocated number is >= 1
- *
- * 2) the allocated number is <= active CPU number of this node
- *
- * The actual allocated total groups may be less than @numgrps when
- * active total CPU number is less than @numgrps.
- *
- * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
- * for each node.
- */
-static void alloc_nodes_groups(unsigned int numgrps,
- cpumask_var_t *node_to_cpumask,
- const struct cpumask *cpu_mask,
- const nodemask_t nodemsk,
- struct cpumask *nmsk,
- struct node_groups *node_groups)
+static void alloc_groups_to_nodes(unsigned int numgrps,
+ unsigned int numcpus,
+ struct node_groups *node_groups,
+ unsigned int num_nodes)
{
- unsigned n, remaining_ncpus = 0;
-
- for (n = 0; n < nr_node_ids; n++) {
- node_groups[n].id = n;
- node_groups[n].ncpus = UINT_MAX;
- }
-
- for_each_node_mask(n, nodemsk) {
- unsigned ncpus;
-
- cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
- ncpus = cpumask_weight(nmsk);
-
- if (!ncpus)
- continue;
- remaining_ncpus += ncpus;
- node_groups[n].ncpus = ncpus;
- }
+ unsigned int n, remaining_ncpus = numcpus;
+ unsigned int ngroups, ncpus;
- numgrps = min_t(unsigned, remaining_ncpus, numgrps);
-
- sort(node_groups, nr_node_ids, sizeof(node_groups[0]),
+ sort(node_groups, num_nodes, sizeof(node_groups[0]),
ncpus_cmp_func, NULL);
/*
* finally for each node X: grps(X) <= ncpu(X).
*
*/
- for (n = 0; n < nr_node_ids; n++) {
- unsigned ngroups, ncpus;
+ for (n = 0; n < num_nodes; n++) {
if (node_groups[n].ncpus == UINT_MAX)
continue;
}
}
+/*
+ * Allocate group number for each node, so that for each node:
+ *
+ * 1) the allocated number is >= 1
+ *
+ * 2) the allocated number is <= active CPU number of this node
+ *
+ * The actual allocated total groups may be less than @numgrps when
+ * active total CPU number is less than @numgrps.
+ *
+ * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
+ * for each node.
+ */
+static void alloc_nodes_groups(unsigned int numgrps,
+ cpumask_var_t *node_to_cpumask,
+ const struct cpumask *cpu_mask,
+ const nodemask_t nodemsk,
+ struct cpumask *nmsk,
+ struct node_groups *node_groups)
+{
+ unsigned int n, numcpus = 0;
+
+ for (n = 0; n < nr_node_ids; n++) {
+ node_groups[n].id = n;
+ node_groups[n].ncpus = UINT_MAX;
+ }
+
+ for_each_node_mask(n, nodemsk) {
+ unsigned int ncpus;
+
+ cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+ ncpus = cpumask_weight(nmsk);
+
+ if (!ncpus)
+ continue;
+ numcpus += ncpus;
+ node_groups[n].ncpus = ncpus;
+ }
+
+ numgrps = min_t(unsigned int, numcpus, numgrps);
+ alloc_groups_to_nodes(numgrps, numcpus, node_groups, nr_node_ids);
+}
+
+static void assign_cpus_to_groups(unsigned int ncpus,
+ struct cpumask *nmsk,
+ struct node_groups *nv,
+ struct cpumask *masks,
+ unsigned int *curgrp,
+ unsigned int last_grp)
+{
+ unsigned int v, cpus_per_grp, extra_grps;
+ /* Account for rounding errors */
+ extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
+
+ /* Spread allocated groups on CPUs of the current node */
+ for (v = 0; v < nv->ngroups; v++, *curgrp += 1) {
+ cpus_per_grp = ncpus / nv->ngroups;
+
+ /* Account for extra groups to compensate rounding errors */
+ if (extra_grps) {
+ cpus_per_grp++;
+ --extra_grps;
+ }
+
+ /*
+ * wrapping has to be considered given 'startgrp'
+ * may start anywhere
+ */
+ if (*curgrp >= last_grp)
+ *curgrp = 0;
+ grp_spread_init_one(&masks[*curgrp], nmsk, cpus_per_grp);
+ }
+}
+
+static int alloc_cluster_groups(unsigned int ncpus,
+ unsigned int ngroups,
+ struct cpumask *node_cpumask,
+ cpumask_var_t msk,
+ const struct cpumask ***clusters_ptr,
+ struct node_groups **cluster_groups_ptr)
+{
+ unsigned int ncluster = 0;
+ unsigned int cpu, nc, n;
+ const struct cpumask *cluster_mask;
+ const struct cpumask **clusters;
+ struct node_groups *cluster_groups;
+
+ cpumask_copy(msk, node_cpumask);
+
+ /* Probe how many clusters in this node. */
+ while (1) {
+ cpu = cpumask_first(msk);
+ if (cpu >= nr_cpu_ids)
+ break;
+
+ cluster_mask = topology_cluster_cpumask(cpu);
+ if (!cpumask_weight(cluster_mask))
+ goto no_cluster;
+ /* Clean out CPUs on the same cluster. */
+ cpumask_andnot(msk, msk, cluster_mask);
+ ncluster++;
+ }
+
+ /* If ngroups < ncluster, cross cluster is inevitable, skip. */
+ if (ncluster == 0 || ncluster > ngroups)
+ goto no_cluster;
+
+ /* Allocate memory based on cluster number. */
+ clusters = kcalloc(ncluster, sizeof(struct cpumask *), GFP_KERNEL);
+ if (!clusters)
+ goto no_cluster;
+ cluster_groups = kcalloc(ncluster, sizeof(struct node_groups), GFP_KERNEL);
+ if (!cluster_groups)
+ goto fail_cluster_groups;
+
+ /* Filling cluster info for later process. */
+ cpumask_copy(msk, node_cpumask);
+ for (n = 0; n < ncluster; n++) {
+ cpu = cpumask_first(msk);
+ cluster_mask = topology_cluster_cpumask(cpu);
+ nc = cpumask_weight_and(cluster_mask, node_cpumask);
+ clusters[n] = cluster_mask;
+ cluster_groups[n].id = n;
+ cluster_groups[n].ncpus = nc;
+ cpumask_andnot(msk, msk, cluster_mask);
+ }
+
+ alloc_groups_to_nodes(ngroups, ncpus, cluster_groups, ncluster);
+
+ *clusters_ptr = clusters;
+ *cluster_groups_ptr = cluster_groups;
+ return ncluster;
+
+ fail_cluster_groups:
+ kfree(clusters);
+ no_cluster:
+ return 0;
+}
+
+/*
+ * Try group CPUs evenly for cluster locality within a NUMA node.
+ *
+ * Return: true if success, false otherwise.
+ */
+static bool __try_group_cluster_cpus(unsigned int ncpus,
+ unsigned int ngroups,
+ struct cpumask *node_cpumask,
+ struct cpumask *masks,
+ unsigned int *curgrp,
+ unsigned int last_grp)
+{
+ struct node_groups *cluster_groups;
+ const struct cpumask **clusters;
+ unsigned int ncluster;
+ bool ret = false;
+ cpumask_var_t nmsk;
+ unsigned int i, nc;
+
+ if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+ goto fail_nmsk_alloc;
+
+ ncluster = alloc_cluster_groups(ncpus, ngroups, node_cpumask, nmsk,
+ &clusters, &cluster_groups);
+
+ if (ncluster == 0)
+ goto fail_no_clusters;
+
+ for (i = 0; i < ncluster; i++) {
+ struct node_groups *nv = &cluster_groups[i];
+
+ /* Get the cpus on this cluster. */
+ cpumask_and(nmsk, node_cpumask, clusters[nv->id]);
+ nc = cpumask_weight(nmsk);
+ if (!nc)
+ continue;
+ WARN_ON_ONCE(nv->ngroups > nc);
+
+ assign_cpus_to_groups(nc, nmsk, nv, masks, curgrp, last_grp);
+ }
+
+ ret = true;
+ kfree(cluster_groups);
+ kfree(clusters);
+ fail_no_clusters:
+ free_cpumask_var(nmsk);
+ fail_nmsk_alloc:
+ return ret;
+}
+
static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
cpumask_var_t *node_to_cpumask,
const struct cpumask *cpu_mask,
struct cpumask *nmsk, struct cpumask *masks)
{
- unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0;
+ unsigned int i, n, nodes, done = 0;
unsigned int last_grp = numgrps;
unsigned int curgrp = startgrp;
nodemask_t nodemsk = NODE_MASK_NONE;
alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask,
nodemsk, nmsk, node_groups);
for (i = 0; i < nr_node_ids; i++) {
- unsigned int ncpus, v;
+ unsigned int ncpus;
struct node_groups *nv = &node_groups[i];
if (nv->ngroups == UINT_MAX)
WARN_ON_ONCE(nv->ngroups > ncpus);
- /* Account for rounding errors */
- extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
-
- /* Spread allocated groups on CPUs of the current node */
- for (v = 0; v < nv->ngroups; v++, curgrp++) {
- cpus_per_grp = ncpus / nv->ngroups;
-
- /* Account for extra groups to compensate rounding errors */
- if (extra_grps) {
- cpus_per_grp++;
- --extra_grps;
- }
-
- /*
- * wrapping has to be considered given 'startgrp'
- * may start anywhere
- */
- if (curgrp >= last_grp)
- curgrp = 0;
- grp_spread_init_one(&masks[curgrp], nmsk,
- cpus_per_grp);
+ if (__try_group_cluster_cpus(ncpus, nv->ngroups, nmsk,
+ masks, &curgrp, last_grp)) {
+ done += nv->ngroups;
+ continue;
}
+
+ assign_cpus_to_groups(ncpus, nmsk, nv, masks, &curgrp,
+ last_grp);
done += nv->ngroups;
}
kfree(node_groups);