diff options
Diffstat (limited to 'lib/group_cpus.c')
| -rw-r--r-- | lib/group_cpus.c | 304 |
1 files changed, 225 insertions, 79 deletions
diff --git a/lib/group_cpus.c b/lib/group_cpus.c index ee272c4cefcc..e6e18d7a49bb 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -47,7 +47,7 @@ static cpumask_var_t *alloc_node_to_cpumask(void) cpumask_var_t *masks; int node; - masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); + masks = kzalloc_objs(cpumask_var_t, nr_node_ids); if (!masks) return NULL; @@ -114,48 +114,15 @@ static int ncpus_cmp_func(const void *l, const void *r) return ln->ncpus - rn->ncpus; } -/* - * Allocate group number for each node, so that for each node: - * - * 1) the allocated number is >= 1 - * - * 2) the allocated number is <= active CPU number of this node - * - * The actual allocated total groups may be less than @numgrps when - * active total CPU number is less than @numgrps. - * - * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' - * for each node. - */ -static void alloc_nodes_groups(unsigned int numgrps, - cpumask_var_t *node_to_cpumask, - const struct cpumask *cpu_mask, - const nodemask_t nodemsk, - struct cpumask *nmsk, - struct node_groups *node_groups) +static void alloc_groups_to_nodes(unsigned int numgrps, + unsigned int numcpus, + struct node_groups *node_groups, + unsigned int num_nodes) { - unsigned n, remaining_ncpus = 0; - - for (n = 0; n < nr_node_ids; n++) { - node_groups[n].id = n; - node_groups[n].ncpus = UINT_MAX; - } - - for_each_node_mask(n, nodemsk) { - unsigned ncpus; - - cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); - ncpus = cpumask_weight(nmsk); - - if (!ncpus) - continue; - remaining_ncpus += ncpus; - node_groups[n].ncpus = ncpus; - } + unsigned int n, remaining_ncpus = numcpus; + unsigned int ngroups, ncpus; - numgrps = min_t(unsigned, remaining_ncpus, numgrps); - - sort(node_groups, nr_node_ids, sizeof(node_groups[0]), + sort(node_groups, num_nodes, sizeof(node_groups[0]), ncpus_cmp_func, NULL); /* @@ -226,9 +193,8 @@ static void alloc_nodes_groups(unsigned int numgrps, * finally for each node X: grps(X) <= ncpu(X). * */ - for (n = 0; n < nr_node_ids; n++) { - unsigned ngroups, ncpus; + for (n = 0; n < num_nodes; n++) { if (node_groups[n].ncpus == UINT_MAX) continue; @@ -246,12 +212,201 @@ static void alloc_nodes_groups(unsigned int numgrps, } } +/* + * Allocate group number for each node, so that for each node: + * + * 1) the allocated number is >= 1 + * + * 2) the allocated number is <= active CPU number of this node + * + * The actual allocated total groups may be less than @numgrps when + * active total CPU number is less than @numgrps. + * + * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' + * for each node. + */ +static void alloc_nodes_groups(unsigned int numgrps, + cpumask_var_t *node_to_cpumask, + const struct cpumask *cpu_mask, + const nodemask_t nodemsk, + struct cpumask *nmsk, + struct node_groups *node_groups) +{ + unsigned int n, numcpus = 0; + + for (n = 0; n < nr_node_ids; n++) { + node_groups[n].id = n; + node_groups[n].ncpus = UINT_MAX; + } + + for_each_node_mask(n, nodemsk) { + unsigned int ncpus; + + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + ncpus = cpumask_weight(nmsk); + + if (!ncpus) + continue; + numcpus += ncpus; + node_groups[n].ncpus = ncpus; + } + + numgrps = min_t(unsigned int, numcpus, numgrps); + alloc_groups_to_nodes(numgrps, numcpus, node_groups, nr_node_ids); +} + +static void assign_cpus_to_groups(unsigned int ncpus, + struct cpumask *nmsk, + struct node_groups *nv, + struct cpumask *masks, + unsigned int *curgrp, + unsigned int last_grp) +{ + unsigned int v, cpus_per_grp, extra_grps; + /* Account for rounding errors */ + extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups); + + /* Spread allocated groups on CPUs of the current node */ + for (v = 0; v < nv->ngroups; v++, *curgrp += 1) { + cpus_per_grp = ncpus / nv->ngroups; + + /* Account for extra groups to compensate rounding errors */ + if (extra_grps) { + cpus_per_grp++; + --extra_grps; + } + + /* + * wrapping has to be considered given 'startgrp' + * may start anywhere + */ + if (*curgrp >= last_grp) + *curgrp = 0; + grp_spread_init_one(&masks[*curgrp], nmsk, cpus_per_grp); + } +} + +static int alloc_cluster_groups(unsigned int ncpus, + unsigned int ngroups, + struct cpumask *node_cpumask, + cpumask_var_t msk, + const struct cpumask ***clusters_ptr, + struct node_groups **cluster_groups_ptr) +{ + unsigned int ncluster = 0; + unsigned int cpu, nc, n; + const struct cpumask *cluster_mask; + const struct cpumask **clusters; + struct node_groups *cluster_groups; + + cpumask_copy(msk, node_cpumask); + + /* Probe how many clusters in this node. */ + while (1) { + cpu = cpumask_first(msk); + if (cpu >= nr_cpu_ids) + break; + + cluster_mask = topology_cluster_cpumask(cpu); + if (!cpumask_weight(cluster_mask)) + goto no_cluster; + /* Clean out CPUs on the same cluster. */ + cpumask_andnot(msk, msk, cluster_mask); + ncluster++; + } + + /* If ngroups < ncluster, cross cluster is inevitable, skip. */ + if (ncluster == 0 || ncluster > ngroups) + goto no_cluster; + + /* Allocate memory based on cluster number. */ + clusters = kzalloc_objs(*clusters, ncluster); + if (!clusters) + goto no_cluster; + cluster_groups = kzalloc_objs(struct node_groups, ncluster); + if (!cluster_groups) + goto fail_cluster_groups; + + /* Filling cluster info for later process. */ + cpumask_copy(msk, node_cpumask); + for (n = 0; n < ncluster; n++) { + cpu = cpumask_first(msk); + cluster_mask = topology_cluster_cpumask(cpu); + nc = cpumask_weight_and(cluster_mask, node_cpumask); + clusters[n] = cluster_mask; + cluster_groups[n].id = n; + cluster_groups[n].ncpus = nc; + cpumask_andnot(msk, msk, cluster_mask); + } + + alloc_groups_to_nodes(ngroups, ncpus, cluster_groups, ncluster); + + *clusters_ptr = clusters; + *cluster_groups_ptr = cluster_groups; + return ncluster; + + fail_cluster_groups: + kfree(clusters); + no_cluster: + return 0; +} + +/* + * Try group CPUs evenly for cluster locality within a NUMA node. + * + * Return: true if success, false otherwise. + */ +static bool __try_group_cluster_cpus(unsigned int ncpus, + unsigned int ngroups, + struct cpumask *node_cpumask, + struct cpumask *masks, + unsigned int *curgrp, + unsigned int last_grp) +{ + struct node_groups *cluster_groups; + const struct cpumask **clusters; + unsigned int ncluster; + bool ret = false; + cpumask_var_t nmsk; + unsigned int i, nc; + + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) + goto fail_nmsk_alloc; + + ncluster = alloc_cluster_groups(ncpus, ngroups, node_cpumask, nmsk, + &clusters, &cluster_groups); + + if (ncluster == 0) + goto fail_no_clusters; + + for (i = 0; i < ncluster; i++) { + struct node_groups *nv = &cluster_groups[i]; + + /* Get the cpus on this cluster. */ + cpumask_and(nmsk, node_cpumask, clusters[nv->id]); + nc = cpumask_weight(nmsk); + if (!nc) + continue; + WARN_ON_ONCE(nv->ngroups > nc); + + assign_cpus_to_groups(nc, nmsk, nv, masks, curgrp, last_grp); + } + + ret = true; + kfree(cluster_groups); + kfree(clusters); + fail_no_clusters: + free_cpumask_var(nmsk); + fail_nmsk_alloc: + return ret; +} + static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, struct cpumask *masks) { - unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0; + unsigned int i, n, nodes, done = 0; unsigned int last_grp = numgrps; unsigned int curgrp = startgrp; nodemask_t nodemsk = NODE_MASK_NONE; @@ -277,9 +432,7 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, return numgrps; } - node_groups = kcalloc(nr_node_ids, - sizeof(struct node_groups), - GFP_KERNEL); + node_groups = kzalloc_objs(struct node_groups, nr_node_ids); if (!node_groups) return -ENOMEM; @@ -287,7 +440,7 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask, nodemsk, nmsk, node_groups); for (i = 0; i < nr_node_ids; i++) { - unsigned int ncpus, v; + unsigned int ncpus; struct node_groups *nv = &node_groups[i]; if (nv->ngroups == UINT_MAX) @@ -301,28 +454,14 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, WARN_ON_ONCE(nv->ngroups > ncpus); - /* Account for rounding errors */ - extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups); - - /* Spread allocated groups on CPUs of the current node */ - for (v = 0; v < nv->ngroups; v++, curgrp++) { - cpus_per_grp = ncpus / nv->ngroups; - - /* Account for extra groups to compensate rounding errors */ - if (extra_grps) { - cpus_per_grp++; - --extra_grps; - } - - /* - * wrapping has to be considered given 'startgrp' - * may start anywhere - */ - if (curgrp >= last_grp) - curgrp = 0; - grp_spread_init_one(&masks[curgrp], nmsk, - cpus_per_grp); + if (__try_group_cluster_cpus(ncpus, nv->ngroups, nmsk, + masks, &curgrp, last_grp)) { + done += nv->ngroups; + continue; } + + assign_cpus_to_groups(ncpus, nmsk, nv, masks, &curgrp, + last_grp); done += nv->ngroups; } kfree(node_groups); @@ -332,9 +471,11 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, /** * group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality * @numgrps: number of groups + * @nummasks: number of initialized cpumasks * * Return: cpumask array if successful, NULL otherwise. And each element - * includes CPUs assigned to this group + * includes CPUs assigned to this group. nummasks contains the number + * of initialized masks which can be less than numgrps. * * Try to put close CPUs from viewpoint of CPU and NUMA locality into * same group, and run two-stage grouping: @@ -344,7 +485,7 @@ static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, * We guarantee in the resulted grouping that all CPUs are covered, and * no same CPU is assigned to multiple groups */ -struct cpumask *group_cpus_evenly(unsigned int numgrps) +struct cpumask *group_cpus_evenly(unsigned int numgrps, unsigned int *nummasks) { unsigned int curgrp = 0, nr_present = 0, nr_others = 0; cpumask_var_t *node_to_cpumask; @@ -352,6 +493,9 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) int ret = -ENOMEM; struct cpumask *masks = NULL; + if (numgrps == 0) + return NULL; + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; @@ -362,7 +506,7 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) if (!node_to_cpumask) goto fail_npresmsk; - masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + masks = kzalloc_objs(*masks, numgrps); if (!masks) goto fail_node_to_cpumask; @@ -386,7 +530,7 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, npresmsk, nmsk, masks); if (ret < 0) - goto fail_build_affinity; + goto fail_node_to_cpumask; nr_present = ret; /* @@ -405,10 +549,6 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) if (ret >= 0) nr_others = ret; - fail_build_affinity: - if (ret >= 0) - WARN_ON(nr_present + nr_others < numgrps); - fail_node_to_cpumask: free_node_to_cpumask(node_to_cpumask); @@ -421,18 +561,24 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) kfree(masks); return NULL; } + *nummasks = min(nr_present + nr_others, numgrps); return masks; } #else /* CONFIG_SMP */ -struct cpumask *group_cpus_evenly(unsigned int numgrps) +struct cpumask *group_cpus_evenly(unsigned int numgrps, unsigned int *nummasks) { - struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + struct cpumask *masks; + + if (numgrps == 0) + return NULL; + masks = kzalloc_objs(*masks, numgrps); if (!masks) return NULL; /* assign all CPUs(cpu 0) to the 1st group only */ cpumask_copy(&masks[0], cpu_possible_mask); + *nummasks = 1; return masks; } #endif /* CONFIG_SMP */ |
