+++ /dev/null
-From b22a8f7b4bde4e4ab73b64908ffd5d90ecdcdbfd Mon Sep 17 00:00:00 2001
-From: Valentin Schneider <valentin.schneider@arm.com>
-Date: Thu, 29 Apr 2021 22:53:27 -0700
-Subject: ia64: ensure proper NUMA distance and possible map initialization
-
-From: Valentin Schneider <valentin.schneider@arm.com>
-
-commit b22a8f7b4bde4e4ab73b64908ffd5d90ecdcdbfd upstream.
-
-John Paul reported a warning about bogus NUMA distance values spurred by
-commit:
-
- 620a6dc40754 ("sched/topology: Make sched_init_numa() use a set for the deduplicating sort")
-
-In this case, the afflicted machine comes up with a reported 256 possible
-nodes, all of which are 0 distance away from one another. This was
-previously silently ignored, but is now caught by the aforementioned
-commit.
-
-The culprit is ia64's node_possible_map which remains unchanged from its
-initialization value of NODE_MASK_ALL. In John's case, the machine
-doesn't have any SRAT nor SLIT table, but AIUI the possible map remains
-untouched regardless of what ACPI tables end up being parsed. Thus,
-!online && possible nodes remain with a bogus distance of 0 (distances \in
-[0, 9] are "reserved and have no meaning" as per the ACPI spec).
-
-Follow x86 / drivers/base/arch_numa's example and set the possible map to
-the parsed map, which in this case seems to be the online map.
-
-Link: http://lore.kernel.org/r/255d6b5d-194e-eb0e-ecdd-97477a534441@physik.fu-berlin.de
-Link: https://lkml.kernel.org/r/20210318130617.896309-1-valentin.schneider@arm.com
-Fixes: 620a6dc40754 ("sched/topology: Make sched_init_numa() use a set for the deduplicating sort")
-Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
-Reported-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
-Tested-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
-Tested-by: Sergei Trofimovich <slyfox@gentoo.org>
-Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
-Cc: Ingo Molnar <mingo@kernel.org>
-Cc: Vincent Guittot <vincent.guittot@linaro.org>
-Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
-Cc: Anatoly Pugachev <matorola@gmail.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-[ dannf: minor context adjustment in arch/ia64/kernel/acpi.c ]
-Signed-off-by: dann frazier <dann.frazier@canonical.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- arch/ia64/kernel/acpi.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
---- a/arch/ia64/kernel/acpi.c
-+++ b/arch/ia64/kernel/acpi.c
-@@ -537,7 +537,8 @@ void __init acpi_numa_fixup(void)
- if (srat_num_cpus == 0) {
- node_set_online(0);
- node_cpuid[0].phys_id = hard_smp_processor_id();
-- return;
-+ slit_distance(0, 0) = LOCAL_DISTANCE;
-+ goto out;
- }
-
- /*
-@@ -580,7 +581,7 @@ void __init acpi_numa_fixup(void)
- for (j = 0; j < MAX_NUMNODES; j++)
- node_distance(i, j) = i == j ? LOCAL_DISTANCE :
- REMOTE_DISTANCE;
-- return;
-+ goto out;
- }
-
- memset(numa_slit, -1, sizeof(numa_slit));
-@@ -605,6 +606,8 @@ void __init acpi_numa_fixup(void)
- printk("\n");
- }
- #endif
-+out:
-+ node_possible_map = node_online_map;
- }
- #endif /* CONFIG_ACPI_NUMA */
-
+++ /dev/null
-From 620a6dc40754dc218f5b6389b5d335e9a107fd29 Mon Sep 17 00:00:00 2001
-From: Valentin Schneider <valentin.schneider@arm.com>
-Date: Fri, 22 Jan 2021 12:39:43 +0000
-Subject: sched/topology: Make sched_init_numa() use a set for the deduplicating sort
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Valentin Schneider <valentin.schneider@arm.com>
-
-commit 620a6dc40754dc218f5b6389b5d335e9a107fd29 upstream.
-
-The deduplicating sort in sched_init_numa() assumes that the first line in
-the distance table contains all unique values in the entire table. I've
-been trying to pen what this exactly means for the topology, but it's not
-straightforward. For instance, topology.c uses this example:
-
- node 0 1 2 3
- 0: 10 20 20 30
- 1: 20 10 20 20
- 2: 20 20 10 20
- 3: 30 20 20 10
-
- 0 ----- 1
- | / |
- | / |
- | / |
- 2 ----- 3
-
-Which works out just fine. However, if we swap nodes 0 and 1:
-
- 1 ----- 0
- | / |
- | / |
- | / |
- 2 ----- 3
-
-we get this distance table:
-
- node 0 1 2 3
- 0: 10 20 20 20
- 1: 20 10 20 30
- 2: 20 20 10 20
- 3: 20 30 20 10
-
-Which breaks the deduplicating sort (non-representative first line). In
-this case this would just be a renumbering exercise, but it so happens that
-we can have a deduplicating sort that goes through the whole table in O(n²)
-at the extra cost of a temporary memory allocation (i.e. any form of set).
-
-The ACPI spec (SLIT) mentions distances are encoded on 8 bits. Following
-this, implement the set as a 256-bits bitmap. Should this not be
-satisfactory (i.e. we want to support 32-bit values), then we'll have to go
-for some other sparse set implementation.
-
-This has the added benefit of letting us allocate just the right amount of
-memory for sched_domains_numa_distance[], rather than an arbitrary
-(nr_node_ids + 1).
-
-Note: DT binding equivalent (distance-map) decodes distances as 32-bit
-values.
-
-Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20210122123943.1217-2-valentin.schneider@arm.com
-Signed-off-by: dann frazier <dann.frazier@canonical.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/topology.h | 1
- kernel/sched/topology.c | 99 ++++++++++++++++++++++-------------------------
- 2 files changed, 49 insertions(+), 51 deletions(-)
-
---- a/include/linux/topology.h
-+++ b/include/linux/topology.h
-@@ -47,6 +47,7 @@ int arch_update_cpu_topology(void);
- /* Conform to ACPI 2.0 SLIT distance definitions */
- #define LOCAL_DISTANCE 10
- #define REMOTE_DISTANCE 20
-+#define DISTANCE_BITS 8
- #ifndef node_distance
- #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
- #endif
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -1322,66 +1322,58 @@ static void init_numa_topology_type(void
- }
- }
-
-+
-+#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS)
-+
- void sched_init_numa(void)
- {
-- int next_distance, curr_distance = node_distance(0, 0);
- struct sched_domain_topology_level *tl;
-- int level = 0;
-- int i, j, k;
--
-- sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
-- if (!sched_domains_numa_distance)
-- return;
--
-- /* Includes NUMA identity node at level 0. */
-- sched_domains_numa_distance[level++] = curr_distance;
-- sched_domains_numa_levels = level;
-+ unsigned long *distance_map;
-+ int nr_levels = 0;
-+ int i, j;
-
- /*
- * O(nr_nodes^2) deduplicating selection sort -- in order to find the
- * unique distances in the node_distance() table.
-- *
-- * Assumes node_distance(0,j) includes all distances in
-- * node_distance(i,j) in order to avoid cubic time.
- */
-- next_distance = curr_distance;
-+ distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL);
-+ if (!distance_map)
-+ return;
-+
-+ bitmap_zero(distance_map, NR_DISTANCE_VALUES);
- for (i = 0; i < nr_node_ids; i++) {
- for (j = 0; j < nr_node_ids; j++) {
-- for (k = 0; k < nr_node_ids; k++) {
-- int distance = node_distance(i, k);
-+ int distance = node_distance(i, j);
-
-- if (distance > curr_distance &&
-- (distance < next_distance ||
-- next_distance == curr_distance))
-- next_distance = distance;
--
-- /*
-- * While not a strong assumption it would be nice to know
-- * about cases where if node A is connected to B, B is not
-- * equally connected to A.
-- */
-- if (sched_debug() && node_distance(k, i) != distance)
-- sched_numa_warn("Node-distance not symmetric");
--
-- if (sched_debug() && i && !find_numa_distance(distance))
-- sched_numa_warn("Node-0 not representative");
-+ if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) {
-+ sched_numa_warn("Invalid distance value range");
-+ return;
- }
-- if (next_distance != curr_distance) {
-- sched_domains_numa_distance[level++] = next_distance;
-- sched_domains_numa_levels = level;
-- curr_distance = next_distance;
-- } else break;
-+
-+ bitmap_set(distance_map, distance, 1);
- }
-+ }
-+ /*
-+ * We can now figure out how many unique distance values there are and
-+ * allocate memory accordingly.
-+ */
-+ nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES);
-
-- /*
-- * In case of sched_debug() we verify the above assumption.
-- */
-- if (!sched_debug())
-- break;
-+ sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL);
-+ if (!sched_domains_numa_distance) {
-+ bitmap_free(distance_map);
-+ return;
- }
-
-+ for (i = 0, j = 0; i < nr_levels; i++, j++) {
-+ j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j);
-+ sched_domains_numa_distance[i] = j;
-+ }
-+
-+ bitmap_free(distance_map);
-+
- /*
-- * 'level' contains the number of unique distances
-+ * 'nr_levels' contains the number of unique distances
- *
- * The sched_domains_numa_distance[] array includes the actual distance
- * numbers.
-@@ -1390,15 +1382,15 @@ void sched_init_numa(void)
- /*
- * Here, we should temporarily reset sched_domains_numa_levels to 0.
- * If it fails to allocate memory for array sched_domains_numa_masks[][],
-- * the array will contain less then 'level' members. This could be
-+ * the array will contain less then 'nr_levels' members. This could be
- * dangerous when we use it to iterate array sched_domains_numa_masks[][]
- * in other functions.
- *
-- * We reset it to 'level' at the end of this function.
-+ * We reset it to 'nr_levels' at the end of this function.
- */
- sched_domains_numa_levels = 0;
-
-- sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
-+ sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL);
- if (!sched_domains_numa_masks)
- return;
-
-@@ -1406,7 +1398,7 @@ void sched_init_numa(void)
- * Now for each level, construct a mask per node which contains all
- * CPUs of nodes that are that many hops away from us.
- */
-- for (i = 0; i < level; i++) {
-+ for (i = 0; i < nr_levels; i++) {
- sched_domains_numa_masks[i] =
- kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
- if (!sched_domains_numa_masks[i])
-@@ -1414,12 +1406,17 @@ void sched_init_numa(void)
-
- for (j = 0; j < nr_node_ids; j++) {
- struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
-+ int k;
-+
- if (!mask)
- return;
-
- sched_domains_numa_masks[i][j] = mask;
-
- for_each_node(k) {
-+ if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
-+ sched_numa_warn("Node-distance not symmetric");
-+
- if (node_distance(j, k) > sched_domains_numa_distance[i])
- continue;
-
-@@ -1431,7 +1428,7 @@ void sched_init_numa(void)
- /* Compute default topology size */
- for (i = 0; sched_domain_topology[i].mask; i++);
-
-- tl = kzalloc((i + level + 1) *
-+ tl = kzalloc((i + nr_levels) *
- sizeof(struct sched_domain_topology_level), GFP_KERNEL);
- if (!tl)
- return;
-@@ -1454,7 +1451,7 @@ void sched_init_numa(void)
- /*
- * .. and append 'j' levels of NUMA goodness.
- */
-- for (j = 1; j < level; i++, j++) {
-+ for (j = 1; j < nr_levels; i++, j++) {
- tl[i] = (struct sched_domain_topology_level){
- .mask = sd_numa_mask,
- .sd_flags = cpu_numa_flags,
-@@ -1466,8 +1463,8 @@ void sched_init_numa(void)
-
- sched_domain_topology = tl;
-
-- sched_domains_numa_levels = level;
-- sched_max_numa_distance = sched_domains_numa_distance[level - 1];
-+ sched_domains_numa_levels = nr_levels;
-+ sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
-
- init_numa_topology_type();
- }