--- /dev/null
+From b22a8f7b4bde4e4ab73b64908ffd5d90ecdcdbfd Mon Sep 17 00:00:00 2001
+From: Valentin Schneider <valentin.schneider@arm.com>
+Date: Thu, 29 Apr 2021 22:53:27 -0700
+Subject: ia64: ensure proper NUMA distance and possible map initialization
+
+From: Valentin Schneider <valentin.schneider@arm.com>
+
+commit b22a8f7b4bde4e4ab73b64908ffd5d90ecdcdbfd upstream.
+
+John Paul reported a warning about bogus NUMA distance values spurred by
+commit:
+
+ 620a6dc40754 ("sched/topology: Make sched_init_numa() use a set for the deduplicating sort")
+
+In this case, the afflicted machine comes up with a reported 256 possible
+nodes, all of which are 0 distance away from one another. This was
+previously silently ignored, but is now caught by the aforementioned
+commit.
+
+The culprit is ia64's node_possible_map which remains unchanged from its
+initialization value of NODE_MASK_ALL. In John's case, the machine
+doesn't have any SRAT nor SLIT table, but AIUI the possible map remains
+untouched regardless of what ACPI tables end up being parsed. Thus,
+!online && possible nodes remain with a bogus distance of 0 (distances \in
+[0, 9] are "reserved and have no meaning" as per the ACPI spec).
+
+Follow x86 / drivers/base/arch_numa's example and set the possible map to
+the parsed map, which in this case seems to be the online map.
+
+Link: http://lore.kernel.org/r/255d6b5d-194e-eb0e-ecdd-97477a534441@physik.fu-berlin.de
+Link: https://lkml.kernel.org/r/20210318130617.896309-1-valentin.schneider@arm.com
+Fixes: 620a6dc40754 ("sched/topology: Make sched_init_numa() use a set for the deduplicating sort")
+Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
+Reported-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Tested-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Tested-by: Sergei Trofimovich <slyfox@gentoo.org>
+Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Vincent Guittot <vincent.guittot@linaro.org>
+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: Anatoly Pugachev <matorola@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: dann frazier <dann.frazier@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/ia64/kernel/acpi.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/ia64/kernel/acpi.c
++++ b/arch/ia64/kernel/acpi.c
+@@ -448,7 +448,8 @@ void __init acpi_numa_fixup(void)
+ if (srat_num_cpus == 0) {
+ node_set_online(0);
+ node_cpuid[0].phys_id = hard_smp_processor_id();
+- return;
++ slit_distance(0, 0) = LOCAL_DISTANCE;
++ goto out;
+ }
+
+ /*
+@@ -491,7 +492,7 @@ void __init acpi_numa_fixup(void)
+ for (j = 0; j < MAX_NUMNODES; j++)
+ slit_distance(i, j) = i == j ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+- return;
++ goto out;
+ }
+
+ memset(numa_slit, -1, sizeof(numa_slit));
+@@ -516,6 +517,8 @@ void __init acpi_numa_fixup(void)
+ printk("\n");
+ }
+ #endif
++out:
++ node_possible_map = node_online_map;
+ }
+ #endif /* CONFIG_ACPI_NUMA */
+
--- /dev/null
+From 620a6dc40754dc218f5b6389b5d335e9a107fd29 Mon Sep 17 00:00:00 2001
+From: Valentin Schneider <valentin.schneider@arm.com>
+Date: Fri, 22 Jan 2021 12:39:43 +0000
+Subject: sched/topology: Make sched_init_numa() use a set for the deduplicating sort
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Valentin Schneider <valentin.schneider@arm.com>
+
+commit 620a6dc40754dc218f5b6389b5d335e9a107fd29 upstream.
+
+The deduplicating sort in sched_init_numa() assumes that the first line in
+the distance table contains all unique values in the entire table. I've
+been trying to pen what this exactly means for the topology, but it's not
+straightforward. For instance, topology.c uses this example:
+
+ node 0 1 2 3
+ 0: 10 20 20 30
+ 1: 20 10 20 20
+ 2: 20 20 10 20
+ 3: 30 20 20 10
+
+ 0 ----- 1
+ | / |
+ | / |
+ | / |
+ 2 ----- 3
+
+Which works out just fine. However, if we swap nodes 0 and 1:
+
+ 1 ----- 0
+ | / |
+ | / |
+ | / |
+ 2 ----- 3
+
+we get this distance table:
+
+ node 0 1 2 3
+ 0: 10 20 20 20
+ 1: 20 10 20 30
+ 2: 20 20 10 20
+ 3: 20 30 20 10
+
+Which breaks the deduplicating sort (non-representative first line). In
+this case this would just be a renumbering exercise, but it so happens that
+we can have a deduplicating sort that goes through the whole table in O(n²)
+at the extra cost of a temporary memory allocation (i.e. any form of set).
+
+The ACPI spec (SLIT) mentions distances are encoded on 8 bits. Following
+this, implement the set as a 256-bits bitmap. Should this not be
+satisfactory (i.e. we want to support 32-bit values), then we'll have to go
+for some other sparse set implementation.
+
+This has the added benefit of letting us allocate just the right amount of
+memory for sched_domains_numa_distance[], rather than an arbitrary
+(nr_node_ids + 1).
+
+Note: DT binding equivalent (distance-map) decodes distances as 32-bit
+values.
+
+Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210122123943.1217-2-valentin.schneider@arm.com
+Signed-off-by: dann frazier <dann.frazier@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/topology.h | 1
+ kernel/sched/topology.c | 99 ++++++++++++++++++++++-------------------------
+ 2 files changed, 49 insertions(+), 51 deletions(-)
+
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -48,6 +48,7 @@ int arch_update_cpu_topology(void);
+ /* Conform to ACPI 2.0 SLIT distance definitions */
+ #define LOCAL_DISTANCE 10
+ #define REMOTE_DISTANCE 20
++#define DISTANCE_BITS 8
+ #ifndef node_distance
+ #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
+ #endif
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -1552,66 +1552,58 @@ static void init_numa_topology_type(void
+ }
+ }
+
++
++#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS)
++
+ void sched_init_numa(void)
+ {
+- int next_distance, curr_distance = node_distance(0, 0);
+ struct sched_domain_topology_level *tl;
+- int level = 0;
+- int i, j, k;
+-
+- sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
+- if (!sched_domains_numa_distance)
+- return;
+-
+- /* Includes NUMA identity node at level 0. */
+- sched_domains_numa_distance[level++] = curr_distance;
+- sched_domains_numa_levels = level;
++ unsigned long *distance_map;
++ int nr_levels = 0;
++ int i, j;
+
+ /*
+ * O(nr_nodes^2) deduplicating selection sort -- in order to find the
+ * unique distances in the node_distance() table.
+- *
+- * Assumes node_distance(0,j) includes all distances in
+- * node_distance(i,j) in order to avoid cubic time.
+ */
+- next_distance = curr_distance;
++ distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL);
++ if (!distance_map)
++ return;
++
++ bitmap_zero(distance_map, NR_DISTANCE_VALUES);
+ for (i = 0; i < nr_node_ids; i++) {
+ for (j = 0; j < nr_node_ids; j++) {
+- for (k = 0; k < nr_node_ids; k++) {
+- int distance = node_distance(i, k);
++ int distance = node_distance(i, j);
+
+- if (distance > curr_distance &&
+- (distance < next_distance ||
+- next_distance == curr_distance))
+- next_distance = distance;
+-
+- /*
+- * While not a strong assumption it would be nice to know
+- * about cases where if node A is connected to B, B is not
+- * equally connected to A.
+- */
+- if (sched_debug() && node_distance(k, i) != distance)
+- sched_numa_warn("Node-distance not symmetric");
+-
+- if (sched_debug() && i && !find_numa_distance(distance))
+- sched_numa_warn("Node-0 not representative");
++ if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) {
++ sched_numa_warn("Invalid distance value range");
++ return;
+ }
+- if (next_distance != curr_distance) {
+- sched_domains_numa_distance[level++] = next_distance;
+- sched_domains_numa_levels = level;
+- curr_distance = next_distance;
+- } else break;
++
++ bitmap_set(distance_map, distance, 1);
+ }
++ }
++ /*
++ * We can now figure out how many unique distance values there are and
++ * allocate memory accordingly.
++ */
++ nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES);
+
+- /*
+- * In case of sched_debug() we verify the above assumption.
+- */
+- if (!sched_debug())
+- break;
++ sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL);
++ if (!sched_domains_numa_distance) {
++ bitmap_free(distance_map);
++ return;
+ }
+
++ for (i = 0, j = 0; i < nr_levels; i++, j++) {
++ j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j);
++ sched_domains_numa_distance[i] = j;
++ }
++
++ bitmap_free(distance_map);
++
+ /*
+- * 'level' contains the number of unique distances
++ * 'nr_levels' contains the number of unique distances
+ *
+ * The sched_domains_numa_distance[] array includes the actual distance
+ * numbers.
+@@ -1620,15 +1612,15 @@ void sched_init_numa(void)
+ /*
+ * Here, we should temporarily reset sched_domains_numa_levels to 0.
+ * If it fails to allocate memory for array sched_domains_numa_masks[][],
+- * the array will contain less then 'level' members. This could be
++ * the array will contain less then 'nr_levels' members. This could be
+ * dangerous when we use it to iterate array sched_domains_numa_masks[][]
+ * in other functions.
+ *
+- * We reset it to 'level' at the end of this function.
++ * We reset it to 'nr_levels' at the end of this function.
+ */
+ sched_domains_numa_levels = 0;
+
+- sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
++ sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL);
+ if (!sched_domains_numa_masks)
+ return;
+
+@@ -1636,7 +1628,7 @@ void sched_init_numa(void)
+ * Now for each level, construct a mask per node which contains all
+ * CPUs of nodes that are that many hops away from us.
+ */
+- for (i = 0; i < level; i++) {
++ for (i = 0; i < nr_levels; i++) {
+ sched_domains_numa_masks[i] =
+ kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
+ if (!sched_domains_numa_masks[i])
+@@ -1644,12 +1636,17 @@ void sched_init_numa(void)
+
+ for (j = 0; j < nr_node_ids; j++) {
+ struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
++ int k;
++
+ if (!mask)
+ return;
+
+ sched_domains_numa_masks[i][j] = mask;
+
+ for_each_node(k) {
++ if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
++ sched_numa_warn("Node-distance not symmetric");
++
+ if (node_distance(j, k) > sched_domains_numa_distance[i])
+ continue;
+
+@@ -1661,7 +1658,7 @@ void sched_init_numa(void)
+ /* Compute default topology size */
+ for (i = 0; sched_domain_topology[i].mask; i++);
+
+- tl = kzalloc((i + level + 1) *
++ tl = kzalloc((i + nr_levels) *
+ sizeof(struct sched_domain_topology_level), GFP_KERNEL);
+ if (!tl)
+ return;
+@@ -1684,7 +1681,7 @@ void sched_init_numa(void)
+ /*
+ * .. and append 'j' levels of NUMA goodness.
+ */
+- for (j = 1; j < level; i++, j++) {
++ for (j = 1; j < nr_levels; i++, j++) {
+ tl[i] = (struct sched_domain_topology_level){
+ .mask = sd_numa_mask,
+ .sd_flags = cpu_numa_flags,
+@@ -1696,8 +1693,8 @@ void sched_init_numa(void)
+
+ sched_domain_topology = tl;
+
+- sched_domains_numa_levels = level;
+- sched_max_numa_distance = sched_domains_numa_distance[level - 1];
++ sched_domains_numa_levels = nr_levels;
++ sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
+
+ init_numa_topology_type();
+ }