]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 5 Mar 2022 19:50:36 +0000 (20:50 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 5 Mar 2022 19:50:36 +0000 (20:50 +0100)
added patches:
ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch
sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch
sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch

queue-5.10/ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch [new file with mode: 0644]
queue-5.10/sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch [new file with mode: 0644]
queue-5.10/sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch b/queue-5.10/ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch
new file mode 100644 (file)
index 0000000..138ed1d
--- /dev/null
@@ -0,0 +1,79 @@
+From b22a8f7b4bde4e4ab73b64908ffd5d90ecdcdbfd Mon Sep 17 00:00:00 2001
+From: Valentin Schneider <valentin.schneider@arm.com>
+Date: Thu, 29 Apr 2021 22:53:27 -0700
+Subject: ia64: ensure proper NUMA distance and possible map initialization
+
+From: Valentin Schneider <valentin.schneider@arm.com>
+
+commit b22a8f7b4bde4e4ab73b64908ffd5d90ecdcdbfd upstream.
+
+John Paul reported a warning about bogus NUMA distance values spurred by
+commit:
+
+  620a6dc40754 ("sched/topology: Make sched_init_numa() use a set for the deduplicating sort")
+
+In this case, the afflicted machine comes up with a reported 256 possible
+nodes, all of which are 0 distance away from one another.  This was
+previously silently ignored, but is now caught by the aforementioned
+commit.
+
+The culprit is ia64's node_possible_map which remains unchanged from its
+initialization value of NODE_MASK_ALL.  In John's case, the machine
+doesn't have any SRAT nor SLIT table, but AIUI the possible map remains
+untouched regardless of what ACPI tables end up being parsed.  Thus,
+!online && possible nodes remain with a bogus distance of 0 (distances \in
+[0, 9] are "reserved and have no meaning" as per the ACPI spec).
+
+Follow x86 / drivers/base/arch_numa's example and set the possible map to
+the parsed map, which in this case seems to be the online map.
+
+Link: http://lore.kernel.org/r/255d6b5d-194e-eb0e-ecdd-97477a534441@physik.fu-berlin.de
+Link: https://lkml.kernel.org/r/20210318130617.896309-1-valentin.schneider@arm.com
+Fixes: 620a6dc40754 ("sched/topology: Make sched_init_numa() use a set for the deduplicating sort")
+Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
+Reported-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Tested-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Tested-by: Sergei Trofimovich <slyfox@gentoo.org>
+Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Vincent Guittot <vincent.guittot@linaro.org>
+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: Anatoly Pugachev <matorola@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: dann frazier <dann.frazier@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/ia64/kernel/acpi.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/ia64/kernel/acpi.c
++++ b/arch/ia64/kernel/acpi.c
+@@ -446,7 +446,8 @@ void __init acpi_numa_fixup(void)
+       if (srat_num_cpus == 0) {
+               node_set_online(0);
+               node_cpuid[0].phys_id = hard_smp_processor_id();
+-              return;
++              slit_distance(0, 0) = LOCAL_DISTANCE;
++              goto out;
+       }
+       /*
+@@ -489,7 +490,7 @@ void __init acpi_numa_fixup(void)
+                       for (j = 0; j < MAX_NUMNODES; j++)
+                               slit_distance(i, j) = i == j ?
+                                       LOCAL_DISTANCE : REMOTE_DISTANCE;
+-              return;
++              goto out;
+       }
+       memset(numa_slit, -1, sizeof(numa_slit));
+@@ -514,6 +515,8 @@ void __init acpi_numa_fixup(void)
+               printk("\n");
+       }
+ #endif
++out:
++      node_possible_map = node_online_map;
+ }
+ #endif                                /* CONFIG_ACPI_NUMA */
diff --git a/queue-5.10/sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch b/queue-5.10/sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch
new file mode 100644 (file)
index 0000000..d32c402
--- /dev/null
@@ -0,0 +1,49 @@
+From 71e5f6644fb2f3304fcb310145ded234a37e7cc1 Mon Sep 17 00:00:00 2001
+From: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Date: Mon, 1 Feb 2021 10:53:53 +0100
+Subject: sched/topology: Fix sched_domain_topology_level alloc in sched_init_numa()
+
+From: Dietmar Eggemann <dietmar.eggemann@arm.com>
+
+commit 71e5f6644fb2f3304fcb310145ded234a37e7cc1 upstream.
+
+Commit "sched/topology: Make sched_init_numa() use a set for the
+deduplicating sort" allocates 'i + nr_levels (level)' instead of
+'i + nr_levels + 1' sched_domain_topology_level.
+
+This led to an Oops (on Arm64 juno with CONFIG_SCHED_DEBUG):
+
+sched_init_domains
+  build_sched_domains()
+    __free_domain_allocs()
+      __sdt_free() {
+       ...
+        for_each_sd_topology(tl)
+         ...
+          sd = *per_cpu_ptr(sdd->sd, j); <--
+         ...
+      }
+
+Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
+Tested-by: Barry Song <song.bao.hua@hisilicon.com>
+Link: https://lkml.kernel.org/r/6000e39e-7d28-c360-9cd6-8798fd22a9bf@arm.com
+Signed-off-by: dann frazier <dann.frazier@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/topology.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -1655,7 +1655,7 @@ void sched_init_numa(void)
+       /* Compute default topology size */
+       for (i = 0; sched_domain_topology[i].mask; i++);
+-      tl = kzalloc((i + nr_levels) *
++      tl = kzalloc((i + nr_levels + 1) *
+                       sizeof(struct sched_domain_topology_level), GFP_KERNEL);
+       if (!tl)
+               return;
diff --git a/queue-5.10/sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch b/queue-5.10/sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch
new file mode 100644 (file)
index 0000000..6898474
--- /dev/null
@@ -0,0 +1,261 @@
+From 620a6dc40754dc218f5b6389b5d335e9a107fd29 Mon Sep 17 00:00:00 2001
+From: Valentin Schneider <valentin.schneider@arm.com>
+Date: Fri, 22 Jan 2021 12:39:43 +0000
+Subject: sched/topology: Make sched_init_numa() use a set for the deduplicating sort
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Valentin Schneider <valentin.schneider@arm.com>
+
+commit 620a6dc40754dc218f5b6389b5d335e9a107fd29 upstream.
+
+The deduplicating sort in sched_init_numa() assumes that the first line in
+the distance table contains all unique values in the entire table. I've
+been trying to pen what this exactly means for the topology, but it's not
+straightforward. For instance, topology.c uses this example:
+
+  node   0   1   2   3
+    0:  10  20  20  30
+    1:  20  10  20  20
+    2:  20  20  10  20
+    3:  30  20  20  10
+
+  0 ----- 1
+  |     / |
+  |   /   |
+  | /     |
+  2 ----- 3
+
+Which works out just fine. However, if we swap nodes 0 and 1:
+
+  1 ----- 0
+  |     / |
+  |   /   |
+  | /     |
+  2 ----- 3
+
+we get this distance table:
+
+  node   0  1  2  3
+    0:  10 20 20 20
+    1:  20 10 20 30
+    2:  20 20 10 20
+    3:  20 30 20 10
+
+Which breaks the deduplicating sort (non-representative first line). In
+this case this would just be a renumbering exercise, but it so happens that
+we can have a deduplicating sort that goes through the whole table in O(n²)
+at the extra cost of a temporary memory allocation (i.e. any form of set).
+
+The ACPI spec (SLIT) mentions distances are encoded on 8 bits. Following
+this, implement the set as a 256-bits bitmap. Should this not be
+satisfactory (i.e. we want to support 32-bit values), then we'll have to go
+for some other sparse set implementation.
+
+This has the added benefit of letting us allocate just the right amount of
+memory for sched_domains_numa_distance[], rather than an arbitrary
+(nr_node_ids + 1).
+
+Note: DT binding equivalent (distance-map) decodes distances as 32-bit
+values.
+
+Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20210122123943.1217-2-valentin.schneider@arm.com
+Signed-off-by: dann frazier <dann.frazier@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/topology.h |    1 
+ kernel/sched/topology.c  |   99 ++++++++++++++++++++++-------------------------
+ 2 files changed, 49 insertions(+), 51 deletions(-)
+
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -48,6 +48,7 @@ int arch_update_cpu_topology(void);
+ /* Conform to ACPI 2.0 SLIT distance definitions */
+ #define LOCAL_DISTANCE                10
+ #define REMOTE_DISTANCE               20
++#define DISTANCE_BITS           8
+ #ifndef node_distance
+ #define node_distance(from,to)        ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
+ #endif
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -1549,66 +1549,58 @@ static void init_numa_topology_type(void
+       }
+ }
++
++#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS)
++
+ void sched_init_numa(void)
+ {
+-      int next_distance, curr_distance = node_distance(0, 0);
+       struct sched_domain_topology_level *tl;
+-      int level = 0;
+-      int i, j, k;
+-
+-      sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
+-      if (!sched_domains_numa_distance)
+-              return;
+-
+-      /* Includes NUMA identity node at level 0. */
+-      sched_domains_numa_distance[level++] = curr_distance;
+-      sched_domains_numa_levels = level;
++      unsigned long *distance_map;
++      int nr_levels = 0;
++      int i, j;
+       /*
+        * O(nr_nodes^2) deduplicating selection sort -- in order to find the
+        * unique distances in the node_distance() table.
+-       *
+-       * Assumes node_distance(0,j) includes all distances in
+-       * node_distance(i,j) in order to avoid cubic time.
+        */
+-      next_distance = curr_distance;
++      distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL);
++      if (!distance_map)
++              return;
++
++      bitmap_zero(distance_map, NR_DISTANCE_VALUES);
+       for (i = 0; i < nr_node_ids; i++) {
+               for (j = 0; j < nr_node_ids; j++) {
+-                      for (k = 0; k < nr_node_ids; k++) {
+-                              int distance = node_distance(i, k);
++                      int distance = node_distance(i, j);
+-                              if (distance > curr_distance &&
+-                                  (distance < next_distance ||
+-                                   next_distance == curr_distance))
+-                                      next_distance = distance;
+-
+-                              /*
+-                               * While not a strong assumption it would be nice to know
+-                               * about cases where if node A is connected to B, B is not
+-                               * equally connected to A.
+-                               */
+-                              if (sched_debug() && node_distance(k, i) != distance)
+-                                      sched_numa_warn("Node-distance not symmetric");
+-
+-                              if (sched_debug() && i && !find_numa_distance(distance))
+-                                      sched_numa_warn("Node-0 not representative");
++                      if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) {
++                              sched_numa_warn("Invalid distance value range");
++                              return;
+                       }
+-                      if (next_distance != curr_distance) {
+-                              sched_domains_numa_distance[level++] = next_distance;
+-                              sched_domains_numa_levels = level;
+-                              curr_distance = next_distance;
+-                      } else break;
++
++                      bitmap_set(distance_map, distance, 1);
+               }
++      }
++      /*
++       * We can now figure out how many unique distance values there are and
++       * allocate memory accordingly.
++       */
++      nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES);
+-              /*
+-               * In case of sched_debug() we verify the above assumption.
+-               */
+-              if (!sched_debug())
+-                      break;
++      sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL);
++      if (!sched_domains_numa_distance) {
++              bitmap_free(distance_map);
++              return;
+       }
++      for (i = 0, j = 0; i < nr_levels; i++, j++) {
++              j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j);
++              sched_domains_numa_distance[i] = j;
++      }
++
++      bitmap_free(distance_map);
++
+       /*
+-       * 'level' contains the number of unique distances
++       * 'nr_levels' contains the number of unique distances
+        *
+        * The sched_domains_numa_distance[] array includes the actual distance
+        * numbers.
+@@ -1617,15 +1609,15 @@ void sched_init_numa(void)
+       /*
+        * Here, we should temporarily reset sched_domains_numa_levels to 0.
+        * If it fails to allocate memory for array sched_domains_numa_masks[][],
+-       * the array will contain less then 'level' members. This could be
++       * the array will contain less then 'nr_levels' members. This could be
+        * dangerous when we use it to iterate array sched_domains_numa_masks[][]
+        * in other functions.
+        *
+-       * We reset it to 'level' at the end of this function.
++       * We reset it to 'nr_levels' at the end of this function.
+        */
+       sched_domains_numa_levels = 0;
+-      sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
++      sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL);
+       if (!sched_domains_numa_masks)
+               return;
+@@ -1633,7 +1625,7 @@ void sched_init_numa(void)
+        * Now for each level, construct a mask per node which contains all
+        * CPUs of nodes that are that many hops away from us.
+        */
+-      for (i = 0; i < level; i++) {
++      for (i = 0; i < nr_levels; i++) {
+               sched_domains_numa_masks[i] =
+                       kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
+               if (!sched_domains_numa_masks[i])
+@@ -1641,12 +1633,17 @@ void sched_init_numa(void)
+               for (j = 0; j < nr_node_ids; j++) {
+                       struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
++                      int k;
++
+                       if (!mask)
+                               return;
+                       sched_domains_numa_masks[i][j] = mask;
+                       for_each_node(k) {
++                              if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
++                                      sched_numa_warn("Node-distance not symmetric");
++
+                               if (node_distance(j, k) > sched_domains_numa_distance[i])
+                                       continue;
+@@ -1658,7 +1655,7 @@ void sched_init_numa(void)
+       /* Compute default topology size */
+       for (i = 0; sched_domain_topology[i].mask; i++);
+-      tl = kzalloc((i + level + 1) *
++      tl = kzalloc((i + nr_levels) *
+                       sizeof(struct sched_domain_topology_level), GFP_KERNEL);
+       if (!tl)
+               return;
+@@ -1681,7 +1678,7 @@ void sched_init_numa(void)
+       /*
+        * .. and append 'j' levels of NUMA goodness.
+        */
+-      for (j = 1; j < level; i++, j++) {
++      for (j = 1; j < nr_levels; i++, j++) {
+               tl[i] = (struct sched_domain_topology_level){
+                       .mask = sd_numa_mask,
+                       .sd_flags = cpu_numa_flags,
+@@ -1693,8 +1690,8 @@ void sched_init_numa(void)
+       sched_domain_topology = tl;
+-      sched_domains_numa_levels = level;
+-      sched_max_numa_distance = sched_domains_numa_distance[level - 1];
++      sched_domains_numa_levels = nr_levels;
++      sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
+       init_numa_topology_type();
+ }
index ad804b153ca3f43dc45667a29c9644107ac526de..574e94abe911a3969169d91a4bb30b9262ec8aed 100644 (file)
@@ -54,3 +54,6 @@ net-smc-fix-unexpected-smc_clc_decl_err_regrmb-error-cause-by-server.patch
 rcu-nocb-fix-missed-nocb_timer-requeue.patch
 ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch
 ice-fix-concurrent-reset-and-removal-of-vfs.patch
+sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch
+sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch
+ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch