From a88d1f7134fa2913df50ea8c1ca41b5c1eeb07ee Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 5 Mar 2022 20:50:21 +0100 Subject: [PATCH] 5.4-stable patches added patches: ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch --- ...ance-and-possible-map-initialization.patch | 79 ++++++ ...ology_level-alloc-in-sched_init_numa.patch | 49 ++++ ...use-a-set-for-the-deduplicating-sort.patch | 261 ++++++++++++++++++ queue-5.4/series | 3 + 4 files changed, 392 insertions(+) create mode 100644 queue-5.4/ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch create mode 100644 queue-5.4/sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch create mode 100644 queue-5.4/sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch diff --git a/queue-5.4/ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch b/queue-5.4/ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch new file mode 100644 index 00000000000..02f329ea0fc --- /dev/null +++ b/queue-5.4/ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch @@ -0,0 +1,79 @@ +From b22a8f7b4bde4e4ab73b64908ffd5d90ecdcdbfd Mon Sep 17 00:00:00 2001 +From: Valentin Schneider +Date: Thu, 29 Apr 2021 22:53:27 -0700 +Subject: ia64: ensure proper NUMA distance and possible map initialization + +From: Valentin Schneider + +commit b22a8f7b4bde4e4ab73b64908ffd5d90ecdcdbfd upstream. + +John Paul reported a warning about bogus NUMA distance values spurred by +commit: + + 620a6dc40754 ("sched/topology: Make sched_init_numa() use a set for the deduplicating sort") + +In this case, the afflicted machine comes up with a reported 256 possible +nodes, all of which are 0 distance away from one another. This was +previously silently ignored, but is now caught by the aforementioned +commit. + +The culprit is ia64's node_possible_map which remains unchanged from its +initialization value of NODE_MASK_ALL. In John's case, the machine +doesn't have any SRAT nor SLIT table, but AIUI the possible map remains +untouched regardless of what ACPI tables end up being parsed. Thus, +!online && possible nodes remain with a bogus distance of 0 (distances \in +[0, 9] are "reserved and have no meaning" as per the ACPI spec). + +Follow x86 / drivers/base/arch_numa's example and set the possible map to +the parsed map, which in this case seems to be the online map. + +Link: http://lore.kernel.org/r/255d6b5d-194e-eb0e-ecdd-97477a534441@physik.fu-berlin.de +Link: https://lkml.kernel.org/r/20210318130617.896309-1-valentin.schneider@arm.com +Fixes: 620a6dc40754 ("sched/topology: Make sched_init_numa() use a set for the deduplicating sort") +Signed-off-by: Valentin Schneider +Reported-by: John Paul Adrian Glaubitz +Tested-by: John Paul Adrian Glaubitz +Tested-by: Sergei Trofimovich +Cc: "Peter Zijlstra (Intel)" +Cc: Ingo Molnar +Cc: Vincent Guittot +Cc: Dietmar Eggemann +Cc: Anatoly Pugachev +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: dann frazier +Signed-off-by: Greg Kroah-Hartman +--- + arch/ia64/kernel/acpi.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/ia64/kernel/acpi.c ++++ b/arch/ia64/kernel/acpi.c +@@ -448,7 +448,8 @@ void __init acpi_numa_fixup(void) + if (srat_num_cpus == 0) { + node_set_online(0); + node_cpuid[0].phys_id = hard_smp_processor_id(); +- return; ++ slit_distance(0, 0) = LOCAL_DISTANCE; ++ goto out; + } + + /* +@@ -491,7 +492,7 @@ void __init acpi_numa_fixup(void) + for (j = 0; j < MAX_NUMNODES; j++) + slit_distance(i, j) = i == j ? + LOCAL_DISTANCE : REMOTE_DISTANCE; +- return; ++ goto out; + } + + memset(numa_slit, -1, sizeof(numa_slit)); +@@ -516,6 +517,8 @@ void __init acpi_numa_fixup(void) + printk("\n"); + } + #endif ++out: ++ node_possible_map = node_online_map; + } + #endif /* CONFIG_ACPI_NUMA */ + diff --git a/queue-5.4/sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch b/queue-5.4/sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch new file mode 100644 index 00000000000..29c24e8bdb2 --- /dev/null +++ b/queue-5.4/sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch @@ -0,0 +1,49 @@ +From 71e5f6644fb2f3304fcb310145ded234a37e7cc1 Mon Sep 17 00:00:00 2001 +From: Dietmar Eggemann +Date: Mon, 1 Feb 2021 10:53:53 +0100 +Subject: sched/topology: Fix sched_domain_topology_level alloc in sched_init_numa() + +From: Dietmar Eggemann + +commit 71e5f6644fb2f3304fcb310145ded234a37e7cc1 upstream. + +Commit "sched/topology: Make sched_init_numa() use a set for the +deduplicating sort" allocates 'i + nr_levels (level)' instead of +'i + nr_levels + 1' sched_domain_topology_level. + +This led to an Oops (on Arm64 juno with CONFIG_SCHED_DEBUG): + +sched_init_domains + build_sched_domains() + __free_domain_allocs() + __sdt_free() { + ... + for_each_sd_topology(tl) + ... + sd = *per_cpu_ptr(sdd->sd, j); <-- + ... + } + +Signed-off-by: Dietmar Eggemann +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Tested-by: Vincent Guittot +Tested-by: Barry Song +Link: https://lkml.kernel.org/r/6000e39e-7d28-c360-9cd6-8798fd22a9bf@arm.com +Signed-off-by: dann frazier +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/topology.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -1658,7 +1658,7 @@ void sched_init_numa(void) + /* Compute default topology size */ + for (i = 0; sched_domain_topology[i].mask; i++); + +- tl = kzalloc((i + nr_levels) * ++ tl = kzalloc((i + nr_levels + 1) * + sizeof(struct sched_domain_topology_level), GFP_KERNEL); + if (!tl) + return; diff --git a/queue-5.4/sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch b/queue-5.4/sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch new file mode 100644 index 00000000000..8658ecc7025 --- /dev/null +++ b/queue-5.4/sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch @@ -0,0 +1,261 @@ +From 620a6dc40754dc218f5b6389b5d335e9a107fd29 Mon Sep 17 00:00:00 2001 +From: Valentin Schneider +Date: Fri, 22 Jan 2021 12:39:43 +0000 +Subject: sched/topology: Make sched_init_numa() use a set for the deduplicating sort +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Valentin Schneider + +commit 620a6dc40754dc218f5b6389b5d335e9a107fd29 upstream. + +The deduplicating sort in sched_init_numa() assumes that the first line in +the distance table contains all unique values in the entire table. I've +been trying to pen what this exactly means for the topology, but it's not +straightforward. For instance, topology.c uses this example: + + node 0 1 2 3 + 0: 10 20 20 30 + 1: 20 10 20 20 + 2: 20 20 10 20 + 3: 30 20 20 10 + + 0 ----- 1 + | / | + | / | + | / | + 2 ----- 3 + +Which works out just fine. However, if we swap nodes 0 and 1: + + 1 ----- 0 + | / | + | / | + | / | + 2 ----- 3 + +we get this distance table: + + node 0 1 2 3 + 0: 10 20 20 20 + 1: 20 10 20 30 + 2: 20 20 10 20 + 3: 20 30 20 10 + +Which breaks the deduplicating sort (non-representative first line). In +this case this would just be a renumbering exercise, but it so happens that +we can have a deduplicating sort that goes through the whole table in O(n²) +at the extra cost of a temporary memory allocation (i.e. any form of set). + +The ACPI spec (SLIT) mentions distances are encoded on 8 bits. Following +this, implement the set as a 256-bits bitmap. Should this not be +satisfactory (i.e. we want to support 32-bit values), then we'll have to go +for some other sparse set implementation. + +This has the added benefit of letting us allocate just the right amount of +memory for sched_domains_numa_distance[], rather than an arbitrary +(nr_node_ids + 1). + +Note: DT binding equivalent (distance-map) decodes distances as 32-bit +values. + +Signed-off-by: Valentin Schneider +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20210122123943.1217-2-valentin.schneider@arm.com +Signed-off-by: dann frazier +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/topology.h | 1 + kernel/sched/topology.c | 99 ++++++++++++++++++++++------------------------- + 2 files changed, 49 insertions(+), 51 deletions(-) + +--- a/include/linux/topology.h ++++ b/include/linux/topology.h +@@ -48,6 +48,7 @@ int arch_update_cpu_topology(void); + /* Conform to ACPI 2.0 SLIT distance definitions */ + #define LOCAL_DISTANCE 10 + #define REMOTE_DISTANCE 20 ++#define DISTANCE_BITS 8 + #ifndef node_distance + #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) + #endif +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -1552,66 +1552,58 @@ static void init_numa_topology_type(void + } + } + ++ ++#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS) ++ + void sched_init_numa(void) + { +- int next_distance, curr_distance = node_distance(0, 0); + struct sched_domain_topology_level *tl; +- int level = 0; +- int i, j, k; +- +- sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL); +- if (!sched_domains_numa_distance) +- return; +- +- /* Includes NUMA identity node at level 0. */ +- sched_domains_numa_distance[level++] = curr_distance; +- sched_domains_numa_levels = level; ++ unsigned long *distance_map; ++ int nr_levels = 0; ++ int i, j; + + /* + * O(nr_nodes^2) deduplicating selection sort -- in order to find the + * unique distances in the node_distance() table. +- * +- * Assumes node_distance(0,j) includes all distances in +- * node_distance(i,j) in order to avoid cubic time. + */ +- next_distance = curr_distance; ++ distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL); ++ if (!distance_map) ++ return; ++ ++ bitmap_zero(distance_map, NR_DISTANCE_VALUES); + for (i = 0; i < nr_node_ids; i++) { + for (j = 0; j < nr_node_ids; j++) { +- for (k = 0; k < nr_node_ids; k++) { +- int distance = node_distance(i, k); ++ int distance = node_distance(i, j); + +- if (distance > curr_distance && +- (distance < next_distance || +- next_distance == curr_distance)) +- next_distance = distance; +- +- /* +- * While not a strong assumption it would be nice to know +- * about cases where if node A is connected to B, B is not +- * equally connected to A. +- */ +- if (sched_debug() && node_distance(k, i) != distance) +- sched_numa_warn("Node-distance not symmetric"); +- +- if (sched_debug() && i && !find_numa_distance(distance)) +- sched_numa_warn("Node-0 not representative"); ++ if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) { ++ sched_numa_warn("Invalid distance value range"); ++ return; + } +- if (next_distance != curr_distance) { +- sched_domains_numa_distance[level++] = next_distance; +- sched_domains_numa_levels = level; +- curr_distance = next_distance; +- } else break; ++ ++ bitmap_set(distance_map, distance, 1); + } ++ } ++ /* ++ * We can now figure out how many unique distance values there are and ++ * allocate memory accordingly. ++ */ ++ nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES); + +- /* +- * In case of sched_debug() we verify the above assumption. +- */ +- if (!sched_debug()) +- break; ++ sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL); ++ if (!sched_domains_numa_distance) { ++ bitmap_free(distance_map); ++ return; + } + ++ for (i = 0, j = 0; i < nr_levels; i++, j++) { ++ j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j); ++ sched_domains_numa_distance[i] = j; ++ } ++ ++ bitmap_free(distance_map); ++ + /* +- * 'level' contains the number of unique distances ++ * 'nr_levels' contains the number of unique distances + * + * The sched_domains_numa_distance[] array includes the actual distance + * numbers. +@@ -1620,15 +1612,15 @@ void sched_init_numa(void) + /* + * Here, we should temporarily reset sched_domains_numa_levels to 0. + * If it fails to allocate memory for array sched_domains_numa_masks[][], +- * the array will contain less then 'level' members. This could be ++ * the array will contain less then 'nr_levels' members. This could be + * dangerous when we use it to iterate array sched_domains_numa_masks[][] + * in other functions. + * +- * We reset it to 'level' at the end of this function. ++ * We reset it to 'nr_levels' at the end of this function. + */ + sched_domains_numa_levels = 0; + +- sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); ++ sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL); + if (!sched_domains_numa_masks) + return; + +@@ -1636,7 +1628,7 @@ void sched_init_numa(void) + * Now for each level, construct a mask per node which contains all + * CPUs of nodes that are that many hops away from us. + */ +- for (i = 0; i < level; i++) { ++ for (i = 0; i < nr_levels; i++) { + sched_domains_numa_masks[i] = + kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL); + if (!sched_domains_numa_masks[i]) +@@ -1644,12 +1636,17 @@ void sched_init_numa(void) + + for (j = 0; j < nr_node_ids; j++) { + struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); ++ int k; ++ + if (!mask) + return; + + sched_domains_numa_masks[i][j] = mask; + + for_each_node(k) { ++ if (sched_debug() && (node_distance(j, k) != node_distance(k, j))) ++ sched_numa_warn("Node-distance not symmetric"); ++ + if (node_distance(j, k) > sched_domains_numa_distance[i]) + continue; + +@@ -1661,7 +1658,7 @@ void sched_init_numa(void) + /* Compute default topology size */ + for (i = 0; sched_domain_topology[i].mask; i++); + +- tl = kzalloc((i + level + 1) * ++ tl = kzalloc((i + nr_levels) * + sizeof(struct sched_domain_topology_level), GFP_KERNEL); + if (!tl) + return; +@@ -1684,7 +1681,7 @@ void sched_init_numa(void) + /* + * .. and append 'j' levels of NUMA goodness. + */ +- for (j = 1; j < level; i++, j++) { ++ for (j = 1; j < nr_levels; i++, j++) { + tl[i] = (struct sched_domain_topology_level){ + .mask = sd_numa_mask, + .sd_flags = cpu_numa_flags, +@@ -1696,8 +1693,8 @@ void sched_init_numa(void) + + sched_domain_topology = tl; + +- sched_domains_numa_levels = level; +- sched_max_numa_distance = sched_domains_numa_distance[level - 1]; ++ sched_domains_numa_levels = nr_levels; ++ sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1]; + + init_numa_topology_type(); + } diff --git a/queue-5.4/series b/queue-5.4/series index fe2c3974a0e..725cff67f73 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -28,3 +28,6 @@ net-smc-fix-unexpected-smc_clc_decl_err_regrmb-error-generated-by-client.patch net-smc-fix-unexpected-smc_clc_decl_err_regrmb-error-cause-by-server.patch block-fix-fsync-always-failed-if-once-failed.patch xen-netfront-destroy-queues-before-real_num_tx_queues-is-zeroed.patch +sched-topology-make-sched_init_numa-use-a-set-for-the-deduplicating-sort.patch +sched-topology-fix-sched_domain_topology_level-alloc-in-sched_init_numa.patch +ia64-ensure-proper-numa-distance-and-possible-map-initialization.patch -- 2.47.2