]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Tue, 18 Apr 2023 01:24:52 +0000 (21:24 -0400)
committerSasha Levin <sashal@kernel.org>
Tue, 18 Apr 2023 01:24:52 +0000 (21:24 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
14 files changed:
queue-5.10/i2c-ocores-generate-stop-condition-after-timeout-in-.patch [new file with mode: 0644]
queue-5.10/mtd-ubi-wl-fix-a-couple-of-kernel-doc-issues.patch [new file with mode: 0644]
queue-5.10/powerpc-papr_scm-update-the-numa-distance-table-for-.patch [new file with mode: 0644]
queue-5.10/powerpc-pseries-add-a-helper-for-form1-cpu-distance.patch [new file with mode: 0644]
queue-5.10/powerpc-pseries-add-support-for-form2-associativity.patch [new file with mode: 0644]
queue-5.10/powerpc-pseries-consolidate-different-numa-distance-.patch [new file with mode: 0644]
queue-5.10/powerpc-pseries-rename-min_common_depth-to-primary_d.patch [new file with mode: 0644]
queue-5.10/powerpc-pseries-rename-type1_affinity-to-form1_affin.patch [new file with mode: 0644]
queue-5.10/sched-fair-fix-imbalance-overflow.patch [new file with mode: 0644]
queue-5.10/sched-fair-move-calculate-of-avg_load-to-a-better-lo.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/ubi-fix-deadlock-caused-by-recursively-holding-work_.patch [new file with mode: 0644]
queue-5.10/ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch [new file with mode: 0644]
queue-5.10/x86-rtc-remove-__init-for-runtime-functions.patch [new file with mode: 0644]

diff --git a/queue-5.10/i2c-ocores-generate-stop-condition-after-timeout-in-.patch b/queue-5.10/i2c-ocores-generate-stop-condition-after-timeout-in-.patch
new file mode 100644 (file)
index 0000000..3deafdb
--- /dev/null
@@ -0,0 +1,106 @@
+From fcae622444dca8b418e9de3b0ac59e78e9435025 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Apr 2023 11:37:37 +0200
+Subject: i2c: ocores: generate stop condition after timeout in polling mode
+
+From: Gregor Herburger <gregor.herburger@tq-group.com>
+
+[ Upstream commit f8160d3b35fc94491bb0cb974dbda310ef96c0e2 ]
+
+In polling mode, no stop condition is generated after a timeout. This
+causes SCL to remain low and thereby block the bus. If this happens
+during a transfer it can cause slaves to misinterpret the subsequent
+transfer and return wrong values.
+
+To solve this, pass the ETIMEDOUT error up from ocores_process_polling()
+instead of setting STATE_ERROR directly. The caller is adjusted to call
+ocores_process_timeout() on error both in polling and in IRQ mode, which
+will set STATE_ERROR and generate a stop condition.
+
+Fixes: 69c8c0c0efa8 ("i2c: ocores: add polling interface")
+Signed-off-by: Gregor Herburger <gregor.herburger@tq-group.com>
+Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
+Acked-by: Peter Korsgaard <peter@korsgaard.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Federico Vaga <federico.vaga@cern.ch>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-ocores.c | 35 ++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
+index f5fc75b65a194..71e26aa6bd8ff 100644
+--- a/drivers/i2c/busses/i2c-ocores.c
++++ b/drivers/i2c/busses/i2c-ocores.c
+@@ -343,18 +343,18 @@ static int ocores_poll_wait(struct ocores_i2c *i2c)
+  * ocores_isr(), we just add our polling code around it.
+  *
+  * It can run in atomic context
++ *
++ * Return: 0 on success, -ETIMEDOUT on timeout
+  */
+-static void ocores_process_polling(struct ocores_i2c *i2c)
++static int ocores_process_polling(struct ocores_i2c *i2c)
+ {
+-      while (1) {
+-              irqreturn_t ret;
+-              int err;
++      irqreturn_t ret;
++      int err = 0;
++      while (1) {
+               err = ocores_poll_wait(i2c);
+-              if (err) {
+-                      i2c->state = STATE_ERROR;
++              if (err)
+                       break; /* timeout */
+-              }
+               ret = ocores_isr(-1, i2c);
+               if (ret == IRQ_NONE)
+@@ -365,13 +365,15 @@ static void ocores_process_polling(struct ocores_i2c *i2c)
+                                       break;
+               }
+       }
++
++      return err;
+ }
+ static int ocores_xfer_core(struct ocores_i2c *i2c,
+                           struct i2c_msg *msgs, int num,
+                           bool polling)
+ {
+-      int ret;
++      int ret = 0;
+       u8 ctrl;
+       ctrl = oc_getreg(i2c, OCI2C_CONTROL);
+@@ -389,15 +391,16 @@ static int ocores_xfer_core(struct ocores_i2c *i2c,
+       oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_START);
+       if (polling) {
+-              ocores_process_polling(i2c);
++              ret = ocores_process_polling(i2c);
+       } else {
+-              ret = wait_event_timeout(i2c->wait,
+-                                       (i2c->state == STATE_ERROR) ||
+-                                       (i2c->state == STATE_DONE), HZ);
+-              if (ret == 0) {
+-                      ocores_process_timeout(i2c);
+-                      return -ETIMEDOUT;
+-              }
++              if (wait_event_timeout(i2c->wait,
++                                     (i2c->state == STATE_ERROR) ||
++                                     (i2c->state == STATE_DONE), HZ) == 0)
++                      ret = -ETIMEDOUT;
++      }
++      if (ret) {
++              ocores_process_timeout(i2c);
++              return ret;
+       }
+       return (i2c->state == STATE_DONE) ? num : -EIO;
+-- 
+2.39.2
+
diff --git a/queue-5.10/mtd-ubi-wl-fix-a-couple-of-kernel-doc-issues.patch b/queue-5.10/mtd-ubi-wl-fix-a-couple-of-kernel-doc-issues.patch
new file mode 100644 (file)
index 0000000..8b3f28d
--- /dev/null
@@ -0,0 +1,51 @@
+From 10c187783fcd90715eb08de38c99757762291e08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Nov 2020 18:21:55 +0000
+Subject: mtd: ubi: wl: Fix a couple of kernel-doc issues
+
+From: Lee Jones <lee.jones@linaro.org>
+
+[ Upstream commit ab4e4de9fd8b469823a645f05f2c142e9270b012 ]
+
+Fixes the following W=1 kernel build warning(s):
+
+ drivers/mtd/ubi/wl.c:584: warning: Function parameter or member 'nested' not described in 'schedule_erase'
+ drivers/mtd/ubi/wl.c:1075: warning: Excess function parameter 'shutdown' description in '__erase_worker'
+
+Cc: Richard Weinberger <richard@nod.at>
+Cc: Miquel Raynal <miquel.raynal@bootlin.com>
+Cc: Vignesh Raghavendra <vigneshr@ti.com>
+Cc: linux-mtd@lists.infradead.org
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20201109182206.3037326-13-lee.jones@linaro.org
+Stable-dep-of: f773f0a331d6 ("ubi: Fix deadlock caused by recursively holding work_sem")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mtd/ubi/wl.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
+index 6da09263e0b9f..2ee0e60c43c2e 100644
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -575,6 +575,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+  * @vol_id: the volume ID that last used this PEB
+  * @lnum: the last used logical eraseblock number for the PEB
+  * @torture: if the physical eraseblock has to be tortured
++ * @nested: denotes whether the work_sem is already held in read mode
+  *
+  * This function returns zero in case of success and a %-ENOMEM in case of
+  * failure.
+@@ -1066,8 +1067,6 @@ static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
+  * __erase_worker - physical eraseblock erase worker function.
+  * @ubi: UBI device description object
+  * @wl_wrk: the work object
+- * @shutdown: non-zero if the worker has to free memory and exit
+- * because the WL sub-system is shutting down
+  *
+  * This function erases a physical eraseblock and perform torture testing if
+  * needed. It also takes care about marking the physical eraseblock bad if
+-- 
+2.39.2
+
diff --git a/queue-5.10/powerpc-papr_scm-update-the-numa-distance-table-for-.patch b/queue-5.10/powerpc-papr_scm-update-the-numa-distance-table-for-.patch
new file mode 100644 (file)
index 0000000..267ef2d
--- /dev/null
@@ -0,0 +1,84 @@
+From 9143366ddb48aa1f932bfeec6ebfdaa968831ee4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Apr 2023 09:44:33 +0530
+Subject: powerpc/papr_scm: Update the NUMA distance table for the target node
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit b277fc793daf258877b4c0744b52f69d6e6ba22e ]
+
+Platform device helper routines won't update the NUMA distance table
+while creating a platform device, even if the device is present on a
+NUMA node that doesn't have memory or CPU. This is especially true for
+pmem devices. If the target node of the pmem device is not online, we
+find the nearest online node to the device and associate the pmem device
+with that online node. To find the nearest online node, we should have
+the numa distance table updated correctly. Update the distance
+information during the device probe.
+
+For a papr scm device on NUMA node 3 distance_lookup_table value for
+distance_ref_points_depth = 2 before and after fix is below:
+
+Before fix:
+  node 3 distance depth 0  - 0
+  node 3 distance depth 1  - 0
+  node 4 distance depth 0  - 4
+  node 4 distance depth 1  - 2
+  node 5 distance depth 0  - 5
+  node 5 distance depth 1  - 1
+
+After fix
+  node 3 distance depth 0  - 3
+  node 3 distance depth 1  - 1
+  node 4 distance depth 0  - 4
+  node 4 distance depth 1  - 2
+  node 5 distance depth 0  - 5
+  node 5 distance depth 1  - 1
+
+Without the fix, the nearest numa node to the pmem device (NUMA node 3)
+will be picked as 4. After the fix, we get the correct numa node which
+is 5.
+
+Fixes: da1115fdbd6e ("powerpc/nvdimm: Pick nearby online node if the device node is not online")
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20230404041433.1781804-1-aneesh.kumar@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/numa.c                    | 1 +
+ arch/powerpc/platforms/pseries/papr_scm.c | 7 +++++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
+index cfc170935a58b..ce8569e16f0c4 100644
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -372,6 +372,7 @@ void update_numa_distance(struct device_node *node)
+       WARN(numa_distance_table[nid][nid] == -1,
+            "NUMA distance details for node %d not provided\n", nid);
+ }
++EXPORT_SYMBOL_GPL(update_numa_distance);
+ /*
+  * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
+diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
+index 057acbb9116dd..e3b7698b4762c 100644
+--- a/arch/powerpc/platforms/pseries/papr_scm.c
++++ b/arch/powerpc/platforms/pseries/papr_scm.c
+@@ -1079,6 +1079,13 @@ static int papr_scm_probe(struct platform_device *pdev)
+               return -ENODEV;
+       }
++      /*
++       * open firmware platform device create won't update the NUMA 
++       * distance table. For PAPR SCM devices we use numa_map_to_online_node()
++       * to find the nearest online NUMA node and that requires correct
++       * distance table information.
++       */
++      update_numa_distance(dn);
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+-- 
+2.39.2
+
diff --git a/queue-5.10/powerpc-pseries-add-a-helper-for-form1-cpu-distance.patch b/queue-5.10/powerpc-pseries-add-a-helper-for-form1-cpu-distance.patch
new file mode 100644 (file)
index 0000000..d11bdc9
--- /dev/null
@@ -0,0 +1,101 @@
+From 6e4a8a54e0e81098fadb33328237141739504aa6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 18:52:22 +0530
+Subject: powerpc/pseries: Add a helper for form1 cpu distance
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit ef31cb83d19c4c589d650747cd5a7e502be9f665 ]
+
+This helper is only used with the dispatch trace log collection.
+A later patch will add Form2 affinity support and this change helps
+in keeping that simpler. Also add a comment explaining we don't expect
+the code to be called with FORM0
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210812132223.225214-5-aneesh.kumar@linux.ibm.com
+Stable-dep-of: b277fc793daf ("powerpc/papr_scm: Update the NUMA distance table for the target node")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/topology.h   |  4 ++--
+ arch/powerpc/mm/numa.c                | 10 +++++++++-
+ arch/powerpc/platforms/pseries/lpar.c |  4 ++--
+ 3 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
+index 1604920d8d2de..b239ef589ae06 100644
+--- a/arch/powerpc/include/asm/topology.h
++++ b/arch/powerpc/include/asm/topology.h
+@@ -36,7 +36,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
+                                cpu_all_mask :                         \
+                                cpumask_of_node(pcibus_to_node(bus)))
+-extern int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
++int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
+ extern int __node_distance(int, int);
+ #define node_distance(a, b) __node_distance(a, b)
+@@ -84,7 +84,7 @@ static inline void sysfs_remove_device_from_node(struct device *dev,
+ static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+-static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
++static inline int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+ {
+       return 0;
+ }
+diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
+index e61593ae25c9e..010476abec344 100644
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -166,7 +166,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
+ }
+ #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
+-int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
++static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+ {
+       int dist = 0;
+@@ -182,6 +182,14 @@ int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+       return dist;
+ }
++int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
++{
++      /* We should not get called with FORM0 */
++      VM_WARN_ON(affinity_form == FORM0_AFFINITY);
++
++      return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
++}
++
+ /* must hold reference to node during call */
+ static const __be32 *of_get_associativity(struct device_node *dev)
+ {
+diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
+index 115d196560b8b..28396a7e77d6f 100644
+--- a/arch/powerpc/platforms/pseries/lpar.c
++++ b/arch/powerpc/platforms/pseries/lpar.c
+@@ -261,7 +261,7 @@ static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
+       if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
+               return -EIO;
+-      return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
++      return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+ }
+ static int cpu_home_node_dispatch_distance(int disp_cpu)
+@@ -281,7 +281,7 @@ static int cpu_home_node_dispatch_distance(int disp_cpu)
+       if (!disp_cpu_assoc || !vcpu_assoc)
+               return -EIO;
+-      return cpu_distance(disp_cpu_assoc, vcpu_assoc);
++      return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
+ }
+ static void update_vcpu_disp_stat(int disp_cpu)
+-- 
+2.39.2
+
diff --git a/queue-5.10/powerpc-pseries-add-support-for-form2-associativity.patch b/queue-5.10/powerpc-pseries-add-support-for-form2-associativity.patch
new file mode 100644 (file)
index 0000000..a7a0c5a
--- /dev/null
@@ -0,0 +1,471 @@
+From f242c177c86e9ce6dfea8000bcf601494f3fdeec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 18:52:23 +0530
+Subject: powerpc/pseries: Add support for FORM2 associativity
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit 1c6b5a7e74052768977855f95d6b8812f6e7772c ]
+
+PAPR interface currently supports two different ways of communicating resource
+grouping details to the OS. These are referred to as Form 0 and Form 1
+associativity grouping. Form 0 is the older format and is now considered
+deprecated. This patch adds another resource grouping named FORM2.
+
+Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210812132223.225214-6-aneesh.kumar@linux.ibm.com
+Stable-dep-of: b277fc793daf ("powerpc/papr_scm: Update the NUMA distance table for the target node")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/powerpc/associativity.rst   | 104 ++++++++++++
+ arch/powerpc/include/asm/firmware.h       |   3 +-
+ arch/powerpc/include/asm/prom.h           |   1 +
+ arch/powerpc/kernel/prom_init.c           |   3 +-
+ arch/powerpc/mm/numa.c                    | 187 ++++++++++++++++++----
+ arch/powerpc/platforms/pseries/firmware.c |   1 +
+ 6 files changed, 262 insertions(+), 37 deletions(-)
+ create mode 100644 Documentation/powerpc/associativity.rst
+
+diff --git a/Documentation/powerpc/associativity.rst b/Documentation/powerpc/associativity.rst
+new file mode 100644
+index 0000000000000..07e7dd3d6c87e
+--- /dev/null
++++ b/Documentation/powerpc/associativity.rst
+@@ -0,0 +1,104 @@
++============================
++NUMA resource associativity
++=============================
++
++Associativity represents the groupings of the various platform resources into
++domains of substantially similar mean performance relative to resources outside
++of that domain. Resources subsets of a given domain that exhibit better
++performance relative to each other than relative to other resources subsets
++are represented as being members of a sub-grouping domain. This performance
++characteristic is presented in terms of NUMA node distance within the Linux kernel.
++From the platform view, these groups are also referred to as domains.
++
++PAPR interface currently supports different ways of communicating these resource
++grouping details to the OS. These are referred to as Form 0, Form 1 and Form2
++associativity grouping. Form 0 is the oldest format and is now considered deprecated.
++
++Hypervisor indicates the type/form of associativity used via "ibm,architecture-vec-5 property".
++Bit 0 of byte 5 in the "ibm,architecture-vec-5" property indicates usage of Form 0 or Form 1.
++A value of 1 indicates the usage of Form 1 associativity. For Form 2 associativity
++bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used.
++
++Form 0
++-----
++Form 0 associativity supports only two NUMA distances (LOCAL and REMOTE).
++
++Form 1
++-----
++With Form 1 a combination of ibm,associativity-reference-points, and ibm,associativity
++device tree properties are used to determine the NUMA distance between resource groups/domains.
++
++The “ibm,associativity” property contains a list of one or more numbers (domainID)
++representing the resource’s platform grouping domains.
++
++The “ibm,associativity-reference-points” property contains a list of one or more numbers
++(domainID index) that represents the 1 based ordinal in the associativity lists.
++The list of domainID indexes represents an increasing hierarchy of resource grouping.
++
++ex:
++{ primary domainID index, secondary domainID index, tertiary domainID index.. }
++
++Linux kernel uses the domainID at the primary domainID index as the NUMA node id.
++Linux kernel computes NUMA distance between two domains by recursively comparing
++if they belong to the same higher-level domains. For mismatch at every higher
++level of the resource group, the kernel doubles the NUMA distance between the
++comparing domains.
++
++Form 2
++-------
++Form 2 associativity format adds separate device tree properties representing NUMA node distance
++thereby making the node distance computation flexible. Form 2 also allows flexible primary
++domain numbering. With numa distance computation now detached from the index value in
++"ibm,associativity-reference-points" property, Form 2 allows a large number of primary domain
++ids at the same domainID index representing resource groups of different performance/latency
++characteristics.
++
++Hypervisor indicates the usage of FORM2 associativity using bit 2 of byte 5 in the
++"ibm,architecture-vec-5" property.
++
++"ibm,numa-lookup-index-table" property contains a list of one or more numbers representing
++the domainIDs present in the system. The offset of the domainID in this property is
++used as an index while computing numa distance information via "ibm,numa-distance-table".
++
++prop-encoded-array: The number N of the domainIDs encoded as with encode-int, followed by
++N domainID encoded as with encode-int
++
++For ex:
++"ibm,numa-lookup-index-table" =  {4, 0, 8, 250, 252}. The offset of domainID 8 (2) is used when
++computing the distance of domain 8 from other domains present in the system. For the rest of
++this document, this offset will be referred to as domain distance offset.
++
++"ibm,numa-distance-table" property contains a list of one or more numbers representing the NUMA
++distance between resource groups/domains present in the system.
++
++prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by
++N distance values encoded as with encode-bytes. The max distance value we could encode is 255.
++The number N must be equal to the square of m where m is the number of domainIDs in the
++numa-lookup-index-table.
++
++For ex:
++ibm,numa-lookup-index-table = <3 0 8 40>;
++ibm,numa-distace-table = <9>, /bits/ 8 < 10  20  80
++                                       20  10 160
++                                       80 160  10>;
++  | 0    8   40
++--|------------
++  |
++0 | 10   20  80
++  |
++8 | 20   10  160
++  |
++40| 80   160  10
++
++A possible "ibm,associativity" property for resources in node 0, 8 and 40
++
++{ 3, 6, 7, 0 }
++{ 3, 6, 9, 8 }
++{ 3, 6, 7, 40}
++
++With "ibm,associativity-reference-points"  { 0x3 }
++
++"ibm,lookup-index-table" helps in having a compact representation of distance matrix.
++Since domainID can be sparse, the matrix of distances can also be effectively sparse.
++With "ibm,lookup-index-table" we can achieve a compact representation of
++distance information.
+diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
+index 0cf648d829f15..89a31f1c7b118 100644
+--- a/arch/powerpc/include/asm/firmware.h
++++ b/arch/powerpc/include/asm/firmware.h
+@@ -53,6 +53,7 @@
+ #define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000)
+ #define FW_FEATURE_STUFF_TCE  ASM_CONST(0x0000008000000000)
+ #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000)
++#define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000)
+ #ifndef __ASSEMBLY__
+@@ -73,7 +74,7 @@ enum {
+               FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
+               FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
+               FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
+-              FW_FEATURE_RPT_INVALIDATE,
++              FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY,
+       FW_FEATURE_PSERIES_ALWAYS = 0,
+       FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
+       FW_FEATURE_POWERNV_ALWAYS = 0,
+diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
+index df9fec9d232cb..5c80152e8f188 100644
+--- a/arch/powerpc/include/asm/prom.h
++++ b/arch/powerpc/include/asm/prom.h
+@@ -149,6 +149,7 @@ extern int of_read_drc_info_cell(struct property **prop,
+ #define OV5_XCMO              0x0440  /* Page Coalescing */
+ #define OV5_FORM1_AFFINITY    0x0580  /* FORM1 NUMA affinity */
+ #define OV5_PRRN              0x0540  /* Platform Resource Reassignment */
++#define OV5_FORM2_AFFINITY    0x0520  /* Form2 NUMA affinity */
+ #define OV5_HP_EVT            0x0604  /* Hot Plug Event support */
+ #define OV5_RESIZE_HPT                0x0601  /* Hash Page Table resizing */
+ #define OV5_PFO_HW_RNG                0x1180  /* PFO Random Number Generator */
+diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
+index a3bf3587a4162..6f7ad80763159 100644
+--- a/arch/powerpc/kernel/prom_init.c
++++ b/arch/powerpc/kernel/prom_init.c
+@@ -1069,7 +1069,8 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
+ #else
+               0,
+ #endif
+-              .associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN),
++              .associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN) |
++              OV5_FEAT(OV5_FORM2_AFFINITY),
+               .bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
+               .micro_checkpoint = 0,
+               .reserved0 = 0,
+diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
+index 010476abec344..cfc170935a58b 100644
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -56,12 +56,17 @@ static int n_mem_addr_cells, n_mem_size_cells;
+ #define FORM0_AFFINITY 0
+ #define FORM1_AFFINITY 1
++#define FORM2_AFFINITY 2
+ static int affinity_form;
+ #define MAX_DISTANCE_REF_POINTS 4
+ static int distance_ref_points_depth;
+ static const __be32 *distance_ref_points;
+ static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
++static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = {
++      [0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 }
++};
++static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
+ /*
+  * Allocate node_to_cpumask_map based on number of available nodes
+@@ -166,6 +171,54 @@ static void unmap_cpu_from_node(unsigned long cpu)
+ }
+ #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
++static int __associativity_to_nid(const __be32 *associativity,
++                                int max_array_sz)
++{
++      int nid;
++      /*
++       * primary_domain_index is 1 based array index.
++       */
++      int index = primary_domain_index  - 1;
++
++      if (!numa_enabled || index >= max_array_sz)
++              return NUMA_NO_NODE;
++
++      nid = of_read_number(&associativity[index], 1);
++
++      /* POWER4 LPAR uses 0xffff as invalid node */
++      if (nid == 0xffff || nid >= nr_node_ids)
++              nid = NUMA_NO_NODE;
++      return nid;
++}
++/*
++ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
++ * info is found.
++ */
++static int associativity_to_nid(const __be32 *associativity)
++{
++      int array_sz = of_read_number(associativity, 1);
++
++      /* Skip the first element in the associativity array */
++      return __associativity_to_nid((associativity + 1), array_sz);
++}
++
++static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
++{
++      int dist;
++      int node1, node2;
++
++      node1 = associativity_to_nid(cpu1_assoc);
++      node2 = associativity_to_nid(cpu2_assoc);
++
++      dist = numa_distance_table[node1][node2];
++      if (dist <= LOCAL_DISTANCE)
++              return 0;
++      else if (dist <= REMOTE_DISTANCE)
++              return 1;
++      else
++              return 2;
++}
++
+ static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+ {
+       int dist = 0;
+@@ -186,8 +239,9 @@ int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
+ {
+       /* We should not get called with FORM0 */
+       VM_WARN_ON(affinity_form == FORM0_AFFINITY);
+-
+-      return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
++      if (affinity_form == FORM1_AFFINITY)
++              return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
++      return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc);
+ }
+ /* must hold reference to node during call */
+@@ -201,7 +255,9 @@ int __node_distance(int a, int b)
+       int i;
+       int distance = LOCAL_DISTANCE;
+-      if (affinity_form == FORM0_AFFINITY)
++      if (affinity_form == FORM2_AFFINITY)
++              return numa_distance_table[a][b];
++      else if (affinity_form == FORM0_AFFINITY)
+               return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
+       for (i = 0; i < distance_ref_points_depth; i++) {
+@@ -216,37 +272,6 @@ int __node_distance(int a, int b)
+ }
+ EXPORT_SYMBOL(__node_distance);
+-static int __associativity_to_nid(const __be32 *associativity,
+-                                int max_array_sz)
+-{
+-      int nid;
+-      /*
+-       * primary_domain_index is 1 based array index.
+-       */
+-      int index = primary_domain_index  - 1;
+-
+-      if (!numa_enabled || index >= max_array_sz)
+-              return NUMA_NO_NODE;
+-
+-      nid = of_read_number(&associativity[index], 1);
+-
+-      /* POWER4 LPAR uses 0xffff as invalid node */
+-      if (nid == 0xffff || nid >= nr_node_ids)
+-              nid = NUMA_NO_NODE;
+-      return nid;
+-}
+-/*
+- * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
+- * info is found.
+- */
+-static int associativity_to_nid(const __be32 *associativity)
+-{
+-      int array_sz = of_read_number(associativity, 1);
+-
+-      /* Skip the first element in the associativity array */
+-      return __associativity_to_nid((associativity + 1), array_sz);
+-}
+-
+ /* Returns the nid associated with the given device tree node,
+  * or -1 if not found.
+  */
+@@ -320,6 +345,8 @@ static void initialize_form1_numa_distance(const __be32 *associativity)
+  */
+ void update_numa_distance(struct device_node *node)
+ {
++      int nid;
++
+       if (affinity_form == FORM0_AFFINITY)
+               return;
+       else if (affinity_form == FORM1_AFFINITY) {
+@@ -332,6 +359,84 @@ void update_numa_distance(struct device_node *node)
+               initialize_form1_numa_distance(associativity);
+               return;
+       }
++
++      /* FORM2 affinity  */
++      nid = of_node_to_nid_single(node);
++      if (nid == NUMA_NO_NODE)
++              return;
++
++      /*
++       * With FORM2 we expect NUMA distance of all possible NUMA
++       * nodes to be provided during boot.
++       */
++      WARN(numa_distance_table[nid][nid] == -1,
++           "NUMA distance details for node %d not provided\n", nid);
++}
++
++/*
++ * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
++ * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
++ */
++static void initialize_form2_numa_distance_lookup_table(void)
++{
++      int i, j;
++      struct device_node *root;
++      const __u8 *numa_dist_table;
++      const __be32 *numa_lookup_index;
++      int numa_dist_table_length;
++      int max_numa_index, distance_index;
++
++      if (firmware_has_feature(FW_FEATURE_OPAL))
++              root = of_find_node_by_path("/ibm,opal");
++      else
++              root = of_find_node_by_path("/rtas");
++      if (!root)
++              root = of_find_node_by_path("/");
++
++      numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL);
++      max_numa_index = of_read_number(&numa_lookup_index[0], 1);
++
++      /* first element of the array is the size and is encode-int */
++      numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL);
++      numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1);
++      /* Skip the size which is encoded int */
++      numa_dist_table += sizeof(__be32);
++
++      pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n",
++               numa_dist_table_length, max_numa_index);
++
++      for (i = 0; i < max_numa_index; i++)
++              /* +1 skip the max_numa_index in the property */
++              numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
++
++
++      if (numa_dist_table_length != max_numa_index * max_numa_index) {
++              WARN(1, "Wrong NUMA distance information\n");
++              /* consider everybody else just remote. */
++              for (i = 0;  i < max_numa_index; i++) {
++                      for (j = 0; j < max_numa_index; j++) {
++                              int nodeA = numa_id_index_table[i];
++                              int nodeB = numa_id_index_table[j];
++
++                              if (nodeA == nodeB)
++                                      numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE;
++                              else
++                                      numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE;
++                      }
++              }
++      }
++
++      distance_index = 0;
++      for (i = 0;  i < max_numa_index; i++) {
++              for (j = 0; j < max_numa_index; j++) {
++                      int nodeA = numa_id_index_table[i];
++                      int nodeB = numa_id_index_table[j];
++
++                      numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++];
++                      pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]);
++              }
++      }
++      of_node_put(root);
+ }
+ static int __init find_primary_domain_index(void)
+@@ -344,6 +449,9 @@ static int __init find_primary_domain_index(void)
+        */
+       if (firmware_has_feature(FW_FEATURE_OPAL)) {
+               affinity_form = FORM1_AFFINITY;
++      } else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
++              dbg("Using form 2 affinity\n");
++              affinity_form = FORM2_AFFINITY;
+       } else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
+               dbg("Using form 1 affinity\n");
+               affinity_form = FORM1_AFFINITY;
+@@ -388,9 +496,12 @@ static int __init find_primary_domain_index(void)
+               index = of_read_number(&distance_ref_points[1], 1);
+       } else {
++              /*
++               * Both FORM1 and FORM2 affinity find the primary domain details
++               * at the same offset.
++               */
+               index = of_read_number(distance_ref_points, 1);
+       }
+-
+       /*
+        * Warn and cap if the hardware supports more than
+        * MAX_DISTANCE_REF_POINTS domains.
+@@ -819,6 +930,12 @@ static int __init parse_numa_properties(void)
+       dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
++      /*
++       * If it is FORM2 initialize the distance table here.
++       */
++      if (affinity_form == FORM2_AFFINITY)
++              initialize_form2_numa_distance_lookup_table();
++
+       /*
+        * Even though we connect cpus to numa domains later in SMP
+        * init, we need to know the node ids now. This is because
+diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
+index 5d4c2bc20bbab..f162156b7b68d 100644
+--- a/arch/powerpc/platforms/pseries/firmware.c
++++ b/arch/powerpc/platforms/pseries/firmware.c
+@@ -123,6 +123,7 @@ vec5_fw_features_table[] = {
+       {FW_FEATURE_PRRN,               OV5_PRRN},
+       {FW_FEATURE_DRMEM_V2,           OV5_DRMEM_V2},
+       {FW_FEATURE_DRC_INFO,           OV5_DRC_INFO},
++      {FW_FEATURE_FORM2_AFFINITY,     OV5_FORM2_AFFINITY},
+ };
+ static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
+-- 
+2.39.2
+
diff --git a/queue-5.10/powerpc-pseries-consolidate-different-numa-distance-.patch b/queue-5.10/powerpc-pseries-consolidate-different-numa-distance-.patch
new file mode 100644 (file)
index 0000000..97d94e4
--- /dev/null
@@ -0,0 +1,392 @@
+From 4b6bf61e15cd9ede4d014c47e0f3c90140818c8b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 18:52:21 +0530
+Subject: powerpc/pseries: Consolidate different NUMA distance update code
+ paths
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit 8ddc6448ec5a5ef50eaa581a7dec0e12a02850ff ]
+
+The associativity details of the newly added resourced are collected from
+the hypervisor via "ibm,configure-connector" rtas call. Update the numa
+distance details of the newly added numa node after the above call.
+
+Instead of updating NUMA distance every time we lookup a node id
+from the associativity property, add helpers that can be used
+during boot which does this only once. Also remove the distance
+update from node id lookup helpers.
+
+Currently, we duplicate parsing code for ibm,associativity and
+ibm,associativity-lookup-arrays in the kernel. The associativity array provided
+by these device tree properties are very similar and hence can use
+a helper to parse the node id and numa distance details.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210812132223.225214-4-aneesh.kumar@linux.ibm.com
+Stable-dep-of: b277fc793daf ("powerpc/papr_scm: Update the NUMA distance table for the target node")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/topology.h           |   2 +
+ arch/powerpc/mm/numa.c                        | 212 +++++++++++++-----
+ arch/powerpc/platforms/pseries/hotplug-cpu.c  |   2 +
+ .../platforms/pseries/hotplug-memory.c        |   2 +
+ 4 files changed, 161 insertions(+), 57 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
+index 3beeb030cd78e..1604920d8d2de 100644
+--- a/arch/powerpc/include/asm/topology.h
++++ b/arch/powerpc/include/asm/topology.h
+@@ -64,6 +64,7 @@ static inline int early_cpu_to_node(int cpu)
+ }
+ int of_drconf_to_nid_single(struct drmem_lmb *lmb);
++void update_numa_distance(struct device_node *node);
+ #else
+@@ -93,6 +94,7 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
+       return first_online_node;
+ }
++static inline void update_numa_distance(struct device_node *node) {}
+ #endif /* CONFIG_NUMA */
+ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
+diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
+index 415cd3d258ff8..e61593ae25c9e 100644
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -208,50 +208,35 @@ int __node_distance(int a, int b)
+ }
+ EXPORT_SYMBOL(__node_distance);
+-static void initialize_distance_lookup_table(int nid,
+-              const __be32 *associativity)
++static int __associativity_to_nid(const __be32 *associativity,
++                                int max_array_sz)
+ {
+-      int i;
++      int nid;
++      /*
++       * primary_domain_index is 1 based array index.
++       */
++      int index = primary_domain_index  - 1;
+-      if (affinity_form != FORM1_AFFINITY)
+-              return;
++      if (!numa_enabled || index >= max_array_sz)
++              return NUMA_NO_NODE;
+-      for (i = 0; i < distance_ref_points_depth; i++) {
+-              const __be32 *entry;
++      nid = of_read_number(&associativity[index], 1);
+-              entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
+-              distance_lookup_table[nid][i] = of_read_number(entry, 1);
+-      }
++      /* POWER4 LPAR uses 0xffff as invalid node */
++      if (nid == 0xffff || nid >= nr_node_ids)
++              nid = NUMA_NO_NODE;
++      return nid;
+ }
+-
+ /*
+  * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
+  * info is found.
+  */
+ static int associativity_to_nid(const __be32 *associativity)
+ {
+-      int nid = NUMA_NO_NODE;
+-
+-      if (!numa_enabled)
+-              goto out;
+-
+-      if (of_read_number(associativity, 1) >= primary_domain_index)
+-              nid = of_read_number(&associativity[primary_domain_index], 1);
+-
+-      /* POWER4 LPAR uses 0xffff as invalid node */
+-      if (nid == 0xffff || nid >= nr_node_ids)
+-              nid = NUMA_NO_NODE;
+-
+-      if (nid > 0 &&
+-              of_read_number(associativity, 1) >= distance_ref_points_depth) {
+-              /*
+-               * Skip the length field and send start of associativity array
+-               */
+-              initialize_distance_lookup_table(nid, associativity + 1);
+-      }
++      int array_sz = of_read_number(associativity, 1);
+-out:
+-      return nid;
++      /* Skip the first element in the associativity array */
++      return __associativity_to_nid((associativity + 1), array_sz);
+ }
+ /* Returns the nid associated with the given device tree node,
+@@ -287,6 +272,60 @@ int of_node_to_nid(struct device_node *device)
+ }
+ EXPORT_SYMBOL(of_node_to_nid);
++static void __initialize_form1_numa_distance(const __be32 *associativity,
++                                           int max_array_sz)
++{
++      int i, nid;
++
++      if (affinity_form != FORM1_AFFINITY)
++              return;
++
++      nid = __associativity_to_nid(associativity, max_array_sz);
++      if (nid != NUMA_NO_NODE) {
++              for (i = 0; i < distance_ref_points_depth; i++) {
++                      const __be32 *entry;
++                      int index = be32_to_cpu(distance_ref_points[i]) - 1;
++
++                      /*
++                       * broken hierarchy, return with broken distance table
++                       */
++                      if (WARN(index >= max_array_sz, "Broken ibm,associativity property"))
++                              return;
++
++                      entry = &associativity[index];
++                      distance_lookup_table[nid][i] = of_read_number(entry, 1);
++              }
++      }
++}
++
++static void initialize_form1_numa_distance(const __be32 *associativity)
++{
++      int array_sz;
++
++      array_sz = of_read_number(associativity, 1);
++      /* Skip the first element in the associativity array */
++      __initialize_form1_numa_distance(associativity + 1, array_sz);
++}
++
++/*
++ * Used to update distance information w.r.t newly added node.
++ */
++void update_numa_distance(struct device_node *node)
++{
++      if (affinity_form == FORM0_AFFINITY)
++              return;
++      else if (affinity_form == FORM1_AFFINITY) {
++              const __be32 *associativity;
++
++              associativity = of_get_associativity(node);
++              if (!associativity)
++                      return;
++
++              initialize_form1_numa_distance(associativity);
++              return;
++      }
++}
++
+ static int __init find_primary_domain_index(void)
+ {
+       int index;
+@@ -433,6 +472,38 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
+       return 0;
+ }
++static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
++{
++      struct assoc_arrays aa = { .arrays = NULL };
++      int default_nid = NUMA_NO_NODE;
++      int nid = default_nid;
++      int rc, index;
++
++      if ((primary_domain_index < 0) || !numa_enabled)
++              return default_nid;
++
++      rc = of_get_assoc_arrays(&aa);
++      if (rc)
++              return default_nid;
++
++      if (primary_domain_index <= aa.array_sz &&
++          !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
++              const __be32 *associativity;
++
++              index = lmb->aa_index * aa.array_sz;
++              associativity = &aa.arrays[index];
++              nid = __associativity_to_nid(associativity, aa.array_sz);
++              if (nid > 0 && affinity_form == FORM1_AFFINITY) {
++                      /*
++                       * lookup array associativity entries have
++                       * no length of the array as the first element.
++                       */
++                      __initialize_form1_numa_distance(associativity, aa.array_sz);
++              }
++      }
++      return nid;
++}
++
+ /*
+  * This is like of_node_to_nid_single() for memory represented in the
+  * ibm,dynamic-reconfiguration-memory node.
+@@ -453,26 +524,19 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
+       if (primary_domain_index <= aa.array_sz &&
+           !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
+-              index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
+-              nid = of_read_number(&aa.arrays[index], 1);
+-
+-              if (nid == 0xffff || nid >= nr_node_ids)
+-                      nid = default_nid;
++              const __be32 *associativity;
+-              if (nid > 0) {
+-                      index = lmb->aa_index * aa.array_sz;
+-                      initialize_distance_lookup_table(nid,
+-                                                      &aa.arrays[index]);
+-              }
++              index = lmb->aa_index * aa.array_sz;
++              associativity = &aa.arrays[index];
++              nid = __associativity_to_nid(associativity, aa.array_sz);
+       }
+-
+       return nid;
+ }
+ #ifdef CONFIG_PPC_SPLPAR
+-static int vphn_get_nid(long lcpu)
++
++static int __vphn_get_associativity(long lcpu, __be32 *associativity)
+ {
+-      __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+       long rc, hwid;
+       /*
+@@ -492,12 +556,30 @@ static int vphn_get_nid(long lcpu)
+               rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
+               if (rc == H_SUCCESS)
+-                      return associativity_to_nid(associativity);
++                      return 0;
+       }
++      return -1;
++}
++
++static int vphn_get_nid(long lcpu)
++{
++      __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
++
++
++      if (!__vphn_get_associativity(lcpu, associativity))
++              return associativity_to_nid(associativity);
++
+       return NUMA_NO_NODE;
++
+ }
+ #else
++
++static int __vphn_get_associativity(long lcpu, __be32 *associativity)
++{
++      return -1;
++}
++
+ static int vphn_get_nid(long unused)
+ {
+       return NUMA_NO_NODE;
+@@ -692,7 +774,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+                       size = read_n_cells(n_mem_size_cells, usm);
+               }
+-              nid = of_drconf_to_nid_single(lmb);
++              nid = get_nid_and_numa_distance(lmb);
+               fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
+                                         &nid);
+               node_set_online(nid);
+@@ -709,6 +791,7 @@ static int __init parse_numa_properties(void)
+       struct device_node *memory;
+       int default_nid = 0;
+       unsigned long i;
++      const __be32 *associativity;
+       if (numa_enabled == 0) {
+               printk(KERN_WARNING "NUMA disabled by user\n");
+@@ -734,18 +817,30 @@ static int __init parse_numa_properties(void)
+        * each node to be onlined must have NODE_DATA etc backing it.
+        */
+       for_each_present_cpu(i) {
++              __be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
+               struct device_node *cpu;
+-              int nid = vphn_get_nid(i);
++              int nid = NUMA_NO_NODE;
+-              /*
+-               * Don't fall back to default_nid yet -- we will plug
+-               * cpus into nodes once the memory scan has discovered
+-               * the topology.
+-               */
+-              if (nid == NUMA_NO_NODE) {
++              memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
++
++              if (__vphn_get_associativity(i, vphn_assoc) == 0) {
++                      nid = associativity_to_nid(vphn_assoc);
++                      initialize_form1_numa_distance(vphn_assoc);
++              } else {
++
++                      /*
++                       * Don't fall back to default_nid yet -- we will plug
++                       * cpus into nodes once the memory scan has discovered
++                       * the topology.
++                       */
+                       cpu = of_get_cpu_node(i, NULL);
+                       BUG_ON(!cpu);
+-                      nid = of_node_to_nid_single(cpu);
++
++                      associativity = of_get_associativity(cpu);
++                      if (associativity) {
++                              nid = associativity_to_nid(associativity);
++                              initialize_form1_numa_distance(associativity);
++                      }
+                       of_node_put(cpu);
+               }
+@@ -783,8 +878,11 @@ static int __init parse_numa_properties(void)
+                * have associativity properties.  If none, then
+                * everything goes to default_nid.
+                */
+-              nid = of_node_to_nid_single(memory);
+-              if (nid < 0)
++              associativity = of_get_associativity(memory);
++              if (associativity) {
++                      nid = associativity_to_nid(associativity);
++                      initialize_form1_numa_distance(associativity);
++              } else
+                       nid = default_nid;
+               fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
+diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
+index 325f3b220f360..1f8f97210d143 100644
+--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
++++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
+@@ -484,6 +484,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
+               return saved_rc;
+       }
++      update_numa_distance(dn);
++
+       rc = dlpar_online_cpu(dn);
+       if (rc) {
+               saved_rc = rc;
+diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
+index 7efe6ec5d14a4..a5f968b5fa3a8 100644
+--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
++++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
+@@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
+               return -ENODEV;
+       }
++      update_numa_distance(lmb_node);
++
+       dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+       if (!dr_node) {
+               dlpar_free_cc_nodes(lmb_node);
+-- 
+2.39.2
+
diff --git a/queue-5.10/powerpc-pseries-rename-min_common_depth-to-primary_d.patch b/queue-5.10/powerpc-pseries-rename-min_common_depth-to-primary_d.patch
new file mode 100644 (file)
index 0000000..13e3dcc
--- /dev/null
@@ -0,0 +1,158 @@
+From 9c3309edcac097af0deead823da6e4fa056691d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 18:52:19 +0530
+Subject: powerpc/pseries: rename min_common_depth to primary_domain_index
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit 7e35ef662ca05c42dbc2f401bb76d9219dd7fd02 ]
+
+No functional change in this patch.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210812132223.225214-2-aneesh.kumar@linux.ibm.com
+Stable-dep-of: b277fc793daf ("powerpc/papr_scm: Update the NUMA distance table for the target node")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/numa.c | 38 +++++++++++++++++++-------------------
+ 1 file changed, 19 insertions(+), 19 deletions(-)
+
+diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
+index 275c60f92a7ce..a21f62fcda1e8 100644
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -51,7 +51,7 @@ EXPORT_SYMBOL(numa_cpu_lookup_table);
+ EXPORT_SYMBOL(node_to_cpumask_map);
+ EXPORT_SYMBOL(node_data);
+-static int min_common_depth;
++static int primary_domain_index;
+ static int n_mem_addr_cells, n_mem_size_cells;
+ static int form1_affinity;
+@@ -232,8 +232,8 @@ static int associativity_to_nid(const __be32 *associativity)
+       if (!numa_enabled)
+               goto out;
+-      if (of_read_number(associativity, 1) >= min_common_depth)
+-              nid = of_read_number(&associativity[min_common_depth], 1);
++      if (of_read_number(associativity, 1) >= primary_domain_index)
++              nid = of_read_number(&associativity[primary_domain_index], 1);
+       /* POWER4 LPAR uses 0xffff as invalid node */
+       if (nid == 0xffff || nid >= nr_node_ids)
+@@ -284,9 +284,9 @@ int of_node_to_nid(struct device_node *device)
+ }
+ EXPORT_SYMBOL(of_node_to_nid);
+-static int __init find_min_common_depth(void)
++static int __init find_primary_domain_index(void)
+ {
+-      int depth;
++      int index;
+       struct device_node *root;
+       if (firmware_has_feature(FW_FEATURE_OPAL))
+@@ -326,7 +326,7 @@ static int __init find_min_common_depth(void)
+       }
+       if (form1_affinity) {
+-              depth = of_read_number(distance_ref_points, 1);
++              index = of_read_number(distance_ref_points, 1);
+       } else {
+               if (distance_ref_points_depth < 2) {
+                       printk(KERN_WARNING "NUMA: "
+@@ -334,7 +334,7 @@ static int __init find_min_common_depth(void)
+                       goto err;
+               }
+-              depth = of_read_number(&distance_ref_points[1], 1);
++              index = of_read_number(&distance_ref_points[1], 1);
+       }
+       /*
+@@ -348,7 +348,7 @@ static int __init find_min_common_depth(void)
+       }
+       of_node_put(root);
+-      return depth;
++      return index;
+ err:
+       of_node_put(root);
+@@ -437,16 +437,16 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
+       int nid = default_nid;
+       int rc, index;
+-      if ((min_common_depth < 0) || !numa_enabled)
++      if ((primary_domain_index < 0) || !numa_enabled)
+               return default_nid;
+       rc = of_get_assoc_arrays(&aa);
+       if (rc)
+               return default_nid;
+-      if (min_common_depth <= aa.array_sz &&
++      if (primary_domain_index <= aa.array_sz &&
+           !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
+-              index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
++              index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
+               nid = of_read_number(&aa.arrays[index], 1);
+               if (nid == 0xffff || nid >= nr_node_ids)
+@@ -708,18 +708,18 @@ static int __init parse_numa_properties(void)
+               return -1;
+       }
+-      min_common_depth = find_min_common_depth();
++      primary_domain_index = find_primary_domain_index();
+-      if (min_common_depth < 0) {
++      if (primary_domain_index < 0) {
+               /*
+-               * if we fail to parse min_common_depth from device tree
++               * if we fail to parse primary_domain_index from device tree
+                * mark the numa disabled, boot with numa disabled.
+                */
+               numa_enabled = false;
+-              return min_common_depth;
++              return primary_domain_index;
+       }
+-      dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
++      dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index);
+       /*
+        * Even though we connect cpus to numa domains later in SMP
+@@ -926,7 +926,7 @@ static void __init find_possible_nodes(void)
+                       goto out;
+       }
+-      max_nodes = of_read_number(&domains[min_common_depth], 1);
++      max_nodes = of_read_number(&domains[primary_domain_index], 1);
+       pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
+       for (i = 0; i < max_nodes; i++) {
+@@ -935,7 +935,7 @@ static void __init find_possible_nodes(void)
+       }
+       prop_length /= sizeof(int);
+-      if (prop_length > min_common_depth + 2)
++      if (prop_length > primary_domain_index + 2)
+               coregroup_enabled = 1;
+ out:
+@@ -1268,7 +1268,7 @@ int cpu_to_coregroup_id(int cpu)
+               goto out;
+       index = of_read_number(associativity, 1);
+-      if (index > min_common_depth + 1)
++      if (index > primary_domain_index + 1)
+               return of_read_number(&associativity[index - 1], 1);
+ out:
+-- 
+2.39.2
+
diff --git a/queue-5.10/powerpc-pseries-rename-type1_affinity-to-form1_affin.patch b/queue-5.10/powerpc-pseries-rename-type1_affinity-to-form1_affin.patch
new file mode 100644 (file)
index 0000000..95ff6b9
--- /dev/null
@@ -0,0 +1,170 @@
+From cacb3c31cf1481761b80e46f88551ddbb0df9d44 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Aug 2021 18:52:20 +0530
+Subject: powerpc/pseries: Rename TYPE1_AFFINITY to FORM1_AFFINITY
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit 0eacd06bb8adea8dd9edb0a30144166d9f227e64 ]
+
+Also make related code cleanup that will allow adding FORM2_AFFINITY in
+later patches. No functional change in this patch.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210812132223.225214-3-aneesh.kumar@linux.ibm.com
+Stable-dep-of: b277fc793daf ("powerpc/papr_scm: Update the NUMA distance table for the target node")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/firmware.h       |  4 +--
+ arch/powerpc/include/asm/prom.h           |  2 +-
+ arch/powerpc/kernel/prom_init.c           |  2 +-
+ arch/powerpc/mm/numa.c                    | 35 ++++++++++++++---------
+ arch/powerpc/platforms/pseries/firmware.c |  2 +-
+ 5 files changed, 26 insertions(+), 19 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
+index aa6a5ef5d4830..0cf648d829f15 100644
+--- a/arch/powerpc/include/asm/firmware.h
++++ b/arch/powerpc/include/asm/firmware.h
+@@ -44,7 +44,7 @@
+ #define FW_FEATURE_OPAL               ASM_CONST(0x0000000010000000)
+ #define FW_FEATURE_SET_MODE   ASM_CONST(0x0000000040000000)
+ #define FW_FEATURE_BEST_ENERGY        ASM_CONST(0x0000000080000000)
+-#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
++#define FW_FEATURE_FORM1_AFFINITY ASM_CONST(0x0000000100000000)
+ #define FW_FEATURE_PRRN               ASM_CONST(0x0000000200000000)
+ #define FW_FEATURE_DRMEM_V2   ASM_CONST(0x0000000400000000)
+ #define FW_FEATURE_DRC_INFO   ASM_CONST(0x0000000800000000)
+@@ -69,7 +69,7 @@ enum {
+               FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
+               FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
+               FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
+-              FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
++              FW_FEATURE_FORM1_AFFINITY | FW_FEATURE_PRRN |
+               FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
+               FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
+               FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
+diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
+index 324a13351749a..df9fec9d232cb 100644
+--- a/arch/powerpc/include/asm/prom.h
++++ b/arch/powerpc/include/asm/prom.h
+@@ -147,7 +147,7 @@ extern int of_read_drc_info_cell(struct property **prop,
+ #define OV5_MSI                       0x0201  /* PCIe/MSI support */
+ #define OV5_CMO                       0x0480  /* Cooperative Memory Overcommitment */
+ #define OV5_XCMO              0x0440  /* Page Coalescing */
+-#define OV5_TYPE1_AFFINITY    0x0580  /* Type 1 NUMA affinity */
++#define OV5_FORM1_AFFINITY    0x0580  /* FORM1 NUMA affinity */
+ #define OV5_PRRN              0x0540  /* Platform Resource Reassignment */
+ #define OV5_HP_EVT            0x0604  /* Hot Plug Event support */
+ #define OV5_RESIZE_HPT                0x0601  /* Hash Page Table resizing */
+diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
+index 9e71c0739f08d..a3bf3587a4162 100644
+--- a/arch/powerpc/kernel/prom_init.c
++++ b/arch/powerpc/kernel/prom_init.c
+@@ -1069,7 +1069,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
+ #else
+               0,
+ #endif
+-              .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
++              .associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN),
+               .bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
+               .micro_checkpoint = 0,
+               .reserved0 = 0,
+diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
+index a21f62fcda1e8..415cd3d258ff8 100644
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -53,7 +53,10 @@ EXPORT_SYMBOL(node_data);
+ static int primary_domain_index;
+ static int n_mem_addr_cells, n_mem_size_cells;
+-static int form1_affinity;
++
++#define FORM0_AFFINITY 0
++#define FORM1_AFFINITY 1
++static int affinity_form;
+ #define MAX_DISTANCE_REF_POINTS 4
+ static int distance_ref_points_depth;
+@@ -190,7 +193,7 @@ int __node_distance(int a, int b)
+       int i;
+       int distance = LOCAL_DISTANCE;
+-      if (!form1_affinity)
++      if (affinity_form == FORM0_AFFINITY)
+               return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
+       for (i = 0; i < distance_ref_points_depth; i++) {
+@@ -210,7 +213,7 @@ static void initialize_distance_lookup_table(int nid,
+ {
+       int i;
+-      if (!form1_affinity)
++      if (affinity_form != FORM1_AFFINITY)
+               return;
+       for (i = 0; i < distance_ref_points_depth; i++) {
+@@ -289,6 +292,17 @@ static int __init find_primary_domain_index(void)
+       int index;
+       struct device_node *root;
++      /*
++       * Check for which form of affinity.
++       */
++      if (firmware_has_feature(FW_FEATURE_OPAL)) {
++              affinity_form = FORM1_AFFINITY;
++      } else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
++              dbg("Using form 1 affinity\n");
++              affinity_form = FORM1_AFFINITY;
++      } else
++              affinity_form = FORM0_AFFINITY;
++
+       if (firmware_has_feature(FW_FEATURE_OPAL))
+               root = of_find_node_by_path("/ibm,opal");
+       else
+@@ -318,23 +332,16 @@ static int __init find_primary_domain_index(void)
+       }
+       distance_ref_points_depth /= sizeof(int);
+-
+-      if (firmware_has_feature(FW_FEATURE_OPAL) ||
+-          firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
+-              dbg("Using form 1 affinity\n");
+-              form1_affinity = 1;
+-      }
+-
+-      if (form1_affinity) {
+-              index = of_read_number(distance_ref_points, 1);
+-      } else {
++      if (affinity_form == FORM0_AFFINITY) {
+               if (distance_ref_points_depth < 2) {
+                       printk(KERN_WARNING "NUMA: "
+-                              "short ibm,associativity-reference-points\n");
++                             "short ibm,associativity-reference-points\n");
+                       goto err;
+               }
+               index = of_read_number(&distance_ref_points[1], 1);
++      } else {
++              index = of_read_number(distance_ref_points, 1);
+       }
+       /*
+diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
+index 4c7b7f5a2ebca..5d4c2bc20bbab 100644
+--- a/arch/powerpc/platforms/pseries/firmware.c
++++ b/arch/powerpc/platforms/pseries/firmware.c
+@@ -119,7 +119,7 @@ struct vec5_fw_feature {
+ static __initdata struct vec5_fw_feature
+ vec5_fw_features_table[] = {
+-      {FW_FEATURE_TYPE1_AFFINITY,     OV5_TYPE1_AFFINITY},
++      {FW_FEATURE_FORM1_AFFINITY,     OV5_FORM1_AFFINITY},
+       {FW_FEATURE_PRRN,               OV5_PRRN},
+       {FW_FEATURE_DRMEM_V2,           OV5_DRMEM_V2},
+       {FW_FEATURE_DRC_INFO,           OV5_DRC_INFO},
+-- 
+2.39.2
+
diff --git a/queue-5.10/sched-fair-fix-imbalance-overflow.patch b/queue-5.10/sched-fair-fix-imbalance-overflow.patch
new file mode 100644 (file)
index 0000000..49e4add
--- /dev/null
@@ -0,0 +1,48 @@
+From 136d511b9163abd01b1b6231b3562ea166e97929 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Apr 2023 11:06:11 +0200
+Subject: sched/fair: Fix imbalance overflow
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit 91dcf1e8068e9a8823e419a7a34ff4341275fb70 ]
+
+When local group is fully busy but its average load is above system load,
+computing the imbalance will overflow and local group is not the best
+target for pulling this load.
+
+Fixes: 0b0695f2b34a ("sched/fair: Rework load_balance()")
+Reported-by: Tingjia Cao <tjcao980311@gmail.com>
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Tingjia Cao <tjcao980311@gmail.com>
+Link: https://lore.kernel.org/lkml/CABcWv9_DAhVBOq2=W=2ypKE9dKM5s2DvoV8-U0+GDwwuKZ89jQ@mail.gmail.com/T/
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 22139e97b2a8e..57a58bc48021a 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -9353,6 +9353,16 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+               sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
+                               sds->total_capacity;
++
++              /*
++               * If the local group is more loaded than the average system
++               * load, don't try to pull any tasks.
++               */
++              if (local->avg_load >= sds->avg_load) {
++                      env->imbalance = 0;
++                      return;
++              }
++
+       }
+       /*
+-- 
+2.39.2
+
diff --git a/queue-5.10/sched-fair-move-calculate-of-avg_load-to-a-better-lo.patch b/queue-5.10/sched-fair-move-calculate-of-avg_load-to-a-better-lo.patch
new file mode 100644 (file)
index 0000000..db3f927
--- /dev/null
@@ -0,0 +1,50 @@
+From 59704a62f12269ef7882e6c95a2e60b39f2adc84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Apr 2022 17:57:05 +0800
+Subject: sched/fair: Move calculate of avg_load to a better location
+
+From: zgpeng <zgpeng.linux@gmail.com>
+
+[ Upstream commit 06354900787f25bf5be3c07a68e3cdbc5bf0fa69 ]
+
+In calculate_imbalance function, when the value of local->avg_load is
+greater than or equal to busiest->avg_load, the calculated sds->avg_load is
+not used. So this calculation can be placed in a more appropriate position.
+
+Signed-off-by: zgpeng <zgpeng@tencent.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Samuel Liao <samuelliao@tencent.com>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lore.kernel.org/r/1649239025-10010-1-git-send-email-zgpeng@tencent.com
+Stable-dep-of: 91dcf1e8068e ("sched/fair: Fix imbalance overflow")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index bb70a7856277f..22139e97b2a8e 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -9342,8 +9342,6 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+               local->avg_load = (local->group_load * SCHED_CAPACITY_SCALE) /
+                                 local->group_capacity;
+-              sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
+-                              sds->total_capacity;
+               /*
+                * If the local group is more loaded than the selected
+                * busiest group don't try to pull any tasks.
+@@ -9352,6 +9350,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+                       env->imbalance = 0;
+                       return;
+               }
++
++              sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
++                              sds->total_capacity;
+       }
+       /*
+-- 
+2.39.2
+
index e3bcbf83bb0e230560613e925b1d6c88daef7a60..1bfc575cee3062395788e4c459220992e83721bf 100644 (file)
@@ -95,3 +95,16 @@ net-sfp-initialize-sfp-i2c_block_size-at-sfp-allocation.patch
 scsi-ses-handle-enclosure-with-just-a-primary-component-gracefully.patch
 x86-pci-add-quirk-for-amd-xhci-controller-that-loses-msi-x-state-in-d3hot.patch
 cgroup-cpuset-wake-up-cpuset_attach_wq-tasks-in-cpuset_cancel_attach.patch
+mtd-ubi-wl-fix-a-couple-of-kernel-doc-issues.patch
+ubi-fix-deadlock-caused-by-recursively-holding-work_.patch
+ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch
+powerpc-pseries-rename-min_common_depth-to-primary_d.patch
+powerpc-pseries-rename-type1_affinity-to-form1_affin.patch
+powerpc-pseries-consolidate-different-numa-distance-.patch
+powerpc-pseries-add-a-helper-for-form1-cpu-distance.patch
+powerpc-pseries-add-support-for-form2-associativity.patch
+powerpc-papr_scm-update-the-numa-distance-table-for-.patch
+sched-fair-move-calculate-of-avg_load-to-a-better-lo.patch
+sched-fair-fix-imbalance-overflow.patch
+x86-rtc-remove-__init-for-runtime-functions.patch
+i2c-ocores-generate-stop-condition-after-timeout-in-.patch
diff --git a/queue-5.10/ubi-fix-deadlock-caused-by-recursively-holding-work_.patch b/queue-5.10/ubi-fix-deadlock-caused-by-recursively-holding-work_.patch
new file mode 100644 (file)
index 0000000..c2fe119
--- /dev/null
@@ -0,0 +1,66 @@
+From 86d1fe87a0c24152f0e88d1b54c77c2618f9e2e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Mar 2023 09:41:41 +0800
+Subject: ubi: Fix deadlock caused by recursively holding work_sem
+
+From: ZhaoLong Wang <wangzhaolong1@huawei.com>
+
+[ Upstream commit f773f0a331d6c41733b17bebbc1b6cae12e016f5 ]
+
+During the processing of the bgt, if the sync_erase() return -EBUSY
+or some other error code in __erase_worker(),schedule_erase() called
+again lead to the down_read(ubi->work_sem) hold twice and may get
+block by down_write(ubi->work_sem) in ubi_update_fastmap(),
+which cause deadlock.
+
+          ubi bgt                        other task
+ do_work
+  down_read(&ubi->work_sem)          ubi_update_fastmap
+  erase_worker                         # Blocked by down_read
+   __erase_worker                      down_write(&ubi->work_sem)
+    schedule_erase
+     schedule_ubi_work
+      down_read(&ubi->work_sem)
+
+Fix this by changing input parameter @nested of the schedule_erase() to
+'true' to avoid recursively acquiring the down_read(&ubi->work_sem).
+
+Also, fix the incorrect comment about @nested parameter of the
+schedule_erase() because when down_write(ubi->work_sem) is held, the
+@nested is also need be true.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217093
+Fixes: 2e8f08deabbc ("ubi: Fix races around ubi_refill_pools()")
+Signed-off-by: ZhaoLong Wang <wangzhaolong1@huawei.com>
+Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mtd/ubi/wl.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
+index 2ee0e60c43c2e..4427018ad4d9b 100644
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -575,7 +575,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+  * @vol_id: the volume ID that last used this PEB
+  * @lnum: the last used logical eraseblock number for the PEB
+  * @torture: if the physical eraseblock has to be tortured
+- * @nested: denotes whether the work_sem is already held in read mode
++ * @nested: denotes whether the work_sem is already held
+  *
+  * This function returns zero in case of success and a %-ENOMEM in case of
+  * failure.
+@@ -1121,7 +1121,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
+               int err1;
+               /* Re-schedule the LEB for erasure */
+-              err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
++              err1 = schedule_erase(ubi, e, vol_id, lnum, 0, true);
+               if (err1) {
+                       spin_lock(&ubi->wl_lock);
+                       wl_entry_destroy(ubi, e);
+-- 
+2.39.2
+
diff --git a/queue-5.10/ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch b/queue-5.10/ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch
new file mode 100644 (file)
index 0000000..fa94e16
--- /dev/null
@@ -0,0 +1,79 @@
+From 4a6d08de55811e838cffbbdba0beb842f635d9f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Mar 2023 09:33:08 +0800
+Subject: ubi: Fix failure attaching when vid_hdr offset equals to (sub)page
+ size
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+[ Upstream commit 1e020e1b96afdecd20680b5b5be2a6ffc3d27628 ]
+
+Following process will make ubi attaching failed since commit
+1b42b1a36fc946 ("ubi: ensure that VID header offset ... size"):
+
+ID="0xec,0xa1,0x00,0x15" # 128M 128KB 2KB
+modprobe nandsim id_bytes=$ID
+flash_eraseall /dev/mtd0
+modprobe ubi mtd="0,2048"  # set vid_hdr offset as 2048 (one page)
+(dmesg):
+  ubi0 error: ubi_attach_mtd_dev [ubi]: VID header offset 2048 too large.
+  UBI error: cannot attach mtd0
+  UBI error: cannot initialize UBI, error -22
+
+Rework original solution, the key point is making sure
+'vid_hdr_shift + UBI_VID_HDR_SIZE < ubi->vid_hdr_alsize',
+so we should check vid_hdr_shift rather not vid_hdr_offset.
+Then, ubi still support (sub)page aligined VID header offset.
+
+Fixes: 1b42b1a36fc946 ("ubi: ensure that VID header offset ... size")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Tested-by: Nicolas Schichan <nschichan@freebox.fr>
+Tested-by: Miquel Raynal <miquel.raynal@bootlin.com> # v5.10, v4.19
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mtd/ubi/build.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
+index e45fdc1bf66a4..929ce489b0629 100644
+--- a/drivers/mtd/ubi/build.c
++++ b/drivers/mtd/ubi/build.c
+@@ -665,12 +665,6 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
+       ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
+       ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
+-      if (ubi->vid_hdr_offset && ((ubi->vid_hdr_offset + UBI_VID_HDR_SIZE) >
+-          ubi->vid_hdr_alsize)) {
+-              ubi_err(ubi, "VID header offset %d too large.", ubi->vid_hdr_offset);
+-              return -EINVAL;
+-      }
+-
+       dbg_gen("min_io_size      %d", ubi->min_io_size);
+       dbg_gen("max_write_size   %d", ubi->max_write_size);
+       dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
+@@ -688,6 +682,21 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
+                                               ubi->vid_hdr_aloffset;
+       }
++      /*
++       * Memory allocation for VID header is ubi->vid_hdr_alsize
++       * which is described in comments in io.c.
++       * Make sure VID header shift + UBI_VID_HDR_SIZE not exceeds
++       * ubi->vid_hdr_alsize, so that all vid header operations
++       * won't access memory out of bounds.
++       */
++      if ((ubi->vid_hdr_shift + UBI_VID_HDR_SIZE) > ubi->vid_hdr_alsize) {
++              ubi_err(ubi, "Invalid VID header offset %d, VID header shift(%d)"
++                      " + VID header size(%zu) > VID header aligned size(%d).",
++                      ubi->vid_hdr_offset, ubi->vid_hdr_shift,
++                      UBI_VID_HDR_SIZE, ubi->vid_hdr_alsize);
++              return -EINVAL;
++      }
++
+       /* Similar for the data offset */
+       ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE;
+       ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size);
+-- 
+2.39.2
+
diff --git a/queue-5.10/x86-rtc-remove-__init-for-runtime-functions.patch b/queue-5.10/x86-rtc-remove-__init-for-runtime-functions.patch
new file mode 100644 (file)
index 0000000..b2181b4
--- /dev/null
@@ -0,0 +1,53 @@
+From c6cd6c861acab70decc185158b61542c40fe1952 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Apr 2023 08:26:52 +0200
+Subject: x86/rtc: Remove __init for runtime functions
+
+From: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com>
+
+[ Upstream commit 775d3c514c5b2763a50ab7839026d7561795924d ]
+
+set_rtc_noop(), get_rtc_noop() are after booting, therefore their __init
+annotation is wrong.
+
+A crash was observed on an x86 platform where CMOS RTC is unused and
+disabled via device tree. set_rtc_noop() was invoked from ntp:
+sync_hw_clock(), although CONFIG_RTC_SYSTOHC=n, however sync_cmos_clock()
+doesn't honour that.
+
+  Workqueue: events_power_efficient sync_hw_clock
+  RIP: 0010:set_rtc_noop
+  Call Trace:
+   update_persistent_clock64
+   sync_hw_clock
+
+Fix this by dropping the __init annotation from set/get_rtc_noop().
+
+Fixes: c311ed6183f4 ("x86/init: Allow DT configured systems to disable RTC at boot time")
+Signed-off-by: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/59f7ceb1-446b-1d3d-0bc8-1f0ee94b1e18@nokia.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/x86_init.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
+index a3038d8deb6a4..b758eeea6090b 100644
+--- a/arch/x86/kernel/x86_init.c
++++ b/arch/x86/kernel/x86_init.c
+@@ -32,8 +32,8 @@ static int __init iommu_init_noop(void) { return 0; }
+ static void iommu_shutdown_noop(void) { }
+ bool __init bool_x86_init_noop(void) { return false; }
+ void x86_op_int_noop(int cpu) { }
+-static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
+-static __init void get_rtc_noop(struct timespec64 *now) { }
++static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
++static void get_rtc_noop(struct timespec64 *now) { }
+ static __initconst const struct of_device_id of_cmos_match[] = {
+       { .compatible = "motorola,mc146818" },
+-- 
+2.39.2
+