]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.0
authorSasha Levin <sashal@kernel.org>
Sun, 4 Dec 2022 02:26:48 +0000 (21:26 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 4 Dec 2022 02:26:48 +0000 (21:26 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
15 files changed:
queue-6.0/asoc-ops-fix-bounds-check-for-_sx-controls.patch [new file with mode: 0644]
queue-6.0/asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch [new file with mode: 0644]
queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch [new file with mode: 0644]
queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch [new file with mode: 0644]
queue-6.0/ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch [new file with mode: 0644]
queue-6.0/ipv4-handle-attempt-to-delete-multipath-route-when-f.patch [new file with mode: 0644]
queue-6.0/mm-damon-introduce-struct-damos_access_pattern.patch [new file with mode: 0644]
queue-6.0/mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch [new file with mode: 0644]
queue-6.0/nvme-fix-srcu-protection-of-nvme_ns_head-list.patch [new file with mode: 0644]
queue-6.0/pinctrl-single-fix-potential-division-by-zero.patch [new file with mode: 0644]
queue-6.0/riscv-fix-race-when-vmap-stack-overflow.patch [new file with mode: 0644]
queue-6.0/riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch [new file with mode: 0644]
queue-6.0/riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch [new file with mode: 0644]
queue-6.0/riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch [new file with mode: 0644]
queue-6.0/series

diff --git a/queue-6.0/asoc-ops-fix-bounds-check-for-_sx-controls.patch b/queue-6.0/asoc-ops-fix-bounds-check-for-_sx-controls.patch
new file mode 100644 (file)
index 0000000..ccf94b2
--- /dev/null
@@ -0,0 +1,39 @@
+From 6b50f2955b935ca4b4bf670333b1b34fce80dffb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 May 2022 14:41:36 +0100
+Subject: ASoC: ops: Fix bounds check for _sx controls
+
+From: Mark Brown <broonie@kernel.org>
+
+[ Upstream commit 698813ba8c580efb356ace8dbf55f61dac6063a8 ]
+
+For _sx controls the semantics of the max field is not the usual one, max
+is the number of steps rather than the maximum value. This means that our
+check in snd_soc_put_volsw_sx() needs to just check against the maximum
+value.
+
+Fixes: 4f1e50d6a9cf9c1b ("ASoC: ops: Reject out of bounds values in snd_soc_put_volsw_sx()")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Link: https://lore.kernel.org/r/20220511134137.169575-1-broonie@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/soc-ops.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
+index bd88de056358..47691119306f 100644
+--- a/sound/soc/soc-ops.c
++++ b/sound/soc/soc-ops.c
+@@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
+       val = ucontrol->value.integer.value[0];
+       if (mc->platform_max && val > mc->platform_max)
+               return -EINVAL;
+-      if (val > max - min)
++      if (val > max)
+               return -EINVAL;
+       val_mask = mask << shift;
+       val = (val + min) & mask;
+-- 
+2.35.1
+
diff --git a/queue-6.0/asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch b/queue-6.0/asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch
new file mode 100644 (file)
index 0000000..5b06bc0
--- /dev/null
@@ -0,0 +1,75 @@
+From 5bdcaab19aa501176d7ff4c5b00d2c7883c8ed64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 May 2022 15:46:40 +0800
+Subject: ASoC: tlv320adc3xxx: Fix build error for implicit function
+ declaration
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Hui Tang <tanghui20@huawei.com>
+
+[ Upstream commit 19c5bda74dc45fee598a57600b550c9ea7662f10 ]
+
+sound/soc/codecs/tlv320adc3xxx.c: In function ‘adc3xxx_i2c_probe’:
+sound/soc/codecs/tlv320adc3xxx.c:1359:21: error: implicit declaration of function ‘devm_gpiod_get’; did you mean ‘devm_gpio_free’? [-Werror=implicit-function-declaration]
+  adc3xxx->rst_pin = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+                     ^~~~~~~~~~~~~~
+                     devm_gpio_free
+  CC [M]  drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.o
+  LD [M]  sound/soc/codecs/snd-soc-ak4671.o
+  LD [M]  sound/soc/codecs/snd-soc-arizona.o
+  LD [M]  sound/soc/codecs/snd-soc-cros-ec-codec.o
+  LD [M]  sound/soc/codecs/snd-soc-ak4641.o
+  LD [M]  sound/soc/codecs/snd-soc-alc5632.o
+sound/soc/codecs/tlv320adc3xxx.c:1359:50: error: ‘GPIOD_OUT_LOW’ undeclared (first use in this function); did you mean ‘GPIOF_INIT_LOW’?
+  adc3xxx->rst_pin = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+                                                  ^~~~~~~~~~~~~
+                                                  GPIOF_INIT_LOW
+sound/soc/codecs/tlv320adc3xxx.c:1359:50: note: each undeclared identifier is reported only once for each function it appears in
+  LD [M]  sound/soc/codecs/snd-soc-cs35l32.o
+sound/soc/codecs/tlv320adc3xxx.c:1408:2: error: implicit declaration of function ‘gpiod_set_value_cansleep’; did you mean ‘gpio_set_value_cansleep’? [-Werror=implicit-function-declaration]
+  gpiod_set_value_cansleep(adc3xxx->rst_pin, 1);
+  ^~~~~~~~~~~~~~~~~~~~~~~~
+  gpio_set_value_cansleep
+  LD [M]  sound/soc/codecs/snd-soc-cs35l41-lib.o
+  LD [M]  sound/soc/codecs/snd-soc-cs35l36.o
+  LD [M]  sound/soc/codecs/snd-soc-cs35l34.o
+  LD [M]  sound/soc/codecs/snd-soc-cs35l41.o
+  CC [M]  drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.o
+cc1: all warnings being treated as errors
+
+Fixes: e9a3b57efd28 ("ASoC: codec: tlv320adc3xxx: New codec driver")
+Signed-off-by: Hui Tang <tanghui20@huawei.com>
+Link: https://lore.kernel.org/r/20220512074640.75550-3-tanghui20@huawei.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/tlv320adc3xxx.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c
+index 8a0965cd3e66..297c458c4d8b 100644
+--- a/sound/soc/codecs/tlv320adc3xxx.c
++++ b/sound/soc/codecs/tlv320adc3xxx.c
+@@ -14,6 +14,7 @@
+ #include <dt-bindings/sound/tlv320adc3xxx.h>
+ #include <linux/clk.h>
++#include <linux/gpio/consumer.h>
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/io.h>
+@@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = {
+ static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx)
+ {
++#ifdef CONFIG_GPIOLIB
+       gpiochip_remove(&adc3xxx->gpio_chip);
++#endif
+ }
+ static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx)
+-- 
+2.35.1
+
diff --git a/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch b/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch
new file mode 100644 (file)
index 0000000..828faa9
--- /dev/null
@@ -0,0 +1,43 @@
+From 31a65fdd67f7bc836e005122829218c895845185 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Dec 2022 12:01:27 +0800
+Subject: iommu/vt-d: Fix PCI device refcount leak in dmar_dev_scope_init()
+
+From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
+
+[ Upstream commit 4bedbbd782ebbe7287231fea862c158d4f08a9e3 ]
+
+for_each_pci_dev() is implemented by pci_get_device(). The comment of
+pci_get_device() says that it will increase the reference count for the
+returned pci_dev and also decrease the reference count for the input
+pci_dev @from if it is not NULL.
+
+If we break for_each_pci_dev() loop with pdev not NULL, we need to call
+pci_dev_put() to decrease the reference count. Add the missing
+pci_dev_put() for the error path to avoid reference count leak.
+
+Fixes: 2e4552893038 ("iommu/vt-d: Unify the way to process DMAR device scope array")
+Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
+Link: https://lore.kernel.org/r/20221121113649.190393-3-wangxiongfeng2@huawei.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/dmar.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
+index 5a8f780e7ffd..bc94059a5b87 100644
+--- a/drivers/iommu/intel/dmar.c
++++ b/drivers/iommu/intel/dmar.c
+@@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void)
+                       info = dmar_alloc_pci_notify_info(dev,
+                                       BUS_NOTIFY_ADD_DEVICE);
+                       if (!info) {
++                              pci_dev_put(dev);
+                               return dmar_dev_scope_status;
+                       } else {
+                               dmar_pci_bus_add_dev(info);
+-- 
+2.35.1
+
diff --git a/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch b/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch
new file mode 100644 (file)
index 0000000..9032ae9
--- /dev/null
@@ -0,0 +1,47 @@
+From 90897a7c53d24bec9408648da2a1f98d8d5717e1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Dec 2022 12:01:26 +0800
+Subject: iommu/vt-d: Fix PCI device refcount leak in has_external_pci()
+
+From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
+
+[ Upstream commit afca9e19cc720bfafc75dc5ce429c185ca93f31d ]
+
+for_each_pci_dev() is implemented by pci_get_device(). The comment of
+pci_get_device() says that it will increase the reference count for the
+returned pci_dev and also decrease the reference count for the input
+pci_dev @from if it is not NULL.
+
+If we break for_each_pci_dev() loop with pdev not NULL, we need to call
+pci_dev_put() to decrease the reference count. Add the missing
+pci_dev_put() before 'return true' to avoid reference count leak.
+
+Fixes: 89a6079df791 ("iommu/vt-d: Force IOMMU on for platform opt in hint")
+Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
+Link: https://lore.kernel.org/r/20221121113649.190393-2-wangxiongfeng2@huawei.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/iommu.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index e47700674978..412b106d2a39 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -3844,8 +3844,10 @@ static inline bool has_external_pci(void)
+       struct pci_dev *pdev = NULL;
+       for_each_pci_dev(pdev)
+-              if (pdev->external_facing)
++              if (pdev->external_facing) {
++                      pci_dev_put(pdev);
+                       return true;
++              }
+       return false;
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch b/queue-6.0/ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch
new file mode 100644 (file)
index 0000000..49234af
--- /dev/null
@@ -0,0 +1,118 @@
+From 845eb16dd753154d97d904758f7eeae06d8cce74 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 23:09:32 +0200
+Subject: ipv4: Fix route deletion when nexthop info is not specified
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit d5082d386eee7e8ec46fa8581932c81a4961dcef ]
+
+When the kernel receives a route deletion request from user space it
+tries to delete a route that matches the route attributes specified in
+the request.
+
+If only prefix information is specified in the request, the kernel
+should delete the first matching FIB alias regardless of its associated
+FIB info. However, an error is currently returned when the FIB info is
+backed by a nexthop object:
+
+ # ip nexthop add id 1 via 192.0.2.2 dev dummy10
+ # ip route add 198.51.100.0/24 nhid 1
+ # ip route del 198.51.100.0/24
+ RTNETLINK answers: No such process
+
+Fix by matching on such a FIB info when legacy nexthop attributes are
+not specified in the request. An earlier check already covers the case
+where a nexthop ID is specified in the request.
+
+Add tests that cover these flows. Before the fix:
+
+ # ./fib_nexthops.sh -t ipv4_fcnal
+ ...
+ TEST: Delete route when not specifying nexthop attributes           [FAIL]
+
+ Tests passed:  11
+ Tests failed:   1
+
+After the fix:
+
+ # ./fib_nexthops.sh -t ipv4_fcnal
+ ...
+ TEST: Delete route when not specifying nexthop attributes           [ OK ]
+
+ Tests passed:  12
+ Tests failed:   0
+
+No regressions in other tests:
+
+ # ./fib_nexthops.sh
+ ...
+ Tests passed: 228
+ Tests failed:   0
+
+ # ./fib_tests.sh
+ ...
+ Tests passed: 186
+ Tests failed:   0
+
+Cc: stable@vger.kernel.org
+Reported-by: Jonas Gorski <jonas.gorski@gmail.com>
+Tested-by: Jonas Gorski <jonas.gorski@gmail.com>
+Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects")
+Fixes: 6bf92d70e690 ("net: ipv4: fix route with nexthop object delete warning")
+Fixes: 61b91eb33a69 ("ipv4: Handle attempt to delete multipath route when fib_info contains an nh reference")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20221124210932.2470010-1-idosch@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fib_semantics.c                    |  8 +++++---
+ tools/testing/selftests/net/fib_nexthops.sh | 11 +++++++++++
+ 2 files changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index e9a7f70a54df..cb24260692e1 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -888,9 +888,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
+               return 1;
+       }
+-      /* cannot match on nexthop object attributes */
+-      if (fi->nh)
+-              return 1;
++      if (fi->nh) {
++              if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp)
++                      return 1;
++              return 0;
++      }
+       if (cfg->fc_oif || cfg->fc_gw_family) {
+               struct fib_nh *nh;
+diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
+index ee5e98204d3d..a47b26ab48f2 100755
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -1228,6 +1228,17 @@ ipv4_fcnal()
+       run_cmd "$IP ro add 172.16.101.0/24 nhid 21"
+       run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1"
+       log_test $? 2 "Delete multipath route with only nh id based entry"
++
++      run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1"
++      run_cmd "$IP ro add 172.16.102.0/24 nhid 22"
++      run_cmd "$IP ro del 172.16.102.0/24 dev veth1"
++      log_test $? 2 "Delete route when specifying only nexthop device"
++
++      run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6"
++      log_test $? 2 "Delete route when specifying only gateway"
++
++      run_cmd "$IP ro del 172.16.102.0/24"
++      log_test $? 0 "Delete route when not specifying nexthop attributes"
+ }
+ ipv4_grp_fcnal()
+-- 
+2.35.1
+
diff --git a/queue-6.0/ipv4-handle-attempt-to-delete-multipath-route-when-f.patch b/queue-6.0/ipv4-handle-attempt-to-delete-multipath-route-when-f.patch
new file mode 100644 (file)
index 0000000..039a2b2
--- /dev/null
@@ -0,0 +1,75 @@
+From 740188d99924efa8cfa6ef0e0d9e1030f60a482e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Oct 2022 10:48:49 -0600
+Subject: ipv4: Handle attempt to delete multipath route when fib_info contains
+ an nh reference
+
+From: David Ahern <dsahern@kernel.org>
+
+[ Upstream commit 61b91eb33a69c3be11b259c5ea484505cd79f883 ]
+
+Gwangun Jung reported a slab-out-of-bounds access in fib_nh_match:
+    fib_nh_match+0xf98/0x1130 linux-6.0-rc7/net/ipv4/fib_semantics.c:961
+    fib_table_delete+0x5f3/0xa40 linux-6.0-rc7/net/ipv4/fib_trie.c:1753
+    inet_rtm_delroute+0x2b3/0x380 linux-6.0-rc7/net/ipv4/fib_frontend.c:874
+
+Separate nexthop objects are mutually exclusive with the legacy
+multipath spec. Fix fib_nh_match to return if the config for the
+to be deleted route contains a multipath spec while the fib_info
+is using a nexthop object.
+
+Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects")
+Fixes: 6bf92d70e690 ("net: ipv4: fix route with nexthop object delete warning")
+Reported-by: Gwangun Jung <exsociety@gmail.com>
+Signed-off-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Tested-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: d5082d386eee ("ipv4: Fix route deletion when nexthop info is not specified")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fib_semantics.c                    | 8 ++++----
+ tools/testing/selftests/net/fib_nexthops.sh | 5 +++++
+ 2 files changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+index 2dc97583d279..e9a7f70a54df 100644
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -888,13 +888,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
+               return 1;
+       }
++      /* cannot match on nexthop object attributes */
++      if (fi->nh)
++              return 1;
++
+       if (cfg->fc_oif || cfg->fc_gw_family) {
+               struct fib_nh *nh;
+-              /* cannot match on nexthop object attributes */
+-              if (fi->nh)
+-                      return 1;
+-
+               nh = fib_info_nh(fi, 0);
+               if (cfg->fc_encap) {
+                       if (fib_encap_match(net, cfg->fc_encap_type,
+diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
+index d5a0dd548989..ee5e98204d3d 100755
+--- a/tools/testing/selftests/net/fib_nexthops.sh
++++ b/tools/testing/selftests/net/fib_nexthops.sh
+@@ -1223,6 +1223,11 @@ ipv4_fcnal()
+       log_test $rc 0 "Delete nexthop route warning"
+       run_cmd "$IP route delete 172.16.101.1/32 nhid 12"
+       run_cmd "$IP nexthop del id 12"
++
++      run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1"
++      run_cmd "$IP ro add 172.16.101.0/24 nhid 21"
++      run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1"
++      log_test $? 2 "Delete multipath route with only nh id based entry"
+ }
+ ipv4_grp_fcnal()
+-- 
+2.35.1
+
diff --git a/queue-6.0/mm-damon-introduce-struct-damos_access_pattern.patch b/queue-6.0/mm-damon-introduce-struct-damos_access_pattern.patch
new file mode 100644 (file)
index 0000000..c9f3ac3
--- /dev/null
@@ -0,0 +1,409 @@
+From b5cf5c6a4e8df453e32dafa2bc07fc38593fe43a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 19:14:43 +0000
+Subject: mm/damon: introduce struct damos_access_pattern
+
+From: Yajun Deng <yajun.deng@linux.dev>
+
+[ Upstream commit f5a79d7c0c87c8d88bb5e3f3c898258fdf1b3b05 ]
+
+damon_new_scheme() has too many parameters, so introduce struct
+damos_access_pattern to simplify it.
+
+In additon, we can't use a bpf trace kprobe that has more than 5
+parameters.
+
+Link: https://lkml.kernel.org/r/20220908191443.129534-1-sj@kernel.org
+Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 95bc35f9bee5 ("mm/damon/sysfs: fix wrong empty schemes assumption under online tuning in damon_sysfs_set_schemes()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/damon.h | 37 ++++++++++++++++++----------------
+ mm/damon/core.c       | 31 ++++++++++++++---------------
+ mm/damon/dbgfs.c      | 27 +++++++++++++++----------
+ mm/damon/lru_sort.c   | 46 ++++++++++++++++++++++++++-----------------
+ mm/damon/reclaim.c    | 23 +++++++++++++---------
+ mm/damon/sysfs.c      | 17 +++++++++++-----
+ 6 files changed, 106 insertions(+), 75 deletions(-)
+
+diff --git a/include/linux/damon.h b/include/linux/damon.h
+index 7b1f4a488230..98e622c34d44 100644
+--- a/include/linux/damon.h
++++ b/include/linux/damon.h
+@@ -216,13 +216,26 @@ struct damos_stat {
+ };
+ /**
+- * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
++ * struct damos_access_pattern - Target access pattern of the given scheme.
+  * @min_sz_region:    Minimum size of target regions.
+  * @max_sz_region:    Maximum size of target regions.
+  * @min_nr_accesses:  Minimum ``->nr_accesses`` of target regions.
+  * @max_nr_accesses:  Maximum ``->nr_accesses`` of target regions.
+  * @min_age_region:   Minimum age of target regions.
+  * @max_age_region:   Maximum age of target regions.
++ */
++struct damos_access_pattern {
++      unsigned long min_sz_region;
++      unsigned long max_sz_region;
++      unsigned int min_nr_accesses;
++      unsigned int max_nr_accesses;
++      unsigned int min_age_region;
++      unsigned int max_age_region;
++};
++
++/**
++ * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
++ * @pattern:          Access pattern of target regions.
+  * @action:           &damo_action to be applied to the target regions.
+  * @quota:            Control the aggressiveness of this scheme.
+  * @wmarks:           Watermarks for automated (in)activation of this scheme.
+@@ -230,10 +243,8 @@ struct damos_stat {
+  * @list:             List head for siblings.
+  *
+  * For each aggregation interval, DAMON finds regions which fit in the
+- * condition (&min_sz_region, &max_sz_region, &min_nr_accesses,
+- * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to
+- * those.  To avoid consuming too much CPU time or IO resources for the
+- * &action, &quota is used.
++ * &pattern and applies &action to those. To avoid consuming too much
++ * CPU time or IO resources for the &action, &quota is used.
+  *
+  * To do the work only when needed, schemes can be activated for specific
+  * system situations using &wmarks.  If all schemes that registered to the
+@@ -248,12 +259,7 @@ struct damos_stat {
+  * &action is applied.
+  */
+ struct damos {
+-      unsigned long min_sz_region;
+-      unsigned long max_sz_region;
+-      unsigned int min_nr_accesses;
+-      unsigned int max_nr_accesses;
+-      unsigned int min_age_region;
+-      unsigned int max_age_region;
++      struct damos_access_pattern pattern;
+       enum damos_action action;
+       struct damos_quota quota;
+       struct damos_watermarks wmarks;
+@@ -501,12 +507,9 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t);
+ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
+               unsigned int nr_ranges);
+-struct damos *damon_new_scheme(
+-              unsigned long min_sz_region, unsigned long max_sz_region,
+-              unsigned int min_nr_accesses, unsigned int max_nr_accesses,
+-              unsigned int min_age_region, unsigned int max_age_region,
+-              enum damos_action action, struct damos_quota *quota,
+-              struct damos_watermarks *wmarks);
++struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
++                      enum damos_action action, struct damos_quota *quota,
++                      struct damos_watermarks *wmarks);
+ void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
+ void damon_destroy_scheme(struct damos *s);
+diff --git a/mm/damon/core.c b/mm/damon/core.c
+index 7d25dc582fe3..7d5a9ae6f4ac 100644
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -230,24 +230,21 @@ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
+       return 0;
+ }
+-struct damos *damon_new_scheme(
+-              unsigned long min_sz_region, unsigned long max_sz_region,
+-              unsigned int min_nr_accesses, unsigned int max_nr_accesses,
+-              unsigned int min_age_region, unsigned int max_age_region,
+-              enum damos_action action, struct damos_quota *quota,
+-              struct damos_watermarks *wmarks)
++struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
++                      enum damos_action action, struct damos_quota *quota,
++                      struct damos_watermarks *wmarks)
+ {
+       struct damos *scheme;
+       scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
+       if (!scheme)
+               return NULL;
+-      scheme->min_sz_region = min_sz_region;
+-      scheme->max_sz_region = max_sz_region;
+-      scheme->min_nr_accesses = min_nr_accesses;
+-      scheme->max_nr_accesses = max_nr_accesses;
+-      scheme->min_age_region = min_age_region;
+-      scheme->max_age_region = max_age_region;
++      scheme->pattern.min_sz_region = pattern->min_sz_region;
++      scheme->pattern.max_sz_region = pattern->max_sz_region;
++      scheme->pattern.min_nr_accesses = pattern->min_nr_accesses;
++      scheme->pattern.max_nr_accesses = pattern->max_nr_accesses;
++      scheme->pattern.min_age_region = pattern->min_age_region;
++      scheme->pattern.max_age_region = pattern->max_age_region;
+       scheme->action = action;
+       scheme->stat = (struct damos_stat){};
+       INIT_LIST_HEAD(&scheme->list);
+@@ -667,10 +664,12 @@ static bool __damos_valid_target(struct damon_region *r, struct damos *s)
+       unsigned long sz;
+       sz = r->ar.end - r->ar.start;
+-      return s->min_sz_region <= sz && sz <= s->max_sz_region &&
+-              s->min_nr_accesses <= r->nr_accesses &&
+-              r->nr_accesses <= s->max_nr_accesses &&
+-              s->min_age_region <= r->age && r->age <= s->max_age_region;
++      return s->pattern.min_sz_region <= sz &&
++              sz <= s->pattern.max_sz_region &&
++              s->pattern.min_nr_accesses <= r->nr_accesses &&
++              r->nr_accesses <= s->pattern.max_nr_accesses &&
++              s->pattern.min_age_region <= r->age &&
++              r->age <= s->pattern.max_age_region;
+ }
+ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
+diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
+index dafe7e71329b..61214cb9a5d3 100644
+--- a/mm/damon/dbgfs.c
++++ b/mm/damon/dbgfs.c
+@@ -131,9 +131,12 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len)
+       damon_for_each_scheme(s, c) {
+               rc = scnprintf(&buf[written], len - written,
+                               "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
+-                              s->min_sz_region, s->max_sz_region,
+-                              s->min_nr_accesses, s->max_nr_accesses,
+-                              s->min_age_region, s->max_age_region,
++                              s->pattern.min_sz_region,
++                              s->pattern.max_sz_region,
++                              s->pattern.min_nr_accesses,
++                              s->pattern.max_nr_accesses,
++                              s->pattern.min_age_region,
++                              s->pattern.max_age_region,
+                               damos_action_to_dbgfs_scheme_action(s->action),
+                               s->quota.ms, s->quota.sz,
+                               s->quota.reset_interval,
+@@ -221,8 +224,6 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
+       struct damos *scheme, **schemes;
+       const int max_nr_schemes = 256;
+       int pos = 0, parsed, ret;
+-      unsigned long min_sz, max_sz;
+-      unsigned int min_nr_a, max_nr_a, min_age, max_age;
+       unsigned int action_input;
+       enum damos_action action;
+@@ -233,13 +234,18 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
+       *nr_schemes = 0;
+       while (pos < len && *nr_schemes < max_nr_schemes) {
++              struct damos_access_pattern pattern = {};
+               struct damos_quota quota = {};
+               struct damos_watermarks wmarks;
+               ret = sscanf(&str[pos],
+                               "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u %u %lu %lu %lu %lu%n",
+-                              &min_sz, &max_sz, &min_nr_a, &max_nr_a,
+-                              &min_age, &max_age, &action_input, &quota.ms,
++                              &pattern.min_sz_region, &pattern.max_sz_region,
++                              &pattern.min_nr_accesses,
++                              &pattern.max_nr_accesses,
++                              &pattern.min_age_region,
++                              &pattern.max_age_region,
++                              &action_input, &quota.ms,
+                               &quota.sz, &quota.reset_interval,
+                               &quota.weight_sz, &quota.weight_nr_accesses,
+                               &quota.weight_age, &wmarks.metric,
+@@ -251,7 +257,9 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
+               if ((int)action < 0)
+                       goto fail;
+-              if (min_sz > max_sz || min_nr_a > max_nr_a || min_age > max_age)
++              if (pattern.min_sz_region > pattern.max_sz_region ||
++                  pattern.min_nr_accesses > pattern.max_nr_accesses ||
++                  pattern.min_age_region > pattern.max_age_region)
+                       goto fail;
+               if (wmarks.high < wmarks.mid || wmarks.high < wmarks.low ||
+@@ -259,8 +267,7 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
+                       goto fail;
+               pos += parsed;
+-              scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a,
+-                              min_age, max_age, action, &quota, &wmarks);
++              scheme = damon_new_scheme(&pattern, action, &quota, &wmarks);
+               if (!scheme)
+                       goto fail;
+diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
+index 9de6f00a71c5..0184ed4828b7 100644
+--- a/mm/damon/lru_sort.c
++++ b/mm/damon/lru_sort.c
+@@ -293,6 +293,17 @@ static bool get_monitoring_region(unsigned long *start, unsigned long *end)
+ /* Create a DAMON-based operation scheme for hot memory regions */
+ static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres)
+ {
++      struct damos_access_pattern pattern = {
++              /* Find regions having PAGE_SIZE or larger size */
++              .min_sz_region = PAGE_SIZE,
++              .max_sz_region = ULONG_MAX,
++              /* and accessed for more than the threshold */
++              .min_nr_accesses = hot_thres,
++              .max_nr_accesses = UINT_MAX,
++              /* no matter its age */
++              .min_age_region = 0,
++              .max_age_region = UINT_MAX,
++      };
+       struct damos_watermarks wmarks = {
+               .metric = DAMOS_WMARK_FREE_MEM_RATE,
+               .interval = wmarks_interval,
+@@ -313,26 +324,31 @@ static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres)
+               .weight_nr_accesses = 1,
+               .weight_age = 0,
+       };
+-      struct damos *scheme = damon_new_scheme(
+-                      /* Find regions having PAGE_SIZE or larger size */
+-                      PAGE_SIZE, ULONG_MAX,
+-                      /* and accessed for more than the threshold */
+-                      hot_thres, UINT_MAX,
+-                      /* no matter its age */
+-                      0, UINT_MAX,
++
++      return damon_new_scheme(
++                      &pattern,
+                       /* prioritize those on LRU lists, as soon as found */
+                       DAMOS_LRU_PRIO,
+                       /* under the quota. */
+                       &quota,
+                       /* (De)activate this according to the watermarks. */
+                       &wmarks);
+-
+-      return scheme;
+ }
+ /* Create a DAMON-based operation scheme for cold memory regions */
+ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
+ {
++      struct damos_access_pattern pattern = {
++              /* Find regions having PAGE_SIZE or larger size */
++              .min_sz_region = PAGE_SIZE,
++              .max_sz_region = ULONG_MAX,
++              /* and not accessed at all */
++              .min_nr_accesses = 0,
++              .max_nr_accesses = 0,
++              /* for min_age or more micro-seconds */
++              .min_age_region = cold_thres,
++              .max_age_region = UINT_MAX,
++      };
+       struct damos_watermarks wmarks = {
+               .metric = DAMOS_WMARK_FREE_MEM_RATE,
+               .interval = wmarks_interval,
+@@ -354,21 +370,15 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
+               .weight_nr_accesses = 0,
+               .weight_age = 1,
+       };
+-      struct damos *scheme = damon_new_scheme(
+-                      /* Find regions having PAGE_SIZE or larger size */
+-                      PAGE_SIZE, ULONG_MAX,
+-                      /* and not accessed at all */
+-                      0, 0,
+-                      /* for cold_thres or more micro-seconds, and */
+-                      cold_thres, UINT_MAX,
++
++      return damon_new_scheme(
++                      &pattern,
+                       /* mark those as not accessed, as soon as found */
+                       DAMOS_LRU_DEPRIO,
+                       /* under the quota. */
+                       &quota,
+                       /* (De)activate this according to the watermarks. */
+                       &wmarks);
+-
+-      return scheme;
+ }
+ static int damon_lru_sort_apply_parameters(void)
+diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
+index a7faf51b4bd4..5aeca0b9e88e 100644
+--- a/mm/damon/reclaim.c
++++ b/mm/damon/reclaim.c
+@@ -264,6 +264,17 @@ static bool get_monitoring_region(unsigned long *start, unsigned long *end)
+ static struct damos *damon_reclaim_new_scheme(void)
+ {
++      struct damos_access_pattern pattern = {
++              /* Find regions having PAGE_SIZE or larger size */
++              .min_sz_region = PAGE_SIZE,
++              .max_sz_region = ULONG_MAX,
++              /* and not accessed at all */
++              .min_nr_accesses = 0,
++              .max_nr_accesses = 0,
++              /* for min_age or more micro-seconds */
++              .min_age_region = min_age / aggr_interval,
++              .max_age_region = UINT_MAX,
++      };
+       struct damos_watermarks wmarks = {
+               .metric = DAMOS_WMARK_FREE_MEM_RATE,
+               .interval = wmarks_interval,
+@@ -284,21 +295,15 @@ static struct damos *damon_reclaim_new_scheme(void)
+               .weight_nr_accesses = 0,
+               .weight_age = 1
+       };
+-      struct damos *scheme = damon_new_scheme(
+-                      /* Find regions having PAGE_SIZE or larger size */
+-                      PAGE_SIZE, ULONG_MAX,
+-                      /* and not accessed at all */
+-                      0, 0,
+-                      /* for min_age or more micro-seconds, and */
+-                      min_age / aggr_interval, UINT_MAX,
++
++      return damon_new_scheme(
++                      &pattern,
+                       /* page out those, as soon as found */
+                       DAMOS_PAGEOUT,
+                       /* under the quota. */
+                       &quota,
+                       /* (De)activate this according to the watermarks. */
+                       &wmarks);
+-
+-      return scheme;
+ }
+ static int damon_reclaim_apply_parameters(void)
+diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
+index b4b9614eecbe..ec88644c51df 100644
+--- a/mm/damon/sysfs.c
++++ b/mm/damon/sysfs.c
+@@ -2259,11 +2259,20 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
+ static struct damos *damon_sysfs_mk_scheme(
+               struct damon_sysfs_scheme *sysfs_scheme)
+ {
+-      struct damon_sysfs_access_pattern *pattern =
++      struct damon_sysfs_access_pattern *access_pattern =
+               sysfs_scheme->access_pattern;
+       struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+       struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+       struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
++
++      struct damos_access_pattern pattern = {
++              .min_sz_region = access_pattern->sz->min,
++              .max_sz_region = access_pattern->sz->max,
++              .min_nr_accesses = access_pattern->nr_accesses->min,
++              .max_nr_accesses = access_pattern->nr_accesses->max,
++              .min_age_region = access_pattern->age->min,
++              .max_age_region = access_pattern->age->max,
++      };
+       struct damos_quota quota = {
+               .ms = sysfs_quotas->ms,
+               .sz = sysfs_quotas->sz,
+@@ -2280,10 +2289,8 @@ static struct damos *damon_sysfs_mk_scheme(
+               .low = sysfs_wmarks->low,
+       };
+-      return damon_new_scheme(pattern->sz->min, pattern->sz->max,
+-                      pattern->nr_accesses->min, pattern->nr_accesses->max,
+-                      pattern->age->min, pattern->age->max,
+-                      sysfs_scheme->action, &quota, &wmarks);
++      return damon_new_scheme(&pattern, sysfs_scheme->action, &quota,
++                      &wmarks);
+ }
+ static int damon_sysfs_set_schemes(struct damon_ctx *ctx,
+-- 
+2.35.1
+
diff --git a/queue-6.0/mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch b/queue-6.0/mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch
new file mode 100644 (file)
index 0000000..d36df8e
--- /dev/null
@@ -0,0 +1,97 @@
+From bbe9e58d36e6734175c00c7d6975f8fd71d2dbdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Nov 2022 19:48:31 +0000
+Subject: mm/damon/sysfs: fix wrong empty schemes assumption under online
+ tuning in damon_sysfs_set_schemes()
+
+From: SeongJae Park <sj@kernel.org>
+
+[ Upstream commit 95bc35f9bee5220dad4e8567654ab3288a181639 ]
+
+Commit da87878010e5 ("mm/damon/sysfs: support online inputs update") made
+'damon_sysfs_set_schemes()' to be called for running DAMON context, which
+could have schemes.  In the case, DAMON sysfs interface is supposed to
+update, remove, or add schemes to reflect the sysfs files.  However, the
+code is assuming the DAMON context wouldn't have schemes at all, and
+therefore creates and adds new schemes.  As a result, the code doesn't
+work as intended for online schemes tuning and could have more than
+expected memory footprint.  The schemes are all in the DAMON context, so
+it doesn't leak the memory, though.
+
+Remove the wrong asssumption (the DAMON context wouldn't have schemes) in
+'damon_sysfs_set_schemes()' to fix the bug.
+
+Link: https://lkml.kernel.org/r/20221122194831.3472-1-sj@kernel.org
+Fixes: da87878010e5 ("mm/damon/sysfs: support online inputs update")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>   [5.19+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/damon/sysfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 44 insertions(+), 2 deletions(-)
+
+diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
+index ec88644c51df..1b782ca41396 100644
+--- a/mm/damon/sysfs.c
++++ b/mm/damon/sysfs.c
+@@ -2293,12 +2293,54 @@ static struct damos *damon_sysfs_mk_scheme(
+                       &wmarks);
+ }
++static void damon_sysfs_update_scheme(struct damos *scheme,
++              struct damon_sysfs_scheme *sysfs_scheme)
++{
++      struct damon_sysfs_access_pattern *access_pattern =
++              sysfs_scheme->access_pattern;
++      struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
++      struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
++      struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
++
++      scheme->pattern.min_sz_region = access_pattern->sz->min;
++      scheme->pattern.max_sz_region = access_pattern->sz->max;
++      scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min;
++      scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max;
++      scheme->pattern.min_age_region = access_pattern->age->min;
++      scheme->pattern.max_age_region = access_pattern->age->max;
++
++      scheme->action = sysfs_scheme->action;
++
++      scheme->quota.ms = sysfs_quotas->ms;
++      scheme->quota.sz = sysfs_quotas->sz;
++      scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms;
++      scheme->quota.weight_sz = sysfs_weights->sz;
++      scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses;
++      scheme->quota.weight_age = sysfs_weights->age;
++
++      scheme->wmarks.metric = sysfs_wmarks->metric;
++      scheme->wmarks.interval = sysfs_wmarks->interval_us;
++      scheme->wmarks.high = sysfs_wmarks->high;
++      scheme->wmarks.mid = sysfs_wmarks->mid;
++      scheme->wmarks.low = sysfs_wmarks->low;
++}
++
+ static int damon_sysfs_set_schemes(struct damon_ctx *ctx,
+               struct damon_sysfs_schemes *sysfs_schemes)
+ {
+-      int i;
++      struct damos *scheme, *next;
++      int i = 0;
++
++      damon_for_each_scheme_safe(scheme, next, ctx) {
++              if (i < sysfs_schemes->nr)
++                      damon_sysfs_update_scheme(scheme,
++                                      sysfs_schemes->schemes_arr[i]);
++              else
++                      damon_destroy_scheme(scheme);
++              i++;
++      }
+-      for (i = 0; i < sysfs_schemes->nr; i++) {
++      for (; i < sysfs_schemes->nr; i++) {
+               struct damos *scheme, *next;
+               scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
+-- 
+2.35.1
+
diff --git a/queue-6.0/nvme-fix-srcu-protection-of-nvme_ns_head-list.patch b/queue-6.0/nvme-fix-srcu-protection-of-nvme_ns_head-list.patch
new file mode 100644 (file)
index 0000000..f643c02
--- /dev/null
@@ -0,0 +1,104 @@
+From 6b41b416b4dba8fdef2afecfa10eb7c523ccc31f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 16:27:56 -0700
+Subject: nvme: fix SRCU protection of nvme_ns_head list
+
+From: Caleb Sander <csander@purestorage.com>
+
+[ Upstream commit 899d2a05dc14733cfba6224083c6b0dd5a738590 ]
+
+Walking the nvme_ns_head siblings list is protected by the head's srcu
+in nvme_ns_head_submit_bio() but not nvme_mpath_revalidate_paths().
+Removing namespaces from the list also fails to synchronize the srcu.
+Concurrent scan work can therefore cause use-after-frees.
+
+Hold the head's srcu lock in nvme_mpath_revalidate_paths() and
+synchronize with the srcu, not the global RCU, in nvme_ns_remove().
+
+Observed the following panic when making NVMe/RDMA connections
+with native multipath on the Rocky Linux 8.6 kernel
+(it seems the upstream kernel has the same race condition).
+Disassembly shows the faulting instruction is cmp 0x50(%rdx),%rcx;
+computing capacity != get_capacity(ns->disk).
+Address 0x50 is dereferenced because ns->disk is NULL.
+The NULL disk appears to be the result of concurrent scan work
+freeing the namespace (note the log line in the middle of the panic).
+
+[37314.206036] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050
+[37314.206036] nvme0n3: detected capacity change from 0 to 11811160064
+[37314.299753] PGD 0 P4D 0
+[37314.299756] Oops: 0000 [#1] SMP PTI
+[37314.299759] CPU: 29 PID: 322046 Comm: kworker/u98:3 Kdump: loaded Tainted: G        W      X --------- -  - 4.18.0-372.32.1.el8test86.x86_64 #1
+[37314.299762] Hardware name: Dell Inc. PowerEdge R720/0JP31P, BIOS 2.7.0 05/23/2018
+[37314.299763] Workqueue: nvme-wq nvme_scan_work [nvme_core]
+[37314.299783] RIP: 0010:nvme_mpath_revalidate_paths+0x26/0xb0 [nvme_core]
+[37314.299790] Code: 1f 44 00 00 66 66 66 66 90 55 53 48 8b 5f 50 48 8b 83 c8 c9 00 00 48 8b 13 48 8b 48 50 48 39 d3 74 20 48 8d 42 d0 48 8b 50 20 <48> 3b 4a 50 74 05 f0 80 60 70 ef 48 8b 50 30 48 8d 42 d0 48 39 d3
+[37315.058803] RSP: 0018:ffffabe28f913d10 EFLAGS: 00010202
+[37315.121316] RAX: ffff927a077da800 RBX: ffff92991dd70000 RCX: 0000000001600000
+[37315.206704] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff92991b719800
+[37315.292106] RBP: ffff929a6b70c000 R08: 000000010234cd4a R09: c0000000ffff7fff
+[37315.377501] R10: 0000000000000001 R11: ffffabe28f913a30 R12: 0000000000000000
+[37315.462889] R13: ffff92992716600c R14: ffff929964e6e030 R15: ffff92991dd70000
+[37315.548286] FS:  0000000000000000(0000) GS:ffff92b87fb80000(0000) knlGS:0000000000000000
+[37315.645111] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[37315.713871] CR2: 0000000000000050 CR3: 0000002208810006 CR4: 00000000000606e0
+[37315.799267] Call Trace:
+[37315.828515]  nvme_update_ns_info+0x1ac/0x250 [nvme_core]
+[37315.892075]  nvme_validate_or_alloc_ns+0x2ff/0xa00 [nvme_core]
+[37315.961871]  ? __blk_mq_free_request+0x6b/0x90
+[37316.015021]  nvme_scan_work+0x151/0x240 [nvme_core]
+[37316.073371]  process_one_work+0x1a7/0x360
+[37316.121318]  ? create_worker+0x1a0/0x1a0
+[37316.168227]  worker_thread+0x30/0x390
+[37316.212024]  ? create_worker+0x1a0/0x1a0
+[37316.258939]  kthread+0x10a/0x120
+[37316.297557]  ? set_kthread_struct+0x50/0x50
+[37316.347590]  ret_from_fork+0x35/0x40
+[37316.390360] Modules linked in: nvme_rdma nvme_tcp(X) nvme_fabrics nvme_core netconsole iscsi_tcp libiscsi_tcp dm_queue_length dm_service_time nf_conntrack_netlink br_netfilter bridge stp llc overlay nft_chain_nat ipt_MASQUERADE nf_nat xt_addrtype xt_CT nft_counter xt_state xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_comment xt_multiport nft_compat nf_tables libcrc32c nfnetlink dm_multipath tg3 rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm intel_rapl_msr iTCO_wdt iTCO_vendor_support dcdbas intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm irqbypass crct10dif_pclmul crc32_pclmul mlx5_ib ghash_clmulni_intel ib_uverbs rapl intel_cstate intel_uncore ib_core ipmi_si joydev mei_me pcspkr ipmi_devintf mei lpc_ich wmi ipmi_msghandler acpi_power_meter ext4 mbcache jbd2 sd_mod t10_pi sg mgag200 mlx5_core drm_kms_helper syscopyarea
+[37316.390419]  sysfillrect ahci sysimgblt fb_sys_fops libahci drm crc32c_intel libata mlxfw pci_hyperv_intf tls i2c_algo_bit psample dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: nvme_core]
+[37317.645908] CR2: 0000000000000050
+
+Fixes: e7d65803e2bb ("nvme-multipath: revalidate paths during rescan")
+Signed-off-by: Caleb Sander <csander@purestorage.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/core.c      | 2 +-
+ drivers/nvme/host/multipath.c | 3 +++
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 01c36284e542..f612a0ba64d0 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -4297,7 +4297,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
+       mutex_unlock(&ns->ctrl->subsys->lock);
+       /* guarantee not available in head->list */
+-      synchronize_rcu();
++      synchronize_srcu(&ns->head->srcu);
+       if (!nvme_ns_head_multipath(ns->head))
+               nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index b9cf17cbbbd5..114e2b9359f8 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+       struct nvme_ns_head *head = ns->head;
+       sector_t capacity = get_capacity(head->disk);
+       int node;
++      int srcu_idx;
++      srcu_idx = srcu_read_lock(&head->srcu);
+       list_for_each_entry_rcu(ns, &head->list, siblings) {
+               if (capacity != get_capacity(ns->disk))
+                       clear_bit(NVME_NS_READY, &ns->flags);
+       }
++      srcu_read_unlock(&head->srcu, srcu_idx);
+       for_each_node(node)
+               rcu_assign_pointer(head->current_path[node], NULL);
+-- 
+2.35.1
+
diff --git a/queue-6.0/pinctrl-single-fix-potential-division-by-zero.patch b/queue-6.0/pinctrl-single-fix-potential-division-by-zero.patch
new file mode 100644 (file)
index 0000000..4733da8
--- /dev/null
@@ -0,0 +1,43 @@
+From e88a843fbe8f4c931ad7056d55a8f30d628af911 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Nov 2022 15:30:34 +0300
+Subject: pinctrl: single: Fix potential division by zero
+
+From: Maxim Korotkov <korotkov.maxim.s@gmail.com>
+
+[ Upstream commit 64c150339e7f6c5cbbe8c17a56ef2b3902612798 ]
+
+There is a possibility of dividing by zero due to the pcs->bits_per_pin
+if pcs->fmask() also has a value of zero and called fls
+from asm-generic/bitops/builtin-fls.h or arch/x86/include/asm/bitops.h.
+The function pcs_probe() has the branch that assigned to fmask 0 before
+pcs_allocate_pin_table() was called
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: 4e7e8017a80e ("pinctrl: pinctrl-single: enhance to configure multiple pins of different modules")
+Signed-off-by: Maxim Korotkov <korotkov.maxim.s@gmail.com>
+Reviewed-by: Tony Lindgren <tony@atomide.com>
+Link: https://lore.kernel.org/r/20221117123034.27383-1-korotkov.maxim.s@gmail.com
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pinctrl/pinctrl-single.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
+index 67bec7ea0f8b..414ee6bb8ac9 100644
+--- a/drivers/pinctrl/pinctrl-single.c
++++ b/drivers/pinctrl/pinctrl-single.c
+@@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs)
+       mux_bytes = pcs->width / BITS_PER_BYTE;
+-      if (pcs->bits_per_mux) {
++      if (pcs->bits_per_mux && pcs->fmask) {
+               pcs->bits_per_pin = fls(pcs->fmask);
+               nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin;
+       } else {
+-- 
+2.35.1
+
diff --git a/queue-6.0/riscv-fix-race-when-vmap-stack-overflow.patch b/queue-6.0/riscv-fix-race-when-vmap-stack-overflow.patch
new file mode 100644 (file)
index 0000000..945c117
--- /dev/null
@@ -0,0 +1,107 @@
+From e1afba7fb3f855473577799d525475085df2b466 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 30 Oct 2022 20:45:17 +0800
+Subject: riscv: fix race when vmap stack overflow
+
+From: Jisheng Zhang <jszhang@kernel.org>
+
+[ Upstream commit 7e1864332fbc1b993659eab7974da9fe8bf8c128 ]
+
+Currently, when detecting vmap stack overflow, riscv firstly switches
+to the so called shadow stack, then use this shadow stack to call the
+get_overflow_stack() to get the overflow stack. However, there's
+a race here if two or more harts use the same shadow stack at the same
+time.
+
+To solve this race, we introduce spin_shadow_stack atomic var, which
+will be swap between its own address and 0 in atomic way, when the
+var is set, it means the shadow_stack is being used; when the var
+is cleared, it means the shadow_stack isn't being used.
+
+Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
+Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
+Suggested-by: Guo Ren <guoren@kernel.org>
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20221030124517.2370-1-jszhang@kernel.org
+[Palmer: Add AQ to the swap, and also some comments.]
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/asm.h |  1 +
+ arch/riscv/kernel/entry.S    | 13 +++++++++++++
+ arch/riscv/kernel/traps.c    | 18 ++++++++++++++++++
+ 3 files changed, 32 insertions(+)
+
+diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
+index 1b471ff73178..816e753de636 100644
+--- a/arch/riscv/include/asm/asm.h
++++ b/arch/riscv/include/asm/asm.h
+@@ -23,6 +23,7 @@
+ #define REG_L         __REG_SEL(ld, lw)
+ #define REG_S         __REG_SEL(sd, sw)
+ #define REG_SC                __REG_SEL(sc.d, sc.w)
++#define REG_AMOSWAP_AQ        __REG_SEL(amoswap.d.aq, amoswap.w.aq)
+ #define REG_ASM               __REG_SEL(.dword, .word)
+ #define SZREG         __REG_SEL(8, 4)
+ #define LGREG         __REG_SEL(3, 2)
+diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
+index b9eda3fcbd6d..186abd146eaf 100644
+--- a/arch/riscv/kernel/entry.S
++++ b/arch/riscv/kernel/entry.S
+@@ -404,6 +404,19 @@ handle_syscall_trace_exit:
+ #ifdef CONFIG_VMAP_STACK
+ handle_kernel_stack_overflow:
++      /*
++       * Takes the psuedo-spinlock for the shadow stack, in case multiple
++       * harts are concurrently overflowing their kernel stacks.  We could
++       * store any value here, but since we're overflowing the kernel stack
++       * already we only have SP to use as a scratch register.  So we just
++       * swap in the address of the spinlock, as that's definately non-zero.
++       *
++       * Pairs with a store_release in handle_bad_stack().
++       */
++1:    la sp, spin_shadow_stack
++      REG_AMOSWAP_AQ sp, sp, (sp)
++      bnez sp, 1b
++
+       la sp, shadow_stack
+       addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
+diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
+index 635e6ec26938..6e8822446069 100644
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -218,11 +218,29 @@ asmlinkage unsigned long get_overflow_stack(void)
+               OVERFLOW_STACK_SIZE;
+ }
++/*
++ * A pseudo spinlock to protect the shadow stack from being used by multiple
++ * harts concurrently.  This isn't a real spinlock because the lock side must
++ * be taken without a valid stack and only a single register, it's only taken
++ * while in the process of panicing anyway so the performance and error
++ * checking a proper spinlock gives us doesn't matter.
++ */
++unsigned long spin_shadow_stack;
++
+ asmlinkage void handle_bad_stack(struct pt_regs *regs)
+ {
+       unsigned long tsk_stk = (unsigned long)current->stack;
+       unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
++      /*
++       * We're done with the shadow stack by this point, as we're on the
++       * overflow stack.  Tell any other concurrent overflowing harts that
++       * they can proceed with panicing by releasing the pseudo-spinlock.
++       *
++       * This pairs with an amoswap.aq in handle_kernel_stack_overflow.
++       */
++      smp_store_release(&spin_shadow_stack, 0);
++
+       console_verbose();
+       pr_emerg("Insufficient stack space to handle exception!\n");
+-- 
+2.35.1
+
diff --git a/queue-6.0/riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch b/queue-6.0/riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch
new file mode 100644 (file)
index 0000000..b3806ee
--- /dev/null
@@ -0,0 +1,314 @@
+From 067905b58e2bdce609e031dc3882039a2482fdac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 10:16:03 -0400
+Subject: riscv: kexec: Fixup crash_smp_send_stop without multi cores
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit 9b932aadfc47de5d70b53ea04b0d1b5f6c82945b ]
+
+Current crash_smp_send_stop is the same as the generic one in
+kernel/panic and misses crash_save_cpu in percpu. This patch is inspired
+by 78fd584cdec0 ("arm64: kdump: implement machine_crash_shutdown()")
+and adds the same mechanism for riscv.
+
+Before this patch, test result:
+crash> help -r
+CPU 0: [OFFLINE]
+
+CPU 1:
+epc : ffffffff80009ff0 ra : ffffffff800b789a sp : ff2000001098bb40
+ gp : ffffffff815fca60 tp : ff60000004680000 t0 : 6666666666663c5b
+ t1 : 0000000000000000 t2 : 666666666666663c s0 : ff2000001098bc90
+ s1 : ffffffff81600798 a0 : ff2000001098bb48 a1 : 0000000000000000
+ a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000
+ a5 : ff60000004690800 a6 : 0000000000000000 a7 : 0000000000000000
+ s2 : ff2000001098bb48 s3 : ffffffff81093ec8 s4 : ffffffff816004ac
+ s5 : 0000000000000000 s6 : 0000000000000007 s7 : ffffffff80e7f720
+ s8 : 00fffffffffff3f0 s9 : 0000000000000007 s10: 00aaaaaaaab98700
+ s11: 0000000000000001 t3 : ffffffff819a8097 t4 : ffffffff819a8097
+ t5 : ffffffff819a8098 t6 : ff2000001098b9a8
+
+CPU 2: [OFFLINE]
+
+CPU 3: [OFFLINE]
+
+After this patch, test result:
+crash> help -r
+CPU 0:
+epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ffffffff81403eb0
+ gp : ffffffff815fcb48 tp : ffffffff81413400 t0 : 0000000000000000
+ t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffff81403ec0
+ s1 : 0000000000000000 a0 : 0000000000000000 a1 : 0000000000000000
+ a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
+ a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000
+ s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18
+ s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000
+ s8 : 0000000000000000 s9 : 0000000080039eac s10: 0000000000000000
+ s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000
+ t5 : 0000000000000000 t6 : 0000000000000000
+
+CPU 1:
+epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ff2000000068bf30
+ gp : ffffffff815fcb48 tp : ff6000000240d400 t0 : 0000000000000000
+ t1 : 0000000000000000 t2 : 0000000000000000 s0 : ff2000000068bf40
+ s1 : 0000000000000001 a0 : 0000000000000000 a1 : 0000000000000000
+ a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
+ a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000
+ s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18
+ s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000
+ s8 : 0000000000000000 s9 : 0000000080039ea8 s10: 0000000000000000
+ s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000
+ t5 : 0000000000000000 t6 : 0000000000000000
+
+CPU 2:
+epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ff20000000693f30
+ gp : ffffffff815fcb48 tp : ff6000000240e900 t0 : 0000000000000000
+ t1 : 0000000000000000 t2 : 0000000000000000 s0 : ff20000000693f40
+ s1 : 0000000000000002 a0 : 0000000000000000 a1 : 0000000000000000
+ a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000
+ a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000
+ s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18
+ s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000
+ s8 : 0000000000000000 s9 : 0000000080039eb0 s10: 0000000000000000
+ s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000
+ t5 : 0000000000000000 t6 : 0000000000000000
+
+CPU 3:
+epc : ffffffff8000a1e4 ra : ffffffff800b7bba sp : ff200000109bbb40
+ gp : ffffffff815fcb48 tp : ff6000000373aa00 t0 : 6666666666663c5b
+ t1 : 0000000000000000 t2 : 666666666666663c s0 : ff200000109bbc90
+ s1 : ffffffff816007a0 a0 : ff200000109bbb48 a1 : 0000000000000000
+ a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000
+ a5 : ff60000002c61c00 a6 : 0000000000000000 a7 : 0000000000000000
+ s2 : ff200000109bbb48 s3 : ffffffff810941a8 s4 : ffffffff816004b4
+ s5 : 0000000000000000 s6 : 0000000000000007 s7 : ffffffff80e7f7a0
+ s8 : 00fffffffffff3f0 s9 : 0000000000000007 s10: 00aaaaaaaab98700
+ s11: 0000000000000001 t3 : ffffffff819a8097 t4 : ffffffff819a8097
+ t5 : ffffffff819a8098 t6 : ff200000109bb9a8
+
+Fixes: ad943893d5f1 ("RISC-V: Fixup schedule out issue in machine_crash_shutdown()")
+Reviewed-by: Xianting Tian <xianting.tian@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Cc: Nick Kossifidis <mick@ics.forth.gr>
+Link: https://lore.kernel.org/r/20221020141603.2856206-3-guoren@kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/smp.h      |  3 +
+ arch/riscv/kernel/machine_kexec.c | 21 ++-----
+ arch/riscv/kernel/smp.c           | 97 ++++++++++++++++++++++++++++++-
+ 3 files changed, 103 insertions(+), 18 deletions(-)
+
+diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
+index d3443be7eedc..3831b638ecab 100644
+--- a/arch/riscv/include/asm/smp.h
++++ b/arch/riscv/include/asm/smp.h
+@@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops);
+ /* Clear IPI for current CPU */
+ void riscv_clear_ipi(void);
++/* Check other CPUs stop or not */
++bool smp_crash_stop_failed(void);
++
+ /* Secondary hart entry */
+ asmlinkage void smp_callin(void);
+diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
+index db41c676e5a2..2d139b724bc8 100644
+--- a/arch/riscv/kernel/machine_kexec.c
++++ b/arch/riscv/kernel/machine_kexec.c
+@@ -140,22 +140,6 @@ void machine_shutdown(void)
+ #endif
+ }
+-/* Override the weak function in kernel/panic.c */
+-void crash_smp_send_stop(void)
+-{
+-      static int cpus_stopped;
+-
+-      /*
+-       * This function can be called twice in panic path, but obviously
+-       * we execute this only once.
+-       */
+-      if (cpus_stopped)
+-              return;
+-
+-      smp_send_stop();
+-      cpus_stopped = 1;
+-}
+-
+ static void machine_kexec_mask_interrupts(void)
+ {
+       unsigned int i;
+@@ -230,6 +214,11 @@ machine_kexec(struct kimage *image)
+       void *control_code_buffer = page_address(image->control_code_page);
+       riscv_kexec_method kexec_method = NULL;
++#ifdef CONFIG_SMP
++      WARN(smp_crash_stop_failed(),
++              "Some CPUs may be stale, kdump will be unreliable.\n");
++#endif
++
+       if (image->type != KEXEC_TYPE_CRASH)
+               kexec_method = control_code_buffer;
+       else
+diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
+index 760a64518c58..8c3b59f1f9b8 100644
+--- a/arch/riscv/kernel/smp.c
++++ b/arch/riscv/kernel/smp.c
+@@ -12,6 +12,7 @@
+ #include <linux/clockchips.h>
+ #include <linux/interrupt.h>
+ #include <linux/module.h>
++#include <linux/kexec.h>
+ #include <linux/profile.h>
+ #include <linux/smp.h>
+ #include <linux/sched.h>
+@@ -22,11 +23,13 @@
+ #include <asm/sbi.h>
+ #include <asm/tlbflush.h>
+ #include <asm/cacheflush.h>
++#include <asm/cpu_ops.h>
+ enum ipi_message_type {
+       IPI_RESCHEDULE,
+       IPI_CALL_FUNC,
+       IPI_CPU_STOP,
++      IPI_CPU_CRASH_STOP,
+       IPI_IRQ_WORK,
+       IPI_TIMER,
+       IPI_MAX
+@@ -71,6 +74,32 @@ static void ipi_stop(void)
+               wait_for_interrupt();
+ }
++#ifdef CONFIG_KEXEC_CORE
++static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
++
++static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
++{
++      crash_save_cpu(regs, cpu);
++
++      atomic_dec(&waiting_for_crash_ipi);
++
++      local_irq_disable();
++
++#ifdef CONFIG_HOTPLUG_CPU
++      if (cpu_has_hotplug(cpu))
++              cpu_ops[cpu]->cpu_stop();
++#endif
++
++      for(;;)
++              wait_for_interrupt();
++}
++#else
++static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
++{
++      unreachable();
++}
++#endif
++
+ static const struct riscv_ipi_ops *ipi_ops __ro_after_init;
+ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
+@@ -124,8 +153,9 @@ void arch_irq_work_raise(void)
+ void handle_IPI(struct pt_regs *regs)
+ {
+-      unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
+-      unsigned long *stats = ipi_data[smp_processor_id()].stats;
++      unsigned int cpu = smp_processor_id();
++      unsigned long *pending_ipis = &ipi_data[cpu].bits;
++      unsigned long *stats = ipi_data[cpu].stats;
+       riscv_clear_ipi();
+@@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs)
+                       ipi_stop();
+               }
++              if (ops & (1 << IPI_CPU_CRASH_STOP)) {
++                      ipi_cpu_crash_stop(cpu, get_irq_regs());
++              }
++
+               if (ops & (1 << IPI_IRQ_WORK)) {
+                       stats[IPI_IRQ_WORK]++;
+                       irq_work_run();
+@@ -176,6 +210,7 @@ static const char * const ipi_names[] = {
+       [IPI_RESCHEDULE]        = "Rescheduling interrupts",
+       [IPI_CALL_FUNC]         = "Function call interrupts",
+       [IPI_CPU_STOP]          = "CPU stop interrupts",
++      [IPI_CPU_CRASH_STOP]    = "CPU stop (for crash dump) interrupts",
+       [IPI_IRQ_WORK]          = "IRQ work interrupts",
+       [IPI_TIMER]             = "Timer broadcast interrupts",
+ };
+@@ -235,6 +270,64 @@ void smp_send_stop(void)
+                          cpumask_pr_args(cpu_online_mask));
+ }
++#ifdef CONFIG_KEXEC_CORE
++/*
++ * The number of CPUs online, not counting this CPU (which may not be
++ * fully online and so not counted in num_online_cpus()).
++ */
++static inline unsigned int num_other_online_cpus(void)
++{
++      unsigned int this_cpu_online = cpu_online(smp_processor_id());
++
++      return num_online_cpus() - this_cpu_online;
++}
++
++void crash_smp_send_stop(void)
++{
++      static int cpus_stopped;
++      cpumask_t mask;
++      unsigned long timeout;
++
++      /*
++       * This function can be called twice in panic path, but obviously
++       * we execute this only once.
++       */
++      if (cpus_stopped)
++              return;
++
++      cpus_stopped = 1;
++
++      /*
++       * If this cpu is the only one alive at this point in time, online or
++       * not, there are no stop messages to be sent around, so just back out.
++       */
++      if (num_other_online_cpus() == 0)
++              return;
++
++      cpumask_copy(&mask, cpu_online_mask);
++      cpumask_clear_cpu(smp_processor_id(), &mask);
++
++      atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
++
++      pr_crit("SMP: stopping secondary CPUs\n");
++      send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
++
++      /* Wait up to one second for other CPUs to stop */
++      timeout = USEC_PER_SEC;
++      while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
++              udelay(1);
++
++      if (atomic_read(&waiting_for_crash_ipi) > 0)
++              pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
++                      cpumask_pr_args(&mask));
++}
++
++bool smp_crash_stop_failed(void)
++{
++      return (atomic_read(&waiting_for_crash_ipi) > 0);
++}
++#endif
++
+ void smp_send_reschedule(int cpu)
+ {
+       send_ipi_single(cpu, IPI_RESCHEDULE);
+-- 
+2.35.1
+
diff --git a/queue-6.0/riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch b/queue-6.0/riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch
new file mode 100644 (file)
index 0000000..22492b0
--- /dev/null
@@ -0,0 +1,93 @@
+From d939a47576f17848db80adf2307553fe5a982a6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Oct 2022 10:16:02 -0400
+Subject: riscv: kexec: Fixup irq controller broken in kexec crash path
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit b17d19a5314a37f7197afd1a0200affd21a7227d ]
+
+If a crash happens on cpu3 and all interrupts are binding on cpu0, the
+bad irq routing will cause a crash kernel which can't receive any irq.
+Because crash kernel won't clean up all harts' PLIC enable bits in
+enable registers. This patch is similar to 9141a003a491 ("ARM: 7316/1:
+kexec: EOI active and mask all interrupts in kexec crash path") and
+78fd584cdec0 ("arm64: kdump: implement machine_crash_shutdown()"), and
+PowerPC also has the same mechanism.
+
+Fixes: fba8a8674f68 ("RISC-V: Add kexec support")
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Reviewed-by: Xianting Tian <xianting.tian@linux.alibaba.com>
+Cc: Nick Kossifidis <mick@ics.forth.gr>
+Cc: Palmer Dabbelt <palmer@rivosinc.com>
+Link: https://lore.kernel.org/r/20221020141603.2856206-2-guoren@kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/machine_kexec.c | 35 +++++++++++++++++++++++++++++++
+ 1 file changed, 35 insertions(+)
+
+diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
+index ee79e6839b86..db41c676e5a2 100644
+--- a/arch/riscv/kernel/machine_kexec.c
++++ b/arch/riscv/kernel/machine_kexec.c
+@@ -15,6 +15,8 @@
+ #include <linux/compiler.h>   /* For unreachable() */
+ #include <linux/cpu.h>                /* For cpu_down() */
+ #include <linux/reboot.h>
++#include <linux/interrupt.h>
++#include <linux/irq.h>
+ /*
+  * kexec_image_info - Print received image details
+@@ -154,6 +156,37 @@ void crash_smp_send_stop(void)
+       cpus_stopped = 1;
+ }
++static void machine_kexec_mask_interrupts(void)
++{
++      unsigned int i;
++      struct irq_desc *desc;
++
++      for_each_irq_desc(i, desc) {
++              struct irq_chip *chip;
++              int ret;
++
++              chip = irq_desc_get_chip(desc);
++              if (!chip)
++                      continue;
++
++              /*
++               * First try to remove the active state. If this
++               * fails, try to EOI the interrupt.
++               */
++              ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
++
++              if (ret && irqd_irq_inprogress(&desc->irq_data) &&
++                  chip->irq_eoi)
++                      chip->irq_eoi(&desc->irq_data);
++
++              if (chip->irq_mask)
++                      chip->irq_mask(&desc->irq_data);
++
++              if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
++                      chip->irq_disable(&desc->irq_data);
++      }
++}
++
+ /*
+  * machine_crash_shutdown - Prepare to kexec after a kernel crash
+  *
+@@ -169,6 +202,8 @@ machine_crash_shutdown(struct pt_regs *regs)
+       crash_smp_send_stop();
+       crash_save_cpu(regs, smp_processor_id());
++      machine_kexec_mask_interrupts();
++
+       pr_info("Starting crashdump kernel...\n");
+ }
+-- 
+2.35.1
+
diff --git a/queue-6.0/riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch b/queue-6.0/riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch
new file mode 100644 (file)
index 0000000..57874e1
--- /dev/null
@@ -0,0 +1,87 @@
+From ad0a0ee0e825007e443bf95519c4c9a22dcd8d89 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Nov 2022 14:33:03 +0100
+Subject: riscv: Sync efi page table's kernel mappings before switching
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit 3f105a742725a1b78766a55169f1d827732e62b8 ]
+
+The EFI page table is initially created as a copy of the kernel page table.
+With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area:
+if the stack is allocated in a new PGD (one that was not present at the
+moment of the efi page table creation or not synced in a previous vmalloc
+fault), the kernel will take a trap when switching to the efi page table
+when the vmalloc kernel stack is accessed, resulting in a kernel panic.
+
+Fix that by updating the efi kernel mappings before switching to the efi
+page table.
+
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Fixes: b91540d52a08 ("RISC-V: Add EFI runtime services")
+Tested-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
+Reviewed-by: Atish Patra <atishp@rivosinc.com>
+Link: https://lore.kernel.org/r/20221121133303.1782246-1-alexghiti@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/efi.h     |  6 +++++-
+ arch/riscv/include/asm/pgalloc.h | 11 ++++++++---
+ 2 files changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
+index f74879a8f1ea..e229d7be4b66 100644
+--- a/arch/riscv/include/asm/efi.h
++++ b/arch/riscv/include/asm/efi.h
+@@ -10,6 +10,7 @@
+ #include <asm/mmu_context.h>
+ #include <asm/ptrace.h>
+ #include <asm/tlbflush.h>
++#include <asm/pgalloc.h>
+ #ifdef CONFIG_EFI
+ extern void efi_init(void);
+@@ -20,7 +21,10 @@ extern void efi_init(void);
+ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+-#define arch_efi_call_virt_setup()      efi_virtmap_load()
++#define arch_efi_call_virt_setup()      ({            \
++              sync_kernel_mappings(efi_mm.pgd);       \
++              efi_virtmap_load();                     \
++      })
+ #define arch_efi_call_virt_teardown()   efi_virtmap_unload()
+ #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
+diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
+index 947f23d7b6af..59dc12b5b7e8 100644
+--- a/arch/riscv/include/asm/pgalloc.h
++++ b/arch/riscv/include/asm/pgalloc.h
+@@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+ #define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
+ #endif /* __PAGETABLE_PMD_FOLDED */
++static inline void sync_kernel_mappings(pgd_t *pgd)
++{
++      memcpy(pgd + USER_PTRS_PER_PGD,
++             init_mm.pgd + USER_PTRS_PER_PGD,
++             (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
++}
++
+ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+       pgd_t *pgd;
+@@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+       if (likely(pgd != NULL)) {
+               memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+               /* Copy kernel mappings */
+-              memcpy(pgd + USER_PTRS_PER_PGD,
+-                      init_mm.pgd + USER_PTRS_PER_PGD,
+-                      (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
++              sync_kernel_mappings(pgd);
+       }
+       return pgd;
+ }
+-- 
+2.35.1
+
index f481cf3a73a82b487fac64ab006703f5286ac98e..2bbe3e5c535e3f827e6013540534c3ddc7a24a60 100644 (file)
@@ -98,3 +98,17 @@ drm-i915-never-return-0-if-not-all-requests-retired.patch
 tracing-osnoise-fix-duration-type.patch
 tracing-fix-race-where-histograms-can-be-called-before-the-event.patch
 tracing-free-buffers-when-a-used-dynamic-event-is-removed.patch
+asoc-ops-fix-bounds-check-for-_sx-controls.patch
+asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch
+pinctrl-single-fix-potential-division-by-zero.patch
+riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch
+riscv-fix-race-when-vmap-stack-overflow.patch
+riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch
+riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch
+nvme-fix-srcu-protection-of-nvme_ns_head-list.patch
+iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch
+iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch
+ipv4-handle-attempt-to-delete-multipath-route-when-f.patch
+ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch
+mm-damon-introduce-struct-damos_access_pattern.patch
+mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch