From 226240c4b86148e29ed05780b3f81abbc1ab7ee5 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 3 Dec 2022 21:26:48 -0500 Subject: [PATCH] Fixes for 6.0 Signed-off-by: Sasha Levin --- ...ps-fix-bounds-check-for-_sx-controls.patch | 39 ++ ...xx-fix-build-error-for-implicit-func.patch | 75 ++++ ...ci-device-refcount-leak-in-dmar_dev_.patch | 43 ++ ...ci-device-refcount-leak-in-has_exter.patch | 47 ++ ...eletion-when-nexthop-info-is-not-spe.patch | 118 +++++ ...mpt-to-delete-multipath-route-when-f.patch | 75 ++++ ...ntroduce-struct-damos_access_pattern.patch | 409 ++++++++++++++++++ ...ix-wrong-empty-schemes-assumption-un.patch | 97 +++++ ...srcu-protection-of-nvme_ns_head-list.patch | 104 +++++ ...ingle-fix-potential-division-by-zero.patch | 43 ++ ...cv-fix-race-when-vmap-stack-overflow.patch | 107 +++++ ...p-crash_smp_send_stop-without-multi-.patch | 314 ++++++++++++++ ...p-irq-controller-broken-in-kexec-cra.patch | 93 ++++ ...age-table-s-kernel-mappings-before-s.patch | 87 ++++ queue-6.0/series | 14 + 15 files changed, 1665 insertions(+) create mode 100644 queue-6.0/asoc-ops-fix-bounds-check-for-_sx-controls.patch create mode 100644 queue-6.0/asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch create mode 100644 queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch create mode 100644 queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch create mode 100644 queue-6.0/ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch create mode 100644 queue-6.0/ipv4-handle-attempt-to-delete-multipath-route-when-f.patch create mode 100644 queue-6.0/mm-damon-introduce-struct-damos_access_pattern.patch create mode 100644 queue-6.0/mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch create mode 100644 queue-6.0/nvme-fix-srcu-protection-of-nvme_ns_head-list.patch create mode 100644 queue-6.0/pinctrl-single-fix-potential-division-by-zero.patch create mode 100644 queue-6.0/riscv-fix-race-when-vmap-stack-overflow.patch create mode 100644 queue-6.0/riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch create mode 100644 queue-6.0/riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch create mode 100644 queue-6.0/riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch diff --git a/queue-6.0/asoc-ops-fix-bounds-check-for-_sx-controls.patch b/queue-6.0/asoc-ops-fix-bounds-check-for-_sx-controls.patch new file mode 100644 index 00000000000..ccf94b22404 --- /dev/null +++ b/queue-6.0/asoc-ops-fix-bounds-check-for-_sx-controls.patch @@ -0,0 +1,39 @@ +From 6b50f2955b935ca4b4bf670333b1b34fce80dffb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 May 2022 14:41:36 +0100 +Subject: ASoC: ops: Fix bounds check for _sx controls + +From: Mark Brown + +[ Upstream commit 698813ba8c580efb356ace8dbf55f61dac6063a8 ] + +For _sx controls the semantics of the max field is not the usual one, max +is the number of steps rather than the maximum value. This means that our +check in snd_soc_put_volsw_sx() needs to just check against the maximum +value. + +Fixes: 4f1e50d6a9cf9c1b ("ASoC: ops: Reject out of bounds values in snd_soc_put_volsw_sx()") +Signed-off-by: Mark Brown +Link: https://lore.kernel.org/r/20220511134137.169575-1-broonie@kernel.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/soc-ops.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c +index bd88de056358..47691119306f 100644 +--- a/sound/soc/soc-ops.c ++++ b/sound/soc/soc-ops.c +@@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, + val = ucontrol->value.integer.value[0]; + if (mc->platform_max && val > mc->platform_max) + return -EINVAL; +- if (val > max - min) ++ if (val > max) + return -EINVAL; + val_mask = mask << shift; + val = (val + min) & mask; +-- +2.35.1 + diff --git a/queue-6.0/asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch b/queue-6.0/asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch new file mode 100644 index 00000000000..5b06bc0c85c --- /dev/null +++ b/queue-6.0/asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch @@ -0,0 +1,75 @@ +From 5bdcaab19aa501176d7ff4c5b00d2c7883c8ed64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 May 2022 15:46:40 +0800 +Subject: ASoC: tlv320adc3xxx: Fix build error for implicit function + declaration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hui Tang + +[ Upstream commit 19c5bda74dc45fee598a57600b550c9ea7662f10 ] + +sound/soc/codecs/tlv320adc3xxx.c: In function ‘adc3xxx_i2c_probe’: +sound/soc/codecs/tlv320adc3xxx.c:1359:21: error: implicit declaration of function ‘devm_gpiod_get’; did you mean ‘devm_gpio_free’? [-Werror=implicit-function-declaration] + adc3xxx->rst_pin = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + ^~~~~~~~~~~~~~ + devm_gpio_free + CC [M] drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.o + LD [M] sound/soc/codecs/snd-soc-ak4671.o + LD [M] sound/soc/codecs/snd-soc-arizona.o + LD [M] sound/soc/codecs/snd-soc-cros-ec-codec.o + LD [M] sound/soc/codecs/snd-soc-ak4641.o + LD [M] sound/soc/codecs/snd-soc-alc5632.o +sound/soc/codecs/tlv320adc3xxx.c:1359:50: error: ‘GPIOD_OUT_LOW’ undeclared (first use in this function); did you mean ‘GPIOF_INIT_LOW’? + adc3xxx->rst_pin = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + ^~~~~~~~~~~~~ + GPIOF_INIT_LOW +sound/soc/codecs/tlv320adc3xxx.c:1359:50: note: each undeclared identifier is reported only once for each function it appears in + LD [M] sound/soc/codecs/snd-soc-cs35l32.o +sound/soc/codecs/tlv320adc3xxx.c:1408:2: error: implicit declaration of function ‘gpiod_set_value_cansleep’; did you mean ‘gpio_set_value_cansleep’? [-Werror=implicit-function-declaration] + gpiod_set_value_cansleep(adc3xxx->rst_pin, 1); + ^~~~~~~~~~~~~~~~~~~~~~~~ + gpio_set_value_cansleep + LD [M] sound/soc/codecs/snd-soc-cs35l41-lib.o + LD [M] sound/soc/codecs/snd-soc-cs35l36.o + LD [M] sound/soc/codecs/snd-soc-cs35l34.o + LD [M] sound/soc/codecs/snd-soc-cs35l41.o + CC [M] drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.o +cc1: all warnings being treated as errors + +Fixes: e9a3b57efd28 ("ASoC: codec: tlv320adc3xxx: New codec driver") +Signed-off-by: Hui Tang +Link: https://lore.kernel.org/r/20220512074640.75550-3-tanghui20@huawei.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/tlv320adc3xxx.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c +index 8a0965cd3e66..297c458c4d8b 100644 +--- a/sound/soc/codecs/tlv320adc3xxx.c ++++ b/sound/soc/codecs/tlv320adc3xxx.c +@@ -14,6 +14,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = { + + static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx) + { ++#ifdef CONFIG_GPIOLIB + gpiochip_remove(&adc3xxx->gpio_chip); ++#endif + } + + static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx) +-- +2.35.1 + diff --git a/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch b/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch new file mode 100644 index 00000000000..828faa9f3c3 --- /dev/null +++ b/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch @@ -0,0 +1,43 @@ +From 31a65fdd67f7bc836e005122829218c895845185 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Dec 2022 12:01:27 +0800 +Subject: iommu/vt-d: Fix PCI device refcount leak in dmar_dev_scope_init() + +From: Xiongfeng Wang + +[ Upstream commit 4bedbbd782ebbe7287231fea862c158d4f08a9e3 ] + +for_each_pci_dev() is implemented by pci_get_device(). The comment of +pci_get_device() says that it will increase the reference count for the +returned pci_dev and also decrease the reference count for the input +pci_dev @from if it is not NULL. + +If we break for_each_pci_dev() loop with pdev not NULL, we need to call +pci_dev_put() to decrease the reference count. Add the missing +pci_dev_put() for the error path to avoid reference count leak. + +Fixes: 2e4552893038 ("iommu/vt-d: Unify the way to process DMAR device scope array") +Signed-off-by: Xiongfeng Wang +Link: https://lore.kernel.org/r/20221121113649.190393-3-wangxiongfeng2@huawei.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + drivers/iommu/intel/dmar.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c +index 5a8f780e7ffd..bc94059a5b87 100644 +--- a/drivers/iommu/intel/dmar.c ++++ b/drivers/iommu/intel/dmar.c +@@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void) + info = dmar_alloc_pci_notify_info(dev, + BUS_NOTIFY_ADD_DEVICE); + if (!info) { ++ pci_dev_put(dev); + return dmar_dev_scope_status; + } else { + dmar_pci_bus_add_dev(info); +-- +2.35.1 + diff --git a/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch b/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch new file mode 100644 index 00000000000..9032ae9fb92 --- /dev/null +++ b/queue-6.0/iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch @@ -0,0 +1,47 @@ +From 90897a7c53d24bec9408648da2a1f98d8d5717e1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Dec 2022 12:01:26 +0800 +Subject: iommu/vt-d: Fix PCI device refcount leak in has_external_pci() + +From: Xiongfeng Wang + +[ Upstream commit afca9e19cc720bfafc75dc5ce429c185ca93f31d ] + +for_each_pci_dev() is implemented by pci_get_device(). The comment of +pci_get_device() says that it will increase the reference count for the +returned pci_dev and also decrease the reference count for the input +pci_dev @from if it is not NULL. + +If we break for_each_pci_dev() loop with pdev not NULL, we need to call +pci_dev_put() to decrease the reference count. Add the missing +pci_dev_put() before 'return true' to avoid reference count leak. + +Fixes: 89a6079df791 ("iommu/vt-d: Force IOMMU on for platform opt in hint") +Signed-off-by: Xiongfeng Wang +Link: https://lore.kernel.org/r/20221121113649.190393-2-wangxiongfeng2@huawei.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + drivers/iommu/intel/iommu.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c +index e47700674978..412b106d2a39 100644 +--- a/drivers/iommu/intel/iommu.c ++++ b/drivers/iommu/intel/iommu.c +@@ -3844,8 +3844,10 @@ static inline bool has_external_pci(void) + struct pci_dev *pdev = NULL; + + for_each_pci_dev(pdev) +- if (pdev->external_facing) ++ if (pdev->external_facing) { ++ pci_dev_put(pdev); + return true; ++ } + + return false; + } +-- +2.35.1 + diff --git a/queue-6.0/ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch b/queue-6.0/ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch new file mode 100644 index 00000000000..49234af49f8 --- /dev/null +++ b/queue-6.0/ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch @@ -0,0 +1,118 @@ +From 845eb16dd753154d97d904758f7eeae06d8cce74 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Nov 2022 23:09:32 +0200 +Subject: ipv4: Fix route deletion when nexthop info is not specified + +From: Ido Schimmel + +[ Upstream commit d5082d386eee7e8ec46fa8581932c81a4961dcef ] + +When the kernel receives a route deletion request from user space it +tries to delete a route that matches the route attributes specified in +the request. + +If only prefix information is specified in the request, the kernel +should delete the first matching FIB alias regardless of its associated +FIB info. However, an error is currently returned when the FIB info is +backed by a nexthop object: + + # ip nexthop add id 1 via 192.0.2.2 dev dummy10 + # ip route add 198.51.100.0/24 nhid 1 + # ip route del 198.51.100.0/24 + RTNETLINK answers: No such process + +Fix by matching on such a FIB info when legacy nexthop attributes are +not specified in the request. An earlier check already covers the case +where a nexthop ID is specified in the request. + +Add tests that cover these flows. Before the fix: + + # ./fib_nexthops.sh -t ipv4_fcnal + ... + TEST: Delete route when not specifying nexthop attributes [FAIL] + + Tests passed: 11 + Tests failed: 1 + +After the fix: + + # ./fib_nexthops.sh -t ipv4_fcnal + ... + TEST: Delete route when not specifying nexthop attributes [ OK ] + + Tests passed: 12 + Tests failed: 0 + +No regressions in other tests: + + # ./fib_nexthops.sh + ... + Tests passed: 228 + Tests failed: 0 + + # ./fib_tests.sh + ... + Tests passed: 186 + Tests failed: 0 + +Cc: stable@vger.kernel.org +Reported-by: Jonas Gorski +Tested-by: Jonas Gorski +Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects") +Fixes: 6bf92d70e690 ("net: ipv4: fix route with nexthop object delete warning") +Fixes: 61b91eb33a69 ("ipv4: Handle attempt to delete multipath route when fib_info contains an nh reference") +Signed-off-by: Ido Schimmel +Reviewed-by: Nikolay Aleksandrov +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20221124210932.2470010-1-idosch@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/fib_semantics.c | 8 +++++--- + tools/testing/selftests/net/fib_nexthops.sh | 11 +++++++++++ + 2 files changed, 16 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index e9a7f70a54df..cb24260692e1 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -888,9 +888,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, + return 1; + } + +- /* cannot match on nexthop object attributes */ +- if (fi->nh) +- return 1; ++ if (fi->nh) { ++ if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) ++ return 1; ++ return 0; ++ } + + if (cfg->fc_oif || cfg->fc_gw_family) { + struct fib_nh *nh; +diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh +index ee5e98204d3d..a47b26ab48f2 100755 +--- a/tools/testing/selftests/net/fib_nexthops.sh ++++ b/tools/testing/selftests/net/fib_nexthops.sh +@@ -1228,6 +1228,17 @@ ipv4_fcnal() + run_cmd "$IP ro add 172.16.101.0/24 nhid 21" + run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" + log_test $? 2 "Delete multipath route with only nh id based entry" ++ ++ run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" ++ run_cmd "$IP ro add 172.16.102.0/24 nhid 22" ++ run_cmd "$IP ro del 172.16.102.0/24 dev veth1" ++ log_test $? 2 "Delete route when specifying only nexthop device" ++ ++ run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" ++ log_test $? 2 "Delete route when specifying only gateway" ++ ++ run_cmd "$IP ro del 172.16.102.0/24" ++ log_test $? 0 "Delete route when not specifying nexthop attributes" + } + + ipv4_grp_fcnal() +-- +2.35.1 + diff --git a/queue-6.0/ipv4-handle-attempt-to-delete-multipath-route-when-f.patch b/queue-6.0/ipv4-handle-attempt-to-delete-multipath-route-when-f.patch new file mode 100644 index 00000000000..039a2b28f9a --- /dev/null +++ b/queue-6.0/ipv4-handle-attempt-to-delete-multipath-route-when-f.patch @@ -0,0 +1,75 @@ +From 740188d99924efa8cfa6ef0e0d9e1030f60a482e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Oct 2022 10:48:49 -0600 +Subject: ipv4: Handle attempt to delete multipath route when fib_info contains + an nh reference + +From: David Ahern + +[ Upstream commit 61b91eb33a69c3be11b259c5ea484505cd79f883 ] + +Gwangun Jung reported a slab-out-of-bounds access in fib_nh_match: + fib_nh_match+0xf98/0x1130 linux-6.0-rc7/net/ipv4/fib_semantics.c:961 + fib_table_delete+0x5f3/0xa40 linux-6.0-rc7/net/ipv4/fib_trie.c:1753 + inet_rtm_delroute+0x2b3/0x380 linux-6.0-rc7/net/ipv4/fib_frontend.c:874 + +Separate nexthop objects are mutually exclusive with the legacy +multipath spec. Fix fib_nh_match to return if the config for the +to be deleted route contains a multipath spec while the fib_info +is using a nexthop object. + +Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects") +Fixes: 6bf92d70e690 ("net: ipv4: fix route with nexthop object delete warning") +Reported-by: Gwangun Jung +Signed-off-by: David Ahern +Reviewed-by: Ido Schimmel +Tested-by: Ido Schimmel +Signed-off-by: David S. Miller +Stable-dep-of: d5082d386eee ("ipv4: Fix route deletion when nexthop info is not specified") +Signed-off-by: Sasha Levin +--- + net/ipv4/fib_semantics.c | 8 ++++---- + tools/testing/selftests/net/fib_nexthops.sh | 5 +++++ + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index 2dc97583d279..e9a7f70a54df 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -888,13 +888,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, + return 1; + } + ++ /* cannot match on nexthop object attributes */ ++ if (fi->nh) ++ return 1; ++ + if (cfg->fc_oif || cfg->fc_gw_family) { + struct fib_nh *nh; + +- /* cannot match on nexthop object attributes */ +- if (fi->nh) +- return 1; +- + nh = fib_info_nh(fi, 0); + if (cfg->fc_encap) { + if (fib_encap_match(net, cfg->fc_encap_type, +diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh +index d5a0dd548989..ee5e98204d3d 100755 +--- a/tools/testing/selftests/net/fib_nexthops.sh ++++ b/tools/testing/selftests/net/fib_nexthops.sh +@@ -1223,6 +1223,11 @@ ipv4_fcnal() + log_test $rc 0 "Delete nexthop route warning" + run_cmd "$IP route delete 172.16.101.1/32 nhid 12" + run_cmd "$IP nexthop del id 12" ++ ++ run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1" ++ run_cmd "$IP ro add 172.16.101.0/24 nhid 21" ++ run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" ++ log_test $? 2 "Delete multipath route with only nh id based entry" + } + + ipv4_grp_fcnal() +-- +2.35.1 + diff --git a/queue-6.0/mm-damon-introduce-struct-damos_access_pattern.patch b/queue-6.0/mm-damon-introduce-struct-damos_access_pattern.patch new file mode 100644 index 00000000000..c9f3ac38f68 --- /dev/null +++ b/queue-6.0/mm-damon-introduce-struct-damos_access_pattern.patch @@ -0,0 +1,409 @@ +From b5cf5c6a4e8df453e32dafa2bc07fc38593fe43a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Sep 2022 19:14:43 +0000 +Subject: mm/damon: introduce struct damos_access_pattern + +From: Yajun Deng + +[ Upstream commit f5a79d7c0c87c8d88bb5e3f3c898258fdf1b3b05 ] + +damon_new_scheme() has too many parameters, so introduce struct +damos_access_pattern to simplify it. + +In additon, we can't use a bpf trace kprobe that has more than 5 +parameters. + +Link: https://lkml.kernel.org/r/20220908191443.129534-1-sj@kernel.org +Signed-off-by: Yajun Deng +Signed-off-by: SeongJae Park +Reviewed-by: SeongJae Park +Signed-off-by: Andrew Morton +Stable-dep-of: 95bc35f9bee5 ("mm/damon/sysfs: fix wrong empty schemes assumption under online tuning in damon_sysfs_set_schemes()") +Signed-off-by: Sasha Levin +--- + include/linux/damon.h | 37 ++++++++++++++++++---------------- + mm/damon/core.c | 31 ++++++++++++++--------------- + mm/damon/dbgfs.c | 27 +++++++++++++++---------- + mm/damon/lru_sort.c | 46 ++++++++++++++++++++++++++----------------- + mm/damon/reclaim.c | 23 +++++++++++++--------- + mm/damon/sysfs.c | 17 +++++++++++----- + 6 files changed, 106 insertions(+), 75 deletions(-) + +diff --git a/include/linux/damon.h b/include/linux/damon.h +index 7b1f4a488230..98e622c34d44 100644 +--- a/include/linux/damon.h ++++ b/include/linux/damon.h +@@ -216,13 +216,26 @@ struct damos_stat { + }; + + /** +- * struct damos - Represents a Data Access Monitoring-based Operation Scheme. ++ * struct damos_access_pattern - Target access pattern of the given scheme. + * @min_sz_region: Minimum size of target regions. + * @max_sz_region: Maximum size of target regions. + * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. + * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. + * @min_age_region: Minimum age of target regions. + * @max_age_region: Maximum age of target regions. ++ */ ++struct damos_access_pattern { ++ unsigned long min_sz_region; ++ unsigned long max_sz_region; ++ unsigned int min_nr_accesses; ++ unsigned int max_nr_accesses; ++ unsigned int min_age_region; ++ unsigned int max_age_region; ++}; ++ ++/** ++ * struct damos - Represents a Data Access Monitoring-based Operation Scheme. ++ * @pattern: Access pattern of target regions. + * @action: &damo_action to be applied to the target regions. + * @quota: Control the aggressiveness of this scheme. + * @wmarks: Watermarks for automated (in)activation of this scheme. +@@ -230,10 +243,8 @@ struct damos_stat { + * @list: List head for siblings. + * + * For each aggregation interval, DAMON finds regions which fit in the +- * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, +- * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to +- * those. To avoid consuming too much CPU time or IO resources for the +- * &action, "a is used. ++ * &pattern and applies &action to those. To avoid consuming too much ++ * CPU time or IO resources for the &action, "a is used. + * + * To do the work only when needed, schemes can be activated for specific + * system situations using &wmarks. If all schemes that registered to the +@@ -248,12 +259,7 @@ struct damos_stat { + * &action is applied. + */ + struct damos { +- unsigned long min_sz_region; +- unsigned long max_sz_region; +- unsigned int min_nr_accesses; +- unsigned int max_nr_accesses; +- unsigned int min_age_region; +- unsigned int max_age_region; ++ struct damos_access_pattern pattern; + enum damos_action action; + struct damos_quota quota; + struct damos_watermarks wmarks; +@@ -501,12 +507,9 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t); + int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, + unsigned int nr_ranges); + +-struct damos *damon_new_scheme( +- unsigned long min_sz_region, unsigned long max_sz_region, +- unsigned int min_nr_accesses, unsigned int max_nr_accesses, +- unsigned int min_age_region, unsigned int max_age_region, +- enum damos_action action, struct damos_quota *quota, +- struct damos_watermarks *wmarks); ++struct damos *damon_new_scheme(struct damos_access_pattern *pattern, ++ enum damos_action action, struct damos_quota *quota, ++ struct damos_watermarks *wmarks); + void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); + void damon_destroy_scheme(struct damos *s); + +diff --git a/mm/damon/core.c b/mm/damon/core.c +index 7d25dc582fe3..7d5a9ae6f4ac 100644 +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -230,24 +230,21 @@ int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, + return 0; + } + +-struct damos *damon_new_scheme( +- unsigned long min_sz_region, unsigned long max_sz_region, +- unsigned int min_nr_accesses, unsigned int max_nr_accesses, +- unsigned int min_age_region, unsigned int max_age_region, +- enum damos_action action, struct damos_quota *quota, +- struct damos_watermarks *wmarks) ++struct damos *damon_new_scheme(struct damos_access_pattern *pattern, ++ enum damos_action action, struct damos_quota *quota, ++ struct damos_watermarks *wmarks) + { + struct damos *scheme; + + scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); + if (!scheme) + return NULL; +- scheme->min_sz_region = min_sz_region; +- scheme->max_sz_region = max_sz_region; +- scheme->min_nr_accesses = min_nr_accesses; +- scheme->max_nr_accesses = max_nr_accesses; +- scheme->min_age_region = min_age_region; +- scheme->max_age_region = max_age_region; ++ scheme->pattern.min_sz_region = pattern->min_sz_region; ++ scheme->pattern.max_sz_region = pattern->max_sz_region; ++ scheme->pattern.min_nr_accesses = pattern->min_nr_accesses; ++ scheme->pattern.max_nr_accesses = pattern->max_nr_accesses; ++ scheme->pattern.min_age_region = pattern->min_age_region; ++ scheme->pattern.max_age_region = pattern->max_age_region; + scheme->action = action; + scheme->stat = (struct damos_stat){}; + INIT_LIST_HEAD(&scheme->list); +@@ -667,10 +664,12 @@ static bool __damos_valid_target(struct damon_region *r, struct damos *s) + unsigned long sz; + + sz = r->ar.end - r->ar.start; +- return s->min_sz_region <= sz && sz <= s->max_sz_region && +- s->min_nr_accesses <= r->nr_accesses && +- r->nr_accesses <= s->max_nr_accesses && +- s->min_age_region <= r->age && r->age <= s->max_age_region; ++ return s->pattern.min_sz_region <= sz && ++ sz <= s->pattern.max_sz_region && ++ s->pattern.min_nr_accesses <= r->nr_accesses && ++ r->nr_accesses <= s->pattern.max_nr_accesses && ++ s->pattern.min_age_region <= r->age && ++ r->age <= s->pattern.max_age_region; + } + + static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, +diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c +index dafe7e71329b..61214cb9a5d3 100644 +--- a/mm/damon/dbgfs.c ++++ b/mm/damon/dbgfs.c +@@ -131,9 +131,12 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) + damon_for_each_scheme(s, c) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", +- s->min_sz_region, s->max_sz_region, +- s->min_nr_accesses, s->max_nr_accesses, +- s->min_age_region, s->max_age_region, ++ s->pattern.min_sz_region, ++ s->pattern.max_sz_region, ++ s->pattern.min_nr_accesses, ++ s->pattern.max_nr_accesses, ++ s->pattern.min_age_region, ++ s->pattern.max_age_region, + damos_action_to_dbgfs_scheme_action(s->action), + s->quota.ms, s->quota.sz, + s->quota.reset_interval, +@@ -221,8 +224,6 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, + struct damos *scheme, **schemes; + const int max_nr_schemes = 256; + int pos = 0, parsed, ret; +- unsigned long min_sz, max_sz; +- unsigned int min_nr_a, max_nr_a, min_age, max_age; + unsigned int action_input; + enum damos_action action; + +@@ -233,13 +234,18 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, + + *nr_schemes = 0; + while (pos < len && *nr_schemes < max_nr_schemes) { ++ struct damos_access_pattern pattern = {}; + struct damos_quota quota = {}; + struct damos_watermarks wmarks; + + ret = sscanf(&str[pos], + "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u %u %lu %lu %lu %lu%n", +- &min_sz, &max_sz, &min_nr_a, &max_nr_a, +- &min_age, &max_age, &action_input, "a.ms, ++ &pattern.min_sz_region, &pattern.max_sz_region, ++ &pattern.min_nr_accesses, ++ &pattern.max_nr_accesses, ++ &pattern.min_age_region, ++ &pattern.max_age_region, ++ &action_input, "a.ms, + "a.sz, "a.reset_interval, + "a.weight_sz, "a.weight_nr_accesses, + "a.weight_age, &wmarks.metric, +@@ -251,7 +257,9 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, + if ((int)action < 0) + goto fail; + +- if (min_sz > max_sz || min_nr_a > max_nr_a || min_age > max_age) ++ if (pattern.min_sz_region > pattern.max_sz_region || ++ pattern.min_nr_accesses > pattern.max_nr_accesses || ++ pattern.min_age_region > pattern.max_age_region) + goto fail; + + if (wmarks.high < wmarks.mid || wmarks.high < wmarks.low || +@@ -259,8 +267,7 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, + goto fail; + + pos += parsed; +- scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, +- min_age, max_age, action, "a, &wmarks); ++ scheme = damon_new_scheme(&pattern, action, "a, &wmarks); + if (!scheme) + goto fail; + +diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c +index 9de6f00a71c5..0184ed4828b7 100644 +--- a/mm/damon/lru_sort.c ++++ b/mm/damon/lru_sort.c +@@ -293,6 +293,17 @@ static bool get_monitoring_region(unsigned long *start, unsigned long *end) + /* Create a DAMON-based operation scheme for hot memory regions */ + static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres) + { ++ struct damos_access_pattern pattern = { ++ /* Find regions having PAGE_SIZE or larger size */ ++ .min_sz_region = PAGE_SIZE, ++ .max_sz_region = ULONG_MAX, ++ /* and accessed for more than the threshold */ ++ .min_nr_accesses = hot_thres, ++ .max_nr_accesses = UINT_MAX, ++ /* no matter its age */ ++ .min_age_region = 0, ++ .max_age_region = UINT_MAX, ++ }; + struct damos_watermarks wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = wmarks_interval, +@@ -313,26 +324,31 @@ static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres) + .weight_nr_accesses = 1, + .weight_age = 0, + }; +- struct damos *scheme = damon_new_scheme( +- /* Find regions having PAGE_SIZE or larger size */ +- PAGE_SIZE, ULONG_MAX, +- /* and accessed for more than the threshold */ +- hot_thres, UINT_MAX, +- /* no matter its age */ +- 0, UINT_MAX, ++ ++ return damon_new_scheme( ++ &pattern, + /* prioritize those on LRU lists, as soon as found */ + DAMOS_LRU_PRIO, + /* under the quota. */ + "a, + /* (De)activate this according to the watermarks. */ + &wmarks); +- +- return scheme; + } + + /* Create a DAMON-based operation scheme for cold memory regions */ + static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) + { ++ struct damos_access_pattern pattern = { ++ /* Find regions having PAGE_SIZE or larger size */ ++ .min_sz_region = PAGE_SIZE, ++ .max_sz_region = ULONG_MAX, ++ /* and not accessed at all */ ++ .min_nr_accesses = 0, ++ .max_nr_accesses = 0, ++ /* for min_age or more micro-seconds */ ++ .min_age_region = cold_thres, ++ .max_age_region = UINT_MAX, ++ }; + struct damos_watermarks wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = wmarks_interval, +@@ -354,21 +370,15 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) + .weight_nr_accesses = 0, + .weight_age = 1, + }; +- struct damos *scheme = damon_new_scheme( +- /* Find regions having PAGE_SIZE or larger size */ +- PAGE_SIZE, ULONG_MAX, +- /* and not accessed at all */ +- 0, 0, +- /* for cold_thres or more micro-seconds, and */ +- cold_thres, UINT_MAX, ++ ++ return damon_new_scheme( ++ &pattern, + /* mark those as not accessed, as soon as found */ + DAMOS_LRU_DEPRIO, + /* under the quota. */ + "a, + /* (De)activate this according to the watermarks. */ + &wmarks); +- +- return scheme; + } + + static int damon_lru_sort_apply_parameters(void) +diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c +index a7faf51b4bd4..5aeca0b9e88e 100644 +--- a/mm/damon/reclaim.c ++++ b/mm/damon/reclaim.c +@@ -264,6 +264,17 @@ static bool get_monitoring_region(unsigned long *start, unsigned long *end) + + static struct damos *damon_reclaim_new_scheme(void) + { ++ struct damos_access_pattern pattern = { ++ /* Find regions having PAGE_SIZE or larger size */ ++ .min_sz_region = PAGE_SIZE, ++ .max_sz_region = ULONG_MAX, ++ /* and not accessed at all */ ++ .min_nr_accesses = 0, ++ .max_nr_accesses = 0, ++ /* for min_age or more micro-seconds */ ++ .min_age_region = min_age / aggr_interval, ++ .max_age_region = UINT_MAX, ++ }; + struct damos_watermarks wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = wmarks_interval, +@@ -284,21 +295,15 @@ static struct damos *damon_reclaim_new_scheme(void) + .weight_nr_accesses = 0, + .weight_age = 1 + }; +- struct damos *scheme = damon_new_scheme( +- /* Find regions having PAGE_SIZE or larger size */ +- PAGE_SIZE, ULONG_MAX, +- /* and not accessed at all */ +- 0, 0, +- /* for min_age or more micro-seconds, and */ +- min_age / aggr_interval, UINT_MAX, ++ ++ return damon_new_scheme( ++ &pattern, + /* page out those, as soon as found */ + DAMOS_PAGEOUT, + /* under the quota. */ + "a, + /* (De)activate this according to the watermarks. */ + &wmarks); +- +- return scheme; + } + + static int damon_reclaim_apply_parameters(void) +diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c +index b4b9614eecbe..ec88644c51df 100644 +--- a/mm/damon/sysfs.c ++++ b/mm/damon/sysfs.c +@@ -2259,11 +2259,20 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx, + static struct damos *damon_sysfs_mk_scheme( + struct damon_sysfs_scheme *sysfs_scheme) + { +- struct damon_sysfs_access_pattern *pattern = ++ struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; ++ ++ struct damos_access_pattern pattern = { ++ .min_sz_region = access_pattern->sz->min, ++ .max_sz_region = access_pattern->sz->max, ++ .min_nr_accesses = access_pattern->nr_accesses->min, ++ .max_nr_accesses = access_pattern->nr_accesses->max, ++ .min_age_region = access_pattern->age->min, ++ .max_age_region = access_pattern->age->max, ++ }; + struct damos_quota quota = { + .ms = sysfs_quotas->ms, + .sz = sysfs_quotas->sz, +@@ -2280,10 +2289,8 @@ static struct damos *damon_sysfs_mk_scheme( + .low = sysfs_wmarks->low, + }; + +- return damon_new_scheme(pattern->sz->min, pattern->sz->max, +- pattern->nr_accesses->min, pattern->nr_accesses->max, +- pattern->age->min, pattern->age->max, +- sysfs_scheme->action, "a, &wmarks); ++ return damon_new_scheme(&pattern, sysfs_scheme->action, "a, ++ &wmarks); + } + + static int damon_sysfs_set_schemes(struct damon_ctx *ctx, +-- +2.35.1 + diff --git a/queue-6.0/mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch b/queue-6.0/mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch new file mode 100644 index 00000000000..d36df8ece48 --- /dev/null +++ b/queue-6.0/mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch @@ -0,0 +1,97 @@ +From bbe9e58d36e6734175c00c7d6975f8fd71d2dbdf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Nov 2022 19:48:31 +0000 +Subject: mm/damon/sysfs: fix wrong empty schemes assumption under online + tuning in damon_sysfs_set_schemes() + +From: SeongJae Park + +[ Upstream commit 95bc35f9bee5220dad4e8567654ab3288a181639 ] + +Commit da87878010e5 ("mm/damon/sysfs: support online inputs update") made +'damon_sysfs_set_schemes()' to be called for running DAMON context, which +could have schemes. In the case, DAMON sysfs interface is supposed to +update, remove, or add schemes to reflect the sysfs files. However, the +code is assuming the DAMON context wouldn't have schemes at all, and +therefore creates and adds new schemes. As a result, the code doesn't +work as intended for online schemes tuning and could have more than +expected memory footprint. The schemes are all in the DAMON context, so +it doesn't leak the memory, though. + +Remove the wrong asssumption (the DAMON context wouldn't have schemes) in +'damon_sysfs_set_schemes()' to fix the bug. + +Link: https://lkml.kernel.org/r/20221122194831.3472-1-sj@kernel.org +Fixes: da87878010e5 ("mm/damon/sysfs: support online inputs update") +Signed-off-by: SeongJae Park +Cc: [5.19+] +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/damon/sysfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 44 insertions(+), 2 deletions(-) + +diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c +index ec88644c51df..1b782ca41396 100644 +--- a/mm/damon/sysfs.c ++++ b/mm/damon/sysfs.c +@@ -2293,12 +2293,54 @@ static struct damos *damon_sysfs_mk_scheme( + &wmarks); + } + ++static void damon_sysfs_update_scheme(struct damos *scheme, ++ struct damon_sysfs_scheme *sysfs_scheme) ++{ ++ struct damon_sysfs_access_pattern *access_pattern = ++ sysfs_scheme->access_pattern; ++ struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; ++ struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; ++ struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; ++ ++ scheme->pattern.min_sz_region = access_pattern->sz->min; ++ scheme->pattern.max_sz_region = access_pattern->sz->max; ++ scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; ++ scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; ++ scheme->pattern.min_age_region = access_pattern->age->min; ++ scheme->pattern.max_age_region = access_pattern->age->max; ++ ++ scheme->action = sysfs_scheme->action; ++ ++ scheme->quota.ms = sysfs_quotas->ms; ++ scheme->quota.sz = sysfs_quotas->sz; ++ scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; ++ scheme->quota.weight_sz = sysfs_weights->sz; ++ scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; ++ scheme->quota.weight_age = sysfs_weights->age; ++ ++ scheme->wmarks.metric = sysfs_wmarks->metric; ++ scheme->wmarks.interval = sysfs_wmarks->interval_us; ++ scheme->wmarks.high = sysfs_wmarks->high; ++ scheme->wmarks.mid = sysfs_wmarks->mid; ++ scheme->wmarks.low = sysfs_wmarks->low; ++} ++ + static int damon_sysfs_set_schemes(struct damon_ctx *ctx, + struct damon_sysfs_schemes *sysfs_schemes) + { +- int i; ++ struct damos *scheme, *next; ++ int i = 0; ++ ++ damon_for_each_scheme_safe(scheme, next, ctx) { ++ if (i < sysfs_schemes->nr) ++ damon_sysfs_update_scheme(scheme, ++ sysfs_schemes->schemes_arr[i]); ++ else ++ damon_destroy_scheme(scheme); ++ i++; ++ } + +- for (i = 0; i < sysfs_schemes->nr; i++) { ++ for (; i < sysfs_schemes->nr; i++) { + struct damos *scheme, *next; + + scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); +-- +2.35.1 + diff --git a/queue-6.0/nvme-fix-srcu-protection-of-nvme_ns_head-list.patch b/queue-6.0/nvme-fix-srcu-protection-of-nvme_ns_head-list.patch new file mode 100644 index 00000000000..f643c02cc7d --- /dev/null +++ b/queue-6.0/nvme-fix-srcu-protection-of-nvme_ns_head-list.patch @@ -0,0 +1,104 @@ +From 6b41b416b4dba8fdef2afecfa10eb7c523ccc31f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 16:27:56 -0700 +Subject: nvme: fix SRCU protection of nvme_ns_head list + +From: Caleb Sander + +[ Upstream commit 899d2a05dc14733cfba6224083c6b0dd5a738590 ] + +Walking the nvme_ns_head siblings list is protected by the head's srcu +in nvme_ns_head_submit_bio() but not nvme_mpath_revalidate_paths(). +Removing namespaces from the list also fails to synchronize the srcu. +Concurrent scan work can therefore cause use-after-frees. + +Hold the head's srcu lock in nvme_mpath_revalidate_paths() and +synchronize with the srcu, not the global RCU, in nvme_ns_remove(). + +Observed the following panic when making NVMe/RDMA connections +with native multipath on the Rocky Linux 8.6 kernel +(it seems the upstream kernel has the same race condition). +Disassembly shows the faulting instruction is cmp 0x50(%rdx),%rcx; +computing capacity != get_capacity(ns->disk). +Address 0x50 is dereferenced because ns->disk is NULL. +The NULL disk appears to be the result of concurrent scan work +freeing the namespace (note the log line in the middle of the panic). + +[37314.206036] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 +[37314.206036] nvme0n3: detected capacity change from 0 to 11811160064 +[37314.299753] PGD 0 P4D 0 +[37314.299756] Oops: 0000 [#1] SMP PTI +[37314.299759] CPU: 29 PID: 322046 Comm: kworker/u98:3 Kdump: loaded Tainted: G W X --------- - - 4.18.0-372.32.1.el8test86.x86_64 #1 +[37314.299762] Hardware name: Dell Inc. PowerEdge R720/0JP31P, BIOS 2.7.0 05/23/2018 +[37314.299763] Workqueue: nvme-wq nvme_scan_work [nvme_core] +[37314.299783] RIP: 0010:nvme_mpath_revalidate_paths+0x26/0xb0 [nvme_core] +[37314.299790] Code: 1f 44 00 00 66 66 66 66 90 55 53 48 8b 5f 50 48 8b 83 c8 c9 00 00 48 8b 13 48 8b 48 50 48 39 d3 74 20 48 8d 42 d0 48 8b 50 20 <48> 3b 4a 50 74 05 f0 80 60 70 ef 48 8b 50 30 48 8d 42 d0 48 39 d3 +[37315.058803] RSP: 0018:ffffabe28f913d10 EFLAGS: 00010202 +[37315.121316] RAX: ffff927a077da800 RBX: ffff92991dd70000 RCX: 0000000001600000 +[37315.206704] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff92991b719800 +[37315.292106] RBP: ffff929a6b70c000 R08: 000000010234cd4a R09: c0000000ffff7fff +[37315.377501] R10: 0000000000000001 R11: ffffabe28f913a30 R12: 0000000000000000 +[37315.462889] R13: ffff92992716600c R14: ffff929964e6e030 R15: ffff92991dd70000 +[37315.548286] FS: 0000000000000000(0000) GS:ffff92b87fb80000(0000) knlGS:0000000000000000 +[37315.645111] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[37315.713871] CR2: 0000000000000050 CR3: 0000002208810006 CR4: 00000000000606e0 +[37315.799267] Call Trace: +[37315.828515] nvme_update_ns_info+0x1ac/0x250 [nvme_core] +[37315.892075] nvme_validate_or_alloc_ns+0x2ff/0xa00 [nvme_core] +[37315.961871] ? __blk_mq_free_request+0x6b/0x90 +[37316.015021] nvme_scan_work+0x151/0x240 [nvme_core] +[37316.073371] process_one_work+0x1a7/0x360 +[37316.121318] ? create_worker+0x1a0/0x1a0 +[37316.168227] worker_thread+0x30/0x390 +[37316.212024] ? create_worker+0x1a0/0x1a0 +[37316.258939] kthread+0x10a/0x120 +[37316.297557] ? set_kthread_struct+0x50/0x50 +[37316.347590] ret_from_fork+0x35/0x40 +[37316.390360] Modules linked in: nvme_rdma nvme_tcp(X) nvme_fabrics nvme_core netconsole iscsi_tcp libiscsi_tcp dm_queue_length dm_service_time nf_conntrack_netlink br_netfilter bridge stp llc overlay nft_chain_nat ipt_MASQUERADE nf_nat xt_addrtype xt_CT nft_counter xt_state xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_comment xt_multiport nft_compat nf_tables libcrc32c nfnetlink dm_multipath tg3 rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm intel_rapl_msr iTCO_wdt iTCO_vendor_support dcdbas intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm irqbypass crct10dif_pclmul crc32_pclmul mlx5_ib ghash_clmulni_intel ib_uverbs rapl intel_cstate intel_uncore ib_core ipmi_si joydev mei_me pcspkr ipmi_devintf mei lpc_ich wmi ipmi_msghandler acpi_power_meter ext4 mbcache jbd2 sd_mod t10_pi sg mgag200 mlx5_core drm_kms_helper syscopyarea +[37316.390419] sysfillrect ahci sysimgblt fb_sys_fops libahci drm crc32c_intel libata mlxfw pci_hyperv_intf tls i2c_algo_bit psample dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: nvme_core] +[37317.645908] CR2: 0000000000000050 + +Fixes: e7d65803e2bb ("nvme-multipath: revalidate paths during rescan") +Signed-off-by: Caleb Sander +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/core.c | 2 +- + drivers/nvme/host/multipath.c | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 01c36284e542..f612a0ba64d0 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -4297,7 +4297,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) + mutex_unlock(&ns->ctrl->subsys->lock); + + /* guarantee not available in head->list */ +- synchronize_rcu(); ++ synchronize_srcu(&ns->head->srcu); + + if (!nvme_ns_head_multipath(ns->head)) + nvme_cdev_del(&ns->cdev, &ns->cdev_device); +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index b9cf17cbbbd5..114e2b9359f8 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) + struct nvme_ns_head *head = ns->head; + sector_t capacity = get_capacity(head->disk); + int node; ++ int srcu_idx; + ++ srcu_idx = srcu_read_lock(&head->srcu); + list_for_each_entry_rcu(ns, &head->list, siblings) { + if (capacity != get_capacity(ns->disk)) + clear_bit(NVME_NS_READY, &ns->flags); + } ++ srcu_read_unlock(&head->srcu, srcu_idx); + + for_each_node(node) + rcu_assign_pointer(head->current_path[node], NULL); +-- +2.35.1 + diff --git a/queue-6.0/pinctrl-single-fix-potential-division-by-zero.patch b/queue-6.0/pinctrl-single-fix-potential-division-by-zero.patch new file mode 100644 index 00000000000..4733da89a84 --- /dev/null +++ b/queue-6.0/pinctrl-single-fix-potential-division-by-zero.patch @@ -0,0 +1,43 @@ +From e88a843fbe8f4c931ad7056d55a8f30d628af911 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Nov 2022 15:30:34 +0300 +Subject: pinctrl: single: Fix potential division by zero + +From: Maxim Korotkov + +[ Upstream commit 64c150339e7f6c5cbbe8c17a56ef2b3902612798 ] + +There is a possibility of dividing by zero due to the pcs->bits_per_pin +if pcs->fmask() also has a value of zero and called fls +from asm-generic/bitops/builtin-fls.h or arch/x86/include/asm/bitops.h. +The function pcs_probe() has the branch that assigned to fmask 0 before +pcs_allocate_pin_table() was called + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: 4e7e8017a80e ("pinctrl: pinctrl-single: enhance to configure multiple pins of different modules") +Signed-off-by: Maxim Korotkov +Reviewed-by: Tony Lindgren +Link: https://lore.kernel.org/r/20221117123034.27383-1-korotkov.maxim.s@gmail.com +Signed-off-by: Linus Walleij +Signed-off-by: Sasha Levin +--- + drivers/pinctrl/pinctrl-single.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c +index 67bec7ea0f8b..414ee6bb8ac9 100644 +--- a/drivers/pinctrl/pinctrl-single.c ++++ b/drivers/pinctrl/pinctrl-single.c +@@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) + + mux_bytes = pcs->width / BITS_PER_BYTE; + +- if (pcs->bits_per_mux) { ++ if (pcs->bits_per_mux && pcs->fmask) { + pcs->bits_per_pin = fls(pcs->fmask); + nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; + } else { +-- +2.35.1 + diff --git a/queue-6.0/riscv-fix-race-when-vmap-stack-overflow.patch b/queue-6.0/riscv-fix-race-when-vmap-stack-overflow.patch new file mode 100644 index 00000000000..945c117198c --- /dev/null +++ b/queue-6.0/riscv-fix-race-when-vmap-stack-overflow.patch @@ -0,0 +1,107 @@ +From e1afba7fb3f855473577799d525475085df2b466 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 30 Oct 2022 20:45:17 +0800 +Subject: riscv: fix race when vmap stack overflow + +From: Jisheng Zhang + +[ Upstream commit 7e1864332fbc1b993659eab7974da9fe8bf8c128 ] + +Currently, when detecting vmap stack overflow, riscv firstly switches +to the so called shadow stack, then use this shadow stack to call the +get_overflow_stack() to get the overflow stack. However, there's +a race here if two or more harts use the same shadow stack at the same +time. + +To solve this race, we introduce spin_shadow_stack atomic var, which +will be swap between its own address and 0 in atomic way, when the +var is set, it means the shadow_stack is being used; when the var +is cleared, it means the shadow_stack isn't being used. + +Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection") +Signed-off-by: Jisheng Zhang +Suggested-by: Guo Ren +Reviewed-by: Guo Ren +Link: https://lore.kernel.org/r/20221030124517.2370-1-jszhang@kernel.org +[Palmer: Add AQ to the swap, and also some comments.] +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/asm.h | 1 + + arch/riscv/kernel/entry.S | 13 +++++++++++++ + arch/riscv/kernel/traps.c | 18 ++++++++++++++++++ + 3 files changed, 32 insertions(+) + +diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h +index 1b471ff73178..816e753de636 100644 +--- a/arch/riscv/include/asm/asm.h ++++ b/arch/riscv/include/asm/asm.h +@@ -23,6 +23,7 @@ + #define REG_L __REG_SEL(ld, lw) + #define REG_S __REG_SEL(sd, sw) + #define REG_SC __REG_SEL(sc.d, sc.w) ++#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) + #define REG_ASM __REG_SEL(.dword, .word) + #define SZREG __REG_SEL(8, 4) + #define LGREG __REG_SEL(3, 2) +diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S +index b9eda3fcbd6d..186abd146eaf 100644 +--- a/arch/riscv/kernel/entry.S ++++ b/arch/riscv/kernel/entry.S +@@ -404,6 +404,19 @@ handle_syscall_trace_exit: + + #ifdef CONFIG_VMAP_STACK + handle_kernel_stack_overflow: ++ /* ++ * Takes the psuedo-spinlock for the shadow stack, in case multiple ++ * harts are concurrently overflowing their kernel stacks. We could ++ * store any value here, but since we're overflowing the kernel stack ++ * already we only have SP to use as a scratch register. So we just ++ * swap in the address of the spinlock, as that's definately non-zero. ++ * ++ * Pairs with a store_release in handle_bad_stack(). ++ */ ++1: la sp, spin_shadow_stack ++ REG_AMOSWAP_AQ sp, sp, (sp) ++ bnez sp, 1b ++ + la sp, shadow_stack + addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE + +diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c +index 635e6ec26938..6e8822446069 100644 +--- a/arch/riscv/kernel/traps.c ++++ b/arch/riscv/kernel/traps.c +@@ -218,11 +218,29 @@ asmlinkage unsigned long get_overflow_stack(void) + OVERFLOW_STACK_SIZE; + } + ++/* ++ * A pseudo spinlock to protect the shadow stack from being used by multiple ++ * harts concurrently. This isn't a real spinlock because the lock side must ++ * be taken without a valid stack and only a single register, it's only taken ++ * while in the process of panicing anyway so the performance and error ++ * checking a proper spinlock gives us doesn't matter. ++ */ ++unsigned long spin_shadow_stack; ++ + asmlinkage void handle_bad_stack(struct pt_regs *regs) + { + unsigned long tsk_stk = (unsigned long)current->stack; + unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + ++ /* ++ * We're done with the shadow stack by this point, as we're on the ++ * overflow stack. Tell any other concurrent overflowing harts that ++ * they can proceed with panicing by releasing the pseudo-spinlock. ++ * ++ * This pairs with an amoswap.aq in handle_kernel_stack_overflow. ++ */ ++ smp_store_release(&spin_shadow_stack, 0); ++ + console_verbose(); + + pr_emerg("Insufficient stack space to handle exception!\n"); +-- +2.35.1 + diff --git a/queue-6.0/riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch b/queue-6.0/riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch new file mode 100644 index 00000000000..b3806ee1eba --- /dev/null +++ b/queue-6.0/riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch @@ -0,0 +1,314 @@ +From 067905b58e2bdce609e031dc3882039a2482fdac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Oct 2022 10:16:03 -0400 +Subject: riscv: kexec: Fixup crash_smp_send_stop without multi cores + +From: Guo Ren + +[ Upstream commit 9b932aadfc47de5d70b53ea04b0d1b5f6c82945b ] + +Current crash_smp_send_stop is the same as the generic one in +kernel/panic and misses crash_save_cpu in percpu. This patch is inspired +by 78fd584cdec0 ("arm64: kdump: implement machine_crash_shutdown()") +and adds the same mechanism for riscv. + +Before this patch, test result: +crash> help -r +CPU 0: [OFFLINE] + +CPU 1: +epc : ffffffff80009ff0 ra : ffffffff800b789a sp : ff2000001098bb40 + gp : ffffffff815fca60 tp : ff60000004680000 t0 : 6666666666663c5b + t1 : 0000000000000000 t2 : 666666666666663c s0 : ff2000001098bc90 + s1 : ffffffff81600798 a0 : ff2000001098bb48 a1 : 0000000000000000 + a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 + a5 : ff60000004690800 a6 : 0000000000000000 a7 : 0000000000000000 + s2 : ff2000001098bb48 s3 : ffffffff81093ec8 s4 : ffffffff816004ac + s5 : 0000000000000000 s6 : 0000000000000007 s7 : ffffffff80e7f720 + s8 : 00fffffffffff3f0 s9 : 0000000000000007 s10: 00aaaaaaaab98700 + s11: 0000000000000001 t3 : ffffffff819a8097 t4 : ffffffff819a8097 + t5 : ffffffff819a8098 t6 : ff2000001098b9a8 + +CPU 2: [OFFLINE] + +CPU 3: [OFFLINE] + +After this patch, test result: +crash> help -r +CPU 0: +epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ffffffff81403eb0 + gp : ffffffff815fcb48 tp : ffffffff81413400 t0 : 0000000000000000 + t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffff81403ec0 + s1 : 0000000000000000 a0 : 0000000000000000 a1 : 0000000000000000 + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 + a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000 + s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18 + s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000 + s8 : 0000000000000000 s9 : 0000000080039eac s10: 0000000000000000 + s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000 + t5 : 0000000000000000 t6 : 0000000000000000 + +CPU 1: +epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ff2000000068bf30 + gp : ffffffff815fcb48 tp : ff6000000240d400 t0 : 0000000000000000 + t1 : 0000000000000000 t2 : 0000000000000000 s0 : ff2000000068bf40 + s1 : 0000000000000001 a0 : 0000000000000000 a1 : 0000000000000000 + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 + a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000 + s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18 + s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000 + s8 : 0000000000000000 s9 : 0000000080039ea8 s10: 0000000000000000 + s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000 + t5 : 0000000000000000 t6 : 0000000000000000 + +CPU 2: +epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ff20000000693f30 + gp : ffffffff815fcb48 tp : ff6000000240e900 t0 : 0000000000000000 + t1 : 0000000000000000 t2 : 0000000000000000 s0 : ff20000000693f40 + s1 : 0000000000000002 a0 : 0000000000000000 a1 : 0000000000000000 + a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 + a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000 + s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18 + s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000 + s8 : 0000000000000000 s9 : 0000000080039eb0 s10: 0000000000000000 + s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000 + t5 : 0000000000000000 t6 : 0000000000000000 + +CPU 3: +epc : ffffffff8000a1e4 ra : ffffffff800b7bba sp : ff200000109bbb40 + gp : ffffffff815fcb48 tp : ff6000000373aa00 t0 : 6666666666663c5b + t1 : 0000000000000000 t2 : 666666666666663c s0 : ff200000109bbc90 + s1 : ffffffff816007a0 a0 : ff200000109bbb48 a1 : 0000000000000000 + a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 + a5 : ff60000002c61c00 a6 : 0000000000000000 a7 : 0000000000000000 + s2 : ff200000109bbb48 s3 : ffffffff810941a8 s4 : ffffffff816004b4 + s5 : 0000000000000000 s6 : 0000000000000007 s7 : ffffffff80e7f7a0 + s8 : 00fffffffffff3f0 s9 : 0000000000000007 s10: 00aaaaaaaab98700 + s11: 0000000000000001 t3 : ffffffff819a8097 t4 : ffffffff819a8097 + t5 : ffffffff819a8098 t6 : ff200000109bb9a8 + +Fixes: ad943893d5f1 ("RISC-V: Fixup schedule out issue in machine_crash_shutdown()") +Reviewed-by: Xianting Tian +Signed-off-by: Guo Ren +Signed-off-by: Guo Ren +Cc: Nick Kossifidis +Link: https://lore.kernel.org/r/20221020141603.2856206-3-guoren@kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/smp.h | 3 + + arch/riscv/kernel/machine_kexec.c | 21 ++----- + arch/riscv/kernel/smp.c | 97 ++++++++++++++++++++++++++++++- + 3 files changed, 103 insertions(+), 18 deletions(-) + +diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h +index d3443be7eedc..3831b638ecab 100644 +--- a/arch/riscv/include/asm/smp.h ++++ b/arch/riscv/include/asm/smp.h +@@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); + /* Clear IPI for current CPU */ + void riscv_clear_ipi(void); + ++/* Check other CPUs stop or not */ ++bool smp_crash_stop_failed(void); ++ + /* Secondary hart entry */ + asmlinkage void smp_callin(void); + +diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c +index db41c676e5a2..2d139b724bc8 100644 +--- a/arch/riscv/kernel/machine_kexec.c ++++ b/arch/riscv/kernel/machine_kexec.c +@@ -140,22 +140,6 @@ void machine_shutdown(void) + #endif + } + +-/* Override the weak function in kernel/panic.c */ +-void crash_smp_send_stop(void) +-{ +- static int cpus_stopped; +- +- /* +- * This function can be called twice in panic path, but obviously +- * we execute this only once. +- */ +- if (cpus_stopped) +- return; +- +- smp_send_stop(); +- cpus_stopped = 1; +-} +- + static void machine_kexec_mask_interrupts(void) + { + unsigned int i; +@@ -230,6 +214,11 @@ machine_kexec(struct kimage *image) + void *control_code_buffer = page_address(image->control_code_page); + riscv_kexec_method kexec_method = NULL; + ++#ifdef CONFIG_SMP ++ WARN(smp_crash_stop_failed(), ++ "Some CPUs may be stale, kdump will be unreliable.\n"); ++#endif ++ + if (image->type != KEXEC_TYPE_CRASH) + kexec_method = control_code_buffer; + else +diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c +index 760a64518c58..8c3b59f1f9b8 100644 +--- a/arch/riscv/kernel/smp.c ++++ b/arch/riscv/kernel/smp.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -22,11 +23,13 @@ + #include + #include + #include ++#include + + enum ipi_message_type { + IPI_RESCHEDULE, + IPI_CALL_FUNC, + IPI_CPU_STOP, ++ IPI_CPU_CRASH_STOP, + IPI_IRQ_WORK, + IPI_TIMER, + IPI_MAX +@@ -71,6 +74,32 @@ static void ipi_stop(void) + wait_for_interrupt(); + } + ++#ifdef CONFIG_KEXEC_CORE ++static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); ++ ++static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) ++{ ++ crash_save_cpu(regs, cpu); ++ ++ atomic_dec(&waiting_for_crash_ipi); ++ ++ local_irq_disable(); ++ ++#ifdef CONFIG_HOTPLUG_CPU ++ if (cpu_has_hotplug(cpu)) ++ cpu_ops[cpu]->cpu_stop(); ++#endif ++ ++ for(;;) ++ wait_for_interrupt(); ++} ++#else ++static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) ++{ ++ unreachable(); ++} ++#endif ++ + static const struct riscv_ipi_ops *ipi_ops __ro_after_init; + + void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) +@@ -124,8 +153,9 @@ void arch_irq_work_raise(void) + + void handle_IPI(struct pt_regs *regs) + { +- unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; +- unsigned long *stats = ipi_data[smp_processor_id()].stats; ++ unsigned int cpu = smp_processor_id(); ++ unsigned long *pending_ipis = &ipi_data[cpu].bits; ++ unsigned long *stats = ipi_data[cpu].stats; + + riscv_clear_ipi(); + +@@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs) + ipi_stop(); + } + ++ if (ops & (1 << IPI_CPU_CRASH_STOP)) { ++ ipi_cpu_crash_stop(cpu, get_irq_regs()); ++ } ++ + if (ops & (1 << IPI_IRQ_WORK)) { + stats[IPI_IRQ_WORK]++; + irq_work_run(); +@@ -176,6 +210,7 @@ static const char * const ipi_names[] = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", ++ [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + }; +@@ -235,6 +270,64 @@ void smp_send_stop(void) + cpumask_pr_args(cpu_online_mask)); + } + ++#ifdef CONFIG_KEXEC_CORE ++/* ++ * The number of CPUs online, not counting this CPU (which may not be ++ * fully online and so not counted in num_online_cpus()). ++ */ ++static inline unsigned int num_other_online_cpus(void) ++{ ++ unsigned int this_cpu_online = cpu_online(smp_processor_id()); ++ ++ return num_online_cpus() - this_cpu_online; ++} ++ ++void crash_smp_send_stop(void) ++{ ++ static int cpus_stopped; ++ cpumask_t mask; ++ unsigned long timeout; ++ ++ /* ++ * This function can be called twice in panic path, but obviously ++ * we execute this only once. ++ */ ++ if (cpus_stopped) ++ return; ++ ++ cpus_stopped = 1; ++ ++ /* ++ * If this cpu is the only one alive at this point in time, online or ++ * not, there are no stop messages to be sent around, so just back out. ++ */ ++ if (num_other_online_cpus() == 0) ++ return; ++ ++ cpumask_copy(&mask, cpu_online_mask); ++ cpumask_clear_cpu(smp_processor_id(), &mask); ++ ++ atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); ++ ++ pr_crit("SMP: stopping secondary CPUs\n"); ++ send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); ++ ++ /* Wait up to one second for other CPUs to stop */ ++ timeout = USEC_PER_SEC; ++ while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) ++ udelay(1); ++ ++ if (atomic_read(&waiting_for_crash_ipi) > 0) ++ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", ++ cpumask_pr_args(&mask)); ++} ++ ++bool smp_crash_stop_failed(void) ++{ ++ return (atomic_read(&waiting_for_crash_ipi) > 0); ++} ++#endif ++ + void smp_send_reschedule(int cpu) + { + send_ipi_single(cpu, IPI_RESCHEDULE); +-- +2.35.1 + diff --git a/queue-6.0/riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch b/queue-6.0/riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch new file mode 100644 index 00000000000..22492b0acc5 --- /dev/null +++ b/queue-6.0/riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch @@ -0,0 +1,93 @@ +From d939a47576f17848db80adf2307553fe5a982a6c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Oct 2022 10:16:02 -0400 +Subject: riscv: kexec: Fixup irq controller broken in kexec crash path + +From: Guo Ren + +[ Upstream commit b17d19a5314a37f7197afd1a0200affd21a7227d ] + +If a crash happens on cpu3 and all interrupts are binding on cpu0, the +bad irq routing will cause a crash kernel which can't receive any irq. +Because crash kernel won't clean up all harts' PLIC enable bits in +enable registers. This patch is similar to 9141a003a491 ("ARM: 7316/1: +kexec: EOI active and mask all interrupts in kexec crash path") and +78fd584cdec0 ("arm64: kdump: implement machine_crash_shutdown()"), and +PowerPC also has the same mechanism. + +Fixes: fba8a8674f68 ("RISC-V: Add kexec support") +Signed-off-by: Guo Ren +Signed-off-by: Guo Ren +Reviewed-by: Xianting Tian +Cc: Nick Kossifidis +Cc: Palmer Dabbelt +Link: https://lore.kernel.org/r/20221020141603.2856206-2-guoren@kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/machine_kexec.c | 35 +++++++++++++++++++++++++++++++ + 1 file changed, 35 insertions(+) + +diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c +index ee79e6839b86..db41c676e5a2 100644 +--- a/arch/riscv/kernel/machine_kexec.c ++++ b/arch/riscv/kernel/machine_kexec.c +@@ -15,6 +15,8 @@ + #include /* For unreachable() */ + #include /* For cpu_down() */ + #include ++#include ++#include + + /* + * kexec_image_info - Print received image details +@@ -154,6 +156,37 @@ void crash_smp_send_stop(void) + cpus_stopped = 1; + } + ++static void machine_kexec_mask_interrupts(void) ++{ ++ unsigned int i; ++ struct irq_desc *desc; ++ ++ for_each_irq_desc(i, desc) { ++ struct irq_chip *chip; ++ int ret; ++ ++ chip = irq_desc_get_chip(desc); ++ if (!chip) ++ continue; ++ ++ /* ++ * First try to remove the active state. If this ++ * fails, try to EOI the interrupt. ++ */ ++ ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); ++ ++ if (ret && irqd_irq_inprogress(&desc->irq_data) && ++ chip->irq_eoi) ++ chip->irq_eoi(&desc->irq_data); ++ ++ if (chip->irq_mask) ++ chip->irq_mask(&desc->irq_data); ++ ++ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) ++ chip->irq_disable(&desc->irq_data); ++ } ++} ++ + /* + * machine_crash_shutdown - Prepare to kexec after a kernel crash + * +@@ -169,6 +202,8 @@ machine_crash_shutdown(struct pt_regs *regs) + crash_smp_send_stop(); + + crash_save_cpu(regs, smp_processor_id()); ++ machine_kexec_mask_interrupts(); ++ + pr_info("Starting crashdump kernel...\n"); + } + +-- +2.35.1 + diff --git a/queue-6.0/riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch b/queue-6.0/riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch new file mode 100644 index 00000000000..57874e1adcf --- /dev/null +++ b/queue-6.0/riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch @@ -0,0 +1,87 @@ +From ad0a0ee0e825007e443bf95519c4c9a22dcd8d89 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Nov 2022 14:33:03 +0100 +Subject: riscv: Sync efi page table's kernel mappings before switching + +From: Alexandre Ghiti + +[ Upstream commit 3f105a742725a1b78766a55169f1d827732e62b8 ] + +The EFI page table is initially created as a copy of the kernel page table. +With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area: +if the stack is allocated in a new PGD (one that was not present at the +moment of the efi page table creation or not synced in a previous vmalloc +fault), the kernel will take a trap when switching to the efi page table +when the vmalloc kernel stack is accessed, resulting in a kernel panic. + +Fix that by updating the efi kernel mappings before switching to the efi +page table. + +Signed-off-by: Alexandre Ghiti +Fixes: b91540d52a08 ("RISC-V: Add EFI runtime services") +Tested-by: Emil Renner Berthing +Reviewed-by: Atish Patra +Link: https://lore.kernel.org/r/20221121133303.1782246-1-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/efi.h | 6 +++++- + arch/riscv/include/asm/pgalloc.h | 11 ++++++++--- + 2 files changed, 13 insertions(+), 4 deletions(-) + +diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h +index f74879a8f1ea..e229d7be4b66 100644 +--- a/arch/riscv/include/asm/efi.h ++++ b/arch/riscv/include/asm/efi.h +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_EFI + extern void efi_init(void); +@@ -20,7 +21,10 @@ extern void efi_init(void); + int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); + int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + +-#define arch_efi_call_virt_setup() efi_virtmap_load() ++#define arch_efi_call_virt_setup() ({ \ ++ sync_kernel_mappings(efi_mm.pgd); \ ++ efi_virtmap_load(); \ ++ }) + #define arch_efi_call_virt_teardown() efi_virtmap_unload() + + #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) +diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h +index 947f23d7b6af..59dc12b5b7e8 100644 +--- a/arch/riscv/include/asm/pgalloc.h ++++ b/arch/riscv/include/asm/pgalloc.h +@@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) + #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) + #endif /* __PAGETABLE_PMD_FOLDED */ + ++static inline void sync_kernel_mappings(pgd_t *pgd) ++{ ++ memcpy(pgd + USER_PTRS_PER_PGD, ++ init_mm.pgd + USER_PTRS_PER_PGD, ++ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); ++} ++ + static inline pgd_t *pgd_alloc(struct mm_struct *mm) + { + pgd_t *pgd; +@@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) + if (likely(pgd != NULL)) { + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + /* Copy kernel mappings */ +- memcpy(pgd + USER_PTRS_PER_PGD, +- init_mm.pgd + USER_PTRS_PER_PGD, +- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); ++ sync_kernel_mappings(pgd); + } + return pgd; + } +-- +2.35.1 + diff --git a/queue-6.0/series b/queue-6.0/series index f481cf3a73a..2bbe3e5c535 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -98,3 +98,17 @@ drm-i915-never-return-0-if-not-all-requests-retired.patch tracing-osnoise-fix-duration-type.patch tracing-fix-race-where-histograms-can-be-called-before-the-event.patch tracing-free-buffers-when-a-used-dynamic-event-is-removed.patch +asoc-ops-fix-bounds-check-for-_sx-controls.patch +asoc-tlv320adc3xxx-fix-build-error-for-implicit-func.patch +pinctrl-single-fix-potential-division-by-zero.patch +riscv-sync-efi-page-table-s-kernel-mappings-before-s.patch +riscv-fix-race-when-vmap-stack-overflow.patch +riscv-kexec-fixup-irq-controller-broken-in-kexec-cra.patch +riscv-kexec-fixup-crash_smp_send_stop-without-multi-.patch +nvme-fix-srcu-protection-of-nvme_ns_head-list.patch +iommu-vt-d-fix-pci-device-refcount-leak-in-has_exter.patch +iommu-vt-d-fix-pci-device-refcount-leak-in-dmar_dev_.patch +ipv4-handle-attempt-to-delete-multipath-route-when-f.patch +ipv4-fix-route-deletion-when-nexthop-info-is-not-spe.patch +mm-damon-introduce-struct-damos_access_pattern.patch +mm-damon-sysfs-fix-wrong-empty-schemes-assumption-un.patch -- 2.47.3