From: Sasha Levin Date: Sun, 17 Sep 2023 02:27:54 +0000 (-0400) Subject: Fixes for 5.10 X-Git-Tag: v5.10.195~25 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=375c036fd2ca75cb332f0d0c5b4f430214832624;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/acpi-apei-explicit-init-of-hest-and-ghes-in-apci_ini.patch b/queue-5.10/acpi-apei-explicit-init-of-hest-and-ghes-in-apci_ini.patch new file mode 100644 index 00000000000..8c1a09612ef --- /dev/null +++ b/queue-5.10/acpi-apei-explicit-init-of-hest-and-ghes-in-apci_ini.patch @@ -0,0 +1,225 @@ +From 05c3e0ea49e0511c40319d0e6c83409bedafc1db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 27 Feb 2022 20:25:45 +0800 +Subject: ACPI: APEI: explicit init of HEST and GHES in apci_init() + +From: Shuai Xue + +[ Upstream commit dc4e8c07e9e2f69387579c49caca26ba239f7270 ] + +From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus +memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage +the estatus memory pool. On the other hand, ghes_init() relies on +sdei_init() to detect the SDEI version and (un)register events. The +dependencies are as follows: + + ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init() + ghes_init() => sdei_init() + +HEST is not PCI-specific and initcall ordering is implicit and not +well-defined within a level. + +Based on above, remove acpi_hest_init() from acpi_pci_root_init() and +convert ghes_init() and sdei_init() from initcalls to explicit calls in the +following order: + + acpi_hest_init() + ghes_init() + sdei_init() + +Signed-off-by: Shuai Xue +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 5cd474e57368 ("arm64: sdei: abort running SDEI handlers during crash") +Signed-off-by: Sasha Levin +--- + drivers/acpi/apei/ghes.c | 19 ++++++++----------- + drivers/acpi/bus.c | 2 ++ + drivers/acpi/pci_root.c | 3 --- + drivers/firmware/Kconfig | 1 + + drivers/firmware/arm_sdei.c | 13 ++----------- + include/acpi/apei.h | 4 +++- + include/linux/arm_sdei.h | 2 ++ + 7 files changed, 18 insertions(+), 26 deletions(-) + +diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c +index 9bdb5bd5fda63..8678e162181f4 100644 +--- a/drivers/acpi/apei/ghes.c ++++ b/drivers/acpi/apei/ghes.c +@@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = { + .remove = ghes_remove, + }; + +-static int __init ghes_init(void) ++void __init ghes_init(void) + { + int rc; + ++ sdei_init(); ++ + if (acpi_disabled) +- return -ENODEV; ++ return; + + switch (hest_disable) { + case HEST_NOT_FOUND: +- return -ENODEV; ++ return; + case HEST_DISABLED: + pr_info(GHES_PFX "HEST is not enabled!\n"); +- return -EINVAL; ++ return; + default: + break; + } + + if (ghes_disable) { + pr_info(GHES_PFX "GHES is not enabled!\n"); +- return -EINVAL; ++ return; + } + + ghes_nmi_init_cxt(); + + rc = platform_driver_register(&ghes_platform_driver); + if (rc) +- goto err; ++ return; + + rc = apei_osc_setup(); + if (rc == 0 && osc_sb_apei_support_acked) +@@ -1494,9 +1496,4 @@ static int __init ghes_init(void) + pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); + else + pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); +- +- return 0; +-err: +- return rc; + } +-device_initcall(ghes_init); +diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c +index 5e14288fcabe9..60dfe63301d00 100644 +--- a/drivers/acpi/bus.c ++++ b/drivers/acpi/bus.c +@@ -1252,6 +1252,8 @@ static int __init acpi_init(void) + + pci_mmcfg_late_init(); + acpi_iort_init(); ++ acpi_hest_init(); ++ ghes_init(); + acpi_scan_init(); + acpi_ec_init(); + acpi_debugfs_init(); +diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c +index c12b5fb3e8fba..d972ea057a035 100644 +--- a/drivers/acpi/pci_root.c ++++ b/drivers/acpi/pci_root.c +@@ -20,8 +20,6 @@ + #include + #include + #include +-#include /* for acpi_hest_init() */ +- + #include "internal.h" + + #define ACPI_PCI_ROOT_CLASS "pci_bridge" +@@ -950,7 +948,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root, + + void __init acpi_pci_root_init(void) + { +- acpi_hest_init(); + if (acpi_pci_disabled) + return; + +diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig +index c08968c5ddf8c..807c5320dc0ff 100644 +--- a/drivers/firmware/Kconfig ++++ b/drivers/firmware/Kconfig +@@ -72,6 +72,7 @@ config ARM_SCPI_POWER_DOMAIN + config ARM_SDE_INTERFACE + bool "ARM Software Delegated Exception Interface (SDEI)" + depends on ARM64 ++ depends on ACPI_APEI_GHES + help + The Software Delegated Exception Interface (SDEI) is an ARM + standard for registering callbacks from the platform firmware +diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c +index 5a877d76078f7..9c4ebaf1f0c8f 100644 +--- a/drivers/firmware/arm_sdei.c ++++ b/drivers/firmware/arm_sdei.c +@@ -1063,14 +1063,14 @@ static bool __init sdei_present_acpi(void) + return true; + } + +-static int __init sdei_init(void) ++void __init sdei_init(void) + { + struct platform_device *pdev; + int ret; + + ret = platform_driver_register(&sdei_driver); + if (ret || !sdei_present_acpi()) +- return ret; ++ return; + + pdev = platform_device_register_simple(sdei_driver.driver.name, + 0, NULL, 0); +@@ -1080,17 +1080,8 @@ static int __init sdei_init(void) + pr_info("Failed to register ACPI:SDEI platform device %d\n", + ret); + } +- +- return ret; + } + +-/* +- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register +- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised +- * by device_initcall(). We want to be called in the middle. +- */ +-subsys_initcall_sync(sdei_init); +- + int sdei_event_handler(struct pt_regs *regs, + struct sdei_registered_event *arg) + { +diff --git a/include/acpi/apei.h b/include/acpi/apei.h +index 680f80960c3dc..a6ac2e8b72da8 100644 +--- a/include/acpi/apei.h ++++ b/include/acpi/apei.h +@@ -27,14 +27,16 @@ extern int hest_disable; + extern int erst_disable; + #ifdef CONFIG_ACPI_APEI_GHES + extern bool ghes_disable; ++void __init ghes_init(void); + #else + #define ghes_disable 1 ++static inline void ghes_init(void) { } + #endif + + #ifdef CONFIG_ACPI_APEI + void __init acpi_hest_init(void); + #else +-static inline void acpi_hest_init(void) { return; } ++static inline void acpi_hest_init(void) { } + #endif + + typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); +diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h +index 0a241c5c911d8..14dc461b0e829 100644 +--- a/include/linux/arm_sdei.h ++++ b/include/linux/arm_sdei.h +@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes); + /* For use by arch code when CPU hotplug notifiers are not appropriate. */ + int sdei_mask_local_cpu(void); + int sdei_unmask_local_cpu(void); ++void __init sdei_init(void); + #else + static inline int sdei_mask_local_cpu(void) { return 0; } + static inline int sdei_unmask_local_cpu(void) { return 0; } ++static inline void sdei_init(void) { } + #endif /* CONFIG_ARM_SDE_INTERFACE */ + + +-- +2.40.1 + diff --git a/queue-5.10/arm-dts-bcm5301x-extend-ram-to-full-256mb-for-linksy.patch b/queue-5.10/arm-dts-bcm5301x-extend-ram-to-full-256mb-for-linksy.patch new file mode 100644 index 00000000000..0a6b6e1d2e1 --- /dev/null +++ b/queue-5.10/arm-dts-bcm5301x-extend-ram-to-full-256mb-for-linksy.patch @@ -0,0 +1,44 @@ +From ee5f21b187ca56827f8c3294e3dde1b269703319 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Jul 2023 03:40:17 +0200 +Subject: ARM: dts: BCM5301X: Extend RAM to full 256MB for Linksys EA6500 V2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Aleksey Nasibulin + +[ Upstream commit 91994e59079dcb455783d3f9ea338eea6f671af3 ] + +Linksys ea6500-v2 have 256MB of ram. Currently we only use 128MB. +Expand the definition to use all the available RAM. + +Fixes: 03e96644d7a8 ("ARM: dts: BCM5301X: Add basic DT for Linksys EA6500 V2") +Signed-off-by: Aleksey Nasibulin +Signed-off-by: Christian Marangi +Cc: stable@vger.kernel.org +Acked-by: Rafał Miłecki +Link: https://lore.kernel.org/r/20230712014017.28123-1-ansuelsmth@gmail.com +Signed-off-by: Florian Fainelli +Signed-off-by: Sasha Levin +--- + arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts b/arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts +index cd797b4202ad8..01c48faabfade 100644 +--- a/arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts ++++ b/arch/arm/boot/dts/bcm4708-linksys-ea6500-v2.dts +@@ -19,7 +19,8 @@ + + memory@0 { + device_type = "memory"; +- reg = <0x00000000 0x08000000>; ++ reg = <0x00000000 0x08000000>, ++ <0x88000000 0x08000000>; + }; + + gpio-keys { +-- +2.40.1 + diff --git a/queue-5.10/arm-dts-samsung-exynos4210-i9100-fix-lcd-screen-s-ph.patch b/queue-5.10/arm-dts-samsung-exynos4210-i9100-fix-lcd-screen-s-ph.patch new file mode 100644 index 00000000000..a0fc5a40158 --- /dev/null +++ b/queue-5.10/arm-dts-samsung-exynos4210-i9100-fix-lcd-screen-s-ph.patch @@ -0,0 +1,44 @@ +From 26e312e27ba26c86c8dea4b507fc910f13ba5d02 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jul 2023 17:37:20 +0200 +Subject: ARM: dts: samsung: exynos4210-i9100: Fix LCD screen's physical size + +From: Paul Cercueil + +[ Upstream commit b3f3fc32e5ff1e848555af8616318cc667457f90 ] + +The previous values were completely bogus, and resulted in the computed +DPI ratio being much lower than reality, causing applications and UIs to +misbehave. + +The new values were measured by myself with a ruler. + +Signed-off-by: Paul Cercueil +Acked-by: Sam Ravnborg +Fixes: 8620cc2f99b7 ("ARM: dts: exynos: Add devicetree file for the Galaxy S2") +Cc: # v5.8+ +Link: https://lore.kernel.org/r/20230714153720.336990-1-paul@crapouillou.net +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Sasha Levin +--- + arch/arm/boot/dts/exynos4210-i9100.dts | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/arm/boot/dts/exynos4210-i9100.dts b/arch/arm/boot/dts/exynos4210-i9100.dts +index ecc9d4dc707e4..d186b93144e38 100644 +--- a/arch/arm/boot/dts/exynos4210-i9100.dts ++++ b/arch/arm/boot/dts/exynos4210-i9100.dts +@@ -170,8 +170,8 @@ + power-on-delay = <10>; + reset-delay = <10>; + +- panel-width-mm = <90>; +- panel-height-mm = <154>; ++ panel-width-mm = <56>; ++ panel-height-mm = <93>; + + display-timings { + timing { +-- +2.40.1 + diff --git a/queue-5.10/arm64-sdei-abort-running-sdei-handlers-during-crash.patch b/queue-5.10/arm64-sdei-abort-running-sdei-handlers-during-crash.patch new file mode 100644 index 00000000000..6c63147f3e3 --- /dev/null +++ b/queue-5.10/arm64-sdei-abort-running-sdei-handlers-during-crash.patch @@ -0,0 +1,198 @@ +From b776f44f007fe07f29bfc53dcbc4b17fc7850f29 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Jun 2023 17:29:39 -0700 +Subject: arm64: sdei: abort running SDEI handlers during crash + +From: D Scott Phillips + +[ Upstream commit 5cd474e57368f0957c343bb21e309cf82826b1ef ] + +Interrupts are blocked in SDEI context, per the SDEI spec: "The client +interrupts cannot preempt the event handler." If we crashed in the SDEI +handler-running context (as with ACPI's AGDI) then we need to clean up the +SDEI state before proceeding to the crash kernel so that the crash kernel +can have working interrupts. + +Track the active SDEI handler per-cpu so that we can COMPLETE_AND_RESUME +the handler, discarding the interrupted context. + +Fixes: f5df26961853 ("arm64: kernel: Add arch-specific SDEI entry code and CPU masking") +Signed-off-by: D Scott Phillips +Cc: stable@vger.kernel.org +Reviewed-by: James Morse +Tested-by: Mihai Carabas +Link: https://lore.kernel.org/r/20230627002939.2758-1-scott@os.amperecomputing.com +Signed-off-by: Will Deacon +Signed-off-by: Sasha Levin +--- + arch/arm64/include/asm/sdei.h | 6 ++++++ + arch/arm64/kernel/entry.S | 27 +++++++++++++++++++++++++-- + arch/arm64/kernel/sdei.c | 3 +++ + arch/arm64/kernel/smp.c | 8 ++++---- + drivers/firmware/arm_sdei.c | 19 +++++++++++++++++++ + include/linux/arm_sdei.h | 2 ++ + 6 files changed, 59 insertions(+), 6 deletions(-) + +diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h +index 63e0b92a5fbb0..5882c0e29331e 100644 +--- a/arch/arm64/include/asm/sdei.h ++++ b/arch/arm64/include/asm/sdei.h +@@ -17,6 +17,9 @@ + + #include + ++DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); ++DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); ++ + extern unsigned long sdei_exit_mode; + + /* Software Delegated Exception entry point from firmware*/ +@@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num, + unsigned long pc, + unsigned long pstate); + ++/* Abort a running handler. Context is discarded. */ ++void __sdei_handler_abort(void); ++ + /* + * The above entry point does the minimum to call C code. This function does + * anything else, before calling the driver. +diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S +index 55e477f73158d..a94acea770c7c 100644 +--- a/arch/arm64/kernel/entry.S ++++ b/arch/arm64/kernel/entry.S +@@ -1137,9 +1137,13 @@ SYM_CODE_START(__sdei_asm_handler) + + mov x19, x1 + +-#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK) ++ /* Store the registered-event for crash_smp_send_stop() */ + ldrb w4, [x19, #SDEI_EVENT_PRIORITY] +-#endif ++ cbnz w4, 1f ++ adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 ++ b 2f ++1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 ++2: str x19, [x5] + + #ifdef CONFIG_VMAP_STACK + /* +@@ -1204,6 +1208,14 @@ SYM_CODE_START(__sdei_asm_handler) + + ldr_l x2, sdei_exit_mode + ++ /* Clear the registered-event seen by crash_smp_send_stop() */ ++ ldrb w3, [x4, #SDEI_EVENT_PRIORITY] ++ cbnz w3, 1f ++ adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 ++ b 2f ++1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 ++2: str xzr, [x5] ++ + alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 + sdei_handler_exit exit_mode=x2 + alternative_else_nop_endif +@@ -1214,4 +1226,15 @@ alternative_else_nop_endif + #endif + SYM_CODE_END(__sdei_asm_handler) + NOKPROBE(__sdei_asm_handler) ++ ++SYM_CODE_START(__sdei_handler_abort) ++ mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME ++ adr x1, 1f ++ ldr_l x2, sdei_exit_mode ++ sdei_handler_exit exit_mode=x2 ++ // exit the handler and jump to the next instruction. ++ // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx. ++1: ret ++SYM_CODE_END(__sdei_handler_abort) ++NOKPROBE(__sdei_handler_abort) + #endif /* CONFIG_ARM_SDE_INTERFACE */ +diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c +index 793c46d6a4479..0083f5afa51db 100644 +--- a/arch/arm64/kernel/sdei.c ++++ b/arch/arm64/kernel/sdei.c +@@ -38,6 +38,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); + DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); + #endif + ++DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); ++DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); ++ + static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu) + { + unsigned long *p; +diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c +index feee5a3cd1288..ae0977b632a18 100644 +--- a/arch/arm64/kernel/smp.c ++++ b/arch/arm64/kernel/smp.c +@@ -1072,10 +1072,8 @@ void crash_smp_send_stop(void) + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ +- if (num_other_online_cpus() == 0) { +- sdei_mask_local_cpu(); +- return; +- } ++ if (num_other_online_cpus() == 0) ++ goto skip_ipi; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); +@@ -1094,7 +1092,9 @@ void crash_smp_send_stop(void) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); + ++skip_ipi: + sdei_mask_local_cpu(); ++ sdei_handler_abort(); + } + + bool smp_crash_stop_failed(void) +diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c +index 9c4ebaf1f0c8f..68e55ca7491e5 100644 +--- a/drivers/firmware/arm_sdei.c ++++ b/drivers/firmware/arm_sdei.c +@@ -1109,3 +1109,22 @@ int sdei_event_handler(struct pt_regs *regs, + return err; + } + NOKPROBE_SYMBOL(sdei_event_handler); ++ ++void sdei_handler_abort(void) ++{ ++ /* ++ * If the crash happened in an SDEI event handler then we need to ++ * finish the handler with the firmware so that we can have working ++ * interrupts in the crash kernel. ++ */ ++ if (__this_cpu_read(sdei_active_critical_event)) { ++ pr_warn("still in SDEI critical event context, attempting to finish handler.\n"); ++ __sdei_handler_abort(); ++ __this_cpu_write(sdei_active_critical_event, NULL); ++ } ++ if (__this_cpu_read(sdei_active_normal_event)) { ++ pr_warn("still in SDEI normal event context, attempting to finish handler.\n"); ++ __sdei_handler_abort(); ++ __this_cpu_write(sdei_active_normal_event, NULL); ++ } ++} +diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h +index 14dc461b0e829..255701e1251b4 100644 +--- a/include/linux/arm_sdei.h ++++ b/include/linux/arm_sdei.h +@@ -47,10 +47,12 @@ int sdei_unregister_ghes(struct ghes *ghes); + int sdei_mask_local_cpu(void); + int sdei_unmask_local_cpu(void); + void __init sdei_init(void); ++void sdei_handler_abort(void); + #else + static inline int sdei_mask_local_cpu(void) { return 0; } + static inline int sdei_unmask_local_cpu(void) { return 0; } + static inline void sdei_init(void) { } ++static inline void sdei_handler_abort(void) { } + #endif /* CONFIG_ARM_SDE_INTERFACE */ + + +-- +2.40.1 + diff --git a/queue-5.10/bus-mhi-host-skip-mhi-reset-if-device-is-in-rddm.patch b/queue-5.10/bus-mhi-host-skip-mhi-reset-if-device-is-in-rddm.patch new file mode 100644 index 00000000000..9f7c307a893 --- /dev/null +++ b/queue-5.10/bus-mhi-host-skip-mhi-reset-if-device-is-in-rddm.patch @@ -0,0 +1,52 @@ +From 3c50c33cace00ac611278a4432e736a7b9e7633f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 18 May 2023 14:22:39 +0800 +Subject: bus: mhi: host: Skip MHI reset if device is in RDDM + +From: Qiang Yu + +[ Upstream commit cabce92dd805945a090dc6fc73b001bb35ed083a ] + +In RDDM EE, device can not process MHI reset issued by host. In case of MHI +power off, host is issuing MHI reset and polls for it to get cleared until +it times out. Since this timeout can not be avoided in case of RDDM, skip +the MHI reset in this scenarios. + +Cc: +Fixes: a6e2e3522f29 ("bus: mhi: core: Add support for PM state transitions") +Signed-off-by: Qiang Yu +Reviewed-by: Jeffrey Hugo +Reviewed-by: Manivannan Sadhasivam +Link: https://lore.kernel.org/r/1684390959-17836-1-git-send-email-quic_qianyu@quicinc.com +Signed-off-by: Manivannan Sadhasivam +Signed-off-by: Sasha Levin +--- + drivers/bus/mhi/host/pm.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c +index 7d69b740b9f93..fe8ecd6eaa4d1 100644 +--- a/drivers/bus/mhi/host/pm.c ++++ b/drivers/bus/mhi/host/pm.c +@@ -490,6 +490,10 @@ static void mhi_pm_disable_transition(struct mhi_controller *mhi_cntrl, + u32 in_reset = -1; + unsigned long timeout = msecs_to_jiffies(mhi_cntrl->timeout_ms); + ++ /* Skip MHI RESET if in RDDM state */ ++ if (mhi_cntrl->rddm_image && mhi_get_exec_env(mhi_cntrl) == MHI_EE_RDDM) ++ goto skip_mhi_reset; ++ + dev_dbg(dev, "Triggering MHI Reset in device\n"); + mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET); + +@@ -515,6 +519,7 @@ static void mhi_pm_disable_transition(struct mhi_controller *mhi_cntrl, + mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0); + } + ++skip_mhi_reset: + dev_dbg(dev, + "Waiting for all pending event ring processing to complete\n"); + mhi_event = mhi_cntrl->mhi_event; +-- +2.40.1 + diff --git a/queue-5.10/hsr-fix-uninit-value-access-in-fill_frame_info.patch b/queue-5.10/hsr-fix-uninit-value-access-in-fill_frame_info.patch new file mode 100644 index 00000000000..b1bb70237a5 --- /dev/null +++ b/queue-5.10/hsr-fix-uninit-value-access-in-fill_frame_info.patch @@ -0,0 +1,90 @@ +From 1f4259fb925bc62193fdd715fa6f28ad38368d71 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Sep 2023 18:17:52 +0800 +Subject: hsr: Fix uninit-value access in fill_frame_info() + +From: Ziyang Xuan + +[ Upstream commit 484b4833c604c0adcf19eac1ca14b60b757355b5 ] + +Syzbot reports the following uninit-value access problem. + +===================================================== +BUG: KMSAN: uninit-value in fill_frame_info net/hsr/hsr_forward.c:601 [inline] +BUG: KMSAN: uninit-value in hsr_forward_skb+0x9bd/0x30f0 net/hsr/hsr_forward.c:616 + fill_frame_info net/hsr/hsr_forward.c:601 [inline] + hsr_forward_skb+0x9bd/0x30f0 net/hsr/hsr_forward.c:616 + hsr_dev_xmit+0x192/0x330 net/hsr/hsr_device.c:223 + __netdev_start_xmit include/linux/netdevice.h:4889 [inline] + netdev_start_xmit include/linux/netdevice.h:4903 [inline] + xmit_one net/core/dev.c:3544 [inline] + dev_hard_start_xmit+0x247/0xa10 net/core/dev.c:3560 + __dev_queue_xmit+0x34d0/0x52a0 net/core/dev.c:4340 + dev_queue_xmit include/linux/netdevice.h:3082 [inline] + packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 + packet_snd net/packet/af_packet.c:3087 [inline] + packet_sendmsg+0x8b1d/0x9f30 net/packet/af_packet.c:3119 + sock_sendmsg_nosec net/socket.c:730 [inline] + sock_sendmsg net/socket.c:753 [inline] + __sys_sendto+0x781/0xa30 net/socket.c:2176 + __do_sys_sendto net/socket.c:2188 [inline] + __se_sys_sendto net/socket.c:2184 [inline] + __ia32_sys_sendto+0x11f/0x1c0 net/socket.c:2184 + do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] + __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 + do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246 + entry_SYSENTER_compat_after_hwframe+0x70/0x82 + +Uninit was created at: + slab_post_alloc_hook+0x12f/0xb70 mm/slab.h:767 + slab_alloc_node mm/slub.c:3478 [inline] + kmem_cache_alloc_node+0x577/0xa80 mm/slub.c:3523 + kmalloc_reserve+0x148/0x470 net/core/skbuff.c:559 + __alloc_skb+0x318/0x740 net/core/skbuff.c:644 + alloc_skb include/linux/skbuff.h:1286 [inline] + alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6299 + sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2794 + packet_alloc_skb net/packet/af_packet.c:2936 [inline] + packet_snd net/packet/af_packet.c:3030 [inline] + packet_sendmsg+0x70e8/0x9f30 net/packet/af_packet.c:3119 + sock_sendmsg_nosec net/socket.c:730 [inline] + sock_sendmsg net/socket.c:753 [inline] + __sys_sendto+0x781/0xa30 net/socket.c:2176 + __do_sys_sendto net/socket.c:2188 [inline] + __se_sys_sendto net/socket.c:2184 [inline] + __ia32_sys_sendto+0x11f/0x1c0 net/socket.c:2184 + do_syscall_32_irqs_on arch/x86/entry/common.c:112 [inline] + __do_fast_syscall_32+0xa2/0x100 arch/x86/entry/common.c:178 + do_fast_syscall_32+0x37/0x80 arch/x86/entry/common.c:203 + do_SYSENTER_32+0x1f/0x30 arch/x86/entry/common.c:246 + entry_SYSENTER_compat_after_hwframe+0x70/0x82 + +It is because VLAN not yet supported in hsr driver. Return error +when protocol is ETH_P_8021Q in fill_frame_info() now to fix it. + +Fixes: 451d8123f897 ("net: prp: add packet handling support") +Reported-by: syzbot+bf7e6250c7ce248f3ec9@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=bf7e6250c7ce248f3ec9 +Signed-off-by: Ziyang Xuan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/hsr/hsr_forward.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c +index aec48e670fb69..2a02cb2edec2f 100644 +--- a/net/hsr/hsr_forward.c ++++ b/net/hsr/hsr_forward.c +@@ -531,6 +531,7 @@ static int fill_frame_info(struct hsr_frame_info *frame, + proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; + /* FIXME: */ + netdev_warn_once(skb->dev, "VLAN not yet supported"); ++ return -EINVAL; + } + + frame->is_from_san = false; +-- +2.40.1 + diff --git a/queue-5.10/ipv6-fix-ip6_sock_set_addr_preferences-typo.patch b/queue-5.10/ipv6-fix-ip6_sock_set_addr_preferences-typo.patch new file mode 100644 index 00000000000..22f593beda3 --- /dev/null +++ b/queue-5.10/ipv6-fix-ip6_sock_set_addr_preferences-typo.patch @@ -0,0 +1,42 @@ +From daae23b478bf7bf409ba0ccd15203327344cb32d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Sep 2023 15:42:13 +0000 +Subject: ipv6: fix ip6_sock_set_addr_preferences() typo + +From: Eric Dumazet + +[ Upstream commit 8cdd9f1aaedf823006449faa4e540026c692ac43 ] + +ip6_sock_set_addr_preferences() second argument should be an integer. + +SUNRPC attempts to set IPV6_PREFER_SRC_PUBLIC were +translated to IPV6_PREFER_SRC_TMP + +Fixes: 18d5ad623275 ("ipv6: add ip6_sock_set_addr_preferences") +Signed-off-by: Eric Dumazet +Cc: Christoph Hellwig +Cc: Chuck Lever +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230911154213.713941-1-edumazet@google.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/net/ipv6.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/net/ipv6.h b/include/net/ipv6.h +index 4c8f97a6da5a7..47d644de0e47c 100644 +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -1249,7 +1249,7 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) + return 0; + } + +-static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val) ++static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val) + { + int ret; + +-- +2.40.1 + diff --git a/queue-5.10/ixgbe-fix-timestamp-configuration-code.patch b/queue-5.10/ixgbe-fix-timestamp-configuration-code.patch new file mode 100644 index 00000000000..02ad55c43f1 --- /dev/null +++ b/queue-5.10/ixgbe-fix-timestamp-configuration-code.patch @@ -0,0 +1,149 @@ +From 143afa3ab6d7c1f3945b0ef2c7f597e5038bb124 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Sep 2023 13:28:14 -0700 +Subject: ixgbe: fix timestamp configuration code + +From: Vadim Fedorenko + +[ Upstream commit 3c44191dd76cf9c0cc49adaf34384cbd42ef8ad2 ] + +The commit in fixes introduced flags to control the status of hardware +configuration while processing packets. At the same time another structure +is used to provide configuration of timestamper to user-space applications. +The way it was coded makes this structures go out of sync easily. The +repro is easy for 82599 chips: + +[root@hostname ~]# hwstamp_ctl -i eth0 -r 12 -t 1 +current settings: +tx_type 0 +rx_filter 0 +new settings: +tx_type 1 +rx_filter 12 + +The eth0 device is properly configured to timestamp any PTPv2 events. + +[root@hostname ~]# hwstamp_ctl -i eth0 -r 1 -t 1 +current settings: +tx_type 1 +rx_filter 12 +SIOCSHWTSTAMP failed: Numerical result out of range +The requested time stamping mode is not supported by the hardware. + +The error is properly returned because HW doesn't support all packets +timestamping. But the adapter->flags is cleared of timestamp flags +even though no HW configuration was done. From that point no RX timestamps +are received by user-space application. But configuration shows good +values: + +[root@hostname ~]# hwstamp_ctl -i eth0 +current settings: +tx_type 1 +rx_filter 12 + +Fix the issue by applying new flags only when the HW was actually +configured. + +Fixes: a9763f3cb54c ("ixgbe: Update PTP to support X550EM_x devices") +Signed-off-by: Vadim Fedorenko +Reviewed-by: Simon Horman +Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 28 +++++++++++--------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +index 8b7f300355710..3eb2c05361e80 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +@@ -989,6 +989,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, + u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED; + u32 tsync_rx_mtrl = PTP_EV_PORT << 16; ++ u32 aflags = adapter->flags; + bool is_l2 = false; + u32 regval; + +@@ -1009,20 +1010,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl = 0; + tsync_rx_mtrl = 0; +- adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | +- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); ++ aflags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | ++ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG; +- adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | +- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); ++ aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | ++ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG; +- adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | +- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); ++ aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | ++ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: +@@ -1036,8 +1037,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, + tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2; + is_l2 = true; + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; +- adapter->flags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | +- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); ++ aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED | ++ IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_NTP_ALL: +@@ -1048,7 +1049,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, + if (hw->mac.type >= ixgbe_mac_X550) { + tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; +- adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; ++ aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + break; + } + fallthrough; +@@ -1059,8 +1060,6 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, + * Delay_Req messages and hardware does not support + * timestamping all packets => return error + */ +- adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | +- IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); + config->rx_filter = HWTSTAMP_FILTER_NONE; + return -ERANGE; + } +@@ -1092,8 +1091,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, + IXGBE_TSYNCRXCTL_TYPE_ALL | + IXGBE_TSYNCRXCTL_TSIP_UT_EN; + config->rx_filter = HWTSTAMP_FILTER_ALL; +- adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; +- adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER; ++ aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; ++ aflags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER; + is_l2 = true; + break; + default: +@@ -1126,6 +1125,9 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, + + IXGBE_WRITE_FLUSH(hw); + ++ /* configure adapter flags only when HW is actually configured */ ++ adapter->flags = aflags; ++ + /* clear TX/RX time stamp registers, just to be sure */ + ixgbe_ptp_clear_tx_timestamp(adapter); + IXGBE_READ_REG(hw, IXGBE_RXSTMPH); +-- +2.40.1 + diff --git a/queue-5.10/kcm-fix-error-handling-for-sock_dgram-in-kcm_sendmsg.patch b/queue-5.10/kcm-fix-error-handling-for-sock_dgram-in-kcm_sendmsg.patch new file mode 100644 index 00000000000..113cdfca290 --- /dev/null +++ b/queue-5.10/kcm-fix-error-handling-for-sock_dgram-in-kcm_sendmsg.patch @@ -0,0 +1,70 @@ +From 1e8fe7f5dfe7767a7055ff0e0fbc278910c2a3df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Sep 2023 19:27:53 -0700 +Subject: kcm: Fix error handling for SOCK_DGRAM in kcm_sendmsg(). + +From: Kuniyuki Iwashima + +[ Upstream commit a22730b1b4bf437c6bbfdeff5feddf54be4aeada ] + +syzkaller found a memory leak in kcm_sendmsg(), and commit c821a88bd720 +("kcm: Fix memory leak in error path of kcm_sendmsg()") suppressed it by +updating kcm_tx_msg(head)->last_skb if partial data is copied so that the +following sendmsg() will resume from the skb. + +However, we cannot know how many bytes were copied when we get the error. +Thus, we could mess up the MSG_MORE queue. + +When kcm_sendmsg() fails for SOCK_DGRAM, we should purge the queue as we +do so for UDP by udp_flush_pending_frames(). + +Even without this change, when the error occurred, the following sendmsg() +resumed from a wrong skb and the queue was messed up. However, we have +yet to get such a report, and only syzkaller stumbled on it. So, this +can be changed safely. + +Note this does not change SOCK_SEQPACKET behaviour. + +Fixes: c821a88bd720 ("kcm: Fix memory leak in error path of kcm_sendmsg()") +Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") +Signed-off-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230912022753.33327-1-kuniyu@amazon.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/kcm/kcmsock.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c +index fb025406ea567..39b3c7fbf9f66 100644 +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -1064,17 +1064,18 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + out_error: + kcm_push(kcm); + +- if (copied && sock->type == SOCK_SEQPACKET) { ++ if (sock->type == SOCK_SEQPACKET) { + /* Wrote some bytes before encountering an + * error, return partial success. + */ +- goto partial_message; +- } +- +- if (head != kcm->seq_skb) ++ if (copied) ++ goto partial_message; ++ if (head != kcm->seq_skb) ++ kfree_skb(head); ++ } else { + kfree_skb(head); +- else if (copied) +- kcm_tx_msg(head)->last_skb = skb; ++ kcm->seq_skb = NULL; ++ } + + err = sk_stream_error(sk, msg->msg_flags, err); + +-- +2.40.1 + diff --git a/queue-5.10/kcm-fix-memory-leak-in-error-path-of-kcm_sendmsg.patch b/queue-5.10/kcm-fix-memory-leak-in-error-path-of-kcm_sendmsg.patch new file mode 100644 index 00000000000..d18757350bd --- /dev/null +++ b/queue-5.10/kcm-fix-memory-leak-in-error-path-of-kcm_sendmsg.patch @@ -0,0 +1,65 @@ +From 48a87cd04ed06eb4cc98388b5b9ef426fc515def Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 10 Sep 2023 02:03:10 +0900 +Subject: kcm: Fix memory leak in error path of kcm_sendmsg() + +From: Shigeru Yoshida + +[ Upstream commit c821a88bd720b0046433173185fd841a100d44ad ] + +syzbot reported a memory leak like below: + +BUG: memory leak +unreferenced object 0xffff88810b088c00 (size 240): + comm "syz-executor186", pid 5012, jiffies 4294943306 (age 13.680s) + hex dump (first 32 bytes): + 00 89 08 0b 81 88 ff ff 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] __alloc_skb+0x1ef/0x230 net/core/skbuff.c:634 + [] alloc_skb include/linux/skbuff.h:1289 [inline] + [] kcm_sendmsg+0x269/0x1050 net/kcm/kcmsock.c:815 + [] sock_sendmsg_nosec net/socket.c:725 [inline] + [] sock_sendmsg+0x56/0xb0 net/socket.c:748 + [] ____sys_sendmsg+0x365/0x470 net/socket.c:2494 + [] ___sys_sendmsg+0xc9/0x130 net/socket.c:2548 + [] __sys_sendmsg+0xa6/0x120 net/socket.c:2577 + [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] + [] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 + [] entry_SYSCALL_64_after_hwframe+0x63/0xcd + +In kcm_sendmsg(), kcm_tx_msg(head)->last_skb is used as a cursor to append +newly allocated skbs to 'head'. If some bytes are copied, an error occurred, +and jumped to out_error label, 'last_skb' is left unmodified. A later +kcm_sendmsg() will use an obsoleted 'last_skb' reference, corrupting the +'head' frag_list and causing the leak. + +This patch fixes this issue by properly updating the last allocated skb in +'last_skb'. + +Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") +Reported-and-tested-by: syzbot+6f98de741f7dbbfc4ccb@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=6f98de741f7dbbfc4ccb +Signed-off-by: Shigeru Yoshida +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/kcm/kcmsock.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c +index 71608a6def988..fb025406ea567 100644 +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -1073,6 +1073,8 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + + if (head != kcm->seq_skb) + kfree_skb(head); ++ else if (copied) ++ kcm_tx_msg(head)->last_skb = skb; + + err = sk_stream_error(sk, msg->msg_flags, err); + +-- +2.40.1 + diff --git a/queue-5.10/kselftest-runner.sh-propagate-sigterm-to-runner-chil.patch b/queue-5.10/kselftest-runner.sh-propagate-sigterm-to-runner-chil.patch new file mode 100644 index 00000000000..c8663f4220b --- /dev/null +++ b/queue-5.10/kselftest-runner.sh-propagate-sigterm-to-runner-chil.patch @@ -0,0 +1,54 @@ +From e7a2cea85b76cfa701325ec4b0b5792969b52a22 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 13:53:17 +0200 +Subject: kselftest/runner.sh: Propagate SIGTERM to runner child +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Björn Töpel + +[ Upstream commit 9616cb34b08ec86642b162eae75c5a7ca8debe3c ] + +Timeouts in kselftest are done using the "timeout" command with the +"--foreground" option. Without the "foreground" option, it is not +possible for a user to cancel the runner using SIGINT, because the +signal is not propagated to timeout which is running in a different +process group. The "forground" options places the timeout in the same +process group as its parent, but only sends the SIGTERM (on timeout) +signal to the forked process. Unfortunately, this does not play nice +with all kselftests, e.g. "net:fcnal-test.sh", where the child +processes will linger because timeout does not send SIGTERM to the +group. + +Some users have noted these hangs [1]. + +Fix this by nesting the timeout with an additional timeout without the +foreground option. + +Link: https://lore.kernel.org/all/7650b2eb-0aee-a2b0-2e64-c9bc63210f67@alu.unizg.hr/ # [1] +Fixes: 651e0d881461 ("kselftest/runner: allow to properly deliver signals to tests") +Signed-off-by: Björn Töpel +Signed-off-by: Shuah Khan +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/kselftest/runner.sh | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh +index a9ba782d8ca0f..83616f0779a7e 100644 +--- a/tools/testing/selftests/kselftest/runner.sh ++++ b/tools/testing/selftests/kselftest/runner.sh +@@ -33,7 +33,8 @@ tap_timeout() + { + # Make sure tests will time out if utility is available. + if [ -x /usr/bin/timeout ] ; then +- /usr/bin/timeout --foreground "$kselftest_timeout" $1 ++ /usr/bin/timeout --foreground "$kselftest_timeout" \ ++ /usr/bin/timeout "$kselftest_timeout" $1 + else + $1 + fi +-- +2.40.1 + diff --git a/queue-5.10/net-ethernet-mtk_eth_soc-fix-possible-null-pointer-d.patch b/queue-5.10/net-ethernet-mtk_eth_soc-fix-possible-null-pointer-d.patch new file mode 100644 index 00000000000..6b7f915e703 --- /dev/null +++ b/queue-5.10/net-ethernet-mtk_eth_soc-fix-possible-null-pointer-d.patch @@ -0,0 +1,40 @@ +From 18e4ad966cc27a5b846cee92af33a56ddb5792d0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Sep 2023 14:19:50 +0800 +Subject: net: ethernet: mtk_eth_soc: fix possible NULL pointer dereference in + mtk_hwlro_get_fdir_all() + +From: Hangyu Hua + +[ Upstream commit e4c79810755f66c9a933ca810da2724133b1165a ] + +rule_locs is allocated in ethtool_get_rxnfc and the size is determined by +rule_cnt from user space. So rule_cnt needs to be check before using +rule_locs to avoid NULL pointer dereference. + +Fixes: 7aab747e5563 ("net: ethernet: mediatek: add ethtool functions to configure RX flows of HW LRO") +Signed-off-by: Hangyu Hua +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +index a8319295f1ab2..aa9e616cc1d59 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -2013,6 +2013,9 @@ static int mtk_hwlro_get_fdir_all(struct net_device *dev, + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { ++ if (cnt == cmd->rule_cnt) ++ return -EMSGSIZE; ++ + if (mac->hwlro_ip[i]) { + rule_locs[cnt] = i; + cnt++; +-- +2.40.1 + diff --git a/queue-5.10/net-ethernet-mvpp2_main-fix-possible-oob-write-in-mv.patch b/queue-5.10/net-ethernet-mvpp2_main-fix-possible-oob-write-in-mv.patch new file mode 100644 index 00000000000..ed08cb92caf --- /dev/null +++ b/queue-5.10/net-ethernet-mvpp2_main-fix-possible-oob-write-in-mv.patch @@ -0,0 +1,43 @@ +From ed8ce97c74e16ab367e20cc418ecdb79709b25d4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Sep 2023 14:19:49 +0800 +Subject: net: ethernet: mvpp2_main: fix possible OOB write in + mvpp2_ethtool_get_rxnfc() + +From: Hangyu Hua + +[ Upstream commit 51fe0a470543f345e3c62b6798929de3ddcedc1d ] + +rules is allocated in ethtool_get_rxnfc and the size is determined by +rule_cnt from user space. So rule_cnt needs to be check before using +rules to avoid OOB writing or NULL pointer dereference. + +Fixes: 90b509b39ac9 ("net: mvpp2: cls: Add Classification offload support") +Signed-off-by: Hangyu Hua +Reviewed-by: Marcin Wojtas +Reviewed-by: Russell King (Oracle) +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +index 68c5ed8716c84..e0e6275b3e20c 100644 +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +@@ -5201,6 +5201,11 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev, + break; + case ETHTOOL_GRXCLSRLALL: + for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) { ++ if (loc == info->rule_cnt) { ++ ret = -EMSGSIZE; ++ break; ++ } ++ + if (port->rfs_rules[i]) + rules[loc++] = i; + } +-- +2.40.1 + diff --git a/queue-5.10/net-ipv4-fix-one-memleak-in-__inet_del_ifa.patch b/queue-5.10/net-ipv4-fix-one-memleak-in-__inet_del_ifa.patch new file mode 100644 index 00000000000..5875a14da34 --- /dev/null +++ b/queue-5.10/net-ipv4-fix-one-memleak-in-__inet_del_ifa.patch @@ -0,0 +1,85 @@ +From 8440aace7f7e855f73b0fd19d9391d17b306ccfc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Sep 2023 10:57:09 +0800 +Subject: net: ipv4: fix one memleak in __inet_del_ifa() + +From: Liu Jian + +[ Upstream commit ac28b1ec6135649b5d78b028e47264cb3ebca5ea ] + +I got the below warning when do fuzzing test: +unregister_netdevice: waiting for bond0 to become free. Usage count = 2 + +It can be repoduced via: + +ip link add bond0 type bond +sysctl -w net.ipv4.conf.bond0.promote_secondaries=1 +ip addr add 4.117.174.103/0 scope 0x40 dev bond0 +ip addr add 192.168.100.111/255.255.255.254 scope 0 dev bond0 +ip addr add 0.0.0.4/0 scope 0x40 secondary dev bond0 +ip addr del 4.117.174.103/0 scope 0x40 dev bond0 +ip link delete bond0 type bond + +In this reproduction test case, an incorrect 'last_prim' is found in +__inet_del_ifa(), as a result, the secondary address(0.0.0.4/0 scope 0x40) +is lost. The memory of the secondary address is leaked and the reference of +in_device and net_device is leaked. + +Fix this problem: +Look for 'last_prim' starting at location of the deleted IP and inserting +the promoted IP into the location of 'last_prim'. + +Fixes: 0ff60a45678e ("[IPV4]: Fix secondary IP addresses after promotion") +Signed-off-by: Liu Jian +Signed-off-by: Julian Anastasov +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/devinet.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c +index 88b6120878cd9..da1ca8081c035 100644 +--- a/net/ipv4/devinet.c ++++ b/net/ipv4/devinet.c +@@ -351,14 +351,14 @@ static void __inet_del_ifa(struct in_device *in_dev, + { + struct in_ifaddr *promote = NULL; + struct in_ifaddr *ifa, *ifa1; +- struct in_ifaddr *last_prim; ++ struct in_ifaddr __rcu **last_prim; + struct in_ifaddr *prev_prom = NULL; + int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); + + ASSERT_RTNL(); + + ifa1 = rtnl_dereference(*ifap); +- last_prim = rtnl_dereference(in_dev->ifa_list); ++ last_prim = ifap; + if (in_dev->dead) + goto no_promotions; + +@@ -372,7 +372,7 @@ static void __inet_del_ifa(struct in_device *in_dev, + while ((ifa = rtnl_dereference(*ifap1)) != NULL) { + if (!(ifa->ifa_flags & IFA_F_SECONDARY) && + ifa1->ifa_scope <= ifa->ifa_scope) +- last_prim = ifa; ++ last_prim = &ifa->ifa_next; + + if (!(ifa->ifa_flags & IFA_F_SECONDARY) || + ifa1->ifa_mask != ifa->ifa_mask || +@@ -436,9 +436,9 @@ static void __inet_del_ifa(struct in_device *in_dev, + + rcu_assign_pointer(prev_prom->ifa_next, next_sec); + +- last_sec = rtnl_dereference(last_prim->ifa_next); ++ last_sec = rtnl_dereference(*last_prim); + rcu_assign_pointer(promote->ifa_next, last_sec); +- rcu_assign_pointer(last_prim->ifa_next, promote); ++ rcu_assign_pointer(*last_prim, promote); + } + + promote->ifa_flags &= ~IFA_F_SECONDARY; +-- +2.40.1 + diff --git a/queue-5.10/net-smc-use-smc_lgr_list.lock-to-protect-smc_lgr_lis.patch b/queue-5.10/net-smc-use-smc_lgr_list.lock-to-protect-smc_lgr_lis.patch new file mode 100644 index 00000000000..29064bff19b --- /dev/null +++ b/queue-5.10/net-smc-use-smc_lgr_list.lock-to-protect-smc_lgr_lis.patch @@ -0,0 +1,73 @@ +From dcd5bdd811ffe6bfbcb1cda0513c8e59567faeab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Sep 2023 11:31:43 +0800 +Subject: net/smc: use smc_lgr_list.lock to protect smc_lgr_list.list iterate + in smcr_port_add + +From: Guangguan Wang + +[ Upstream commit f5146e3ef0a9eea405874b36178c19a4863b8989 ] + +While doing smcr_port_add, there maybe linkgroup add into or delete +from smc_lgr_list.list at the same time, which may result kernel crash. +So, use smc_lgr_list.lock to protect smc_lgr_list.list iterate in +smcr_port_add. + +The crash calltrace show below: +BUG: kernel NULL pointer dereference, address: 0000000000000000 +PGD 0 P4D 0 +Oops: 0000 [#1] SMP NOPTI +CPU: 0 PID: 559726 Comm: kworker/0:92 Kdump: loaded Tainted: G +Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 449e491 04/01/2014 +Workqueue: events smc_ib_port_event_work [smc] +RIP: 0010:smcr_port_add+0xa6/0xf0 [smc] +RSP: 0000:ffffa5a2c8f67de0 EFLAGS: 00010297 +RAX: 0000000000000001 RBX: ffff9935e0650000 RCX: 0000000000000000 +RDX: 0000000000000010 RSI: ffff9935e0654290 RDI: ffff9935c8560000 +RBP: 0000000000000000 R08: 0000000000000000 R09: ffff9934c0401918 +R10: 0000000000000000 R11: ffffffffb4a5c278 R12: ffff99364029aae4 +R13: ffff99364029aa00 R14: 00000000ffffffed R15: ffff99364029ab08 +FS: 0000000000000000(0000) GS:ffff994380600000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000000 CR3: 0000000f06a10003 CR4: 0000000002770ef0 +PKRU: 55555554 +Call Trace: + smc_ib_port_event_work+0x18f/0x380 [smc] + process_one_work+0x19b/0x340 + worker_thread+0x30/0x370 + ? process_one_work+0x340/0x340 + kthread+0x114/0x130 + ? __kthread_cancel_work+0x50/0x50 + ret_from_fork+0x1f/0x30 + +Fixes: 1f90a05d9ff9 ("net/smc: add smcr_port_add() and smcr_link_up() processing") +Signed-off-by: Guangguan Wang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/smc/smc_core.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c +index e84241ff4ac4f..ab9ecdd1af0ac 100644 +--- a/net/smc/smc_core.c ++++ b/net/smc/smc_core.c +@@ -1101,6 +1101,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) + { + struct smc_link_group *lgr, *n; + ++ spin_lock_bh(&smc_lgr_list.lock); + list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { + struct smc_link *link; + +@@ -1115,6 +1116,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) + if (link) + smc_llc_add_link_local(link); + } ++ spin_unlock_bh(&smc_lgr_list.lock); + } + + /* link is down - switch connections to alternate link, +-- +2.40.1 + diff --git a/queue-5.10/net-tls-do-not-free-tls_rec-on-async-operation-in-bp.patch b/queue-5.10/net-tls-do-not-free-tls_rec-on-async-operation-in-bp.patch new file mode 100644 index 00000000000..b7052a70af8 --- /dev/null +++ b/queue-5.10/net-tls-do-not-free-tls_rec-on-async-operation-in-bp.patch @@ -0,0 +1,85 @@ +From 1284e6eaf9e8131a5a39fde864c794d73d0a6c83 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 9 Sep 2023 16:14:34 +0800 +Subject: net/tls: do not free tls_rec on async operation in + bpf_exec_tx_verdict() + +From: Liu Jian + +[ Upstream commit cfaa80c91f6f99b9342b6557f0f0e1143e434066 ] + +I got the below warning when do fuzzing test: +BUG: KASAN: null-ptr-deref in scatterwalk_copychunks+0x320/0x470 +Read of size 4 at addr 0000000000000008 by task kworker/u8:1/9 + +CPU: 0 PID: 9 Comm: kworker/u8:1 Tainted: G OE +Hardware name: linux,dummy-virt (DT) +Workqueue: pencrypt_parallel padata_parallel_worker +Call trace: + dump_backtrace+0x0/0x420 + show_stack+0x34/0x44 + dump_stack+0x1d0/0x248 + __kasan_report+0x138/0x140 + kasan_report+0x44/0x6c + __asan_load4+0x94/0xd0 + scatterwalk_copychunks+0x320/0x470 + skcipher_next_slow+0x14c/0x290 + skcipher_walk_next+0x2fc/0x480 + skcipher_walk_first+0x9c/0x110 + skcipher_walk_aead_common+0x380/0x440 + skcipher_walk_aead_encrypt+0x54/0x70 + ccm_encrypt+0x13c/0x4d0 + crypto_aead_encrypt+0x7c/0xfc + pcrypt_aead_enc+0x28/0x84 + padata_parallel_worker+0xd0/0x2dc + process_one_work+0x49c/0xbdc + worker_thread+0x124/0x880 + kthread+0x210/0x260 + ret_from_fork+0x10/0x18 + +This is because the value of rec_seq of tls_crypto_info configured by the +user program is too large, for example, 0xffffffffffffff. In addition, TLS +is asynchronously accelerated. When tls_do_encryption() returns +-EINPROGRESS and sk->sk_err is set to EBADMSG due to rec_seq overflow, +skmsg is released before the asynchronous encryption process ends. As a +result, the UAF problem occurs during the asynchronous processing of the +encryption module. + +If the operation is asynchronous and the encryption module returns +EINPROGRESS, do not free the record information. + +Fixes: 635d93981786 ("net/tls: free record only on encryption error") +Signed-off-by: Liu Jian +Reviewed-by: Sabrina Dubroca +Link: https://lore.kernel.org/r/20230909081434.2324940-1-liujian56@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/tls/tls_sw.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index ac7feadb43904..50eae668578a7 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -810,7 +810,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, + psock = sk_psock_get(sk); + if (!psock || !policy) { + err = tls_push_record(sk, flags, record_type); +- if (err && sk->sk_err == EBADMSG) { ++ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { + *copied -= sk_msg_free(sk, msg); + tls_free_open_rec(sk); + err = -sk->sk_err; +@@ -839,7 +839,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, + switch (psock->eval) { + case __SK_PASS: + err = tls_push_record(sk, flags, record_type); +- if (err && sk->sk_err == EBADMSG) { ++ if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { + *copied -= sk_msg_free(sk, msg); + tls_free_open_rec(sk); + err = -sk->sk_err; +-- +2.40.1 + diff --git a/queue-5.10/platform-mellanox-mlxbf-tmfifo-drop-jumbo-frames.patch b/queue-5.10/platform-mellanox-mlxbf-tmfifo-drop-jumbo-frames.patch new file mode 100644 index 00000000000..8060c3c234e --- /dev/null +++ b/queue-5.10/platform-mellanox-mlxbf-tmfifo-drop-jumbo-frames.patch @@ -0,0 +1,103 @@ +From 6e991db5266d56b16e8ac45e48773d0ccd53c265 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Aug 2023 13:43:00 -0400 +Subject: platform/mellanox: mlxbf-tmfifo: Drop jumbo frames + +From: Liming Sun + +[ Upstream commit fc4c655821546239abb3cf4274d66b9747aa87dd ] + +This commit drops over-sized network packets to avoid tmfifo +queue stuck. + +Fixes: 1357dfd7261f ("platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc") +Signed-off-by: Liming Sun +Reviewed-by: Vadim Pasternak +Reviewed-by: David Thompson +Link: https://lore.kernel.org/r/9318936c2447f76db475c985ca6d91f057efcd41.1693322547.git.limings@nvidia.com +Signed-off-by: Hans de Goede +Signed-off-by: Sasha Levin +--- + drivers/platform/mellanox/mlxbf-tmfifo.c | 24 +++++++++++++++++------- + 1 file changed, 17 insertions(+), 7 deletions(-) + +diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c +index 42fcccf06157f..194f3205e5597 100644 +--- a/drivers/platform/mellanox/mlxbf-tmfifo.c ++++ b/drivers/platform/mellanox/mlxbf-tmfifo.c +@@ -205,7 +205,7 @@ static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = { + static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr"; + + /* Maximum L2 header length. */ +-#define MLXBF_TMFIFO_NET_L2_OVERHEAD 36 ++#define MLXBF_TMFIFO_NET_L2_OVERHEAD (ETH_HLEN + VLAN_HLEN) + + /* Supported virtio-net features. */ + #define MLXBF_TMFIFO_NET_FEATURES \ +@@ -623,13 +623,14 @@ static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring, + * flag is set. + */ + static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring, +- struct vring_desc *desc, ++ struct vring_desc **desc, + bool is_rx, bool *vring_change) + { + struct mlxbf_tmfifo *fifo = vring->fifo; + struct virtio_net_config *config; + struct mlxbf_tmfifo_msg_hdr hdr; + int vdev_id, hdr_len; ++ bool drop_rx = false; + + /* Read/Write packet header. */ + if (is_rx) { +@@ -649,8 +650,8 @@ static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring, + if (ntohs(hdr.len) > + __virtio16_to_cpu(virtio_legacy_is_little_endian(), + config->mtu) + +- MLXBF_TMFIFO_NET_L2_OVERHEAD) +- return; ++ MLXBF_TMFIFO_NET_L2_OVERHEAD) ++ drop_rx = true; + } else { + vdev_id = VIRTIO_ID_CONSOLE; + hdr_len = 0; +@@ -665,16 +666,25 @@ static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring, + + if (!tm_dev2) + return; +- vring->desc = desc; ++ vring->desc = *desc; + vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX]; + *vring_change = true; + } ++ ++ if (drop_rx && !IS_VRING_DROP(vring)) { ++ if (vring->desc_head) ++ mlxbf_tmfifo_release_pkt(vring); ++ *desc = &vring->drop_desc; ++ vring->desc_head = *desc; ++ vring->desc = *desc; ++ } ++ + vring->pkt_len = ntohs(hdr.len) + hdr_len; + } else { + /* Network virtio has an extra header. */ + hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ? + sizeof(struct virtio_net_hdr) : 0; +- vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc); ++ vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, *desc); + hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ? + VIRTIO_ID_NET : VIRTIO_ID_CONSOLE; + hdr.len = htons(vring->pkt_len - hdr_len); +@@ -723,7 +733,7 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring, + + /* Beginning of a packet. Start to Rx/Tx packet header. */ + if (vring->pkt_len == 0) { +- mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change); ++ mlxbf_tmfifo_rxtx_header(vring, &desc, is_rx, &vring_change); + (*avail)--; + + /* Return if new packet is for another ring. */ +-- +2.40.1 + diff --git a/queue-5.10/platform-mellanox-mlxbf-tmfifo-drop-the-rx-packet-if.patch b/queue-5.10/platform-mellanox-mlxbf-tmfifo-drop-the-rx-packet-if.patch new file mode 100644 index 00000000000..9b20dbe2959 --- /dev/null +++ b/queue-5.10/platform-mellanox-mlxbf-tmfifo-drop-the-rx-packet-if.patch @@ -0,0 +1,176 @@ +From cf37b8902f029527087f2da005f2bb51bb1e7764 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Aug 2023 13:42:59 -0400 +Subject: platform/mellanox: mlxbf-tmfifo: Drop the Rx packet if no more + descriptors + +From: Liming Sun + +[ Upstream commit 78034cbece79c2d730ad0770b3b7f23eedbbecf5 ] + +This commit fixes tmfifo console stuck issue when the virtual +networking interface is in down state. In such case, the network +Rx descriptors runs out and causes the Rx network packet staying +in the head of the tmfifo thus blocking the console packets. The +fix is to drop the Rx network packet when no more Rx descriptors. +Function name mlxbf_tmfifo_release_pending_pkt() is also renamed +to mlxbf_tmfifo_release_pkt() to be more approperiate. + +Fixes: 1357dfd7261f ("platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc") +Signed-off-by: Liming Sun +Reviewed-by: Vadim Pasternak +Reviewed-by: David Thompson +Link: https://lore.kernel.org/r/8c0177dc938ae03f52ff7e0b62dbeee74b7bec09.1693322547.git.limings@nvidia.com +Signed-off-by: Hans de Goede +Signed-off-by: Sasha Levin +--- + drivers/platform/mellanox/mlxbf-tmfifo.c | 66 ++++++++++++++++++------ + 1 file changed, 49 insertions(+), 17 deletions(-) + +diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c +index 64d22ecf3cddd..42fcccf06157f 100644 +--- a/drivers/platform/mellanox/mlxbf-tmfifo.c ++++ b/drivers/platform/mellanox/mlxbf-tmfifo.c +@@ -56,6 +56,7 @@ struct mlxbf_tmfifo; + * @vq: pointer to the virtio virtqueue + * @desc: current descriptor of the pending packet + * @desc_head: head descriptor of the pending packet ++ * @drop_desc: dummy desc for packet dropping + * @cur_len: processed length of the current descriptor + * @rem_len: remaining length of the pending packet + * @pkt_len: total length of the pending packet +@@ -72,6 +73,7 @@ struct mlxbf_tmfifo_vring { + struct virtqueue *vq; + struct vring_desc *desc; + struct vring_desc *desc_head; ++ struct vring_desc drop_desc; + int cur_len; + int rem_len; + u32 pkt_len; +@@ -83,6 +85,14 @@ struct mlxbf_tmfifo_vring { + struct mlxbf_tmfifo *fifo; + }; + ++/* Check whether vring is in drop mode. */ ++#define IS_VRING_DROP(_r) ({ \ ++ typeof(_r) (r) = (_r); \ ++ (r->desc_head == &r->drop_desc ? true : false); }) ++ ++/* A stub length to drop maximum length packet. */ ++#define VRING_DROP_DESC_MAX_LEN GENMASK(15, 0) ++ + /* Interrupt types. */ + enum { + MLXBF_TM_RX_LWM_IRQ, +@@ -243,6 +253,7 @@ static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo, + vring->align = SMP_CACHE_BYTES; + vring->index = i; + vring->vdev_id = tm_vdev->vdev.id.device; ++ vring->drop_desc.len = VRING_DROP_DESC_MAX_LEN; + dev = &tm_vdev->vdev.dev; + + size = vring_size(vring->num, vring->align); +@@ -348,7 +359,7 @@ static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring, + return len; + } + +-static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring) ++static void mlxbf_tmfifo_release_pkt(struct mlxbf_tmfifo_vring *vring) + { + struct vring_desc *desc_head; + u32 len = 0; +@@ -577,19 +588,25 @@ static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring, + + if (vring->cur_len + sizeof(u64) <= len) { + /* The whole word. */ +- if (is_rx) +- memcpy(addr + vring->cur_len, &data, sizeof(u64)); +- else +- memcpy(&data, addr + vring->cur_len, sizeof(u64)); ++ if (!IS_VRING_DROP(vring)) { ++ if (is_rx) ++ memcpy(addr + vring->cur_len, &data, ++ sizeof(u64)); ++ else ++ memcpy(&data, addr + vring->cur_len, ++ sizeof(u64)); ++ } + vring->cur_len += sizeof(u64); + } else { + /* Leftover bytes. */ +- if (is_rx) +- memcpy(addr + vring->cur_len, &data, +- len - vring->cur_len); +- else +- memcpy(&data, addr + vring->cur_len, +- len - vring->cur_len); ++ if (!IS_VRING_DROP(vring)) { ++ if (is_rx) ++ memcpy(addr + vring->cur_len, &data, ++ len - vring->cur_len); ++ else ++ memcpy(&data, addr + vring->cur_len, ++ len - vring->cur_len); ++ } + vring->cur_len = len; + } + +@@ -690,8 +707,16 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring, + /* Get the descriptor of the next packet. */ + if (!vring->desc) { + desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx); +- if (!desc) +- return false; ++ if (!desc) { ++ /* Drop next Rx packet to avoid stuck. */ ++ if (is_rx) { ++ desc = &vring->drop_desc; ++ vring->desc_head = desc; ++ vring->desc = desc; ++ } else { ++ return false; ++ } ++ } + } else { + desc = vring->desc; + } +@@ -724,17 +749,24 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring, + vring->rem_len -= len; + + /* Get the next desc on the chain. */ +- if (vring->rem_len > 0 && ++ if (!IS_VRING_DROP(vring) && vring->rem_len > 0 && + (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) { + idx = virtio16_to_cpu(vdev, desc->next); + desc = &vr->desc[idx]; + goto mlxbf_tmfifo_desc_done; + } + +- /* Done and release the pending packet. */ +- mlxbf_tmfifo_release_pending_pkt(vring); ++ /* Done and release the packet. */ + desc = NULL; + fifo->vring[is_rx] = NULL; ++ if (!IS_VRING_DROP(vring)) { ++ mlxbf_tmfifo_release_pkt(vring); ++ } else { ++ vring->pkt_len = 0; ++ vring->desc_head = NULL; ++ vring->desc = NULL; ++ return false; ++ } + + /* + * Make sure the load/store are in order before +@@ -914,7 +946,7 @@ static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev) + + /* Release the pending packet. */ + if (vring->desc) +- mlxbf_tmfifo_release_pending_pkt(vring); ++ mlxbf_tmfifo_release_pkt(vring); + vq = vring->vq; + if (vq) { + vring->vq = NULL; +-- +2.40.1 + diff --git a/queue-5.10/r8152-check-budget-for-r8152_poll.patch b/queue-5.10/r8152-check-budget-for-r8152_poll.patch new file mode 100644 index 00000000000..3b7a62a7ec1 --- /dev/null +++ b/queue-5.10/r8152-check-budget-for-r8152_poll.patch @@ -0,0 +1,38 @@ +From a0741f54e7077889957ccd87d18cd72cc7322120 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Sep 2023 15:01:52 +0800 +Subject: r8152: check budget for r8152_poll() + +From: Hayes Wang + +[ Upstream commit a7b8d60b37237680009dd0b025fe8c067aba0ee3 ] + +According to the document of napi, there is no rx process when the +budget is 0. Therefore, r8152_poll() has to return 0 directly when the +budget is equal to 0. + +Fixes: d2187f8e4454 ("r8152: divide the tx and rx bottom functions") +Signed-off-by: Hayes Wang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/usb/r8152.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c +index f9a79d67d6d4f..cc7c86debfa27 100644 +--- a/drivers/net/usb/r8152.c ++++ b/drivers/net/usb/r8152.c +@@ -2439,6 +2439,9 @@ static int r8152_poll(struct napi_struct *napi, int budget) + struct r8152 *tp = container_of(napi, struct r8152, napi); + int work_done; + ++ if (!budget) ++ return 0; ++ + work_done = rx_bottom(tp, budget); + + if (work_done < budget) { +-- +2.40.1 + diff --git a/queue-5.10/scsi-qla2xxx-consolidate-zio-threshold-setting-for-b.patch b/queue-5.10/scsi-qla2xxx-consolidate-zio-threshold-setting-for-b.patch new file mode 100644 index 00000000000..996984a59b5 --- /dev/null +++ b/queue-5.10/scsi-qla2xxx-consolidate-zio-threshold-setting-for-b.patch @@ -0,0 +1,101 @@ +From e6b0872865c53481420a69d0503b2331207716c6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Mar 2021 01:52:21 -0700 +Subject: scsi: qla2xxx: Consolidate zio threshold setting for both FCP & NVMe + +From: Quinn Tran + +[ Upstream commit 5777fef788a59f5ac9ab6661988a95a045fc0574 ] + +Consolidate zio threshold setting for both FCP & NVMe to prevent one +protocol from clobbering the setting of the other protocol. + +Link: https://lore.kernel.org/r/20210329085229.4367-5-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Stable-dep-of: 6d0b65569c0a ("scsi: qla2xxx: Flush mailbox commands on chip reset") +Signed-off-by: Sasha Levin +--- + drivers/scsi/qla2xxx/qla_def.h | 1 - + drivers/scsi/qla2xxx/qla_os.c | 34 ++++++++++++++-------------------- + 2 files changed, 14 insertions(+), 21 deletions(-) + +diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h +index 06b0ad2b51bb4..676e50142baaf 100644 +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -4706,7 +4706,6 @@ typedef struct scsi_qla_host { + #define FX00_CRITEMP_RECOVERY 25 + #define FX00_HOST_INFO_RESEND 26 + #define QPAIR_ONLINE_CHECK_NEEDED 27 +-#define SET_NVME_ZIO_THRESHOLD_NEEDED 28 + #define DETECT_SFP_CHANGE 29 + #define N2N_LOGIN_NEEDED 30 + #define IOCB_WORK_ACTIVE 31 +diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c +index 78a335f862cee..bf40b293dcea6 100644 +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -6973,28 +6973,23 @@ qla2x00_do_dpc(void *data) + mutex_unlock(&ha->mq_lock); + } + +- if (test_and_clear_bit(SET_NVME_ZIO_THRESHOLD_NEEDED, +- &base_vha->dpc_flags)) { ++ if (test_and_clear_bit(SET_ZIO_THRESHOLD_NEEDED, ++ &base_vha->dpc_flags)) { ++ u16 threshold = ha->nvme_last_rptd_aen + ha->last_zio_threshold; ++ ++ if (threshold > ha->orig_fw_xcb_count) ++ threshold = ha->orig_fw_xcb_count; ++ + ql_log(ql_log_info, base_vha, 0xffffff, +- "nvme: SET ZIO Activity exchange threshold to %d.\n", +- ha->nvme_last_rptd_aen); +- if (qla27xx_set_zio_threshold(base_vha, +- ha->nvme_last_rptd_aen)) { ++ "SET ZIO Activity exchange threshold to %d.\n", ++ threshold); ++ if (qla27xx_set_zio_threshold(base_vha, threshold)) { + ql_log(ql_log_info, base_vha, 0xffffff, +- "nvme: Unable to SET ZIO Activity exchange threshold to %d.\n", +- ha->nvme_last_rptd_aen); ++ "Unable to SET ZIO Activity exchange threshold to %d.\n", ++ threshold); + } + } + +- if (test_and_clear_bit(SET_ZIO_THRESHOLD_NEEDED, +- &base_vha->dpc_flags)) { +- ql_log(ql_log_info, base_vha, 0xffffff, +- "SET ZIO Activity exchange threshold to %d.\n", +- ha->last_zio_threshold); +- qla27xx_set_zio_threshold(base_vha, +- ha->last_zio_threshold); +- } +- + if (!IS_QLAFX00(ha)) + qla2x00_do_dpc_all_vps(base_vha); + +@@ -7210,14 +7205,13 @@ qla2x00_timer(struct timer_list *t) + index = atomic_read(&ha->nvme_active_aen_cnt); + if (!vha->vp_idx && + (index != ha->nvme_last_rptd_aen) && +- (index >= DEFAULT_ZIO_THRESHOLD) && + ha->zio_mode == QLA_ZIO_MODE_6 && + !ha->flags.host_shutting_down) { ++ ha->nvme_last_rptd_aen = atomic_read(&ha->nvme_active_aen_cnt); + ql_log(ql_log_info, vha, 0x3002, + "nvme: Sched: Set ZIO exchange threshold to %d.\n", + ha->nvme_last_rptd_aen); +- ha->nvme_last_rptd_aen = atomic_read(&ha->nvme_active_aen_cnt); +- set_bit(SET_NVME_ZIO_THRESHOLD_NEEDED, &vha->dpc_flags); ++ set_bit(SET_ZIO_THRESHOLD_NEEDED, &vha->dpc_flags); + start_dpc++; + } + +-- +2.40.1 + diff --git a/queue-5.10/scsi-qla2xxx-fix-crash-in-pcie-error-handling.patch b/queue-5.10/scsi-qla2xxx-fix-crash-in-pcie-error-handling.patch new file mode 100644 index 00000000000..b56a4df29e8 --- /dev/null +++ b/queue-5.10/scsi-qla2xxx-fix-crash-in-pcie-error-handling.patch @@ -0,0 +1,837 @@ +From ed81ee3c26c4abe8653c1e065a0b72787364c6fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Mar 2021 01:52:25 -0700 +Subject: scsi: qla2xxx: Fix crash in PCIe error handling + +From: Quinn Tran + +[ Upstream commit f7a0ed479e66ab177801301a1a72c37775c40450 ] + +BUG: unable to handle kernel NULL pointer dereference at (null) +IP: qla2x00_abort_isp+0x21/0x6b0 [qla2xxx] PGD 0 P4D 0 +Oops: 0000 [#1] SMP PTI +CPU: 0 PID: 1715 Comm: kworker/0:2 +Tainted: GOE 4.12.14-122.37-default #1 SLE12-SP5 +Hardware name: HPE Superdome Flex/Superdome Flex, BIOS +Bundle:3.30.100 SFW:IP147.007.004.017.000.2009211957 09/21/2020 +Workqueue: events aer_recover_work_func +task: ffff9e399c14ca80 task.stack: ffffc1c58e4ac000 +RIP: 0010:qla2x00_abort_isp+0x21/0x6b0 [qla2xxx] +RSP: 0018:ffffc1c58e4afd50 EFLAGS: 00010282 +RAX: 0000000000000000 RBX: ffff9e419cdef480 RCX: 0000000000000000 +RDX: ffff9e399c14ca80 RSI: 0000000000000246 RDI: ffff9e419bbc27b8 +RBP: ffff9e419bbc27b8 R08: 0000000000000004 R09: 00000000a0440000 +R10: 0000000000000000 R11: ffff9e399416d1a0 R12: ffff9e419cdef000 +R13: ffff9e3a7cfae800 R14: ffff9e3a7cfae800 R15: 00000000000000c0 +FS: 0000000000000000(0000) GS:ffff9e39a0000000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000000 CR3: 00000006cd00a005 CR4: 00000000007606f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: + qla2xxx_pci_slot_reset+0x141/0x160 [qla2xxx] + report_slot_reset+0x41/0x80 + ? merge_result.part.4+0x30/0x30 + pci_walk_bus+0x70/0x90 + pcie_do_recovery+0x1db/0x2e0 + aer_recover_work_func+0xc2/0xf0 + process_one_work+0x14c/0x390 + +Disable board_disable logic where driver resources are freed while OS is in +the process of recovering the adapter. + +Link: https://lore.kernel.org/r/20210329085229.4367-9-njavali@marvell.com +Tested-by: Laurence Oberman +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Stable-dep-of: 6d0b65569c0a ("scsi: qla2xxx: Flush mailbox commands on chip reset") +Signed-off-by: Sasha Levin +--- + drivers/scsi/qla2xxx/qla_dbg.c | 16 ++- + drivers/scsi/qla2xxx/qla_def.h | 10 ++ + drivers/scsi/qla2xxx/qla_gbl.h | 3 + + drivers/scsi/qla2xxx/qla_init.c | 40 ++++--- + drivers/scsi/qla2xxx/qla_inline.h | 46 ++++++++ + drivers/scsi/qla2xxx/qla_iocb.c | 60 +++++++++-- + drivers/scsi/qla2xxx/qla_isr.c | 9 +- + drivers/scsi/qla2xxx/qla_mbx.c | 3 +- + drivers/scsi/qla2xxx/qla_nvme.c | 10 +- + drivers/scsi/qla2xxx/qla_os.c | 173 ++++++++++++++++++------------ + 10 files changed, 265 insertions(+), 105 deletions(-) + +diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c +index 3e618d777082b..8e9ffbec6643f 100644 +--- a/drivers/scsi/qla2xxx/qla_dbg.c ++++ b/drivers/scsi/qla2xxx/qla_dbg.c +@@ -112,8 +112,13 @@ qla27xx_dump_mpi_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, + uint32_t stat; + ulong i, j, timer = 6000000; + int rval = QLA_FUNCTION_FAILED; ++ scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev); + + clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); ++ ++ if (qla_pci_disconnected(vha, reg)) ++ return rval; ++ + for (i = 0; i < ram_dwords; i += dwords, addr += dwords) { + if (i + dwords > ram_dwords) + dwords = ram_dwords - i; +@@ -137,6 +142,9 @@ qla27xx_dump_mpi_ram(struct qla_hw_data *ha, uint32_t addr, uint32_t *ram, + while (timer--) { + udelay(5); + ++ if (qla_pci_disconnected(vha, reg)) ++ return rval; ++ + stat = rd_reg_dword(®->host_status); + /* Check for pending interrupts. */ + if (!(stat & HSRX_RISC_INT)) +@@ -191,9 +199,13 @@ qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, __be32 *ram, + uint32_t dwords = qla2x00_gid_list_size(ha) / 4; + uint32_t stat; + ulong i, j, timer = 6000000; ++ scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev); + + clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); + ++ if (qla_pci_disconnected(vha, reg)) ++ return rval; ++ + for (i = 0; i < ram_dwords; i += dwords, addr += dwords) { + if (i + dwords > ram_dwords) + dwords = ram_dwords - i; +@@ -215,8 +227,10 @@ qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, __be32 *ram, + ha->flags.mbox_int = 0; + while (timer--) { + udelay(5); +- stat = rd_reg_dword(®->host_status); ++ if (qla_pci_disconnected(vha, reg)) ++ return rval; + ++ stat = rd_reg_dword(®->host_status); + /* Check for pending interrupts. */ + if (!(stat & HSRX_RISC_INT)) + continue; +diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h +index 676e50142baaf..9a09b36fd573c 100644 +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -396,6 +396,7 @@ typedef union { + } b; + } port_id_t; + #define INVALID_PORT_ID 0xFFFFFF ++#define ISP_REG16_DISCONNECT 0xFFFF + + static inline le_id_t be_id_to_le(be_id_t id) + { +@@ -3848,6 +3849,13 @@ struct qla_hw_data_stat { + u32 num_mpi_reset; + }; + ++/* refer to pcie_do_recovery reference */ ++typedef enum { ++ QLA_PCI_RESUME, ++ QLA_PCI_ERR_DETECTED, ++ QLA_PCI_MMIO_ENABLED, ++ QLA_PCI_SLOT_RESET, ++} pci_error_state_t; + /* + * Qlogic host adapter specific data structure. + */ +@@ -4586,6 +4594,7 @@ struct qla_hw_data { + #define DEFAULT_ZIO_THRESHOLD 5 + + struct qla_hw_data_stat stat; ++ pci_error_state_t pci_error_state; + }; + + struct active_regions { +@@ -4706,6 +4715,7 @@ typedef struct scsi_qla_host { + #define FX00_CRITEMP_RECOVERY 25 + #define FX00_HOST_INFO_RESEND 26 + #define QPAIR_ONLINE_CHECK_NEEDED 27 ++#define DO_EEH_RECOVERY 28 + #define DETECT_SFP_CHANGE 29 + #define N2N_LOGIN_NEEDED 30 + #define IOCB_WORK_ACTIVE 31 +diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h +index 7e5ee31581d61..8ef2de6822de9 100644 +--- a/drivers/scsi/qla2xxx/qla_gbl.h ++++ b/drivers/scsi/qla2xxx/qla_gbl.h +@@ -222,6 +222,7 @@ extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32); + + extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32); + extern void qla2x00_disable_board_on_pci_error(struct work_struct *); ++extern void qla_eeh_work(struct work_struct *); + extern void qla2x00_sp_compl(srb_t *sp, int); + extern void qla2xxx_qpair_sp_free_dma(srb_t *sp); + extern void qla2xxx_qpair_sp_compl(srb_t *sp, int); +@@ -233,6 +234,8 @@ int qla24xx_post_relogin_work(struct scsi_qla_host *vha); + void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *); + void qla24xx_process_purex_rdp(struct scsi_qla_host *vha, + struct purex_item *pkt); ++void qla_pci_set_eeh_busy(struct scsi_qla_host *); ++void qla_schedule_eeh_work(struct scsi_qla_host *); + + /* + * Global Functions in qla_mid.c source file. +diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c +index 3c24a52d5a909..f24f087c733b1 100644 +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -6982,22 +6982,18 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) + } + spin_unlock_irqrestore(&ha->vport_slock, flags); + +- if (!ha->flags.eeh_busy) { +- /* Make sure for ISP 82XX IO DMA is complete */ +- if (IS_P3P_TYPE(ha)) { +- qla82xx_chip_reset_cleanup(vha); +- ql_log(ql_log_info, vha, 0x00b4, +- "Done chip reset cleanup.\n"); +- +- /* Done waiting for pending commands. +- * Reset the online flag. +- */ +- vha->flags.online = 0; +- } ++ /* Make sure for ISP 82XX IO DMA is complete */ ++ if (IS_P3P_TYPE(ha)) { ++ qla82xx_chip_reset_cleanup(vha); ++ ql_log(ql_log_info, vha, 0x00b4, ++ "Done chip reset cleanup.\n"); + +- /* Requeue all commands in outstanding command list. */ +- qla2x00_abort_all_cmds(vha, DID_RESET << 16); ++ /* Done waiting for pending commands. Reset online flag */ ++ vha->flags.online = 0; + } ++ ++ /* Requeue all commands in outstanding command list. */ ++ qla2x00_abort_all_cmds(vha, DID_RESET << 16); + /* memory barrier */ + wmb(); + } +@@ -7025,6 +7021,12 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) + if (vha->flags.online) { + qla2x00_abort_isp_cleanup(vha); + ++ if (qla2x00_isp_reg_stat(ha)) { ++ ql_log(ql_log_info, vha, 0x803f, ++ "ISP Abort - ISP reg disconnect, exiting.\n"); ++ return status; ++ } ++ + if (test_and_clear_bit(ISP_ABORT_TO_ROM, &vha->dpc_flags)) { + ha->flags.chip_reset_done = 1; + vha->flags.online = 1; +@@ -7065,8 +7067,18 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) + + ha->isp_ops->get_flash_version(vha, req->ring); + ++ if (qla2x00_isp_reg_stat(ha)) { ++ ql_log(ql_log_info, vha, 0x803f, ++ "ISP Abort - ISP reg disconnect pre nvram config, exiting.\n"); ++ return status; ++ } + ha->isp_ops->nvram_config(vha); + ++ if (qla2x00_isp_reg_stat(ha)) { ++ ql_log(ql_log_info, vha, 0x803f, ++ "ISP Abort - ISP reg disconnect post nvmram config, exiting.\n"); ++ return status; ++ } + if (!qla2x00_restart_isp(vha)) { + clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags); + +diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h +index 7e8b59a0954bb..47ee5b9f2a55c 100644 +--- a/drivers/scsi/qla2xxx/qla_inline.h ++++ b/drivers/scsi/qla2xxx/qla_inline.h +@@ -435,3 +435,49 @@ qla_put_iocbs(struct qla_qpair *qp, struct iocb_resource *iores) + } + iores->res_type = RESOURCE_NONE; + } ++ ++#define ISP_REG_DISCONNECT 0xffffffffU ++/************************************************************************** ++ * qla2x00_isp_reg_stat ++ * ++ * Description: ++ * Read the host status register of ISP before aborting the command. ++ * ++ * Input: ++ * ha = pointer to host adapter structure. ++ * ++ * ++ * Returns: ++ * Either true or false. ++ * ++ * Note: Return true if there is register disconnect. ++ **************************************************************************/ ++static inline ++uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha) ++{ ++ struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; ++ struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82; ++ ++ if (IS_P3P_TYPE(ha)) ++ return ((rd_reg_dword(®82->host_int)) == ISP_REG_DISCONNECT); ++ else ++ return ((rd_reg_dword(®->host_status)) == ++ ISP_REG_DISCONNECT); ++} ++ ++static inline ++bool qla_pci_disconnected(struct scsi_qla_host *vha, ++ struct device_reg_24xx __iomem *reg) ++{ ++ uint32_t stat; ++ bool ret = false; ++ ++ stat = rd_reg_dword(®->host_status); ++ if (stat == 0xffffffff) { ++ ql_log(ql_log_info, vha, 0x8041, ++ "detected PCI disconnect.\n"); ++ qla_schedule_eeh_work(vha); ++ ret = true; ++ } ++ return ret; ++} +diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c +index 54fc0afbc02ac..1752a62031710 100644 +--- a/drivers/scsi/qla2xxx/qla_iocb.c ++++ b/drivers/scsi/qla2xxx/qla_iocb.c +@@ -1644,8 +1644,14 @@ qla24xx_start_scsi(srb_t *sp) + goto queuing_error; + + if (req->cnt < (req_cnt + 2)) { +- cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : +- rd_reg_dword_relaxed(req->req_q_out); ++ if (IS_SHADOW_REG_CAPABLE(ha)) { ++ cnt = *req->out_ptr; ++ } else { ++ cnt = rd_reg_dword_relaxed(req->req_q_out); ++ if (qla2x00_check_reg16_for_disconnect(vha, cnt)) ++ goto queuing_error; ++ } ++ + if (req->ring_index < cnt) + req->cnt = cnt - req->ring_index; + else +@@ -1836,8 +1842,13 @@ qla24xx_dif_start_scsi(srb_t *sp) + goto queuing_error; + + if (req->cnt < (req_cnt + 2)) { +- cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : +- rd_reg_dword_relaxed(req->req_q_out); ++ if (IS_SHADOW_REG_CAPABLE(ha)) { ++ cnt = *req->out_ptr; ++ } else { ++ cnt = rd_reg_dword_relaxed(req->req_q_out); ++ if (qla2x00_check_reg16_for_disconnect(vha, cnt)) ++ goto queuing_error; ++ } + if (req->ring_index < cnt) + req->cnt = cnt - req->ring_index; + else +@@ -1911,6 +1922,7 @@ qla24xx_dif_start_scsi(srb_t *sp) + + qla_put_iocbs(sp->qpair, &sp->iores); + spin_unlock_irqrestore(&ha->hardware_lock, flags); ++ + return QLA_FUNCTION_FAILED; + } + +@@ -1978,8 +1990,14 @@ qla2xxx_start_scsi_mq(srb_t *sp) + goto queuing_error; + + if (req->cnt < (req_cnt + 2)) { +- cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : +- rd_reg_dword_relaxed(req->req_q_out); ++ if (IS_SHADOW_REG_CAPABLE(ha)) { ++ cnt = *req->out_ptr; ++ } else { ++ cnt = rd_reg_dword_relaxed(req->req_q_out); ++ if (qla2x00_check_reg16_for_disconnect(vha, cnt)) ++ goto queuing_error; ++ } ++ + if (req->ring_index < cnt) + req->cnt = cnt - req->ring_index; + else +@@ -2185,8 +2203,14 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp) + goto queuing_error; + + if (req->cnt < (req_cnt + 2)) { +- cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : +- rd_reg_dword_relaxed(req->req_q_out); ++ if (IS_SHADOW_REG_CAPABLE(ha)) { ++ cnt = *req->out_ptr; ++ } else { ++ cnt = rd_reg_dword_relaxed(req->req_q_out); ++ if (qla2x00_check_reg16_for_disconnect(vha, cnt)) ++ goto queuing_error; ++ } ++ + if (req->ring_index < cnt) + req->cnt = cnt - req->ring_index; + else +@@ -2263,6 +2287,7 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp) + + qla_put_iocbs(sp->qpair, &sp->iores); + spin_unlock_irqrestore(&qpair->qp_lock, flags); ++ + return QLA_FUNCTION_FAILED; + } + +@@ -2307,6 +2332,11 @@ __qla2x00_alloc_iocbs(struct qla_qpair *qpair, srb_t *sp) + cnt = qla2x00_debounce_register( + ISP_REQ_Q_OUT(ha, ®->isp)); + ++ if (!qpair->use_shadow_reg && cnt == ISP_REG16_DISCONNECT) { ++ qla_schedule_eeh_work(vha); ++ return NULL; ++ } ++ + if (req->ring_index < cnt) + req->cnt = cnt - req->ring_index; + else +@@ -3711,6 +3741,9 @@ qla2x00_start_sp(srb_t *sp) + void *pkt; + unsigned long flags; + ++ if (vha->hw->flags.eeh_busy) ++ return -EIO; ++ + spin_lock_irqsave(qp->qp_lock_ptr, flags); + pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); + if (!pkt) { +@@ -3928,8 +3961,14 @@ qla2x00_start_bidir(srb_t *sp, struct scsi_qla_host *vha, uint32_t tot_dsds) + + /* Check for room on request queue. */ + if (req->cnt < req_cnt + 2) { +- cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : +- rd_reg_dword_relaxed(req->req_q_out); ++ if (IS_SHADOW_REG_CAPABLE(ha)) { ++ cnt = *req->out_ptr; ++ } else { ++ cnt = rd_reg_dword_relaxed(req->req_q_out); ++ if (qla2x00_check_reg16_for_disconnect(vha, cnt)) ++ goto queuing_error; ++ } ++ + if (req->ring_index < cnt) + req->cnt = cnt - req->ring_index; + else +@@ -3968,5 +4007,6 @@ qla2x00_start_bidir(srb_t *sp, struct scsi_qla_host *vha, uint32_t tot_dsds) + qla2x00_start_iocbs(vha, req); + queuing_error: + spin_unlock_irqrestore(&ha->hardware_lock, flags); ++ + return rval; + } +diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c +index 5678cf23c44bc..fd0beb194e351 100644 +--- a/drivers/scsi/qla2xxx/qla_isr.c ++++ b/drivers/scsi/qla2xxx/qla_isr.c +@@ -269,12 +269,7 @@ qla2x00_check_reg32_for_disconnect(scsi_qla_host_t *vha, uint32_t reg) + if (!test_and_set_bit(PFLG_DISCONNECTED, &vha->pci_flags) && + !test_bit(PFLG_DRIVER_REMOVING, &vha->pci_flags) && + !test_bit(PFLG_DRIVER_PROBING, &vha->pci_flags)) { +- /* +- * Schedule this (only once) on the default system +- * workqueue so that all the adapter workqueues and the +- * DPC thread can be shutdown cleanly. +- */ +- schedule_work(&vha->hw->board_disable); ++ qla_schedule_eeh_work(vha); + } + return true; + } else +@@ -1643,8 +1638,6 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) + case MBA_TEMPERATURE_ALERT: + ql_dbg(ql_dbg_async, vha, 0x505e, + "TEMPERATURE ALERT: %04x %04x %04x\n", mb[1], mb[2], mb[3]); +- if (mb[1] == 0x12) +- schedule_work(&ha->board_disable); + break; + + case MBA_TRANS_INSERT: +diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c +index 6ff720d8961d0..8b65e8a82ec99 100644 +--- a/drivers/scsi/qla2xxx/qla_mbx.c ++++ b/drivers/scsi/qla2xxx/qla_mbx.c +@@ -167,7 +167,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) + /* check if ISP abort is active and return cmd with timeout */ + if ((test_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags) || + test_bit(ISP_ABORT_RETRY, &base_vha->dpc_flags) || +- test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags)) && ++ test_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags) || ++ ha->flags.eeh_busy) && + !is_rom_cmd(mcp->mb[0])) { + ql_log(ql_log_info, vha, 0x1005, + "Cmd 0x%x aborted with timeout since ISP Abort is pending\n", +diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c +index 840dc1e10a233..6dad7787f20de 100644 +--- a/drivers/scsi/qla2xxx/qla_nvme.c ++++ b/drivers/scsi/qla2xxx/qla_nvme.c +@@ -397,8 +397,13 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp) + } + req_cnt = qla24xx_calc_iocbs(vha, tot_dsds); + if (req->cnt < (req_cnt + 2)) { +- cnt = IS_SHADOW_REG_CAPABLE(ha) ? *req->out_ptr : +- rd_reg_dword_relaxed(req->req_q_out); ++ if (IS_SHADOW_REG_CAPABLE(ha)) { ++ cnt = *req->out_ptr; ++ } else { ++ cnt = rd_reg_dword_relaxed(req->req_q_out); ++ if (qla2x00_check_reg16_for_disconnect(vha, cnt)) ++ goto queuing_error; ++ } + + if (req->ring_index < cnt) + req->cnt = cnt - req->ring_index; +@@ -535,6 +540,7 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp) + + queuing_error: + spin_unlock_irqrestore(&qpair->qp_lock, flags); ++ + return rval; + } + +diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c +index bf40b293dcea6..5a4df566afd16 100644 +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -961,6 +961,13 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd, + goto qc24_fail_command; + } + ++ if (!qpair->online) { ++ ql_dbg(ql_dbg_io, vha, 0x3077, ++ "qpair not online. eeh_busy=%d.\n", ha->flags.eeh_busy); ++ cmd->result = DID_NO_CONNECT << 16; ++ goto qc24_fail_command; ++ } ++ + if (!fcport || fcport->deleted) { + cmd->result = DID_IMM_RETRY << 16; + goto qc24_fail_command; +@@ -1190,35 +1197,6 @@ qla2x00_wait_for_chip_reset(scsi_qla_host_t *vha) + return return_status; + } + +-#define ISP_REG_DISCONNECT 0xffffffffU +-/************************************************************************** +-* qla2x00_isp_reg_stat +-* +-* Description: +-* Read the host status register of ISP before aborting the command. +-* +-* Input: +-* ha = pointer to host adapter structure. +-* +-* +-* Returns: +-* Either true or false. +-* +-* Note: Return true if there is register disconnect. +-**************************************************************************/ +-static inline +-uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha) +-{ +- struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; +- struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82; +- +- if (IS_P3P_TYPE(ha)) +- return ((rd_reg_dword(®82->host_int)) == ISP_REG_DISCONNECT); +- else +- return ((rd_reg_dword(®->host_status)) == +- ISP_REG_DISCONNECT); +-} +- + /************************************************************************** + * qla2xxx_eh_abort + * +@@ -1253,6 +1231,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) + if (qla2x00_isp_reg_stat(ha)) { + ql_log(ql_log_info, vha, 0x8042, + "PCI/Register disconnect, exiting.\n"); ++ qla_pci_set_eeh_busy(vha); + return FAILED; + } + +@@ -1444,6 +1423,7 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd) + if (qla2x00_isp_reg_stat(ha)) { + ql_log(ql_log_info, vha, 0x803e, + "PCI/Register disconnect, exiting.\n"); ++ qla_pci_set_eeh_busy(vha); + return FAILED; + } + +@@ -1460,6 +1440,7 @@ qla2xxx_eh_target_reset(struct scsi_cmnd *cmd) + if (qla2x00_isp_reg_stat(ha)) { + ql_log(ql_log_info, vha, 0x803f, + "PCI/Register disconnect, exiting.\n"); ++ qla_pci_set_eeh_busy(vha); + return FAILED; + } + +@@ -1495,6 +1476,7 @@ qla2xxx_eh_bus_reset(struct scsi_cmnd *cmd) + if (qla2x00_isp_reg_stat(ha)) { + ql_log(ql_log_info, vha, 0x8040, + "PCI/Register disconnect, exiting.\n"); ++ qla_pci_set_eeh_busy(vha); + return FAILED; + } + +@@ -1572,7 +1554,7 @@ qla2xxx_eh_host_reset(struct scsi_cmnd *cmd) + if (qla2x00_isp_reg_stat(ha)) { + ql_log(ql_log_info, vha, 0x8041, + "PCI/Register disconnect, exiting.\n"); +- schedule_work(&ha->board_disable); ++ qla_pci_set_eeh_busy(vha); + return SUCCESS; + } + +@@ -6677,6 +6659,9 @@ qla2x00_do_dpc(void *data) + + schedule(); + ++ if (test_and_clear_bit(DO_EEH_RECOVERY, &base_vha->dpc_flags)) ++ qla_pci_set_eeh_busy(base_vha); ++ + if (!base_vha->flags.init_done || ha->flags.mbox_busy) + goto end_loop; + +@@ -7384,6 +7369,8 @@ static void qla_pci_error_cleanup(scsi_qla_host_t *vha) + int i; + unsigned long flags; + ++ ql_dbg(ql_dbg_aer, vha, 0x9000, ++ "%s\n", __func__); + ha->chip_reset++; + + ha->base_qpair->chip_reset = ha->chip_reset; +@@ -7393,28 +7380,16 @@ static void qla_pci_error_cleanup(scsi_qla_host_t *vha) + ha->base_qpair->chip_reset; + } + +- /* purge MBox commands */ +- if (atomic_read(&ha->num_pend_mbx_stage3)) { +- clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags); +- complete(&ha->mbx_intr_comp); +- } +- +- i = 0; +- +- while (atomic_read(&ha->num_pend_mbx_stage3) || +- atomic_read(&ha->num_pend_mbx_stage2) || +- atomic_read(&ha->num_pend_mbx_stage1)) { +- msleep(20); +- i++; +- if (i > 50) +- break; +- } +- +- ha->flags.purge_mbox = 0; ++ /* ++ * purge mailbox might take a while. Slot Reset/chip reset ++ * will take care of the purge ++ */ + + mutex_lock(&ha->mq_lock); ++ ha->base_qpair->online = 0; + list_for_each_entry(qpair, &base_vha->qp_list, qp_list_elem) + qpair->online = 0; ++ wmb(); + mutex_unlock(&ha->mq_lock); + + qla2x00_mark_all_devices_lost(vha); +@@ -7451,14 +7426,17 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) + { + scsi_qla_host_t *vha = pci_get_drvdata(pdev); + struct qla_hw_data *ha = vha->hw; ++ pci_ers_result_t ret = PCI_ERS_RESULT_NEED_RESET; + +- ql_dbg(ql_dbg_aer, vha, 0x9000, +- "PCI error detected, state %x.\n", state); ++ ql_log(ql_log_warn, vha, 0x9000, ++ "PCI error detected, state %x.\n", state); ++ ha->pci_error_state = QLA_PCI_ERR_DETECTED; + + if (!atomic_read(&pdev->enable_cnt)) { + ql_log(ql_log_info, vha, 0xffff, + "PCI device is disabled,state %x\n", state); +- return PCI_ERS_RESULT_NEED_RESET; ++ ret = PCI_ERS_RESULT_NEED_RESET; ++ goto out; + } + + switch (state) { +@@ -7468,11 +7446,12 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) + set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } +- return PCI_ERS_RESULT_CAN_RECOVER; ++ ret = PCI_ERS_RESULT_CAN_RECOVER; ++ break; + case pci_channel_io_frozen: +- ha->flags.eeh_busy = 1; +- qla_pci_error_cleanup(vha); +- return PCI_ERS_RESULT_NEED_RESET; ++ qla_pci_set_eeh_busy(vha); ++ ret = PCI_ERS_RESULT_NEED_RESET; ++ break; + case pci_channel_io_perm_failure: + ha->flags.pci_channel_io_perm_failure = 1; + qla2x00_abort_all_cmds(vha, DID_NO_CONNECT << 16); +@@ -7480,9 +7459,12 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) + set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } +- return PCI_ERS_RESULT_DISCONNECT; ++ ret = PCI_ERS_RESULT_DISCONNECT; + } +- return PCI_ERS_RESULT_NEED_RESET; ++out: ++ ql_dbg(ql_dbg_aer, vha, 0x600d, ++ "PCI error detected returning [%x].\n", ret); ++ return ret; + } + + static pci_ers_result_t +@@ -7496,6 +7478,10 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) + struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; + struct device_reg_24xx __iomem *reg24 = &ha->iobase->isp24; + ++ ql_log(ql_log_warn, base_vha, 0x9000, ++ "mmio enabled\n"); ++ ++ ha->pci_error_state = QLA_PCI_MMIO_ENABLED; + if (IS_QLA82XX(ha)) + return PCI_ERS_RESULT_RECOVERED; + +@@ -7519,10 +7505,11 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev) + ql_log(ql_log_info, base_vha, 0x9003, + "RISC paused -- mmio_enabled, Dumping firmware.\n"); + qla2xxx_dump_fw(base_vha); +- +- return PCI_ERS_RESULT_NEED_RESET; +- } else +- return PCI_ERS_RESULT_RECOVERED; ++ } ++ /* set PCI_ERS_RESULT_NEED_RESET to trigger call to qla2xxx_pci_slot_reset */ ++ ql_dbg(ql_dbg_aer, base_vha, 0x600d, ++ "mmio enabled returning.\n"); ++ return PCI_ERS_RESULT_NEED_RESET; + } + + static pci_ers_result_t +@@ -7534,9 +7521,10 @@ qla2xxx_pci_slot_reset(struct pci_dev *pdev) + int rc; + struct qla_qpair *qpair = NULL; + +- ql_dbg(ql_dbg_aer, base_vha, 0x9004, +- "Slot Reset.\n"); ++ ql_log(ql_log_warn, base_vha, 0x9004, ++ "Slot Reset.\n"); + ++ ha->pci_error_state = QLA_PCI_SLOT_RESET; + /* Workaround: qla2xxx driver which access hardware earlier + * needs error state to be pci_channel_io_online. + * Otherwise mailbox command timesout. +@@ -7570,16 +7558,24 @@ qla2xxx_pci_slot_reset(struct pci_dev *pdev) + qpair->online = 1; + mutex_unlock(&ha->mq_lock); + ++ ha->flags.eeh_busy = 0; + base_vha->flags.online = 1; + set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags); +- if (ha->isp_ops->abort_isp(base_vha) == QLA_SUCCESS) +- ret = PCI_ERS_RESULT_RECOVERED; ++ ha->isp_ops->abort_isp(base_vha); + clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags); + ++ if (qla2x00_isp_reg_stat(ha)) { ++ ha->flags.eeh_busy = 1; ++ qla_pci_error_cleanup(base_vha); ++ ql_log(ql_log_warn, base_vha, 0x9005, ++ "Device unable to recover from PCI error.\n"); ++ } else { ++ ret = PCI_ERS_RESULT_RECOVERED; ++ } + + exit_slot_reset: + ql_dbg(ql_dbg_aer, base_vha, 0x900e, +- "slot_reset return %x.\n", ret); ++ "Slot Reset returning %x.\n", ret); + + return ret; + } +@@ -7591,16 +7587,55 @@ qla2xxx_pci_resume(struct pci_dev *pdev) + struct qla_hw_data *ha = base_vha->hw; + int ret; + +- ql_dbg(ql_dbg_aer, base_vha, 0x900f, +- "pci_resume.\n"); ++ ql_log(ql_log_warn, base_vha, 0x900f, ++ "Pci Resume.\n"); + +- ha->flags.eeh_busy = 0; + + ret = qla2x00_wait_for_hba_online(base_vha); + if (ret != QLA_SUCCESS) { + ql_log(ql_log_fatal, base_vha, 0x9002, + "The device failed to resume I/O from slot/link_reset.\n"); + } ++ ha->pci_error_state = QLA_PCI_RESUME; ++ ql_dbg(ql_dbg_aer, base_vha, 0x600d, ++ "Pci Resume returning.\n"); ++} ++ ++void qla_pci_set_eeh_busy(struct scsi_qla_host *vha) ++{ ++ struct qla_hw_data *ha = vha->hw; ++ struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); ++ bool do_cleanup = false; ++ unsigned long flags; ++ ++ if (ha->flags.eeh_busy) ++ return; ++ ++ spin_lock_irqsave(&base_vha->work_lock, flags); ++ if (!ha->flags.eeh_busy) { ++ ha->flags.eeh_busy = 1; ++ do_cleanup = true; ++ } ++ spin_unlock_irqrestore(&base_vha->work_lock, flags); ++ ++ if (do_cleanup) ++ qla_pci_error_cleanup(base_vha); ++} ++ ++/* ++ * this routine will schedule a task to pause IO from interrupt context ++ * if caller sees a PCIE error event (register read = 0xf's) ++ */ ++void qla_schedule_eeh_work(struct scsi_qla_host *vha) ++{ ++ struct qla_hw_data *ha = vha->hw; ++ struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); ++ ++ if (ha->flags.eeh_busy) ++ return; ++ ++ set_bit(DO_EEH_RECOVERY, &base_vha->dpc_flags); ++ qla2xxx_wake_dpc(base_vha); + } + + static void +-- +2.40.1 + diff --git a/queue-5.10/scsi-qla2xxx-flush-mailbox-commands-on-chip-reset.patch b/queue-5.10/scsi-qla2xxx-flush-mailbox-commands-on-chip-reset.patch new file mode 100644 index 00000000000..b8feab96cb3 --- /dev/null +++ b/queue-5.10/scsi-qla2xxx-flush-mailbox-commands-on-chip-reset.patch @@ -0,0 +1,115 @@ +From 38b64e6901cb5af9c257b50cc6b3cf593d77792a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Aug 2023 18:30:38 +0530 +Subject: scsi: qla2xxx: Flush mailbox commands on chip reset + +From: Quinn Tran + +[ Upstream commit 6d0b65569c0a10b27c49bacd8d25bcd406003533 ] + +Fix race condition between Interrupt thread and Chip reset thread in trying +to flush the same mailbox. With the race condition, the "ha->mbx_intr_comp" +will get an extra complete() call. The extra complete call create erroneous +mailbox timeout condition when the next mailbox is sent where the mailbox +call does not wait for interrupt to arrive. Instead, it advances without +waiting. + +Add lock protection around the check for mailbox completion. + +Cc: stable@vger.kernel.org +Fixes: b2000805a975 ("scsi: qla2xxx: Flush mailbox commands on chip reset") +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20230821130045.34850-3-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/qla2xxx/qla_def.h | 1 - + drivers/scsi/qla2xxx/qla_init.c | 7 ++++--- + drivers/scsi/qla2xxx/qla_mbx.c | 4 ---- + drivers/scsi/qla2xxx/qla_os.c | 1 - + 4 files changed, 4 insertions(+), 9 deletions(-) + +diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h +index 9a09b36fd573c..6645b69fc2a0f 100644 +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -4200,7 +4200,6 @@ struct qla_hw_data { + uint8_t aen_mbx_count; + atomic_t num_pend_mbx_stage1; + atomic_t num_pend_mbx_stage2; +- atomic_t num_pend_mbx_stage3; + uint16_t frame_payload_size; + + uint32_t login_retry_count; +diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c +index f24f087c733b1..a8d2c06285c24 100644 +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -6926,14 +6926,15 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) + } + + /* purge MBox commands */ +- if (atomic_read(&ha->num_pend_mbx_stage3)) { ++ spin_lock_irqsave(&ha->hardware_lock, flags); ++ if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags)) { + clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags); + complete(&ha->mbx_intr_comp); + } ++ spin_unlock_irqrestore(&ha->hardware_lock, flags); + + i = 0; +- while (atomic_read(&ha->num_pend_mbx_stage3) || +- atomic_read(&ha->num_pend_mbx_stage2) || ++ while (atomic_read(&ha->num_pend_mbx_stage2) || + atomic_read(&ha->num_pend_mbx_stage1)) { + msleep(20); + i++; +diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c +index 8b65e8a82ec99..21ba7100ff676 100644 +--- a/drivers/scsi/qla2xxx/qla_mbx.c ++++ b/drivers/scsi/qla2xxx/qla_mbx.c +@@ -269,7 +269,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) + spin_unlock_irqrestore(&ha->hardware_lock, flags); + + wait_time = jiffies; +- atomic_inc(&ha->num_pend_mbx_stage3); + if (!wait_for_completion_timeout(&ha->mbx_intr_comp, + mcp->tov * HZ)) { + ql_dbg(ql_dbg_mbx, vha, 0x117a, +@@ -284,7 +283,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) + spin_unlock_irqrestore(&ha->hardware_lock, + flags); + atomic_dec(&ha->num_pend_mbx_stage2); +- atomic_dec(&ha->num_pend_mbx_stage3); + rval = QLA_ABORTED; + goto premature_exit; + } +@@ -294,11 +292,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) + ha->flags.mbox_busy = 0; + spin_unlock_irqrestore(&ha->hardware_lock, flags); + atomic_dec(&ha->num_pend_mbx_stage2); +- atomic_dec(&ha->num_pend_mbx_stage3); + rval = QLA_ABORTED; + goto premature_exit; + } +- atomic_dec(&ha->num_pend_mbx_stage3); + + if (time_after(jiffies, wait_time + 5 * HZ)) + ql_log(ql_log_warn, vha, 0x1015, "cmd=0x%x, waited %d msecs\n", +diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c +index 5a4df566afd16..8d199deaf3b12 100644 +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -2848,7 +2848,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) + ha->max_exchg = FW_MAX_EXCHANGES_CNT; + atomic_set(&ha->num_pend_mbx_stage1, 0); + atomic_set(&ha->num_pend_mbx_stage2, 0); +- atomic_set(&ha->num_pend_mbx_stage3, 0); + atomic_set(&ha->zio_threshold, DEFAULT_ZIO_THRESHOLD); + ha->last_zio_threshold = DEFAULT_ZIO_THRESHOLD; + +-- +2.40.1 + diff --git a/queue-5.10/scsi-qla2xxx-if-fcport-is-undergoing-deletion-comple.patch b/queue-5.10/scsi-qla2xxx-if-fcport-is-undergoing-deletion-comple.patch new file mode 100644 index 00000000000..5a39b3e5142 --- /dev/null +++ b/queue-5.10/scsi-qla2xxx-if-fcport-is-undergoing-deletion-comple.patch @@ -0,0 +1,83 @@ +From c8bcc6b7bc9ce22fcdf3a9a2657a36ea104010de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Dec 2020 05:23:10 -0800 +Subject: scsi: qla2xxx: If fcport is undergoing deletion complete I/O with + retry + +From: Saurav Kashyap + +[ Upstream commit 707531bc2626c1959a03b93566ebb4e629c99276 ] + +Driver unload with I/Os in flight causes server to crash. Complete I/O +with DID_IMM_RETRY if fcport undergoing deletion. + +CPU: 44 PID: 35008 Comm: qla2xxx_4_dpc Kdump: loaded Tainted: G +OE X 5.3.18-22-default #1 SLE15-SP2 (unreleased) +Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 07/16/2020 +RIP: 0010:dma_direct_unmap_sg+0x24/0x60 +Code: 4c 8b 04 24 eb b9 0f 1f 44 00 00 85 d2 7e 4e 41 57 + 4d 89 c7 41 56 41 89 ce 41 55 49 89 fd 41 54 41 89 d4 55 31 ed 53 48 89 + f3 <8b> 53 18 48 8b 73 10 4d 89 f8 44 89 f1 4c 89 ef 83 c5 01 e8 44 ff +RSP: 0018:ffffc0c661037d88 EFLAGS: 00010046 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000002 +RDX: 000000000000001d RSI: 0000000000000000 RDI: ffff9a51ee53b0b0 +RBP: 0000000000000000 R08: 0000000000000000 R09: ffff9a51ee53b0b0 +R10: ffffc0c646463dc8 R11: ffff9a4a067087c8 R12: 000000000000001d +R13: ffff9a51ee53b0b0 R14: 0000000000000002 R15: 0000000000000000 +FS: 0000000000000000(0000) GS:ffff9a523f800000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000018 CR3: 000000043740a004 CR4: 00000000007606e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: +qla2xxx_qpair_sp_free_dma+0x20d/0x3c0 [qla2xxx] +qla2xxx_qpair_sp_compl+0x35/0x90 [qla2xxx] +__qla2x00_abort_all_cmds+0x180/0x390 [qla2xxx] +? qla24xx_process_purex_list+0x100/0x100 [qla2xxx] +qla2x00_abort_all_cmds+0x5e/0x80 [qla2xxx] +qla2x00_do_dpc+0x317/0xa30 [qla2xxx] +kthread+0x10d/0x130 +? kthread_park+0xa0/0xa0 +ret_from_fork+0x35/0x40 + +Link: https://lore.kernel.org/r/20201202132312.19966-14-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Saurav Kashyap +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Stable-dep-of: 6d0b65569c0a ("scsi: qla2xxx: Flush mailbox commands on chip reset") +Signed-off-by: Sasha Levin +--- + drivers/scsi/qla2xxx/qla_os.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c +index f1e7868787d4a..78a335f862cee 100644 +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -879,8 +879,8 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) + goto qc24_fail_command; + } + +- if (!fcport) { +- cmd->result = DID_NO_CONNECT << 16; ++ if (!fcport || fcport->deleted) { ++ cmd->result = DID_IMM_RETRY << 16; + goto qc24_fail_command; + } + +@@ -961,8 +961,8 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd, + goto qc24_fail_command; + } + +- if (!fcport) { +- cmd->result = DID_NO_CONNECT << 16; ++ if (!fcport || fcport->deleted) { ++ cmd->result = DID_IMM_RETRY << 16; + goto qc24_fail_command; + } + +-- +2.40.1 + diff --git a/queue-5.10/selftests-kselftest-runner-run_one-allow-running-non.patch b/queue-5.10/selftests-kselftest-runner-run_one-allow-running-non.patch new file mode 100644 index 00000000000..48162ae7692 --- /dev/null +++ b/queue-5.10/selftests-kselftest-runner-run_one-allow-running-non.patch @@ -0,0 +1,89 @@ +From 97a2898a9fa6fb3fda920c313a4305e83bb50122 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Nov 2021 18:35:56 -0800 +Subject: selftests/kselftest/runner/run_one(): allow running non-executable + files + +From: SeongJae Park + +[ Upstream commit 303f8e2d02002dbe331cab7813ee091aead3cd39 ] + +When running a test program, 'run_one()' checks if the program has the +execution permission and fails if it doesn't. However, it's easy to +mistakenly lose the permissions, as some common tools like 'diff' don't +support the permission change well[1]. Compared to that, making mistakes +in the test program's path would only rare, as those are explicitly listed +in 'TEST_PROGS'. Therefore, it might make more sense to resolve the +situation on our own and run the program. + +For this reason, this commit makes the test program runner function still +print the warning message but to try parsing the interpreter of the +program and to explicitly run it with the interpreter, in this case. + +[1] https://lore.kernel.org/mm-commits/YRJisBs9AunccCD4@kroah.com/ + +Link: https://lkml.kernel.org/r/20210810164534.25902-1-sj38.park@gmail.com +Signed-off-by: SeongJae Park +Suggested-by: Greg Kroah-Hartman +Cc: Shuah Khan +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 9616cb34b08e ("kselftest/runner.sh: Propagate SIGTERM to runner child") +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/kselftest/runner.sh | 28 +++++++++++++-------- + 1 file changed, 18 insertions(+), 10 deletions(-) + +diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh +index cc9c846585f05..a9ba782d8ca0f 100644 +--- a/tools/testing/selftests/kselftest/runner.sh ++++ b/tools/testing/selftests/kselftest/runner.sh +@@ -33,9 +33,9 @@ tap_timeout() + { + # Make sure tests will time out if utility is available. + if [ -x /usr/bin/timeout ] ; then +- /usr/bin/timeout --foreground "$kselftest_timeout" "$1" ++ /usr/bin/timeout --foreground "$kselftest_timeout" $1 + else +- "$1" ++ $1 + fi + } + +@@ -65,17 +65,25 @@ run_one() + + TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST" + echo "# $TEST_HDR_MSG" +- if [ ! -x "$TEST" ]; then +- echo -n "# Warning: file $TEST is " +- if [ ! -e "$TEST" ]; then +- echo "missing!" +- else +- echo "not executable, correct this." +- fi ++ if [ ! -e "$TEST" ]; then ++ echo "# Warning: file $TEST is missing!" + echo "not ok $test_num $TEST_HDR_MSG" + else ++ cmd="./$BASENAME_TEST" ++ if [ ! -x "$TEST" ]; then ++ echo "# Warning: file $TEST is not executable" ++ ++ if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] ++ then ++ interpreter=$(head -n 1 "$TEST" | cut -c 3-) ++ cmd="$interpreter ./$BASENAME_TEST" ++ else ++ echo "not ok $test_num $TEST_HDR_MSG" ++ return ++ fi ++ fi + cd `dirname $TEST` > /dev/null +- ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) | ++ ((((( tap_timeout "$cmd" 2>&1; echo $? >&3) | + tap_prefix >&4) 3>&1) | + (read xs; exit $xs)) 4>>"$logfile" && + echo "ok $test_num $TEST_HDR_MSG") || +-- +2.40.1 + diff --git a/queue-5.10/series b/queue-5.10/series index 21988ef74c9..236290b9cf9 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -378,3 +378,27 @@ drm-amd-display-prevent-potential-division-by-zero-errors.patch perf-hists-browser-fix-hierarchy-mode-header.patch perf-tools-handle-old-data-in-perf_record_attr.patch perf-hists-browser-fix-the-number-of-entries-for-e-key.patch +acpi-apei-explicit-init-of-hest-and-ghes-in-apci_ini.patch +arm64-sdei-abort-running-sdei-handlers-during-crash.patch +scsi-qla2xxx-if-fcport-is-undergoing-deletion-comple.patch +scsi-qla2xxx-consolidate-zio-threshold-setting-for-b.patch +scsi-qla2xxx-fix-crash-in-pcie-error-handling.patch +scsi-qla2xxx-flush-mailbox-commands-on-chip-reset.patch +arm-dts-samsung-exynos4210-i9100-fix-lcd-screen-s-ph.patch +arm-dts-bcm5301x-extend-ram-to-full-256mb-for-linksy.patch +bus-mhi-host-skip-mhi-reset-if-device-is-in-rddm.patch +net-ipv4-fix-one-memleak-in-__inet_del_ifa.patch +selftests-kselftest-runner-run_one-allow-running-non.patch +kselftest-runner.sh-propagate-sigterm-to-runner-chil.patch +net-smc-use-smc_lgr_list.lock-to-protect-smc_lgr_lis.patch +net-ethernet-mvpp2_main-fix-possible-oob-write-in-mv.patch +net-ethernet-mtk_eth_soc-fix-possible-null-pointer-d.patch +hsr-fix-uninit-value-access-in-fill_frame_info.patch +r8152-check-budget-for-r8152_poll.patch +kcm-fix-memory-leak-in-error-path-of-kcm_sendmsg.patch +platform-mellanox-mlxbf-tmfifo-drop-the-rx-packet-if.patch +platform-mellanox-mlxbf-tmfifo-drop-jumbo-frames.patch +net-tls-do-not-free-tls_rec-on-async-operation-in-bp.patch +ipv6-fix-ip6_sock_set_addr_preferences-typo.patch +ixgbe-fix-timestamp-configuration-code.patch +kcm-fix-error-handling-for-sock_dgram-in-kcm_sendmsg.patch