From: Greg Kroah-Hartman Date: Mon, 16 Oct 2023 07:34:12 +0000 (+0200) Subject: 6.5-stable patches X-Git-Tag: v5.15.136~25 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=76791fd7732eaaa28eb4f946d5b535a14b05eb95;p=thirdparty%2Fkernel%2Fstable-queue.git 6.5-stable patches added patches: revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch --- diff --git a/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch b/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch new file mode 100644 index 00000000000..88144750a3e --- /dev/null +++ b/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch @@ -0,0 +1,182 @@ +From fbe1bf1e5ff1e3b298420d7a8434983ef8d72bd1 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Sun, 15 Oct 2023 12:02:02 -0700 +Subject: Revert "x86/smp: Put CPUs into INIT on shutdown if possible" + +From: Linus Torvalds + +commit fbe1bf1e5ff1e3b298420d7a8434983ef8d72bd1 upstream. + +This reverts commit 45e34c8af58f23db4474e2bfe79183efec09a18b, and the +two subsequent fixes to it: + + 3f874c9b2aae ("x86/smp: Don't send INIT to non-present and non-booted CPUs") + b1472a60a584 ("x86/smp: Don't send INIT to boot CPU") + +because it seems to result in hung machines at shutdown. Particularly +some Dell machines, but Thomas says + + "The rest seems to be Lenovo and Sony with Alderlake/Raptorlake CPUs - + at least that's what I could figure out from the various bug reports. + + I don't know which CPUs the DELL machines have, so I can't say it's a + pattern. + + I agree with the revert for now" + +Ashok Raj chimes in: + + "There was a report (probably this same one), and it turns out it was a + bug in the BIOS SMI handler. + + The client BIOS's were waiting for the lowest APICID to be the SMI + rendevous master. If this is MeteorLake, the BSP wasn't the one with + the lowest APIC and it triped here. + + The BIOS change is also being pushed to others for assimilation :) + + Server BIOS's had this correctly for a while now" + +and it does look likely to be some bad interaction between SMI and the +non-BSP cores having put into INIT (and thus unresponsive until reset). + +Link: https://bbs.archlinux.org/viewtopic.php?pid=2124429 +Link: https://www.reddit.com/r/openSUSE/comments/16qq99b/tumbleweed_shutdown_did_not_finish_completely/ +Link: https://forum.artixlinux.org/index.php/topic,5997.0.html +Link: https://bugzilla.redhat.com/show_bug.cgi?id=2241279 +Acked-by: Thomas Gleixner +Cc: Ashok Raj +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/smp.h | 2 -- + arch/x86/kernel/smp.c | 39 +++++++-------------------------------- + arch/x86/kernel/smpboot.c | 27 --------------------------- + 3 files changed, 7 insertions(+), 61 deletions(-) + +--- a/arch/x86/include/asm/smp.h ++++ b/arch/x86/include/asm/smp.h +@@ -134,8 +134,6 @@ void native_send_call_func_ipi(const str + void native_send_call_func_single_ipi(int cpu); + void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); + +-bool smp_park_other_cpus_in_init(void); +- + void smp_store_boot_cpu_info(void); + void smp_store_cpu_info(int id); + +--- a/arch/x86/kernel/smp.c ++++ b/arch/x86/kernel/smp.c +@@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigne + } + + /* +- * Disable virtualization, APIC etc. and park the CPU in a HLT loop ++ * this function calls the 'stop' function on all other CPUs in the system. + */ + DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) + { +@@ -172,17 +172,13 @@ static void native_stop_other_cpus(int w + * 2) Wait for all other CPUs to report that they reached the + * HLT loop in stop_this_cpu() + * +- * 3) If the system uses INIT/STARTUP for CPU bringup, then +- * send all present CPUs an INIT vector, which brings them +- * completely out of the way. ++ * 3) If #2 timed out send an NMI to the CPUs which did not ++ * yet report + * +- * 4) If #3 is not possible and #2 timed out send an NMI to the +- * CPUs which did not yet report +- * +- * 5) Wait for all other CPUs to report that they reached the ++ * 4) Wait for all other CPUs to report that they reached the + * HLT loop in stop_this_cpu() + * +- * #4 can obviously race against a CPU reaching the HLT loop late. ++ * #3 can obviously race against a CPU reaching the HLT loop late. + * That CPU will have reported already and the "have all CPUs + * reached HLT" condition will be true despite the fact that the + * other CPU is still handling the NMI. Again, there is no +@@ -198,7 +194,7 @@ static void native_stop_other_cpus(int w + /* + * Don't wait longer than a second for IPI completion. The + * wait request is not checked here because that would +- * prevent an NMI/INIT shutdown in case that not all ++ * prevent an NMI shutdown attempt in case that not all + * CPUs reach shutdown state. + */ + timeout = USEC_PER_SEC; +@@ -206,27 +202,7 @@ static void native_stop_other_cpus(int w + udelay(1); + } + +- /* +- * Park all other CPUs in INIT including "offline" CPUs, if +- * possible. That's a safe place where they can't resume execution +- * of HLT and then execute the HLT loop from overwritten text or +- * page tables. +- * +- * The only downside is a broadcast MCE, but up to the point where +- * the kexec() kernel brought all APs online again an MCE will just +- * make HLT resume and handle the MCE. The machine crashes and burns +- * due to overwritten text, page tables and data. So there is a +- * choice between fire and frying pan. The result is pretty much +- * the same. Chose frying pan until x86 provides a sane mechanism +- * to park a CPU. +- */ +- if (smp_park_other_cpus_in_init()) +- goto done; +- +- /* +- * If park with INIT was not possible and the REBOOT_VECTOR didn't +- * take all secondary CPUs offline, try with the NMI. +- */ ++ /* if the REBOOT_VECTOR didn't work, try with the NMI */ + if (!cpumask_empty(&cpus_stop_mask)) { + /* + * If NMI IPI is enabled, try to register the stop handler +@@ -249,7 +225,6 @@ static void native_stop_other_cpus(int w + udelay(1); + } + +-done: + local_irq_save(flags); + disable_local_APIC(); + mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -1346,33 +1346,6 @@ void arch_thaw_secondary_cpus_end(void) + cache_aps_init(); + } + +-bool smp_park_other_cpus_in_init(void) +-{ +- unsigned int cpu, this_cpu = smp_processor_id(); +- unsigned int apicid; +- +- if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu) +- return false; +- +- /* +- * If this is a crash stop which does not execute on the boot CPU, +- * then this cannot use the INIT mechanism because INIT to the boot +- * CPU will reset the machine. +- */ +- if (this_cpu) +- return false; +- +- for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) { +- if (cpu == this_cpu) +- continue; +- apicid = apic->cpu_present_to_apicid(cpu); +- if (apicid == BAD_APICID) +- continue; +- send_init_sequence(apicid); +- } +- return true; +-} +- + /* + * Early setup to make printk work. + */ diff --git a/queue-6.5/series b/queue-6.5/series index 6cc0771e37e..b23ad5ce467 100644 --- a/queue-6.5/series +++ b/queue-6.5/series @@ -178,3 +178,4 @@ usb-gadget-ncm-handle-decoding-of-multiple-ntb-s-in-unwrap-call.patch usb-cdnsp-fixes-issue-with-dequeuing-not-queued-requests.patch usb-typec-qcom-update-the-logic-of-regulator-enable-and-disable.patch usb-misc-onboard_hub-add-support-for-microchip-usb2412-usb-2.0-hub.patch +revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch