]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.5-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 16 Oct 2023 07:34:12 +0000 (09:34 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 16 Oct 2023 07:34:12 +0000 (09:34 +0200)
added patches:
revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch

queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch [new file with mode: 0644]
queue-6.5/series

diff --git a/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch b/queue-6.5/revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch
new file mode 100644 (file)
index 0000000..8814475
--- /dev/null
@@ -0,0 +1,182 @@
+From fbe1bf1e5ff1e3b298420d7a8434983ef8d72bd1 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 15 Oct 2023 12:02:02 -0700
+Subject: Revert "x86/smp: Put CPUs into INIT on shutdown if possible"
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit fbe1bf1e5ff1e3b298420d7a8434983ef8d72bd1 upstream.
+
+This reverts commit 45e34c8af58f23db4474e2bfe79183efec09a18b, and the
+two subsequent fixes to it:
+
+  3f874c9b2aae ("x86/smp: Don't send INIT to non-present and non-booted CPUs")
+  b1472a60a584 ("x86/smp: Don't send INIT to boot CPU")
+
+because it seems to result in hung machines at shutdown.  Particularly
+some Dell machines, but Thomas says
+
+ "The rest seems to be Lenovo and Sony with Alderlake/Raptorlake CPUs -
+  at least that's what I could figure out from the various bug reports.
+
+  I don't know which CPUs the DELL machines have, so I can't say it's a
+  pattern.
+
+  I agree with the revert for now"
+
+Ashok Raj chimes in:
+
+ "There was a report (probably this same one), and it turns out it was a
+  bug in the BIOS SMI handler.
+
+  The client BIOS's were waiting for the lowest APICID to be the SMI
+  rendevous master. If this is MeteorLake, the BSP wasn't the one with
+  the lowest APIC and it triped here.
+
+  The BIOS change is also being pushed to others for assimilation :)
+
+  Server BIOS's had this correctly for a while now"
+
+and it does look likely to be some bad interaction between SMI and the
+non-BSP cores having put into INIT (and thus unresponsive until reset).
+
+Link: https://bbs.archlinux.org/viewtopic.php?pid=2124429
+Link: https://www.reddit.com/r/openSUSE/comments/16qq99b/tumbleweed_shutdown_did_not_finish_completely/
+Link: https://forum.artixlinux.org/index.php/topic,5997.0.html
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2241279
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/smp.h |    2 --
+ arch/x86/kernel/smp.c      |   39 +++++++--------------------------------
+ arch/x86/kernel/smpboot.c  |   27 ---------------------------
+ 3 files changed, 7 insertions(+), 61 deletions(-)
+
+--- a/arch/x86/include/asm/smp.h
++++ b/arch/x86/include/asm/smp.h
+@@ -134,8 +134,6 @@ void native_send_call_func_ipi(const str
+ void native_send_call_func_single_ipi(int cpu);
+ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
+-bool smp_park_other_cpus_in_init(void);
+-
+ void smp_store_boot_cpu_info(void);
+ void smp_store_cpu_info(int id);
+--- a/arch/x86/kernel/smp.c
++++ b/arch/x86/kernel/smp.c
+@@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigne
+ }
+ /*
+- * Disable virtualization, APIC etc. and park the CPU in a HLT loop
++ * this function calls the 'stop' function on all other CPUs in the system.
+  */
+ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
+ {
+@@ -172,17 +172,13 @@ static void native_stop_other_cpus(int w
+        * 2) Wait for all other CPUs to report that they reached the
+        *    HLT loop in stop_this_cpu()
+        *
+-       * 3) If the system uses INIT/STARTUP for CPU bringup, then
+-       *    send all present CPUs an INIT vector, which brings them
+-       *    completely out of the way.
++       * 3) If #2 timed out send an NMI to the CPUs which did not
++       *    yet report
+        *
+-       * 4) If #3 is not possible and #2 timed out send an NMI to the
+-       *    CPUs which did not yet report
+-       *
+-       * 5) Wait for all other CPUs to report that they reached the
++       * 4) Wait for all other CPUs to report that they reached the
+        *    HLT loop in stop_this_cpu()
+        *
+-       * #4 can obviously race against a CPU reaching the HLT loop late.
++       * #3 can obviously race against a CPU reaching the HLT loop late.
+        * That CPU will have reported already and the "have all CPUs
+        * reached HLT" condition will be true despite the fact that the
+        * other CPU is still handling the NMI. Again, there is no
+@@ -198,7 +194,7 @@ static void native_stop_other_cpus(int w
+               /*
+                * Don't wait longer than a second for IPI completion. The
+                * wait request is not checked here because that would
+-               * prevent an NMI/INIT shutdown in case that not all
++               * prevent an NMI shutdown attempt in case that not all
+                * CPUs reach shutdown state.
+                */
+               timeout = USEC_PER_SEC;
+@@ -206,27 +202,7 @@ static void native_stop_other_cpus(int w
+                       udelay(1);
+       }
+-      /*
+-       * Park all other CPUs in INIT including "offline" CPUs, if
+-       * possible. That's a safe place where they can't resume execution
+-       * of HLT and then execute the HLT loop from overwritten text or
+-       * page tables.
+-       *
+-       * The only downside is a broadcast MCE, but up to the point where
+-       * the kexec() kernel brought all APs online again an MCE will just
+-       * make HLT resume and handle the MCE. The machine crashes and burns
+-       * due to overwritten text, page tables and data. So there is a
+-       * choice between fire and frying pan. The result is pretty much
+-       * the same. Chose frying pan until x86 provides a sane mechanism
+-       * to park a CPU.
+-       */
+-      if (smp_park_other_cpus_in_init())
+-              goto done;
+-
+-      /*
+-       * If park with INIT was not possible and the REBOOT_VECTOR didn't
+-       * take all secondary CPUs offline, try with the NMI.
+-       */
++      /* if the REBOOT_VECTOR didn't work, try with the NMI */
+       if (!cpumask_empty(&cpus_stop_mask)) {
+               /*
+                * If NMI IPI is enabled, try to register the stop handler
+@@ -249,7 +225,6 @@ static void native_stop_other_cpus(int w
+                       udelay(1);
+       }
+-done:
+       local_irq_save(flags);
+       disable_local_APIC();
+       mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -1346,33 +1346,6 @@ void arch_thaw_secondary_cpus_end(void)
+       cache_aps_init();
+ }
+-bool smp_park_other_cpus_in_init(void)
+-{
+-      unsigned int cpu, this_cpu = smp_processor_id();
+-      unsigned int apicid;
+-
+-      if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu)
+-              return false;
+-
+-      /*
+-       * If this is a crash stop which does not execute on the boot CPU,
+-       * then this cannot use the INIT mechanism because INIT to the boot
+-       * CPU will reset the machine.
+-       */
+-      if (this_cpu)
+-              return false;
+-
+-      for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) {
+-              if (cpu == this_cpu)
+-                      continue;
+-              apicid = apic->cpu_present_to_apicid(cpu);
+-              if (apicid == BAD_APICID)
+-                      continue;
+-              send_init_sequence(apicid);
+-      }
+-      return true;
+-}
+-
+ /*
+  * Early setup to make printk work.
+  */
index 6cc0771e37eedabe8c49d09cc1bd87a869836632..b23ad5ce467d3bf5bb917664e9ffe4db266e489b 100644 (file)
@@ -178,3 +178,4 @@ usb-gadget-ncm-handle-decoding-of-multiple-ntb-s-in-unwrap-call.patch
 usb-cdnsp-fixes-issue-with-dequeuing-not-queued-requests.patch
 usb-typec-qcom-update-the-logic-of-regulator-enable-and-disable.patch
 usb-misc-onboard_hub-add-support-for-microchip-usb2412-usb-2.0-hub.patch
+revert-x86-smp-put-cpus-into-init-on-shutdown-if-possible.patch