From: Greg Kroah-Hartman Date: Wed, 28 Jun 2023 19:35:51 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.4.1~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1335bf02b6eaa9cd376499454aeadd556f78becb;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: x86-microcode-amd-load-late-on-both-threads-too.patch x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch x86-smp-dont-access-non-existing-cpuid-leaf.patch x86-smp-make-stop_other_cpus-more-robust.patch x86-smp-remove-pointless-wmb-s-from-native_stop_other_cpus.patch x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch --- diff --git a/queue-6.1/series b/queue-6.1/series index 64f8a0aa3ae..22138203592 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -3,3 +3,9 @@ mm-mmap-fix-error-return-in-do_vmi_align_munmap.patch mptcp-ensure-listener-is-unhashed-before-updating-the-sk-status.patch mm-hwpoison-try-to-recover-from-copy-on-write-faults.patch mm-hwpoison-when-copy-on-write-hits-poison-take-page-offline.patch +x86-microcode-amd-load-late-on-both-threads-too.patch +x86-smp-make-stop_other_cpus-more-robust.patch +x86-smp-dont-access-non-existing-cpuid-leaf.patch +x86-smp-remove-pointless-wmb-s-from-native_stop_other_cpus.patch +x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch +x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch diff --git a/queue-6.1/x86-microcode-amd-load-late-on-both-threads-too.patch b/queue-6.1/x86-microcode-amd-load-late-on-both-threads-too.patch new file mode 100644 index 00000000000..8d1c3b2de4b --- /dev/null +++ b/queue-6.1/x86-microcode-amd-load-late-on-both-threads-too.patch @@ -0,0 +1,30 @@ +From a32b0f0db3f396f1c9be2fe621e77c09ec3d8e7d Mon Sep 17 00:00:00 2001 +From: "Borislav Petkov (AMD)" +Date: Tue, 2 May 2023 19:53:50 +0200 +Subject: x86/microcode/AMD: Load late on both threads too + +From: Borislav Petkov (AMD) + +commit a32b0f0db3f396f1c9be2fe621e77c09ec3d8e7d upstream. + +Do the same as early loading - load on both threads. + +Signed-off-by: Borislav Petkov (AMD) +Cc: +Link: https://lore.kernel.org/r/20230605141332.25948-1-bp@alien8.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/microcode/amd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/microcode/amd.c ++++ b/arch/x86/kernel/cpu/microcode/amd.c +@@ -705,7 +705,7 @@ static enum ucode_state apply_microcode_ + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + + /* need to apply patch? */ +- if (rev >= mc_amd->hdr.patch_id) { ++ if (rev > mc_amd->hdr.patch_id) { + ret = UCODE_OK; + goto out; + } diff --git a/queue-6.1/x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch b/queue-6.1/x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch new file mode 100644 index 00000000000..caf7215abf7 --- /dev/null +++ b/queue-6.1/x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch @@ -0,0 +1,179 @@ +From d7893093a7417527c0d73c9832244e65c9d0114f Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 15 Jun 2023 22:33:57 +0200 +Subject: x86/smp: Cure kexec() vs. mwait_play_dead() breakage + +From: Thomas Gleixner + +commit d7893093a7417527c0d73c9832244e65c9d0114f upstream. + +TLDR: It's a mess. + +When kexec() is executed on a system with offline CPUs, which are parked in +mwait_play_dead() it can end up in a triple fault during the bootup of the +kexec kernel or cause hard to diagnose data corruption. + +The reason is that kexec() eventually overwrites the previous kernel's text, +page tables, data and stack. If it writes to the cache line which is +monitored by a previously offlined CPU, MWAIT resumes execution and ends +up executing the wrong text, dereferencing overwritten page tables or +corrupting the kexec kernels data. + +Cure this by bringing the offlined CPUs out of MWAIT into HLT. + +Write to the monitored cache line of each offline CPU, which makes MWAIT +resume execution. The written control word tells the offlined CPUs to issue +HLT, which does not have the MWAIT problem. + +That does not help, if a stray NMI, MCE or SMI hits the offlined CPUs as +those make it come out of HLT. + +A follow up change will put them into INIT, which protects at least against +NMI and SMI. + +Fixes: ea53069231f9 ("x86, hotplug: Use mwait to offline a processor, fix the legacy case") +Reported-by: Ashok Raj +Signed-off-by: Thomas Gleixner +Tested-by: Ashok Raj +Reviewed-by: Ashok Raj +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230615193330.492257119@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/smp.h | 2 + + arch/x86/kernel/smp.c | 5 +++ + arch/x86/kernel/smpboot.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 66 insertions(+) + +--- a/arch/x86/include/asm/smp.h ++++ b/arch/x86/include/asm/smp.h +@@ -132,6 +132,8 @@ void wbinvd_on_cpu(int cpu); + int wbinvd_on_all_cpus(void); + void cond_wakeup_cpu0(void); + ++void smp_kick_mwait_play_dead(void); ++ + void native_smp_send_reschedule(int cpu); + void native_send_call_func_ipi(const struct cpumask *mask); + void native_send_call_func_single_ipi(int cpu); +--- a/arch/x86/kernel/smp.c ++++ b/arch/x86/kernel/smp.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -157,6 +158,10 @@ static void native_stop_other_cpus(int w + if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1) + return; + ++ /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */ ++ if (kexec_in_progress) ++ smp_kick_mwait_play_dead(); ++ + /* + * 1) Send an IPI on the reboot vector to all other CPUs. + * +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -53,6 +53,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -104,6 +105,9 @@ struct mwait_cpu_dead { + unsigned int status; + }; + ++#define CPUDEAD_MWAIT_WAIT 0xDEADBEEF ++#define CPUDEAD_MWAIT_KEXEC_HLT 0x4A17DEAD ++ + /* + * Cache line aligned data for mwait_play_dead(). Separate on purpose so + * that it's unlikely to be touched by other CPUs. +@@ -166,6 +170,10 @@ static void smp_callin(void) + { + int cpuid; + ++ /* Mop up eventual mwait_play_dead() wreckage */ ++ this_cpu_write(mwait_cpu_dead.status, 0); ++ this_cpu_write(mwait_cpu_dead.control, 0); ++ + /* + * If waken up by an INIT in an 82489DX configuration + * cpu_callout_mask guarantees we don't get here before +@@ -1795,6 +1803,10 @@ static inline void mwait_play_dead(void) + (highest_subcstate - 1); + } + ++ /* Set up state for the kexec() hack below */ ++ md->status = CPUDEAD_MWAIT_WAIT; ++ md->control = CPUDEAD_MWAIT_WAIT; ++ + wbinvd(); + + while (1) { +@@ -1812,10 +1824,57 @@ static inline void mwait_play_dead(void) + mb(); + __mwait(eax, 0); + ++ if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) { ++ /* ++ * Kexec is about to happen. Don't go back into mwait() as ++ * the kexec kernel might overwrite text and data including ++ * page tables and stack. So mwait() would resume when the ++ * monitor cache line is written to and then the CPU goes ++ * south due to overwritten text, page tables and stack. ++ * ++ * Note: This does _NOT_ protect against a stray MCE, NMI, ++ * SMI. They will resume execution at the instruction ++ * following the HLT instruction and run into the problem ++ * which this is trying to prevent. ++ */ ++ WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT); ++ while(1) ++ native_halt(); ++ } ++ + cond_wakeup_cpu0(); + } + } + ++/* ++ * Kick all "offline" CPUs out of mwait on kexec(). See comment in ++ * mwait_play_dead(). ++ */ ++void smp_kick_mwait_play_dead(void) ++{ ++ u32 newstate = CPUDEAD_MWAIT_KEXEC_HLT; ++ struct mwait_cpu_dead *md; ++ unsigned int cpu, i; ++ ++ for_each_cpu_andnot(cpu, cpu_present_mask, cpu_online_mask) { ++ md = per_cpu_ptr(&mwait_cpu_dead, cpu); ++ ++ /* Does it sit in mwait_play_dead() ? */ ++ if (READ_ONCE(md->status) != CPUDEAD_MWAIT_WAIT) ++ continue; ++ ++ /* Wait up to 5ms */ ++ for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { ++ /* Bring it out of mwait */ ++ WRITE_ONCE(md->control, newstate); ++ udelay(5); ++ } ++ ++ if (READ_ONCE(md->status) != newstate) ++ pr_err_once("CPU%u is stuck in mwait_play_dead()\n", cpu); ++ } ++} ++ + void hlt_play_dead(void) + { + if (__this_cpu_read(cpu_info.x86) >= 4) diff --git a/queue-6.1/x86-smp-dont-access-non-existing-cpuid-leaf.patch b/queue-6.1/x86-smp-dont-access-non-existing-cpuid-leaf.patch new file mode 100644 index 00000000000..4de86a072f6 --- /dev/null +++ b/queue-6.1/x86-smp-dont-access-non-existing-cpuid-leaf.patch @@ -0,0 +1,63 @@ +From 9b040453d4440659f33dc6f0aa26af418ebfe70b Mon Sep 17 00:00:00 2001 +From: Tony Battersby +Date: Thu, 15 Jun 2023 22:33:52 +0200 +Subject: x86/smp: Dont access non-existing CPUID leaf + +From: Tony Battersby + +commit 9b040453d4440659f33dc6f0aa26af418ebfe70b upstream. + +stop_this_cpu() tests CPUID leaf 0x8000001f::EAX unconditionally. Intel +CPUs return the content of the highest supported leaf when a non-existing +leaf is read, while AMD CPUs return all zeros for unsupported leafs. + +So the result of the test on Intel CPUs is lottery. + +While harmless it's incorrect and causes the conditional wbinvd() to be +issued where not required. + +Check whether the leaf is supported before reading it. + +[ tglx: Adjusted changelog ] + +Fixes: 08f253ec3767 ("x86/cpu: Clear SME feature flag when not in use") +Signed-off-by: Tony Battersby +Signed-off-by: Thomas Gleixner +Reviewed-by: Mario Limonciello +Reviewed-by: Borislav Petkov (AMD) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/3817d810-e0f1-8ef8-0bbd-663b919ca49b@cybernetics.com +Link: https://lore.kernel.org/r/20230615193330.322186388@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/process.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -748,6 +748,7 @@ struct cpumask cpus_stop_mask; + + void __noreturn stop_this_cpu(void *dummy) + { ++ struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info); + unsigned int cpu = smp_processor_id(); + + local_irq_disable(); +@@ -762,7 +763,7 @@ void __noreturn stop_this_cpu(void *dumm + */ + set_cpu_online(cpu, false); + disable_local_APIC(); +- mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); ++ mcheck_cpu_clear(c); + + /* + * Use wbinvd on processors that support SME. This provides support +@@ -776,7 +777,7 @@ void __noreturn stop_this_cpu(void *dumm + * Test the CPUID bit directly because the machine might've cleared + * X86_FEATURE_SME due to cmdline options. + */ +- if (cpuid_eax(0x8000001f) & BIT(0)) ++ if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) + native_wbinvd(); + + /* diff --git a/queue-6.1/x86-smp-make-stop_other_cpus-more-robust.patch b/queue-6.1/x86-smp-make-stop_other_cpus-more-robust.patch new file mode 100644 index 00000000000..d1dc3baacf1 --- /dev/null +++ b/queue-6.1/x86-smp-make-stop_other_cpus-more-robust.patch @@ -0,0 +1,235 @@ +From 1f5e7eb7868e42227ac426c96d437117e6e06e8e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Wed, 26 Apr 2023 18:37:00 +0200 +Subject: x86/smp: Make stop_other_cpus() more robust + +From: Thomas Gleixner + +commit 1f5e7eb7868e42227ac426c96d437117e6e06e8e upstream. + +Tony reported intermittent lockups on poweroff. His analysis identified the +wbinvd() in stop_this_cpu() as the culprit. This was added to ensure that +on SME enabled machines a kexec() does not leave any stale data in the +caches when switching from encrypted to non-encrypted mode or vice versa. + +That wbinvd() is conditional on the SME feature bit which is read directly +from CPUID. But that readout does not check whether the CPUID leaf is +available or not. If it's not available the CPU will return the value of +the highest supported leaf instead. Depending on the content the "SME" bit +might be set or not. + +That's incorrect but harmless. Making the CPUID readout conditional makes +the observed hangs go away, but it does not fix the underlying problem: + +CPU0 CPU1 + + stop_other_cpus() + send_IPIs(REBOOT); stop_this_cpu() + while (num_online_cpus() > 1); set_online(false); + proceed... -> hang + wbinvd() + +WBINVD is an expensive operation and if multiple CPUs issue it at the same +time the resulting delays are even larger. + +But CPU0 already observed num_online_cpus() going down to 1 and proceeds +which causes the system to hang. + +This issue exists independent of WBINVD, but the delays caused by WBINVD +make it more prominent. + +Make this more robust by adding a cpumask which is initialized to the +online CPU mask before sending the IPIs and CPUs clear their bit in +stop_this_cpu() after the WBINVD completed. Check for that cpumask to +become empty in stop_other_cpus() instead of watching num_online_cpus(). + +The cpumask cannot plug all holes either, but it's better than a raw +counter and allows to restrict the NMI fallback IPI to be sent only the +CPUs which have not reported within the timeout window. + +Fixes: 08f253ec3767 ("x86/cpu: Clear SME feature flag when not in use") +Reported-by: Tony Battersby +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov (AMD) +Reviewed-by: Ashok Raj +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/3817d810-e0f1-8ef8-0bbd-663b919ca49b@cybernetics.com +Link: https://lore.kernel.org/r/87h6r770bv.ffs@tglx +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpu.h | 2 + + arch/x86/kernel/process.c | 23 +++++++++++++++- + arch/x86/kernel/smp.c | 62 +++++++++++++++++++++++++++++---------------- + 3 files changed, 64 insertions(+), 23 deletions(-) + +--- a/arch/x86/include/asm/cpu.h ++++ b/arch/x86/include/asm/cpu.h +@@ -96,4 +96,6 @@ static inline bool intel_cpu_signatures_ + + extern u64 x86_read_arch_cap_msr(void); + ++extern struct cpumask cpus_stop_mask; ++ + #endif /* _ASM_X86_CPU_H */ +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -744,13 +744,23 @@ bool xen_set_default_idle(void) + } + #endif + ++struct cpumask cpus_stop_mask; ++ + void __noreturn stop_this_cpu(void *dummy) + { ++ unsigned int cpu = smp_processor_id(); ++ + local_irq_disable(); ++ + /* +- * Remove this CPU: ++ * Remove this CPU from the online mask and disable it ++ * unconditionally. This might be redundant in case that the reboot ++ * vector was handled late and stop_other_cpus() sent an NMI. ++ * ++ * According to SDM and APM NMIs can be accepted even after soft ++ * disabling the local APIC. + */ +- set_cpu_online(smp_processor_id(), false); ++ set_cpu_online(cpu, false); + disable_local_APIC(); + mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); + +@@ -768,6 +778,15 @@ void __noreturn stop_this_cpu(void *dumm + */ + if (cpuid_eax(0x8000001f) & BIT(0)) + native_wbinvd(); ++ ++ /* ++ * This brings a cache line back and dirties it, but ++ * native_stop_other_cpus() will overwrite cpus_stop_mask after it ++ * observed that all CPUs reported stop. This write will invalidate ++ * the related cache line on this CPU. ++ */ ++ cpumask_clear_cpu(cpu, &cpus_stop_mask); ++ + for (;;) { + /* + * Use native_halt() so that memory contents don't change +--- a/arch/x86/kernel/smp.c ++++ b/arch/x86/kernel/smp.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -146,31 +147,43 @@ static int register_stop_handler(void) + + static void native_stop_other_cpus(int wait) + { +- unsigned long flags; +- unsigned long timeout; ++ unsigned int cpu = smp_processor_id(); ++ unsigned long flags, timeout; + + if (reboot_force) + return; + +- /* +- * Use an own vector here because smp_call_function +- * does lots of things not suitable in a panic situation. +- */ ++ /* Only proceed if this is the first CPU to reach this code */ ++ if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1) ++ return; + + /* +- * We start by using the REBOOT_VECTOR irq. +- * The irq is treated as a sync point to allow critical +- * regions of code on other cpus to release their spin locks +- * and re-enable irqs. Jumping straight to an NMI might +- * accidentally cause deadlocks with further shutdown/panic +- * code. By syncing, we give the cpus up to one second to +- * finish their work before we force them off with the NMI. ++ * 1) Send an IPI on the reboot vector to all other CPUs. ++ * ++ * The other CPUs should react on it after leaving critical ++ * sections and re-enabling interrupts. They might still hold ++ * locks, but there is nothing which can be done about that. ++ * ++ * 2) Wait for all other CPUs to report that they reached the ++ * HLT loop in stop_this_cpu() ++ * ++ * 3) If #2 timed out send an NMI to the CPUs which did not ++ * yet report ++ * ++ * 4) Wait for all other CPUs to report that they reached the ++ * HLT loop in stop_this_cpu() ++ * ++ * #3 can obviously race against a CPU reaching the HLT loop late. ++ * That CPU will have reported already and the "have all CPUs ++ * reached HLT" condition will be true despite the fact that the ++ * other CPU is still handling the NMI. Again, there is no ++ * protection against that as "disabled" APICs still respond to ++ * NMIs. + */ +- if (num_online_cpus() > 1) { +- /* did someone beat us here? */ +- if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) +- return; ++ cpumask_copy(&cpus_stop_mask, cpu_online_mask); ++ cpumask_clear_cpu(cpu, &cpus_stop_mask); + ++ if (!cpumask_empty(&cpus_stop_mask)) { + /* sync above data before sending IRQ */ + wmb(); + +@@ -183,12 +196,12 @@ static void native_stop_other_cpus(int w + * CPUs reach shutdown state. + */ + timeout = USEC_PER_SEC; +- while (num_online_cpus() > 1 && timeout--) ++ while (!cpumask_empty(&cpus_stop_mask) && timeout--) + udelay(1); + } + + /* if the REBOOT_VECTOR didn't work, try with the NMI */ +- if (num_online_cpus() > 1) { ++ if (!cpumask_empty(&cpus_stop_mask)) { + /* + * If NMI IPI is enabled, try to register the stop handler + * and send the IPI. In any case try to wait for the other +@@ -200,7 +213,8 @@ static void native_stop_other_cpus(int w + + pr_emerg("Shutting down cpus with NMI\n"); + +- apic_send_IPI_allbutself(NMI_VECTOR); ++ for_each_cpu(cpu, &cpus_stop_mask) ++ apic->send_IPI(cpu, NMI_VECTOR); + } + /* + * Don't wait longer than 10 ms if the caller didn't +@@ -208,7 +222,7 @@ static void native_stop_other_cpus(int w + * one or more CPUs do not reach shutdown state. + */ + timeout = USEC_PER_MSEC * 10; +- while (num_online_cpus() > 1 && (wait || timeout--)) ++ while (!cpumask_empty(&cpus_stop_mask) && (wait || timeout--)) + udelay(1); + } + +@@ -216,6 +230,12 @@ static void native_stop_other_cpus(int w + disable_local_APIC(); + mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); + local_irq_restore(flags); ++ ++ /* ++ * Ensure that the cpus_stop_mask cache lines are invalidated on ++ * the other CPUs. See comment vs. SME in stop_this_cpu(). ++ */ ++ cpumask_clear(&cpus_stop_mask); + } + + /* diff --git a/queue-6.1/x86-smp-remove-pointless-wmb-s-from-native_stop_other_cpus.patch b/queue-6.1/x86-smp-remove-pointless-wmb-s-from-native_stop_other_cpus.patch new file mode 100644 index 00000000000..2e370a2cdac --- /dev/null +++ b/queue-6.1/x86-smp-remove-pointless-wmb-s-from-native_stop_other_cpus.patch @@ -0,0 +1,48 @@ +From 2affa6d6db28855e6340b060b809c23477aa546e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 15 Jun 2023 22:33:54 +0200 +Subject: x86/smp: Remove pointless wmb()s from native_stop_other_cpus() + +From: Thomas Gleixner + +commit 2affa6d6db28855e6340b060b809c23477aa546e upstream. + +The wmb()s before sending the IPIs are not synchronizing anything. + +If at all then the apic IPI functions have to provide or act as appropriate +barriers. + +Remove these cargo cult barriers which have no explanation of what they are +synchronizing. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Borislav Petkov (AMD) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230615193330.378358382@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/smp.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/arch/x86/kernel/smp.c ++++ b/arch/x86/kernel/smp.c +@@ -184,9 +184,6 @@ static void native_stop_other_cpus(int w + cpumask_clear_cpu(cpu, &cpus_stop_mask); + + if (!cpumask_empty(&cpus_stop_mask)) { +- /* sync above data before sending IRQ */ +- wmb(); +- + apic_send_IPI_allbutself(REBOOT_VECTOR); + + /* +@@ -208,9 +205,6 @@ static void native_stop_other_cpus(int w + * CPUs to stop. + */ + if (!smp_no_nmi_ipi && !register_stop_handler()) { +- /* Sync above data before sending IRQ */ +- wmb(); +- + pr_emerg("Shutting down cpus with NMI\n"); + + for_each_cpu(cpu, &cpus_stop_mask) diff --git a/queue-6.1/x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch b/queue-6.1/x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch new file mode 100644 index 00000000000..0b486e74b1e --- /dev/null +++ b/queue-6.1/x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch @@ -0,0 +1,91 @@ +From f9c9987bf52f4e42e940ae217333ebb5a4c3b506 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Thu, 15 Jun 2023 22:33:55 +0200 +Subject: x86/smp: Use dedicated cache-line for mwait_play_dead() + +From: Thomas Gleixner + +commit f9c9987bf52f4e42e940ae217333ebb5a4c3b506 upstream. + +Monitoring idletask::thread_info::flags in mwait_play_dead() has been an +obvious choice as all what is needed is a cache line which is not written +by other CPUs. + +But there is a use case where a "dead" CPU needs to be brought out of +MWAIT: kexec(). + +This is required as kexec() can overwrite text, pagetables, stacks and the +monitored cacheline of the original kernel. The latter causes MWAIT to +resume execution which obviously causes havoc on the kexec kernel which +results usually in triple faults. + +Use a dedicated per CPU storage to prepare for that. + +Signed-off-by: Thomas Gleixner +Reviewed-by: Ashok Raj +Reviewed-by: Borislav Petkov (AMD) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230615193330.434553750@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/smpboot.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -99,6 +99,17 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); + DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); + EXPORT_PER_CPU_SYMBOL(cpu_info); + ++struct mwait_cpu_dead { ++ unsigned int control; ++ unsigned int status; ++}; ++ ++/* ++ * Cache line aligned data for mwait_play_dead(). Separate on purpose so ++ * that it's unlikely to be touched by other CPUs. ++ */ ++static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); ++ + /* Logical package management. We might want to allocate that dynamically */ + unsigned int __max_logical_packages __read_mostly; + EXPORT_SYMBOL(__max_logical_packages); +@@ -1746,10 +1757,10 @@ EXPORT_SYMBOL_GPL(cond_wakeup_cpu0); + */ + static inline void mwait_play_dead(void) + { ++ struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; +- void *mwait_ptr; + int i; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +@@ -1784,13 +1795,6 @@ static inline void mwait_play_dead(void) + (highest_subcstate - 1); + } + +- /* +- * This should be a memory location in a cache line which is +- * unlikely to be touched by other processors. The actual +- * content is immaterial as it is not actually modified in any way. +- */ +- mwait_ptr = ¤t_thread_info()->flags; +- + wbinvd(); + + while (1) { +@@ -1802,9 +1806,9 @@ static inline void mwait_play_dead(void) + * case where we return around the loop. + */ + mb(); +- clflush(mwait_ptr); ++ clflush(md); + mb(); +- __monitor(mwait_ptr, 0, 0); ++ __monitor(md, 0, 0); + mb(); + __mwait(eax, 0); +