From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 28 Jun 2023 19:32:37 +0000 (+0200)
Subject: 4.19-stable patches
X-Git-Tag: v6.4.1~44
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=31f208b19ac10eb48986c5d3617424bba3d7f44b;p=thirdparty%2Fkernel%2Fstable-queue.git

4.19-stable patches

added patches:
	x86-microcode-amd-load-late-on-both-threads-too.patch
	x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch
	x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch
---

diff --git a/queue-4.19/series b/queue-4.19/series
index 2a26bd910b0..7d4ec1e9440 100644
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -1 +1,4 @@
 gfs2-don-t-deref-jdesc-in-evict.patch
+x86-microcode-amd-load-late-on-both-threads-too.patch
+x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch
+x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch
diff --git a/queue-4.19/x86-microcode-amd-load-late-on-both-threads-too.patch b/queue-4.19/x86-microcode-amd-load-late-on-both-threads-too.patch
new file mode 100644
index 00000000000..e53a68b05d3
--- /dev/null
+++ b/queue-4.19/x86-microcode-amd-load-late-on-both-threads-too.patch
@@ -0,0 +1,30 @@
+From a32b0f0db3f396f1c9be2fe621e77c09ec3d8e7d Mon Sep 17 00:00:00 2001
+From: "Borislav Petkov (AMD)" <bp@alien8.de>
+Date: Tue, 2 May 2023 19:53:50 +0200
+Subject: x86/microcode/AMD: Load late on both threads too
+
+From: Borislav Petkov (AMD) <bp@alien8.de>
+
+commit a32b0f0db3f396f1c9be2fe621e77c09ec3d8e7d upstream.
+
+Do the same as early loading - load on both threads.
+
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/20230605141332.25948-1-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/microcode/amd.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/microcode/amd.c
++++ b/arch/x86/kernel/cpu/microcode/amd.c
+@@ -532,7 +532,7 @@ static enum ucode_state apply_microcode_
+ 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+ 
+ 	/* need to apply patch? */
+-	if (rev >= mc_amd->hdr.patch_id) {
++	if (rev > mc_amd->hdr.patch_id) {
+ 		ret = UCODE_OK;
+ 		goto out;
+ 	}
diff --git a/queue-4.19/x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch b/queue-4.19/x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch
new file mode 100644
index 00000000000..44f17b89a5f
--- /dev/null
+++ b/queue-4.19/x86-smp-cure-kexec-vs.-mwait_play_dead-breakage.patch
@@ -0,0 +1,182 @@
+From d7893093a7417527c0d73c9832244e65c9d0114f Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 15 Jun 2023 22:33:57 +0200
+Subject: x86/smp: Cure kexec() vs. mwait_play_dead() breakage
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit d7893093a7417527c0d73c9832244e65c9d0114f upstream.
+
+TLDR: It's a mess.
+
+When kexec() is executed on a system with offline CPUs, which are parked in
+mwait_play_dead() it can end up in a triple fault during the bootup of the
+kexec kernel or cause hard to diagnose data corruption.
+
+The reason is that kexec() eventually overwrites the previous kernel's text,
+page tables, data and stack. If it writes to the cache line which is
+monitored by a previously offlined CPU, MWAIT resumes execution and ends
+up executing the wrong text, dereferencing overwritten page tables or
+corrupting the kexec kernels data.
+
+Cure this by bringing the offlined CPUs out of MWAIT into HLT.
+
+Write to the monitored cache line of each offline CPU, which makes MWAIT
+resume execution. The written control word tells the offlined CPUs to issue
+HLT, which does not have the MWAIT problem.
+
+That does not help, if a stray NMI, MCE or SMI hits the offlined CPUs as
+those make it come out of HLT.
+
+A follow up change will put them into INIT, which protects at least against
+NMI and SMI.
+
+Fixes: ea53069231f9 ("x86, hotplug: Use mwait to offline a processor, fix the legacy case")
+Reported-by: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Ashok Raj <ashok.raj@intel.com>
+Reviewed-by: Ashok Raj <ashok.raj@intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230615193330.492257119@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/smp.h |    2 +
+ arch/x86/kernel/smp.c      |    5 +++
+ arch/x86/kernel/smpboot.c  |   59 +++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 66 insertions(+)
+
+--- a/arch/x86/include/asm/smp.h
++++ b/arch/x86/include/asm/smp.h
+@@ -142,6 +142,8 @@ void play_dead_common(void);
+ void wbinvd_on_cpu(int cpu);
+ int wbinvd_on_all_cpus(void);
+ 
++void smp_kick_mwait_play_dead(void);
++
+ void native_send_call_func_ipi(const struct cpumask *mask);
+ void native_send_call_func_single_ipi(int cpu);
+ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
+--- a/arch/x86/kernel/smp.c
++++ b/arch/x86/kernel/smp.c
+@@ -23,6 +23,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/cpu.h>
+ #include <linux/gfp.h>
++#include <linux/kexec.h>
+ 
+ #include <asm/mtrr.h>
+ #include <asm/tlbflush.h>
+@@ -200,6 +201,10 @@ static void native_stop_other_cpus(int w
+ 	 * does lots of things not suitable in a panic situation.
+ 	 */
+ 
++	/* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */
++	if (kexec_in_progress)
++		smp_kick_mwait_play_dead();
++
+ 	/*
+ 	 * We start by using the REBOOT_VECTOR irq.
+ 	 * The irq is treated as a sync point to allow critical
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -56,6 +56,7 @@
+ #include <linux/stackprotector.h>
+ #include <linux/gfp.h>
+ #include <linux/cpuidle.h>
++#include <linux/kexec.h>
+ 
+ #include <asm/acpi.h>
+ #include <asm/desc.h>
+@@ -101,6 +102,9 @@ struct mwait_cpu_dead {
+ 	unsigned int	status;
+ };
+ 
++#define CPUDEAD_MWAIT_WAIT	0xDEADBEEF
++#define CPUDEAD_MWAIT_KEXEC_HLT	0x4A17DEAD
++
+ /*
+  * Cache line aligned data for mwait_play_dead(). Separate on purpose so
+  * that it's unlikely to be touched by other CPUs.
+@@ -162,6 +166,10 @@ static void smp_callin(void)
+ {
+ 	int cpuid, phys_id;
+ 
++	/* Mop up eventual mwait_play_dead() wreckage */
++	this_cpu_write(mwait_cpu_dead.status, 0);
++	this_cpu_write(mwait_cpu_dead.control, 0);
++
+ 	/*
+ 	 * If waken up by an INIT in an 82489DX configuration
+ 	 * cpu_callout_mask guarantees we don't get here before
+@@ -1642,6 +1650,10 @@ static inline void mwait_play_dead(void)
+ 			(highest_subcstate - 1);
+ 	}
+ 
++	/* Set up state for the kexec() hack below */
++	md->status = CPUDEAD_MWAIT_WAIT;
++	md->control = CPUDEAD_MWAIT_WAIT;
++
+ 	wbinvd();
+ 
+ 	while (1) {
+@@ -1658,6 +1670,24 @@ static inline void mwait_play_dead(void)
+ 		__monitor(md, 0, 0);
+ 		mb();
+ 		__mwait(eax, 0);
++
++		if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
++			/*
++			 * Kexec is about to happen. Don't go back into mwait() as
++			 * the kexec kernel might overwrite text and data including
++			 * page tables and stack. So mwait() would resume when the
++			 * monitor cache line is written to and then the CPU goes
++			 * south due to overwritten text, page tables and stack.
++			 *
++			 * Note: This does _NOT_ protect against a stray MCE, NMI,
++			 * SMI. They will resume execution at the instruction
++			 * following the HLT instruction and run into the problem
++			 * which this is trying to prevent.
++			 */
++			WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
++			while(1)
++				native_halt();
++		}
+ 		/*
+ 		 * If NMI wants to wake up CPU0, start CPU0.
+ 		 */
+@@ -1666,6 +1696,35 @@ static inline void mwait_play_dead(void)
+ 	}
+ }
+ 
++/*
++ * Kick all "offline" CPUs out of mwait on kexec(). See comment in
++ * mwait_play_dead().
++ */
++void smp_kick_mwait_play_dead(void)
++{
++	u32 newstate = CPUDEAD_MWAIT_KEXEC_HLT;
++	struct mwait_cpu_dead *md;
++	unsigned int cpu, i;
++
++	for_each_cpu_andnot(cpu, cpu_present_mask, cpu_online_mask) {
++		md = per_cpu_ptr(&mwait_cpu_dead, cpu);
++
++		/* Does it sit in mwait_play_dead() ? */
++		if (READ_ONCE(md->status) != CPUDEAD_MWAIT_WAIT)
++			continue;
++
++		/* Wait up to 5ms */
++		for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) {
++			/* Bring it out of mwait */
++			WRITE_ONCE(md->control, newstate);
++			udelay(5);
++		}
++
++		if (READ_ONCE(md->status) != newstate)
++			pr_err_once("CPU%u is stuck in mwait_play_dead()\n", cpu);
++	}
++}
++
+ void hlt_play_dead(void)
+ {
+ 	if (__this_cpu_read(cpu_info.x86) >= 4)
diff --git a/queue-4.19/x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch b/queue-4.19/x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch
new file mode 100644
index 00000000000..631b2772a18
--- /dev/null
+++ b/queue-4.19/x86-smp-use-dedicated-cache-line-for-mwait_play_dead.patch
@@ -0,0 +1,91 @@
+From f9c9987bf52f4e42e940ae217333ebb5a4c3b506 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Thu, 15 Jun 2023 22:33:55 +0200
+Subject: x86/smp: Use dedicated cache-line for mwait_play_dead()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f9c9987bf52f4e42e940ae217333ebb5a4c3b506 upstream.
+
+Monitoring idletask::thread_info::flags in mwait_play_dead() has been an
+obvious choice as all what is needed is a cache line which is not written
+by other CPUs.
+
+But there is a use case where a "dead" CPU needs to be brought out of
+MWAIT: kexec().
+
+This is required as kexec() can overwrite text, pagetables, stacks and the
+monitored cacheline of the original kernel. The latter causes MWAIT to
+resume execution which obviously causes havoc on the kexec kernel which
+results usually in triple faults.
+
+Use a dedicated per CPU storage to prepare for that.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Ashok Raj <ashok.raj@intel.com>
+Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230615193330.434553750@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/smpboot.c |   24 ++++++++++++++----------
+ 1 file changed, 14 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -96,6 +96,17 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t
+ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
+ EXPORT_PER_CPU_SYMBOL(cpu_info);
+ 
++struct mwait_cpu_dead {
++	unsigned int	control;
++	unsigned int	status;
++};
++
++/*
++ * Cache line aligned data for mwait_play_dead(). Separate on purpose so
++ * that it's unlikely to be touched by other CPUs.
++ */
++static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
++
+ /* Logical package management. We might want to allocate that dynamically */
+ unsigned int __max_logical_packages __read_mostly;
+ EXPORT_SYMBOL(__max_logical_packages);
+@@ -1594,10 +1605,10 @@ static bool wakeup_cpu0(void)
+  */
+ static inline void mwait_play_dead(void)
+ {
++	struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
+ 	unsigned int eax, ebx, ecx, edx;
+ 	unsigned int highest_cstate = 0;
+ 	unsigned int highest_subcstate = 0;
+-	void *mwait_ptr;
+ 	int i;
+ 
+ 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+@@ -1631,13 +1642,6 @@ static inline void mwait_play_dead(void)
+ 			(highest_subcstate - 1);
+ 	}
+ 
+-	/*
+-	 * This should be a memory location in a cache line which is
+-	 * unlikely to be touched by other processors.  The actual
+-	 * content is immaterial as it is not actually modified in any way.
+-	 */
+-	mwait_ptr = &current_thread_info()->flags;
+-
+ 	wbinvd();
+ 
+ 	while (1) {
+@@ -1649,9 +1653,9 @@ static inline void mwait_play_dead(void)
+ 		 * case where we return around the loop.
+ 		 */
+ 		mb();
+-		clflush(mwait_ptr);
++		clflush(md);
+ 		mb();
+-		__monitor(mwait_ptr, 0, 0);
++		__monitor(md, 0, 0);
+ 		mb();
+ 		__mwait(eax, 0);
+ 		/*