--- /dev/null
+From stable+bounces-25869-greg=kroah.com@vger.kernel.org Mon Mar 4 10:27:48 2024
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Mar 2024 01:24:24 -0800
+Subject: KVM/VMX: Move VERW closer to VMentry for MDS mitigation
+To: stable@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@linux.intel.com>, Sean Christopherson <seanjc@google.com>
+Message-ID: <20240304-delay-verw-backport-6-1-y-v2-6-bf4bce517d60@linux.intel.com>
+Content-Disposition: inline
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 43fb862de8f628c5db5e96831c915b9aebf62d33 upstream.
+
+During VMentry VERW is executed to mitigate MDS. After VERW, any memory
+access like register push onto stack may put host data in MDS affected
+CPU buffers. A guest can then use MDS to sample host data.
+
+Although likelihood of secrets surviving in registers at current VERW
+callsite is less, but it can't be ruled out. Harden the MDS mitigation
+by moving the VERW mitigation late in VMentry path.
+
+Note that VERW for MMIO Stale Data mitigation is unchanged because of
+the complexity of per-guest conditional VERW which is not easy to handle
+that late in asm with no GPRs available. If the CPU is also affected by
+MDS, VERW is unconditionally executed late in asm regardless of guest
+having MMIO access.
+
+ [ pawan: conflict resolved in backport ]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Sean Christopherson <seanjc@google.com>
+Link: https://lore.kernel.org/all/20240213-delay-verw-v8-6-a6216d83edb7%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmenter.S | 3 +++
+ arch/x86/kvm/vmx/vmx.c | 12 ++++++++----
+ 2 files changed, 11 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -128,6 +128,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ /* Load guest RAX. This kills the @regs pointer! */
+ mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+
++ /* Clobbers EFLAGS.ZF */
++ CLEAR_CPU_BUFFERS
++
+ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
+ jnc .Lvmlaunch
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -407,7 +407,8 @@ static __always_inline void vmx_enable_f
+
+ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+ {
+- vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
++ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
++ vmx_fb_clear_ctrl_available;
+
+ /*
+ * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
+@@ -7120,11 +7121,14 @@ static noinstr void vmx_vcpu_enter_exit(
+ {
+ guest_state_enter_irqoff();
+
+- /* L1D Flush includes CPU buffer clear to mitigate MDS */
++ /*
++ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
++ * mitigation for MDS is done late in VMentry and is still
++ * executed in spite of L1D Flush. This is because an extra VERW
++ * should not matter much after the big hammer L1D Flush.
++ */
+ if (static_branch_unlikely(&vmx_l1d_should_flush))
+ vmx_l1d_flush(vcpu);
+- else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF))
+- mds_clear_cpu_buffers();
+ else if (static_branch_unlikely(&mmio_stale_data_clear) &&
+ kvm_arch_has_assigned_device(vcpu->kvm))
+ mds_clear_cpu_buffers();
--- /dev/null
+From stable+bounces-25868-greg=kroah.com@vger.kernel.org Mon Mar 4 10:27:18 2024
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Mar 2024 01:24:18 -0800
+Subject: KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
+To: stable@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@linux.intel.com>, Sean Christopherson <seanjc@google.com>, Nikolay Borisov <nik.borisov@suse.com>
+Message-ID: <20240304-delay-verw-backport-6-1-y-v2-5-bf4bce517d60@linux.intel.com>
+Content-Disposition: inline
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 706a189dcf74d3b3f955e9384785e726ed6c7c80 upstream.
+
+Use EFLAGS.CF instead of EFLAGS.ZF to track whether to use VMRESUME versus
+VMLAUNCH. Freeing up EFLAGS.ZF will allow doing VERW, which clobbers ZF,
+for MDS mitigations as late as possible without needing to duplicate VERW
+for both paths.
+
+ [ pawan: resolved merge conflict in __vmx_vcpu_run in backport. ]
+
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
+Link: https://lore.kernel.org/all/20240213-delay-verw-v8-5-a6216d83edb7%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/run_flags.h | 7 +++++--
+ arch/x86/kvm/vmx/vmenter.S | 6 +++---
+ 2 files changed, 8 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/vmx/run_flags.h
++++ b/arch/x86/kvm/vmx/run_flags.h
+@@ -2,7 +2,10 @@
+ #ifndef __KVM_X86_VMX_RUN_FLAGS_H
+ #define __KVM_X86_VMX_RUN_FLAGS_H
+
+-#define VMX_RUN_VMRESUME (1 << 0)
+-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
++#define VMX_RUN_VMRESUME_SHIFT 0
++#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
++
++#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
++#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
+
+ #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
+--- a/arch/x86/kvm/vmx/vmenter.S
++++ b/arch/x86/kvm/vmx/vmenter.S
+@@ -106,7 +106,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ mov (%_ASM_SP), %_ASM_AX
+
+ /* Check if vmlaunch or vmresume is needed */
+- testb $VMX_RUN_VMRESUME, %bl
++ bt $VMX_RUN_VMRESUME_SHIFT, %bx
+
+ /* Load guest registers. Don't clobber flags. */
+ mov VCPU_RCX(%_ASM_AX), %_ASM_CX
+@@ -128,8 +128,8 @@ SYM_FUNC_START(__vmx_vcpu_run)
+ /* Load guest RAX. This kills the @regs pointer! */
+ mov VCPU_RAX(%_ASM_AX), %_ASM_AX
+
+- /* Check EFLAGS.ZF from 'testb' above */
+- jz .Lvmlaunch
++ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
++ jnc .Lvmlaunch
+
+ /*
+ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
block-define-bvec_iter-as-__packed-__aligned-4.patch
revert-interconnect-fix-locking-for-runpm-vs-reclaim.patch
revert-interconnect-teach-lockdep-about-icc_bw_lock-order.patch
+x86-bugs-add-asm-helpers-for-executing-verw.patch
+x86-entry_64-add-verw-just-before-userspace-transition.patch
+x86-entry_32-add-verw-just-before-userspace-transition.patch
+x86-bugs-use-alternative-instead-of-mds_user_clear-static-key.patch
+kvm-vmx-use-bt-jnc-i.e.-eflags.cf-to-select-vmresume-vs.-vmlaunch.patch
+kvm-vmx-move-verw-closer-to-vmentry-for-mds-mitigation.patch
--- /dev/null
+From stable+bounces-25864-greg=kroah.com@vger.kernel.org Mon Mar 4 10:25:18 2024
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Mar 2024 01:23:53 -0800
+Subject: x86/bugs: Add asm helpers for executing VERW
+To: stable@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@linux.intel.com>, Alyssa Milburn <alyssa.milburn@intel.com>, Andrew Cooper <andrew.cooper3@citrix.com>, Peter Zijlstra <peterz@infradead.org>
+Message-ID: <20240304-delay-verw-backport-6-1-y-v2-1-bf4bce517d60@linux.intel.com>
+Content-Disposition: inline
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit baf8361e54550a48a7087b603313ad013cc13386 upstream.
+
+MDS mitigation requires clearing the CPU buffers before returning to
+user. This needs to be done late in the exit-to-user path. Current
+location of VERW leaves a possibility of kernel data ending up in CPU
+buffers for memory accesses done after VERW such as:
+
+ 1. Kernel data accessed by an NMI between VERW and return-to-user can
+ remain in CPU buffers since NMI returning to kernel does not
+ execute VERW to clear CPU buffers.
+ 2. Alyssa reported that after VERW is executed,
+ CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
+ call. Memory accesses during stack scrubbing can move kernel stack
+ contents into CPU buffers.
+ 3. When caller saved registers are restored after a return from
+ function executing VERW, the kernel stack accesses can remain in
+ CPU buffers(since they occur after VERW).
+
+To fix this VERW needs to be moved very late in exit-to-user path.
+
+In preparation for moving VERW to entry/exit asm code, create macros
+that can be used in asm. Also make VERW patching depend on a new feature
+flag X86_FEATURE_CLEAR_CPU_BUF.
+
+ [pawan: - Runtime patch jmp instead of verw in macro CLEAR_CPU_BUFFERS
+ due to lack of relative addressing support for relocations
+ in kernels < v6.5.
+ - Add UNWIND_HINT_EMPTY to avoid warning:
+ arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction]
+
+Reported-by: Alyssa Milburn <alyssa.milburn@intel.com>
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry.S | 23 +++++++++++++++++++++++
+ arch/x86/include/asm/cpufeatures.h | 2 +-
+ arch/x86/include/asm/nospec-branch.h | 15 +++++++++++++++
+ 3 files changed, 39 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/entry/entry.S
++++ b/arch/x86/entry/entry.S
+@@ -6,6 +6,9 @@
+ #include <linux/linkage.h>
+ #include <asm/export.h>
+ #include <asm/msr-index.h>
++#include <asm/unwind_hints.h>
++#include <asm/segment.h>
++#include <asm/cache.h>
+
+ .pushsection .noinstr.text, "ax"
+
+@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
+ EXPORT_SYMBOL_GPL(entry_ibpb);
+
+ .popsection
++
++/*
++ * Define the VERW operand that is disguised as entry code so that
++ * it can be referenced with KPTI enabled. This ensure VERW can be
++ * used late in exit-to-user path after page tables are switched.
++ */
++.pushsection .entry.text, "ax"
++
++.align L1_CACHE_BYTES, 0xcc
++SYM_CODE_START_NOALIGN(mds_verw_sel)
++ UNWIND_HINT_EMPTY
++ ANNOTATE_NOENDBR
++ .word __KERNEL_DS
++.align L1_CACHE_BYTES, 0xcc
++SYM_CODE_END(mds_verw_sel);
++/* For KVM */
++EXPORT_SYMBOL_GPL(mds_verw_sel);
++
++.popsection
++
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -304,7 +304,7 @@
+ #define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
+ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
+-
++#define X86_FEATURE_CLEAR_CPU_BUF (11*32+18) /* "" Clear CPU buffers using VERW */
+
+ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
+
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -194,6 +194,19 @@
+ #endif
+ .endm
+
++/*
++ * Macro to execute VERW instruction that mitigate transient data sampling
++ * attacks such as MDS. On affected systems a microcode update overloaded VERW
++ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
++ *
++ * Note: Only the memory operand variant of VERW clears the CPU buffers.
++ */
++.macro CLEAR_CPU_BUFFERS
++ ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF
++ verw _ASM_RIP(mds_verw_sel)
++.Lskip_verw_\@:
++.endm
++
+ #else /* __ASSEMBLY__ */
+
+ #define ANNOTATE_RETPOLINE_SAFE \
+@@ -375,6 +388,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_
+
+ DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+
++extern u16 mds_verw_sel;
++
+ #include <asm/segment.h>
+
+ /**
--- /dev/null
+From stable+bounces-25867-greg=kroah.com@vger.kernel.org Mon Mar 4 10:26:56 2024
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Mar 2024 01:24:11 -0800
+Subject: x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
+To: stable@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Message-ID: <20240304-delay-verw-backport-6-1-y-v2-4-bf4bce517d60@linux.intel.com>
+Content-Disposition: inline
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 6613d82e617dd7eb8b0c40b2fe3acea655b1d611 upstream.
+
+The VERW mitigation at exit-to-user is enabled via a static branch
+mds_user_clear. This static branch is never toggled after boot, and can
+be safely replaced with an ALTERNATIVE() which is convenient to use in
+asm.
+
+Switch to ALTERNATIVE() to use the VERW mitigation late in exit-to-user
+path. Also remove the now redundant VERW in exc_nmi() and
+arch_exit_to_user_mode().
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Link: https://lore.kernel.org/all/20240213-delay-verw-v8-4-a6216d83edb7%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/x86/mds.rst | 36 +++++++++++++++++++++++++----------
+ arch/x86/include/asm/entry-common.h | 1
+ arch/x86/include/asm/nospec-branch.h | 12 -----------
+ arch/x86/kernel/cpu/bugs.c | 15 +++++---------
+ arch/x86/kernel/nmi.c | 3 --
+ arch/x86/kvm/vmx/vmx.c | 2 -
+ 6 files changed, 33 insertions(+), 36 deletions(-)
+
+--- a/Documentation/x86/mds.rst
++++ b/Documentation/x86/mds.rst
+@@ -95,6 +95,9 @@ The kernel provides a function to invoke
+
+ mds_clear_cpu_buffers()
+
++Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
++Other than CFLAGS.ZF, this macro doesn't clobber any registers.
++
+ The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
+ (idle) transitions.
+
+@@ -138,17 +141,30 @@ Mitigation points
+
+ When transitioning from kernel to user space the CPU buffers are flushed
+ on affected CPUs when the mitigation is not disabled on the kernel
+- command line. The migitation is enabled through the static key
+- mds_user_clear.
++ command line. The mitigation is enabled through the feature flag
++ X86_FEATURE_CLEAR_CPU_BUF.
+
+- The mitigation is invoked in prepare_exit_to_usermode() which covers
+- all but one of the kernel to user space transitions. The exception
+- is when we return from a Non Maskable Interrupt (NMI), which is
+- handled directly in do_nmi().
+-
+- (The reason that NMI is special is that prepare_exit_to_usermode() can
+- enable IRQs. In NMI context, NMIs are blocked, and we don't want to
+- enable IRQs with NMIs blocked.)
++ The mitigation is invoked just before transitioning to userspace after
++ user registers are restored. This is done to minimize the window in
++ which kernel data could be accessed after VERW e.g. via an NMI after
++ VERW.
++
++ **Corner case not handled**
++ Interrupts returning to kernel don't clear CPUs buffers since the
++ exit-to-user path is expected to do that anyways. But, there could be
++ a case when an NMI is generated in kernel after the exit-to-user path
++ has cleared the buffers. This case is not handled and NMI returning to
++ kernel don't clear CPU buffers because:
++
++ 1. It is rare to get an NMI after VERW, but before returning to userspace.
++ 2. For an unprivileged user, there is no known way to make that NMI
++ less rare or target it.
++ 3. It would take a large number of these precisely-timed NMIs to mount
++ an actual attack. There's presumably not enough bandwidth.
++ 4. The NMI in question occurs after a VERW, i.e. when user state is
++ restored and most interesting data is already scrubbed. Whats left
++ is only the data that NMI touches, and that may or may not be of
++ any interest.
+
+
+ 2. C-State transition
+--- a/arch/x86/include/asm/entry-common.h
++++ b/arch/x86/include/asm/entry-common.h
+@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mod
+
+ static __always_inline void arch_exit_to_user_mode(void)
+ {
+- mds_user_clear_cpu_buffers();
+ amd_clear_divider();
+ }
+ #define arch_exit_to_user_mode arch_exit_to_user_mode
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -381,7 +381,6 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_
+ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+
+-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
+ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+
+ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+@@ -416,17 +415,6 @@ static __always_inline void mds_clear_cp
+ }
+
+ /**
+- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+- *
+- * Clear CPU buffers if the corresponding static key is enabled
+- */
+-static __always_inline void mds_user_clear_cpu_buffers(void)
+-{
+- if (static_branch_likely(&mds_user_clear))
+- mds_clear_cpu_buffers();
+-}
+-
+-/**
+ * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
+ *
+ * Clear CPU buffers if the corresponding static key is enabled
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -110,9 +110,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_i
+ /* Control unconditional IBPB in switch_mm() */
+ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+
+-/* Control MDS CPU buffer clear before returning to user space */
+-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
+-EXPORT_SYMBOL_GPL(mds_user_clear);
+ /* Control MDS CPU buffer clear before idling (halt, mwait) */
+ DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
+ EXPORT_SYMBOL_GPL(mds_idle_clear);
+@@ -251,7 +248,7 @@ static void __init mds_select_mitigation
+ if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
+ mds_mitigation = MDS_MITIGATION_VMWERV;
+
+- static_branch_enable(&mds_user_clear);
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+
+ if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
+ (mds_nosmt || cpu_mitigations_auto_nosmt()))
+@@ -355,7 +352,7 @@ static void __init taa_select_mitigation
+ * For guests that can't determine whether the correct microcode is
+ * present on host, enable the mitigation for UCODE_NEEDED as well.
+ */
+- static_branch_enable(&mds_user_clear);
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+
+ if (taa_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+@@ -423,7 +420,7 @@ static void __init mmio_select_mitigatio
+ */
+ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
+ boot_cpu_has(X86_FEATURE_RTM)))
+- static_branch_enable(&mds_user_clear);
++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ else
+ static_branch_enable(&mmio_stale_data_clear);
+
+@@ -483,12 +480,12 @@ static void __init md_clear_update_mitig
+ if (cpu_mitigations_off())
+ return;
+
+- if (!static_key_enabled(&mds_user_clear))
++ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
+ goto out;
+
+ /*
+- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
+- * mitigation, if necessary.
++ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
++ * Stale Data mitigation, if necessary.
+ */
+ if (mds_mitigation == MDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MDS)) {
+--- a/arch/x86/kernel/nmi.c
++++ b/arch/x86/kernel/nmi.c
+@@ -522,9 +522,6 @@ nmi_restart:
+ write_cr2(this_cpu_read(nmi_cr2));
+ if (this_cpu_dec_return(nmi_state))
+ goto nmi_restart;
+-
+- if (user_mode(regs))
+- mds_user_clear_cpu_buffers();
+ }
+
+ #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -7123,7 +7123,7 @@ static noinstr void vmx_vcpu_enter_exit(
+ /* L1D Flush includes CPU buffer clear to mitigate MDS */
+ if (static_branch_unlikely(&vmx_l1d_should_flush))
+ vmx_l1d_flush(vcpu);
+- else if (static_branch_unlikely(&mds_user_clear))
++ else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF))
+ mds_clear_cpu_buffers();
+ else if (static_branch_unlikely(&mmio_stale_data_clear) &&
+ kvm_arch_has_assigned_device(vcpu->kvm))
--- /dev/null
+From stable+bounces-25866-greg=kroah.com@vger.kernel.org Mon Mar 4 10:26:09 2024
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Mar 2024 01:24:05 -0800
+Subject: x86/entry_32: Add VERW just before userspace transition
+To: stable@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Message-ID: <20240304-delay-verw-backport-6-1-y-v2-3-bf4bce517d60@linux.intel.com>
+Content-Disposition: inline
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit a0e2dab44d22b913b4c228c8b52b2a104434b0b3 upstream.
+
+As done for entry_64, add support for executing VERW late in exit to
+user path for 32-bit mode.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Link: https://lore.kernel.org/all/20240213-delay-verw-v8-3-a6216d83edb7%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_32.S | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -912,6 +912,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
+ BUG_IF_WRONG_CR3 no_user_check=1
+ popfl
+ popl %eax
++ CLEAR_CPU_BUFFERS
+
+ /*
+ * Return back to the vDSO, which will pop ecx and edx.
+@@ -981,6 +982,7 @@ restore_all_switch_stack:
+
+ /* Restore user state */
+ RESTORE_REGS pop=4 # skip orig_eax/error_code
++ CLEAR_CPU_BUFFERS
+ .Lirq_return:
+ /*
+ * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
+@@ -1173,6 +1175,7 @@ SYM_CODE_START(asm_exc_nmi)
+
+ /* Not on SYSENTER stack. */
+ call exc_nmi
++ CLEAR_CPU_BUFFERS
+ jmp .Lnmi_return
+
+ .Lnmi_from_sysenter_stack:
--- /dev/null
+From stable+bounces-25865-greg=kroah.com@vger.kernel.org Mon Mar 4 10:25:47 2024
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Mar 2024 01:23:59 -0800
+Subject: x86/entry_64: Add VERW just before userspace transition
+To: stable@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@linux.intel.com>, Dave Hansen <dave.hansen@intel.com>
+Message-ID: <20240304-delay-verw-backport-6-1-y-v2-2-bf4bce517d60@linux.intel.com>
+Content-Disposition: inline
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 3c7501722e6b31a6e56edd23cea5e77dbb9ffd1a upstream.
+
+Mitigation for MDS is to use VERW instruction to clear any secrets in
+CPU Buffers. Any memory accesses after VERW execution can still remain
+in CPU buffers. It is safer to execute VERW late in return to user path
+to minimize the window in which kernel data can end up in CPU buffers.
+There are not many kernel secrets to be had after SWITCH_TO_USER_CR3.
+
+Add support for deploying VERW mitigation after user register state is
+restored. This helps minimize the chances of kernel data ending up into
+CPU buffers after executing VERW.
+
+Note that the mitigation at the new location is not yet enabled.
+
+ Corner case not handled
+ =======================
+ Interrupts returning to kernel don't clear CPUs buffers since the
+ exit-to-user path is expected to do that anyways. But, there could be
+ a case when an NMI is generated in kernel after the exit-to-user path
+ has cleared the buffers. This case is not handled and NMI returning to
+ kernel don't clear CPU buffers because:
+
+ 1. It is rare to get an NMI after VERW, but before returning to user.
+ 2. For an unprivileged user, there is no known way to make that NMI
+ less rare or target it.
+ 3. It would take a large number of these precisely-timed NMIs to mount
+ an actual attack. There's presumably not enough bandwidth.
+ 4. The NMI in question occurs after a VERW, i.e. when user state is
+ restored and most interesting data is already scrubbed. Whats left
+ is only the data that NMI touches, and that may or may not be of
+ any interest.
+
+ [ pawan: resolved conflict for hunk swapgs_restore_regs_and_return_to_usermode in backport ]
+
+Suggested-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Link: https://lore.kernel.org/all/20240213-delay-verw-v8-2-a6216d83edb7%40linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/entry_64.S | 11 +++++++++++
+ arch/x86/entry/entry_64_compat.S | 1 +
+ 2 files changed, 12 insertions(+)
+
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -223,6 +223,7 @@ syscall_return_via_sysret:
+ SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+ swapgs
++ CLEAR_CPU_BUFFERS
+ sysretq
+ SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR
+@@ -656,6 +657,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_
+ /* Restore RDI. */
+ popq %rdi
+ swapgs
++ CLEAR_CPU_BUFFERS
+ jmp .Lnative_iret
+
+
+@@ -767,6 +769,8 @@ native_irq_return_ldt:
+ */
+ popq %rax /* Restore user RAX */
+
++ CLEAR_CPU_BUFFERS
++
+ /*
+ * RSP now points to an ordinary IRET frame, except that the page
+ * is read-only and RSP[31:16] are preloaded with the userspace
+@@ -1494,6 +1498,12 @@ nmi_restore:
+ movq $0, 5*8(%rsp) /* clear "NMI executing" */
+
+ /*
++ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
++ * NMI in kernel after user state is restored. For an unprivileged user
++ * these conditions are hard to meet.
++ */
++
++ /*
+ * iretq reads the "iret" frame and exits the NMI stack in a
+ * single instruction. We are returning to kernel mode, so this
+ * cannot result in a fault. Similarly, we don't need to worry
+@@ -1511,6 +1521,7 @@ SYM_CODE_START(ignore_sysret)
+ UNWIND_HINT_EMPTY
+ ENDBR
+ mov $-ENOSYS, %eax
++ CLEAR_CPU_BUFFERS
+ sysretl
+ SYM_CODE_END(ignore_sysret)
+ #endif
+--- a/arch/x86/entry/entry_64_compat.S
++++ b/arch/x86/entry/entry_64_compat.S
+@@ -272,6 +272,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_uns
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
+ swapgs
++ CLEAR_CPU_BUFFERS
+ sysretl
+ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
+ ANNOTATE_NOENDBR