From: Greg Kroah-Hartman Date: Mon, 4 Mar 2024 13:29:12 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v4.19.309~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=abb9624273bf0d2ae321c22698bb5e7d623e68e3;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: kvm-vmx-move-verw-closer-to-vmentry-for-mds-mitigation.patch kvm-vmx-use-bt-jnc-i.e.-eflags.cf-to-select-vmresume-vs.-vmlaunch.patch x86-bugs-use-alternative-instead-of-mds_user_clear-static-key.patch x86-entry_32-add-verw-just-before-userspace-transition.patch x86-entry_64-add-verw-just-before-userspace-transition.patch --- diff --git a/queue-6.6/kvm-vmx-move-verw-closer-to-vmentry-for-mds-mitigation.patch b/queue-6.6/kvm-vmx-move-verw-closer-to-vmentry-for-mds-mitigation.patch new file mode 100644 index 00000000000..a3335dce142 --- /dev/null +++ b/queue-6.6/kvm-vmx-move-verw-closer-to-vmentry-for-mds-mitigation.patch @@ -0,0 +1,87 @@ +From stable+bounces-25813-greg=kroah.com@vger.kernel.org Mon Mar 4 06:09:04 2024 +From: Pawan Gupta +Date: Sun, 3 Mar 2024 21:08:54 -0800 +Subject: KVM/VMX: Move VERW closer to VMentry for MDS mitigation +To: stable@vger.kernel.org +Cc: Dave Hansen , Sean Christopherson +Message-ID: <20240303-delay-verw-backport-6-6-y-v2-5-40ce56b521a5@linux.intel.com> +Content-Disposition: inline + +From: Pawan Gupta + +commit 43fb862de8f628c5db5e96831c915b9aebf62d33 upstream. + +During VMentry VERW is executed to mitigate MDS. After VERW, any memory +access like register push onto stack may put host data in MDS affected +CPU buffers. A guest can then use MDS to sample host data. + +Although likelihood of secrets surviving in registers at current VERW +callsite is less, but it can't be ruled out. Harden the MDS mitigation +by moving the VERW mitigation late in VMentry path. + +Note that VERW for MMIO Stale Data mitigation is unchanged because of +the complexity of per-guest conditional VERW which is not easy to handle +that late in asm with no GPRs available. If the CPU is also affected by +MDS, VERW is unconditionally executed late in asm regardless of guest +having MMIO access. + +Signed-off-by: Pawan Gupta +Signed-off-by: Dave Hansen +Acked-by: Sean Christopherson +Link: https://lore.kernel.org/all/20240213-delay-verw-v8-6-a6216d83edb7%40linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmenter.S | 3 +++ + arch/x86/kvm/vmx/vmx.c | 20 ++++++++++++++++---- + 2 files changed, 19 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -161,6 +161,9 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Load guest RAX. This kills the @regs pointer! */ + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + ++ /* Clobbers EFLAGS.ZF */ ++ CLEAR_CPU_BUFFERS ++ + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ + jnc .Lvmlaunch + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -387,7 +387,16 @@ static __always_inline void vmx_enable_f + + static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) + { +- vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && ++ /* ++ * Disable VERW's behavior of clearing CPU buffers for the guest if the ++ * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled ++ * the mitigation. Disabling the clearing behavior provides a ++ * performance boost for guests that aren't aware that manually clearing ++ * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry ++ * and VM-Exit. ++ */ ++ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && ++ (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA); + +@@ -7226,11 +7235,14 @@ static noinstr void vmx_vcpu_enter_exit( + + guest_state_enter_irqoff(); + +- /* L1D Flush includes CPU buffer clear to mitigate MDS */ ++ /* ++ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW ++ * mitigation for MDS is done late in VMentry and is still ++ * executed in spite of L1D Flush. This is because an extra VERW ++ * should not matter much after the big hammer L1D Flush. ++ */ + if (static_branch_unlikely(&vmx_l1d_should_flush)) + vmx_l1d_flush(vcpu); +- else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) +- mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); diff --git a/queue-6.6/kvm-vmx-use-bt-jnc-i.e.-eflags.cf-to-select-vmresume-vs.-vmlaunch.patch b/queue-6.6/kvm-vmx-use-bt-jnc-i.e.-eflags.cf-to-select-vmresume-vs.-vmlaunch.patch new file mode 100644 index 00000000000..03194ffd6cf --- /dev/null +++ b/queue-6.6/kvm-vmx-use-bt-jnc-i.e.-eflags.cf-to-select-vmresume-vs.-vmlaunch.patch @@ -0,0 +1,68 @@ +From stable+bounces-25812-greg=kroah.com@vger.kernel.org Mon Mar 4 06:08:54 2024 +From: Pawan Gupta +Date: Sun, 3 Mar 2024 21:08:49 -0800 +Subject: KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH +To: stable@vger.kernel.org +Cc: Dave Hansen , Sean Christopherson , Nikolay Borisov +Message-ID: <20240303-delay-verw-backport-6-6-y-v2-4-40ce56b521a5@linux.intel.com> +Content-Disposition: inline + +From: Pawan Gupta + +From: Sean Christopherson + +commit 706a189dcf74d3b3f955e9384785e726ed6c7c80 upstream. + +Use EFLAGS.CF instead of EFLAGS.ZF to track whether to use VMRESUME versus +VMLAUNCH. Freeing up EFLAGS.ZF will allow doing VERW, which clobbers ZF, +for MDS mitigations as late as possible without needing to duplicate VERW +for both paths. + +Signed-off-by: Sean Christopherson +Signed-off-by: Pawan Gupta +Signed-off-by: Dave Hansen +Reviewed-by: Nikolay Borisov +Link: https://lore.kernel.org/all/20240213-delay-verw-v8-5-a6216d83edb7%40linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/run_flags.h | 7 +++++-- + arch/x86/kvm/vmx/vmenter.S | 6 +++--- + 2 files changed, 8 insertions(+), 5 deletions(-) + +--- a/arch/x86/kvm/vmx/run_flags.h ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -2,7 +2,10 @@ + #ifndef __KVM_X86_VMX_RUN_FLAGS_H + #define __KVM_X86_VMX_RUN_FLAGS_H + +-#define VMX_RUN_VMRESUME (1 << 0) +-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) ++#define VMX_RUN_VMRESUME_SHIFT 0 ++#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 ++ ++#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) ++#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) + + #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov (%_ASM_SP), %_ASM_AX + + /* Check if vmlaunch or vmresume is needed */ +- test $VMX_RUN_VMRESUME, %ebx ++ bt $VMX_RUN_VMRESUME_SHIFT, %ebx + + /* Load guest registers. Don't clobber flags. */ + mov VCPU_RCX(%_ASM_AX), %_ASM_CX +@@ -161,8 +161,8 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Load guest RAX. This kills the @regs pointer! */ + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + +- /* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */ +- jz .Lvmlaunch ++ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ ++ jnc .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" diff --git a/queue-6.6/series b/queue-6.6/series index 9d311e484a0..6dc1955f6cd 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -131,3 +131,8 @@ gpio-74x164-enable-output-pins-after-registers-are-r.patch gpiolib-fix-the-error-path-order-in-gpiochip_add_dat.patch gpio-fix-resource-unwinding-order-in-error-path.patch block-define-bvec_iter-as-__packed-__aligned-4.patch +x86-entry_64-add-verw-just-before-userspace-transition.patch +x86-entry_32-add-verw-just-before-userspace-transition.patch +x86-bugs-use-alternative-instead-of-mds_user_clear-static-key.patch +kvm-vmx-use-bt-jnc-i.e.-eflags.cf-to-select-vmresume-vs.-vmlaunch.patch +kvm-vmx-move-verw-closer-to-vmentry-for-mds-mitigation.patch diff --git a/queue-6.6/x86-bugs-use-alternative-instead-of-mds_user_clear-static-key.patch b/queue-6.6/x86-bugs-use-alternative-instead-of-mds_user_clear-static-key.patch new file mode 100644 index 00000000000..9728197a797 --- /dev/null +++ b/queue-6.6/x86-bugs-use-alternative-instead-of-mds_user_clear-static-key.patch @@ -0,0 +1,204 @@ +From stable+bounces-25811-greg=kroah.com@vger.kernel.org Mon Mar 4 06:08:53 2024 +From: Pawan Gupta +Date: Sun, 3 Mar 2024 21:08:42 -0800 +Subject: x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key +To: stable@vger.kernel.org +Cc: Dave Hansen +Message-ID: <20240303-delay-verw-backport-6-6-y-v2-3-40ce56b521a5@linux.intel.com> +Content-Disposition: inline + +From: Pawan Gupta + +commit 6613d82e617dd7eb8b0c40b2fe3acea655b1d611 upstream. + +The VERW mitigation at exit-to-user is enabled via a static branch +mds_user_clear. This static branch is never toggled after boot, and can +be safely replaced with an ALTERNATIVE() which is convenient to use in +asm. + +Switch to ALTERNATIVE() to use the VERW mitigation late in exit-to-user +path. Also remove the now redundant VERW in exc_nmi() and +arch_exit_to_user_mode(). + +Signed-off-by: Pawan Gupta +Signed-off-by: Dave Hansen +Link: https://lore.kernel.org/all/20240213-delay-verw-v8-4-a6216d83edb7%40linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/arch/x86/mds.rst | 36 +++++++++++++++++++++++++---------- + arch/x86/include/asm/entry-common.h | 1 + arch/x86/include/asm/nospec-branch.h | 12 ----------- + arch/x86/kernel/cpu/bugs.c | 15 +++++--------- + arch/x86/kernel/nmi.c | 3 -- + arch/x86/kvm/vmx/vmx.c | 2 - + 6 files changed, 33 insertions(+), 36 deletions(-) + +--- a/Documentation/arch/x86/mds.rst ++++ b/Documentation/arch/x86/mds.rst +@@ -95,6 +95,9 @@ The kernel provides a function to invoke + + mds_clear_cpu_buffers() + ++Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. ++Other than CFLAGS.ZF, this macro doesn't clobber any registers. ++ + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state + (idle) transitions. + +@@ -138,17 +141,30 @@ Mitigation points + + When transitioning from kernel to user space the CPU buffers are flushed + on affected CPUs when the mitigation is not disabled on the kernel +- command line. The migitation is enabled through the static key +- mds_user_clear. ++ command line. The mitigation is enabled through the feature flag ++ X86_FEATURE_CLEAR_CPU_BUF. + +- The mitigation is invoked in prepare_exit_to_usermode() which covers +- all but one of the kernel to user space transitions. The exception +- is when we return from a Non Maskable Interrupt (NMI), which is +- handled directly in do_nmi(). +- +- (The reason that NMI is special is that prepare_exit_to_usermode() can +- enable IRQs. In NMI context, NMIs are blocked, and we don't want to +- enable IRQs with NMIs blocked.) ++ The mitigation is invoked just before transitioning to userspace after ++ user registers are restored. This is done to minimize the window in ++ which kernel data could be accessed after VERW e.g. via an NMI after ++ VERW. ++ ++ **Corner case not handled** ++ Interrupts returning to kernel don't clear CPUs buffers since the ++ exit-to-user path is expected to do that anyways. But, there could be ++ a case when an NMI is generated in kernel after the exit-to-user path ++ has cleared the buffers. This case is not handled and NMI returning to ++ kernel don't clear CPU buffers because: ++ ++ 1. It is rare to get an NMI after VERW, but before returning to userspace. ++ 2. For an unprivileged user, there is no known way to make that NMI ++ less rare or target it. ++ 3. It would take a large number of these precisely-timed NMIs to mount ++ an actual attack. There's presumably not enough bandwidth. ++ 4. The NMI in question occurs after a VERW, i.e. when user state is ++ restored and most interesting data is already scrubbed. Whats left ++ is only the data that NMI touches, and that may or may not be of ++ any interest. + + + 2. C-State transition +--- a/arch/x86/include/asm/entry-common.h ++++ b/arch/x86/include/asm/entry-common.h +@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mod + + static __always_inline void arch_exit_to_user_mode(void) + { +- mds_user_clear_cpu_buffers(); + amd_clear_divider(); + } + #define arch_exit_to_user_mode arch_exit_to_user_mode +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -549,7 +549,6 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_ + DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); + DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + +-DECLARE_STATIC_KEY_FALSE(mds_user_clear); + DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + + DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); +@@ -584,17 +583,6 @@ static __always_inline void mds_clear_cp + } + + /** +- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability +- * +- * Clear CPU buffers if the corresponding static key is enabled +- */ +-static __always_inline void mds_user_clear_cpu_buffers(void) +-{ +- if (static_branch_likely(&mds_user_clear)) +- mds_clear_cpu_buffers(); +-} +- +-/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_i + /* Control unconditional IBPB in switch_mm() */ + DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); + +-/* Control MDS CPU buffer clear before returning to user space */ +-DEFINE_STATIC_KEY_FALSE(mds_user_clear); +-EXPORT_SYMBOL_GPL(mds_user_clear); + /* Control MDS CPU buffer clear before idling (halt, mwait) */ + DEFINE_STATIC_KEY_FALSE(mds_idle_clear); + EXPORT_SYMBOL_GPL(mds_idle_clear); +@@ -252,7 +249,7 @@ static void __init mds_select_mitigation + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) +@@ -356,7 +353,7 @@ static void __init taa_select_mitigation + * For guests that can't determine whether the correct microcode is + * present on host, enable the mitigation for UCODE_NEEDED as well. + */ +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + + if (taa_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +@@ -424,7 +421,7 @@ static void __init mmio_select_mitigatio + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) +- static_branch_enable(&mds_user_clear); ++ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + else + static_branch_enable(&mmio_stale_data_clear); + +@@ -484,12 +481,12 @@ static void __init md_clear_update_mitig + if (cpu_mitigations_off()) + return; + +- if (!static_key_enabled(&mds_user_clear)) ++ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) + goto out; + + /* +- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data +- * mitigation, if necessary. ++ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO ++ * Stale Data mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { +--- a/arch/x86/kernel/nmi.c ++++ b/arch/x86/kernel/nmi.c +@@ -556,9 +556,6 @@ nmi_restart: + } + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; +- +- if (user_mode(regs)) +- mds_user_clear_cpu_buffers(); + } + + #if IS_ENABLED(CONFIG_KVM_INTEL) +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -7229,7 +7229,7 @@ static noinstr void vmx_vcpu_enter_exit( + /* L1D Flush includes CPU buffer clear to mitigate MDS */ + if (static_branch_unlikely(&vmx_l1d_should_flush)) + vmx_l1d_flush(vcpu); +- else if (static_branch_unlikely(&mds_user_clear)) ++ else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) + mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) diff --git a/queue-6.6/x86-entry_32-add-verw-just-before-userspace-transition.patch b/queue-6.6/x86-entry_32-add-verw-just-before-userspace-transition.patch new file mode 100644 index 00000000000..797b5d5f840 --- /dev/null +++ b/queue-6.6/x86-entry_32-add-verw-just-before-userspace-transition.patch @@ -0,0 +1,50 @@ +From stable+bounces-25810-greg=kroah.com@vger.kernel.org Mon Mar 4 06:08:42 2024 +From: Pawan Gupta +Date: Sun, 3 Mar 2024 21:08:36 -0800 +Subject: x86/entry_32: Add VERW just before userspace transition +To: stable@vger.kernel.org +Cc: Dave Hansen +Message-ID: <20240303-delay-verw-backport-6-6-y-v2-2-40ce56b521a5@linux.intel.com> +Content-Disposition: inline + +From: Pawan Gupta + +commit a0e2dab44d22b913b4c228c8b52b2a104434b0b3 upstream. + +As done for entry_64, add support for executing VERW late in exit to +user path for 32-bit mode. + +Signed-off-by: Pawan Gupta +Signed-off-by: Dave Hansen +Link: https://lore.kernel.org/all/20240213-delay-verw-v8-3-a6216d83edb7%40linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_32.S | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32) + BUG_IF_WRONG_CR3 no_user_check=1 + popfl + popl %eax ++ CLEAR_CPU_BUFFERS + + /* + * Return back to the vDSO, which will pop ecx and edx. +@@ -954,6 +955,7 @@ restore_all_switch_stack: + + /* Restore user state */ + RESTORE_REGS pop=4 # skip orig_eax/error_code ++ CLEAR_CPU_BUFFERS + .Lirq_return: + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization +@@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi) + + /* Not on SYSENTER stack. */ + call exc_nmi ++ CLEAR_CPU_BUFFERS + jmp .Lnmi_return + + .Lnmi_from_sysenter_stack: diff --git a/queue-6.6/x86-entry_64-add-verw-just-before-userspace-transition.patch b/queue-6.6/x86-entry_64-add-verw-just-before-userspace-transition.patch new file mode 100644 index 00000000000..eb81dba504b --- /dev/null +++ b/queue-6.6/x86-entry_64-add-verw-just-before-userspace-transition.patch @@ -0,0 +1,113 @@ +From stable+bounces-25809-greg=kroah.com@vger.kernel.org Mon Mar 4 06:08:40 2024 +From: Pawan Gupta +Date: Sun, 3 Mar 2024 21:08:31 -0800 +Subject: x86/entry_64: Add VERW just before userspace transition +To: stable@vger.kernel.org +Cc: Dave Hansen , Dave Hansen +Message-ID: <20240303-delay-verw-backport-6-6-y-v2-1-40ce56b521a5@linux.intel.com> +Content-Disposition: inline + +From: Pawan Gupta + +commit 3c7501722e6b31a6e56edd23cea5e77dbb9ffd1a upstream. + +Mitigation for MDS is to use VERW instruction to clear any secrets in +CPU Buffers. Any memory accesses after VERW execution can still remain +in CPU buffers. It is safer to execute VERW late in return to user path +to minimize the window in which kernel data can end up in CPU buffers. +There are not many kernel secrets to be had after SWITCH_TO_USER_CR3. + +Add support for deploying VERW mitigation after user register state is +restored. This helps minimize the chances of kernel data ending up into +CPU buffers after executing VERW. + +Note that the mitigation at the new location is not yet enabled. + + Corner case not handled + ======================= + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. + + [ pawan: resolved conflict for hunk swapgs_restore_regs_and_return_to_usermode in backport ] + +Suggested-by: Dave Hansen +Signed-off-by: Pawan Gupta +Signed-off-by: Dave Hansen +Link: https://lore.kernel.org/all/20240213-delay-verw-v8-2-a6216d83edb7%40linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/entry_64.S | 11 +++++++++++ + arch/x86/entry/entry_64_compat.S | 1 + + 2 files changed, 12 insertions(+) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -223,6 +223,7 @@ syscall_return_via_sysret: + SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + swapgs ++ CLEAR_CPU_BUFFERS + sysretq + SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR +@@ -663,6 +664,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_ + /* Restore RDI. */ + popq %rdi + swapgs ++ CLEAR_CPU_BUFFERS + jmp .Lnative_iret + + +@@ -774,6 +776,8 @@ native_irq_return_ldt: + */ + popq %rax /* Restore user RAX */ + ++ CLEAR_CPU_BUFFERS ++ + /* + * RSP now points to an ordinary IRET frame, except that the page + * is read-only and RSP[31:16] are preloaded with the userspace +@@ -1503,6 +1507,12 @@ nmi_restore: + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* ++ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like ++ * NMI in kernel after user state is restored. For an unprivileged user ++ * these conditions are hard to meet. ++ */ ++ ++ /* + * iretq reads the "iret" frame and exits the NMI stack in a + * single instruction. We are returning to kernel mode, so this + * cannot result in a fault. Similarly, we don't need to worry +@@ -1520,6 +1530,7 @@ SYM_CODE_START(ignore_sysret) + UNWIND_HINT_END_OF_STACK + ENDBR + mov $-ENOSYS, %eax ++ CLEAR_CPU_BUFFERS + sysretl + SYM_CODE_END(ignore_sysret) + #endif +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -271,6 +271,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_uns + xorl %r9d, %r9d + xorl %r10d, %r10d + swapgs ++ CLEAR_CPU_BUFFERS + sysretl + SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR