From: Paolo Bonzini Date: Wed, 8 Apr 2026 08:39:20 +0000 (-0400) Subject: KVM: VMX: replace vmx_spec_ctrl_restore_host with RESTORE_HOST_SPEC_CTRL_BODY X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=344ebd21f2c9d59fb6d7a409eedf8b31d8b0f17e;p=thirdparty%2Fkernel%2Flinux.git KVM: VMX: replace vmx_spec_ctrl_restore_host with RESTORE_HOST_SPEC_CTRL_BODY Reuse the same assembly as SVM, just with alternatives instead of cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS). Note that Intel does need an LFENCE with eIBRS, unlike AMD's AutoIBRS. However, it is not needed for X86_FEATURE_KERNEL_IBRS because there are no conditional branches between FILL_RETURN_BUFFER and ret. Signed-off-by: Paolo Bonzini --- diff --git a/arch/x86/kvm/vmenter.h b/arch/x86/kvm/vmenter.h index 73f3adc301d9..ba3f71449c62 100644 --- a/arch/x86/kvm/vmenter.h +++ b/arch/x86/kvm/vmenter.h @@ -55,7 +55,12 @@ #ifdef CONFIG_X86_64 mov PER_CPU_VAR(x86_spec_ctrl_current), %rdx cmp \guest_spec_ctrl, %rdx - je \label + /* + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. + */ + ALTERNATIVE __stringify(je \label), "", X86_FEATURE_KERNEL_IBRS movl %edx, %eax shr $32, %rdx #else @@ -66,7 +71,7 @@ mov 4 + \guest_spec_ctrl, %edi xor %edx, %edi or %edi, %esi - je \label + ALTERNATIVE __stringify(je \label), "", X86_FEATURE_KERNEL_IBRS #endif wrmsr .endm diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 2dd49080630d..7e4dc17fc0b8 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -105,9 +105,9 @@ SYM_FUNC_START(__vmx_vcpu_run) * Unlike AMD there's no V_SPEC_CTRL here, so do not leave the body * out of line. Clobbers RAX, RCX, RDX, RSI. */ - ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL - RESTORE_GUEST_SPEC_CTRL_BODY VMX_spec_ctrl(%_ASM_DI), .Lspec_ctrl_done -.Lspec_ctrl_done: + ALTERNATIVE "jmp .Lspec_ctrl_guest_done", "", X86_FEATURE_MSR_SPEC_CTRL + RESTORE_GUEST_SPEC_CTRL_BODY VMX_spec_ctrl(%_ASM_DI), .Lspec_ctrl_guest_done +.Lspec_ctrl_guest_done: /* * Since vmentry is serializing on affected CPUs, there's no need for @@ -252,16 +252,32 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ X86_FEATURE_RSB_VMEXIT_LITE - pop %_ASM_ARG2 /* @flags */ - pop %_ASM_ARG1 /* @vmx */ + /* Clobbers RAX, RCX, RDX, RSI. */ + ALTERNATIVE "jmp .Lspec_ctrl_host_done", "", X86_FEATURE_MSR_SPEC_CTRL + mov WORD_SIZE(%_ASM_SP), %_ASM_DI + RESTORE_HOST_SPEC_CTRL_BODY VMX_spec_ctrl(%_ASM_DI), (%_ASM_SP), .Lspec_ctrl_host_done +.Lspec_ctrl_host_done: - call vmx_spec_ctrl_restore_host + /* + * Halt speculation past a conditional wrmsr. Intel's eIBRS + * guarantees that the guest cannot control the RSB "once IBRS is + * set", but in the eIBRS case speculative execution past the 'je' + * can go all the way to the RET below while MSR_IA32_SPEC_CTRL + * still holds the guest value. + */ + ALTERNATIVE_2 "", "lfence", X86_FEATURE_MSR_SPEC_CTRL, \ + "", X86_FEATURE_KERNEL_IBRS CLEAR_BRANCH_HISTORY_VMEXIT /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX + /* Pop our saved arguments from the stack */ + pop %_ASM_BX + pop %_ASM_BX + + /* ... and then the callee-save registers */ pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a8039b0f9392..b033f611fa04 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7388,31 +7388,6 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } -void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, - unsigned int flags) -{ - u64 hostval = this_cpu_read(x86_spec_ctrl_current); - - if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) - return; - - if (flags & KVM_ENTER_SAVE_SPEC_CTRL) - vmx->spec_ctrl = native_rdmsrq(MSR_IA32_SPEC_CTRL); - - /* - * If the guest/host SPEC_CTRL values differ, restore the host value. - * - * For legacy IBRS, the IBRS bit always needs to be written after - * transitioning from a less privileged predictor mode, regardless of - * whether the guest/host values differ. - */ - if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || - vmx->spec_ctrl != hostval) - native_wrmsrq(MSR_IA32_SPEC_CTRL, hostval); - - barrier_nospec(); -} - static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, bool force_immediate_exit) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index be6a1dc2f69f..f62007a5c2a7 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -367,7 +367,6 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); unsigned int __vmx_vcpu_enter_flags(struct vcpu_vmx *vmx); bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned int flags); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);