]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
x86/kvm/vmx: Move IRQ/NMI dispatch from KVM into x86 core
authorPeter Zijlstra <peterz@infradead.org>
Fri, 8 May 2026 09:18:29 +0000 (11:18 +0200)
committerThomas Gleixner <tglx@kernel.org>
Tue, 19 May 2026 18:25:51 +0000 (20:25 +0200)
Move the VMX interrupt dispatch magic into the x86 core code. This
isolates KVM from the FRED/IDT decisions and reduces the amount of
EXPORT_SYMBOL_FOR_KVM().

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: "Verma, Vishal L" <vishal.l.verma@intel.com>
Tested-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Binbin Wu <binbin.wu@linxu.intel.com>
Acked-by: Sean Christopherson <seanjc@google.com>
Link: https://patch.msgid.link/20260508091829.GO3126523@noisy.programming.kicks-ass.net
12 files changed:
arch/x86/entry/Makefile
arch/x86/entry/common.c [new file with mode: 0644]
arch/x86/entry/entry.S
arch/x86/entry/entry_64_fred.S
arch/x86/include/asm/desc.h
arch/x86/include/asm/desc_defs.h
arch/x86/include/asm/entry-common.h
arch/x86/include/asm/fred.h
arch/x86/kernel/idt.c
arch/x86/kernel/nmi.c
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/vmx/vmx.c

index 72cae8e0ce856412c02a8009aabebb9c3037e6d9..83b4762d6ecba4893f7885ec785a2f8a67a337c2 100644 (file)
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_syscall_64.o    = $(CC_FLAGS_FTRACE)
 CFLAGS_syscall_32.o            += -fno-stack-protector
 CFLAGS_syscall_64.o            += -fno-stack-protector
 
-obj-y                          := entry.o entry_$(BITS).o syscall_$(BITS).o
+obj-y                          := entry.o entry_$(BITS).o syscall_$(BITS).o common.o
 
 obj-y                          += vdso/
 obj-y                          += vsyscall/
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
new file mode 100644 (file)
index 0000000..b62ac82
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/entry-common.h>
+#include <linux/kvm_types.h>
+#include <asm/fred.h>
+#include <asm/desc.h>
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * On VMX, NMIs and IRQs (as configured by KVM) are acknowledged by hardware as
+ * part of the VM-Exit, i.e. the event itself is consumed as part the VM-Exit.
+ * x86_entry_from_kvm() is invoked by KVM to effectively forward NMIs and IRQs
+ * to the kernel for servicing.  On SVM, a.k.a. AMD, the NMI/IRQ VM-Exit is
+ * purely a signal that an NMI/IRQ is pending, i.e. the event that triggered
+ * the VM-Exit is held pending until it's unblocked in the host.
+ */
+noinstr void x86_entry_from_kvm(unsigned int event_type, unsigned int vector)
+{
+       if (event_type == EVENT_TYPE_EXTINT) {
+#ifdef CONFIG_X86_64
+               /*
+                * Use FRED dispatch, even when running IDT. The dispatch
+                * tables are kept in sync between FRED and IDT, and the FRED
+                * dispatch works well with CFI.
+                */
+               fred_entry_from_kvm(event_type, vector);
+#else
+               idt_entry_from_kvm(vector);
+#endif
+               return;
+       }
+
+       WARN_ON_ONCE(event_type != EVENT_TYPE_NMI);
+
+#ifdef CONFIG_X86_64
+       if (cpu_feature_enabled(X86_FEATURE_FRED))
+               return fred_entry_from_kvm(event_type, vector);
+#endif
+
+       /*
+        * Notably, we must use IDT dispatch for NMI when running in IDT mode.
+        * The FRED NMI context is significantly different and will not work
+        * right (specifically FRED fixed the NMI recursion issue).
+        */
+       idt_entry_from_kvm(vector);
+}
+EXPORT_SYMBOL_FOR_KVM(x86_entry_from_kvm);
+#endif
index 6ba2b3adcef0f06b448868b0c46bff119b839f33..a56e043b266dcebf2dae452d1e2b5d551a5b430a 100644 (file)
@@ -75,3 +75,49 @@ THUNK warn_thunk_thunk, __warn_thunk
 #if defined(CONFIG_STACKPROTECTOR) && defined(CONFIG_SMP)
 EXPORT_SYMBOL(__ref_stack_chk_guard);
 #endif
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+.macro IDT_DO_EVENT_IRQOFF call_insn call_target
+       /*
+        * Unconditionally create a stack frame, getting the correct RSP on the
+        * stack (for x86-64) would take two instructions anyways, and RBP can
+        * be used to restore RSP to make objtool happy (see below).
+        */
+       push %_ASM_BP
+       mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+       /*
+        * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+        * creating the synthetic interrupt stack frame for the IRQ/NMI.
+        */
+       and  $-16, %rsp
+       push $__KERNEL_DS
+       push %rbp
+#endif
+       pushf
+       push $__KERNEL_CS
+       \call_insn \call_target
+
+       /*
+        * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+        * the correct value.  objtool doesn't know the callee will IRET and,
+        * without the explicit restore, thinks the stack is getting walloped.
+        * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+        */
+       leave
+       RET
+.endm
+
+.pushsection .text, "ax"
+SYM_FUNC_START(idt_do_interrupt_irqoff)
+       IDT_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+SYM_FUNC_END(idt_do_interrupt_irqoff)
+.popsection
+
+.pushsection .noinstr.text, "ax"
+SYM_FUNC_START(idt_do_nmi_irqoff)
+       IDT_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(idt_do_nmi_irqoff)
+.popsection
+#endif
index 894f7f16eb80a29ce26c176e4494f1878fad03b2..0d2768ab836c46cee3557f83e54c2fd3f33970a0 100644 (file)
@@ -147,5 +147,4 @@ SYM_FUNC_START(asm_fred_entry_from_kvm)
        RET
 
 SYM_FUNC_END(asm_fred_entry_from_kvm)
-EXPORT_SYMBOL_FOR_KVM(asm_fred_entry_from_kvm);
 #endif
index ec95fe44fa3a03a1708bc0d8474595258db6027f..00aeae84352979aec8538eb5120973d0bf7d40d2 100644 (file)
@@ -438,6 +438,10 @@ extern void idt_setup_traps(void);
 extern void idt_setup_apic_and_irq_gates(void);
 extern bool idt_is_f00f_address(unsigned long address);
 
+extern void idt_do_interrupt_irqoff(unsigned long address);
+extern void idt_do_nmi_irqoff(void);
+extern void idt_entry_from_kvm(unsigned int vector);
+
 #ifdef CONFIG_X86_64
 extern void idt_setup_early_pf(void);
 #else
index 7e6b9314758a19eaa86906cdea4588a326120099..2f2ce8aadf07639ba5aed89c587bc56d2b343a43 100644 (file)
@@ -145,7 +145,7 @@ struct gate_struct {
 typedef struct gate_struct gate_desc;
 
 #ifndef _SETUP
-static inline unsigned long gate_offset(const gate_desc *g)
+static __always_inline unsigned long gate_offset(const gate_desc *g)
 {
 #ifdef CONFIG_X86_64
        return g->offset_low | ((unsigned long)g->offset_middle << 16) |
index 7535131c711bb628f7fd81863f9e30921180e7d4..eca24b5e07f4c6f8b25788a5f992824c54de259f 100644 (file)
@@ -97,4 +97,6 @@ static __always_inline void arch_exit_to_user_mode(void)
 }
 #define arch_exit_to_user_mode arch_exit_to_user_mode
 
+extern void x86_entry_from_kvm(unsigned int entry_type, unsigned int vector);
+
 #endif
index 2bb65677c079b23cc178fe6e2df3deeb65cd1a31..18a2f811c35802497213f11398771561284d5daa 100644 (file)
@@ -110,7 +110,6 @@ static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { ret
 static inline void cpu_init_fred_exceptions(void) { }
 static inline void cpu_init_fred_rsps(void) { }
 static inline void fred_complete_exception_setup(void) { }
-static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
 static inline void fred_sync_rsp0(unsigned long rsp0) { }
 static inline void fred_update_rsp0(void) { }
 #endif /* CONFIG_X86_FRED */
index 2604565887564a3207f3a6513a974ac36391eac9..7bcf1decc0349136affaabd6e6b6ba1282cecdd0 100644 (file)
@@ -268,6 +268,21 @@ void __init idt_setup_early_pf(void)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+noinstr void idt_entry_from_kvm(unsigned int vector)
+{
+       if (vector == NMI_VECTOR)
+               return idt_do_nmi_irqoff();
+
+       /*
+        * Only the NMI path requires noinstr.
+        */
+       instrumentation_begin();
+       idt_do_interrupt_irqoff(gate_offset(idt_table + vector));
+       instrumentation_end();
+}
+#endif
+
 static void __init idt_map_in_cea(void)
 {
        /*
index 3d239ed12744173df01d3b14a9f810c0309a4f5e..52a3afb1b79e93a9d0fbd982265afb74ce0361a2 100644 (file)
@@ -614,7 +614,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
 {
        exc_nmi(regs);
 }
-EXPORT_SYMBOL_FOR_KVM(asm_exc_nmi_kvm_vmx);
 #endif
 
 #ifdef CONFIG_NMI_CHECK_CPU
index 8a481dae9cae2738c1331901da512bf0e691f527..ff1f254a0ef4e2ad1c2378d1a9fd588849d6a474 100644 (file)
 #define VCPU_R15       __VCPU_REGS_R15 * WORD_SIZE
 #endif
 
-.macro VMX_DO_EVENT_IRQOFF call_insn call_target
-       /*
-        * Unconditionally create a stack frame, getting the correct RSP on the
-        * stack (for x86-64) would take two instructions anyways, and RBP can
-        * be used to restore RSP to make objtool happy (see below).
-        */
-       push %_ASM_BP
-       mov %_ASM_SP, %_ASM_BP
-
-#ifdef CONFIG_X86_64
-       /*
-        * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
-        * creating the synthetic interrupt stack frame for the IRQ/NMI.
-        */
-       and  $-16, %rsp
-       push $__KERNEL_DS
-       push %rbp
-#endif
-       pushf
-       push $__KERNEL_CS
-       \call_insn \call_target
-
-       /*
-        * "Restore" RSP from RBP, even though IRET has already unwound RSP to
-        * the correct value.  objtool doesn't know the callee will IRET and,
-        * without the explicit restore, thinks the stack is getting walloped.
-        * Using an unwind hint is problematic due to x86-64's dynamic alignment.
-        */
-       leave
-       RET
-.endm
-
 .section .noinstr.text, "ax"
 
 /**
@@ -320,10 +288,6 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
 
 SYM_FUNC_END(__vmx_vcpu_run)
 
-SYM_FUNC_START(vmx_do_nmi_irqoff)
-       VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
 #ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
 
 /**
@@ -375,13 +339,3 @@ SYM_FUNC_START(vmread_error_trampoline)
        RET
 SYM_FUNC_END(vmread_error_trampoline)
 #endif
-
-.section .text, "ax"
-
-#ifndef CONFIG_X86_FRED
-
-SYM_FUNC_START(vmx_do_interrupt_irqoff)
-       VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
-SYM_FUNC_END(vmx_do_interrupt_irqoff)
-
-#endif
index 49feecb286b23cc7213dd2e78eb0f80b1365f03c..b9103de01428eb95a4fa786a8bcd5b9aa9d4322e 100644 (file)
@@ -7117,9 +7117,6 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
        vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
 }
 
-void vmx_do_interrupt_irqoff(unsigned long entry);
-void vmx_do_nmi_irqoff(void);
-
 static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
 {
        /*
@@ -7161,17 +7158,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
            "unexpected VM-Exit interrupt info: 0x%x", intr_info))
                return;
 
-       /*
-        * Invoke the kernel's IRQ handler for the vector.  Use the FRED path
-        * when it's available even if FRED isn't fully enabled, e.g. even if
-        * FRED isn't supported in hardware, in order to avoid the indirect
-        * CALL in the non-FRED path.
-        */
        kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
-       if (IS_ENABLED(CONFIG_X86_FRED))
-               fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
-       else
-               vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
+       x86_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
        kvm_after_interrupt(vcpu);
 
        vcpu->arch.at_instruction_boundary = true;
@@ -7481,10 +7469,7 @@ noinstr void vmx_handle_nmi(struct kvm_vcpu *vcpu)
                return;
 
        kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
-       if (cpu_feature_enabled(X86_FEATURE_FRED))
-               fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
-       else
-               vmx_do_nmi_irqoff();
+       x86_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
        kvm_after_interrupt(vcpu);
 }