From: Sean Christopherson Date: Wed, 11 Jun 2025 21:35:53 +0000 (-0700) Subject: KVM: x86: Add CONFIG_KVM_IOAPIC to allow disabling in-kernel I/O APIC X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=628a27731e3f36de7ddce226f7e09ee70e40ed66;p=thirdparty%2Flinux.git KVM: x86: Add CONFIG_KVM_IOAPIC to allow disabling in-kernel I/O APIC Add a Kconfig to allow building KVM without support for emulating a I/O APIC, PIC, and PIT, which is desirable for deployments that effectively don't support a fully in-kernel IRQ chip, i.e. never expect any VMM to create an in-kernel I/O APIC. E.g. compiling out support eliminates a few thousand lines of guest-facing code and gives security folks warm fuzzies. As a bonus, wrapping relevant paths with CONFIG_KVM_IOAPIC #ifdefs makes it much easier for readers to understand which bits and pieces exist specifically for fully in-kernel IRQ chips. Opportunistically convert all two in-kernel uses of __KVM_HAVE_IOAPIC to CONFIG_KVM_IOAPIC, e.g. rather than add a second #ifdef to generate a stub for kvm_arch_post_irq_routing_update(). Acked-by: Kai Huang Link: https://lore.kernel.org/r/20250611213557.294358-15-seanjc@google.com Signed-off-by: Sean Christopherson --- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a4649a234f05b..8d511eb039337 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1375,9 +1375,11 @@ struct kvm_arch { atomic_t noncoherent_dma_count; #define __KVM_HAVE_ARCH_ASSIGNED_DEVICE atomic_t assigned_device_count; +#ifdef CONFIG_KVM_IOAPIC struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; +#endif atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map __rcu *apic_map; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 2eeffcec53828..2c86673155c9a 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -166,6 +166,16 @@ config KVM_AMD_SEV Encrypted State (SEV-ES), and Secure Encrypted Virtualization with Secure Nested Paging (SEV-SNP) technologies on AMD processors. +config KVM_IOAPIC + bool "I/O APIC, PIC, and PIT emulation" + default y + depends on KVM + help + Provides support for KVM to emulate an I/O APIC, PIC, and PIT, i.e. + for full in-kernel APIC emulation. + + If unsure, say Y. + config KVM_SMM bool "System Management Mode emulation" default y diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a5d362c7b5040..92c737257789a 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -5,12 +5,13 @@ ccflags-$(CONFIG_KVM_WERROR) += -Werror include $(srctree)/virt/kvm/Makefile.kvm -kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ - i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ +kvm-y += x86.o emulate.o irq.o lapic.o \ + irq_comm.o cpuid.o pmu.o mtrr.o \ debugfs.o mmu/mmu.o mmu/page_track.o \ mmu/spte.o kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o +kvm-$(CONFIG_KVM_IOAPIC) += i8259.o i8254.o ioapic.o kvm-$(CONFIG_KVM_HYPERV) += hyperv.o kvm-$(CONFIG_KVM_XEN) += xen.o kvm-$(CONFIG_KVM_SMM) += smm.o diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index b9c1feb379a7a..e8bd59ad8a7c4 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -8,6 +8,7 @@ #include +#ifdef CONFIG_KVM_IOAPIC struct kvm_kpit_channel_state { u32 count; /* can be 65536 */ u16 latched_count; @@ -64,5 +65,6 @@ int kvm_vm_ioctl_reinject(struct kvm *kvm, struct kvm_reinject_control *control) struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags); void kvm_free_pit(struct kvm *kvm); +#endif /* CONFIG_KVM_IOAPIC */ #endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index fb3bad0f49657..4c219e9f52b0e 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -76,8 +76,10 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v) if (!kvm_apic_accept_pic_intr(v)) return 0; +#ifdef CONFIG_KVM_IOAPIC if (pic_in_kernel(v->kvm)) return v->kvm->arch.vpic->output; +#endif WARN_ON_ONCE(!irqchip_split(v->kvm)); return pending_userspace_extint(v); @@ -136,8 +138,10 @@ int kvm_cpu_get_extint(struct kvm_vcpu *v) return v->kvm->arch.xen.upcall_vector; #endif +#ifdef CONFIG_KVM_IOAPIC if (pic_in_kernel(v->kvm)) return kvm_pic_read_irq(v->kvm); /* PIC */ +#endif WARN_ON_ONCE(!irqchip_split(v->kvm)); return get_userspace_extint(v); @@ -171,7 +175,9 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { __kvm_migrate_apic_timer(vcpu); +#ifdef CONFIG_KVM_IOAPIC __kvm_migrate_pit_timer(vcpu); +#endif kvm_x86_call(migrate_timers)(vcpu); } @@ -187,6 +193,7 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } +#ifdef CONFIG_KVM_IOAPIC #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } @@ -273,3 +280,4 @@ int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) kvm_pic_update_irq(pic); return r; } +#endif diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7b8b54462f955..5e62c1f79ce65 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -18,6 +18,8 @@ #include #include "lapic.h" +#ifdef CONFIG_KVM_IOAPIC + #define PIC_NUM_PINS 16 #define SELECT_PIC(irq) \ ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE) @@ -79,12 +81,19 @@ static inline int irqchip_full(struct kvm *kvm) smp_rmb(); return mode == KVM_IRQCHIP_KERNEL; } +#else /* CONFIG_KVM_IOAPIC */ +static __always_inline int irqchip_full(struct kvm *kvm) +{ + return false; +} +#endif static inline int pic_in_kernel(struct kvm *kvm) { return irqchip_full(kvm); } + static inline int irqchip_split(struct kvm *kvm) { int mode = kvm->arch.irqchip_mode; diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 13d84c25e5035..14fc8db0206c1 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -208,6 +208,7 @@ int kvm_set_routing_entry(struct kvm *kvm, * check kvm_arch_can_set_irq_routing() before calling this function. */ switch (ue->type) { +#ifdef CONFIG_KVM_IOAPIC case KVM_IRQ_ROUTING_IRQCHIP: if (irqchip_split(kvm)) return -EINVAL; @@ -231,6 +232,7 @@ int kvm_set_routing_entry(struct kvm *kvm, } e->irqchip.irqchip = ue->u.irqchip.irqchip; break; +#endif case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; e->msi.address_lo = ue->u.msi.address_lo; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 73418dc0ebb22..4cf8c1f753d3a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1455,7 +1455,7 @@ static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) { - int trigger_mode; + int __maybe_unused trigger_mode; /* Eoi the ioapic only if the ioapic doesn't own the vector. */ if (!kvm_ioapic_handles_vector(apic, vector)) @@ -1476,12 +1476,14 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) return; } +#ifdef CONFIG_KVM_IOAPIC if (apic_test_vector(vector, apic->regs + APIC_TMR)) trigger_mode = IOAPIC_LEVEL_TRIG; else trigger_mode = IOAPIC_EDGE_TRIG; kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); +#endif } static int apic_set_eoi(struct kvm_lapic *apic) @@ -3146,8 +3148,11 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); } kvm_make_request(KVM_REQ_EVENT, vcpu); + +#ifdef CONFIG_KVM_IOAPIC if (ioapic_in_kernel(vcpu->kvm)) kvm_rtc_eoi_tracking_restore_one(vcpu); +#endif vcpu->arch.apic_arb_prio = 0; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4ef17990574d0..ababdba2c186b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -270,6 +270,7 @@ TRACE_EVENT(kvm_cpuid, {0x6, "SIPI"}, \ {0x7, "ExtINT"} +#ifdef CONFIG_KVM_IOAPIC TRACE_EVENT(kvm_ioapic_set_irq, TP_PROTO(__u64 e, int pin, bool coalesced), TP_ARGS(e, pin, coalesced), @@ -314,6 +315,7 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, (__entry->e & (1<<15)) ? "level" : "edge", (__entry->e & (1<<16)) ? "|masked" : "") ); +#endif TRACE_EVENT(kvm_msi_set_irq, TP_PROTO(__u64 address, __u64 data), diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d744730985ef..78dfa6c1cb012 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4632,17 +4632,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_EXT_CPUID: case KVM_CAP_EXT_EMUL_CPUID: case KVM_CAP_CLOCKSOURCE: +#ifdef CONFIG_KVM_IOAPIC case KVM_CAP_PIT: + case KVM_CAP_PIT2: + case KVM_CAP_PIT_STATE2: + case KVM_CAP_REINJECT_CONTROL: +#endif case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: case KVM_CAP_SYNC_MMU: case KVM_CAP_USER_NMI: - case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_IOEVENTFD: case KVM_CAP_IOEVENTFD_NO_LENGTH: - case KVM_CAP_PIT2: - case KVM_CAP_PIT_STATE2: + case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_VCPU_EVENTS: #ifdef CONFIG_KVM_HYPERV @@ -6937,9 +6940,11 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; int r = -ENOTTY; + +#ifdef CONFIG_KVM_IOAPIC /* * This union makes it completely explicit to gcc-3.x - * that these two variables' stack usage should be + * that these three variables' stack usage should be * combined, not added together. */ union { @@ -6947,6 +6952,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) struct kvm_pit_state2 ps2; struct kvm_pit_config pit_config; } u; +#endif switch (ioctl) { case KVM_SET_TSS_ADDR: @@ -6970,6 +6976,7 @@ set_identity_unlock: case KVM_SET_NR_MMU_PAGES: r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); break; +#ifdef CONFIG_KVM_IOAPIC case KVM_CREATE_IRQCHIP: { mutex_lock(&kvm->lock); @@ -7136,6 +7143,7 @@ set_pit2_out: r = kvm_vm_ioctl_reinject(kvm, &control); break; } +#endif case KVM_SET_BOOT_CPU_ID: r = 0; mutex_lock(&kvm->lock); @@ -10595,8 +10603,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) if (irqchip_split(vcpu->kvm)) kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); +#ifdef CONFIG_KVM_IOAPIC else if (ioapic_in_kernel(vcpu->kvm)) kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); +#endif if (is_guest_mode(vcpu)) vcpu->arch.load_eoi_exitmap_pending = true; @@ -12799,7 +12809,9 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm) cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work); cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work); +#ifdef CONFIG_KVM_IOAPIC kvm_free_pit(kvm); +#endif kvm_mmu_pre_destroy_vm(kvm); static_call_cond(kvm_x86_vm_pre_destroy)(kvm); @@ -12823,8 +12835,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } kvm_destroy_vcpus(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); +#ifdef CONFIG_KVM_IOAPIC kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); +#endif kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4ff5ea29e3439..3b5575d0b5744 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1023,7 +1023,7 @@ void kvm_unlock_all_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_KVM_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); #else static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 96e581900c8e3..1065a81ca57f0 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -84,14 +84,14 @@ TRACE_EVENT(kvm_set_irq, ); #endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ -#if defined(__KVM_HAVE_IOAPIC) +#ifdef CONFIG_KVM_IOAPIC #define kvm_irqchips \ {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ {KVM_IRQCHIP_IOAPIC, "IOAPIC"} -#endif /* defined(__KVM_HAVE_IOAPIC) */ +#endif /* CONFIG_KVM_IOAPIC */ #if defined(CONFIG_HAVE_KVM_IRQCHIP)