]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
KVM: TDX: Reject fully in-kernel irqchip if EOIs are protected, i.e. for TDX VMs
authorSagi Shahar <sagis@google.com>
Wed, 27 Aug 2025 01:17:26 +0000 (18:17 -0700)
committerSean Christopherson <seanjc@google.com>
Tue, 16 Sep 2025 19:54:15 +0000 (12:54 -0700)
Reject KVM_CREATE_IRQCHIP if the VM type has protected EOIs, i.e. if KVM
can't intercept EOI and thus can't faithfully emulate level-triggered
interrupts that are routed through the I/O APIC.  For TDX VMs, the
TDX-Module owns the VMX EOI-bitmap and configures all IRQ vectors to have
the CPU accelerate EOIs, i.e. doesn't allow KVM to intercept any EOIs.

KVM already requires a split irqchip[1], but does so during vCPU creation,
which is both too late to allow userspace to fallback to a split irqchip
and a less-than-stellar experience for userspace since an -EINVAL on
KVM_VCPU_CREATE is far harder to debug/triage than failure exactly on
KVM_CREATE_IRQCHIP.  And of course, allowing an action that ultimately
fails is arguably a bug regardless of the impact on userspace.

Link: https://lore.kernel.org/lkml/20250222014757.897978-11-binbin.wu@linux.intel.com
Link: https://lore.kernel.org/lkml/aK3vZ5HuKKeFuuM4@google.com
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sagi Shahar <sagis@google.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
Acked-by: Kai Huang <kai.huang@intel.com>
Link: https://lore.kernel.org/r/20250827011726.2451115-1-sagis@google.com
[sean: massage shortlog+changelog, relocate setting has_protected_eoi]
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/vmx/tdx.c
arch/x86/kvm/x86.c

index b1f87b5750f8c511a7d3dbe9a948a347c4d62450..990fbbb96ebfddbf7168c1fbf7ca5b2f38166e50 100644 (file)
@@ -1362,6 +1362,7 @@ struct kvm_arch {
        u8 vm_type;
        bool has_private_mem;
        bool has_protected_state;
+       bool has_protected_eoi;
        bool pre_fault_allowed;
        struct hlist_head *mmu_page_hash;
        struct list_head active_mmu_pages;
index 66744f5768c8eb8d439680fe7ddaabf8e5ca9171..17559f3ffbd51faf027bfaba2223604a32c8490d 100644 (file)
@@ -629,6 +629,11 @@ int tdx_vm_init(struct kvm *kvm)
        struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
 
        kvm->arch.has_protected_state = true;
+       /*
+        * TDX Module doesn't allow the hypervisor to modify the EOI-bitmap,
+        * i.e. all EOIs are accelerated and never trigger exits.
+        */
+       kvm->arch.has_protected_eoi = true;
        kvm->arch.has_private_mem = true;
        kvm->arch.disabled_quirks |= KVM_X86_QUIRK_IGNORE_GUEST_PAT;
 
index bf386f2ebba3ef5fbea02e3fb0046dc922e48b6e..99f2a150ca78dfb618186729b9b664208d980e59 100644 (file)
@@ -6989,6 +6989,15 @@ set_identity_unlock:
                if (irqchip_in_kernel(kvm))
                        goto create_irqchip_unlock;
 
+               /*
+                * Disallow an in-kernel I/O APIC if the VM has protected EOIs,
+                * i.e. if KVM can't intercept EOIs and thus can't properly
+                * emulate level-triggered interrupts.
+                */
+               r = -ENOTTY;
+               if (kvm->arch.has_protected_eoi)
+                       goto create_irqchip_unlock;
+
                r = -EINVAL;
                if (kvm->created_vcpus)
                        goto create_irqchip_unlock;