KVM: SVM: Fix IRQ window inhibit handling across multiple vCPUs

author Sean Christopherson <seanjc@google.com>

Fri, 23 Jan 2026 22:45:12 +0000 (14:45 -0800)

committer Sean Christopherson <seanjc@google.com>

Mon, 2 Mar 2026 22:51:36 +0000 (14:51 -0800)
author Sean Christopherson <seanjc@google.com>
Fri, 23 Jan 2026 22:45:12 +0000 (14:45 -0800)
committer Sean Christopherson <seanjc@google.com>
Mon, 2 Mar 2026 22:51:36 +0000 (14:51 -0800)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index ff07c45e3c731a2833b472faca4262ac4af19a5b..68db00dc09a02c949c761691a0c04f74083060f5 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1433,6 +1433,7 @@ struct kvm_arch {
         struct kvm_pit *vpit;
  #endif
         atomic_t vapics_in_nmi_mode;
+
         struct mutex apic_map_lock;
         struct kvm_apic_map __rcu *apic_map;
         atomic_t apic_map_dirty;
@@ -1440,9 +1441,13 @@ struct kvm_arch {
         bool apic_access_memslot_enabled;
         bool apic_access_memslot_inhibited;
  
-       /* Protects apicv_inhibit_reasons */
+       /*
+        * Protects apicv_inhibit_reasons and apicv_nr_irq_window_req (with an
+        * asterisk, see kvm_inc_or_dec_irq_window_inhibit() for details).
+        */
         struct rw_semaphore apicv_update_lock;
         unsigned long apicv_inhibit_reasons;
+       atomic_t apicv_nr_irq_window_req;
  
         gpa_t wall_clock;
  
@@ -2316,6 +2321,18 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
         kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
  }
  
+void kvm_inc_or_dec_irq_window_inhibit(struct kvm *kvm, bool inc);
+
+static inline void kvm_inc_apicv_irq_window_req(struct kvm *kvm)
+{
+       kvm_inc_or_dec_irq_window_inhibit(kvm, true);
+}
+
+static inline void kvm_dec_apicv_irq_window_req(struct kvm *kvm)
+{
+       kvm_inc_or_dec_irq_window_inhibit(kvm, false);
+}
+
  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
                        void *insn, int insn_len);
  void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index 8766fd5f6d2b7a64b21887ea9289497861b9502f..e0da247ee594278857e618107e8a8d9fc91c79bc 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3729,8 +3729,11 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
          * the case in which the interrupt window was requested while L1 was
          * active (the vCPU was not running nested).
          */
-       if (!kvm_cpu_has_injectable_intr(vcpu) || is_guest_mode(vcpu))
-               kvm_clear_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
+       if (svm->avic_irq_window &&
+           (!kvm_cpu_has_injectable_intr(vcpu) || is_guest_mode(vcpu))) {
+               svm->avic_irq_window = false;
+               kvm_dec_apicv_irq_window_req(svm->vcpu.kvm);
+       }
  
         trace_kvm_inj_virq(intr->nr, intr->soft, reinjected);
         ++vcpu->stat.irq_injections;
@@ -3932,17 +3935,28 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
          */
         if (vgif || gif_set(svm)) {
                 /*
-                * IRQ window is not needed when AVIC is enabled,
-                * unless we have pending ExtINT since it cannot be injected
-                * via AVIC. In such case, KVM needs to temporarily disable AVIC,
-                * and fallback to injecting IRQ via V_IRQ.
+                * KVM only enables IRQ windows when AVIC is enabled if there's
+                * pending ExtINT since it cannot be injected via AVIC (ExtINT
+                * bypasses the local APIC).  V_IRQ is ignored by hardware when
+                * AVIC is enabled, and so KVM needs to temporarily disable
+                * AVIC in order to detect when it's ok to inject the ExtINT.
+                *
+                * If running nested, AVIC is already locally inhibited on this
+                * vCPU (L2 vCPUs use a different MMU that never maps the AVIC
+                * backing page), therefore there is no need to increment the
+                * VM-wide AVIC inhibit.  KVM will re-evaluate events when the
+                * vCPU exits to L1 and enable an IRQ window if the ExtINT is
+                * still pending.
                  *
-                * If running nested, AVIC is already locally inhibited
-                * on this vCPU, therefore there is no need to request
-                * the VM wide AVIC inhibition.
+                * Note, the IRQ window inhibit needs to be updated even if
+                * AVIC is inhibited for a different reason, as KVM needs to
+                * keep AVIC inhibited if the other reason is cleared and there
+                * is still an injectable interrupt pending.
                  */
-               if (!is_guest_mode(vcpu))
-                       kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN);
+               if (enable_apicv && !svm->avic_irq_window && !is_guest_mode(vcpu)) {
+                       svm->avic_irq_window = true;
+                       kvm_inc_apicv_irq_window_req(vcpu->kvm);
+               }
  
                 svm_set_vintr(svm);
         }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h

index ebd7b36b1ceb9b1ba1c6f7ef5622f6fc1ff8a232..68675b25ef8e970dfac6d02baf69bea5e3ae3671 100644 (file)
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -333,6 +333,7 @@ struct vcpu_svm {
  
         bool guest_state_loaded;
  
+       bool avic_irq_window;
         bool x2avic_msrs_intercepted;
         bool lbr_msrs_intercepted;
  
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index a03530795707797b73c701b0c952d46d27389e8d..db25938b6b50a735b3c506eedbd7482980611847 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11014,6 +11014,25 @@ void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
  }
  EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_or_clear_apicv_inhibit);
  
+void kvm_inc_or_dec_irq_window_inhibit(struct kvm *kvm, bool inc)
+{
+       int add = inc ? 1 : -1;
+
+       if (!enable_apicv)
+               return;
+
+       /*
+        * Strictly speaking, the lock is only needed if going 0->1 or 1->0,
+        * a la atomic_dec_and_mutex_lock.  However, ExtINTs are rare and
+        * only target a single CPU, so that is the common case; do not
+        * bother eliding the down_write()/up_write() pair.
+        */
+       guard(rwsem_write)(&kvm->arch.apicv_update_lock);
+       if (atomic_add_return(add, &kvm->arch.apicv_nr_irq_window_req) == inc)
+               __kvm_set_or_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_IRQWIN, inc);
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_inc_or_dec_irq_window_inhibit);
+
  static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
  {
         if (!kvm_apic_present(vcpu))
author	Sean Christopherson <seanjc@google.com>
	Fri, 23 Jan 2026 22:45:12 +0000 (14:45 -0800)
committer	Sean Christopherson <seanjc@google.com>
	Mon, 2 Mar 2026 22:51:36 +0000 (14:51 -0800)
arch/x86/include/asm/kvm_host.h		patch \| blob \| blame \| history
arch/x86/kvm/svm/svm.c		patch \| blob \| blame \| history
arch/x86/kvm/svm/svm.h		patch \| blob \| blame \| history
arch/x86/kvm/x86.c		patch \| blob \| blame \| history