KVM: SVM: Track per-vCPU IRTEs using kvm_kernel_irqfd structure

author Sean Christopherson <seanjc@google.com>

Wed, 11 Jun 2025 22:45:07 +0000 (15:45 -0700)

committer Sean Christopherson <seanjc@google.com>

Fri, 20 Jun 2025 20:52:54 +0000 (13:52 -0700)
author Sean Christopherson <seanjc@google.com>
Wed, 11 Jun 2025 22:45:07 +0000 (15:45 -0700)
committer Sean Christopherson <seanjc@google.com>
Fri, 20 Jun 2025 20:52:54 +0000 (13:52 -0700)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c

index 49b73907de92c0b303393464415e8c646d89e2c8..accc36958a75a05f8ca151e2bbea6be20d72186b 100644 (file)
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -76,14 +76,6 @@ static bool next_vm_id_wrapped = 0;
  static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
  bool x2avic_enabled;
  
-/*
- * This is a wrapper of struct amd_iommu_ir_data.
- */
-struct amd_svm_iommu_ir {
-       struct list_head node;  /* Used by SVM for per-vcpu ir_list */
-       void *data;             /* Storing pointer to struct amd_ir_data */
-};
-
  static void avic_activate_vmcb(struct vcpu_svm *svm)
  {
         struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -747,8 +739,8 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
  {
         int ret = 0;
         unsigned long flags;
-       struct amd_svm_iommu_ir *ir;
         struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_kernel_irqfd *irqfd;
  
         if (!kvm_arch_has_assigned_device(vcpu->kvm))
                 return 0;
@@ -762,11 +754,11 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
         if (list_empty(&svm->ir_list))
                 goto out;
  
-       list_for_each_entry(ir, &svm->ir_list, node) {
+       list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
                 if (activate)
-                       ret = amd_iommu_activate_guest_mode(ir->data);
+                       ret = amd_iommu_activate_guest_mode(irqfd->irq_bypass_data);
                 else
-                       ret = amd_iommu_deactivate_guest_mode(ir->data);
+                       ret = amd_iommu_deactivate_guest_mode(irqfd->irq_bypass_data);
                 if (ret)
                         break;
         }
@@ -775,27 +767,30 @@ out:
         return ret;
  }
  
-static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+static void svm_ir_list_del(struct vcpu_svm *svm,
+                           struct kvm_kernel_irqfd *irqfd,
+                           struct amd_iommu_pi_data *pi)
  {
         unsigned long flags;
-       struct amd_svm_iommu_ir *cur;
+       struct kvm_kernel_irqfd *cur;
  
         spin_lock_irqsave(&svm->ir_list_lock, flags);
-       list_for_each_entry(cur, &svm->ir_list, node) {
-               if (cur->data != pi->ir_data)
+       list_for_each_entry(cur, &svm->ir_list, vcpu_list) {
+               if (cur->irq_bypass_data != pi->ir_data)
+                       continue;
+               if (WARN_ON_ONCE(cur != irqfd))
                         continue;
-               list_del(&cur->node);
-               kfree(cur);
+               list_del(&irqfd->vcpu_list);
                 break;
         }
         spin_unlock_irqrestore(&svm->ir_list_lock, flags);
  }
  
-static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+static int svm_ir_list_add(struct vcpu_svm *svm,
+                          struct kvm_kernel_irqfd *irqfd,
+                          struct amd_iommu_pi_data *pi)
  {
-       int ret = 0;
         unsigned long flags;
-       struct amd_svm_iommu_ir *ir;
         u64 entry;
  
         if (WARN_ON_ONCE(!pi->ir_data))
@@ -812,25 +807,14 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
                 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
                 struct vcpu_svm *prev_svm;
  
-               if (!prev_vcpu) {
-                       ret = -EINVAL;
-                       goto out;
-               }
+               if (!prev_vcpu)
+                       return -EINVAL;
  
                 prev_svm = to_svm(prev_vcpu);
-               svm_ir_list_del(prev_svm, pi);
+               svm_ir_list_del(prev_svm, irqfd, pi);
         }
  
-       /**
-        * Allocating new amd_iommu_pi_data, which will get
-        * add to the per-vcpu ir_list.
-        */
-       ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT);
-       if (!ir) {
-               ret = -ENOMEM;
-               goto out;
-       }
-       ir->data = pi->ir_data;
+       irqfd->irq_bypass_data = pi->ir_data;
  
         spin_lock_irqsave(&svm->ir_list_lock, flags);
  
@@ -845,10 +829,9 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
                 amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
                                     true, pi->ir_data);
  
-       list_add(&ir->node, &svm->ir_list);
+       list_add(&irqfd->vcpu_list, &svm->ir_list);
         spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-out:
-       return ret;
+       return 0;
  }
  
  /*
@@ -952,7 +935,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
                          * scheduling information in IOMMU irte.
                          */
                         if (!ret && pi.is_guest_mode)
-                               svm_ir_list_add(svm, &pi);
+                               svm_ir_list_add(svm, irqfd, &pi);
                 }
  
                 if (!ret && svm) {
@@ -993,7 +976,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
  
                         vcpu = kvm_get_vcpu_by_id(kvm, id);
                         if (vcpu)
-                               svm_ir_list_del(to_svm(vcpu), &pi);
+                               svm_ir_list_del(to_svm(vcpu), irqfd, &pi);
                 }
         }
  out:
@@ -1005,8 +988,8 @@ static inline int
  avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
  {
         int ret = 0;
-       struct amd_svm_iommu_ir *ir;
         struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_kernel_irqfd *irqfd;
  
         lockdep_assert_held(&svm->ir_list_lock);
  
@@ -1020,8 +1003,8 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
         if (list_empty(&svm->ir_list))
                 return 0;
  
-       list_for_each_entry(ir, &svm->ir_list, node) {
-               ret = amd_iommu_update_ga(cpu, r, ir->data);
+       list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
+               ret = amd_iommu_update_ga(cpu, r, irqfd->irq_bypass_data);
                 if (ret)
                         return ret;
         }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h

index b35fce30d92314cbbb2d2ffe485093df4652bd7c..cc27877d69ae10d6154a1b75e5e1a8bb6961d6a6 100644 (file)
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -310,10 +310,12 @@ struct vcpu_svm {
         u64 *avic_physical_id_cache;
  
         /*
-        * Per-vcpu list of struct amd_svm_iommu_ir:
-        * This is used mainly to store interrupt remapping information used
-        * when update the vcpu affinity. This avoids the need to scan for
-        * IRTE and try to match ga_tag in the IOMMU driver.
+        * Per-vCPU list of irqfds that are eligible to post IRQs directly to
+        * the vCPU (a.k.a. device posted IRQs, a.k.a. IRQ bypass).  The list
+        * is used to reconfigure IRTEs when the vCPU is loaded/put (to set the
+        * target pCPU), when AVIC is toggled on/off (to (de)activate bypass),
+        * and if the irqfd becomes ineligible for posting (to put the IRTE
+        * back into remapped mode).
          */
         struct list_head ir_list;
         spinlock_t ir_list_lock;
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h

index 8ad43692e3bbb459a75c3e33fd17fbd22b3d03e1..6510a48e62aa6bdf74b89bb81566213c2afb7293 100644 (file)
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -59,6 +59,9 @@ struct kvm_kernel_irqfd {
         struct work_struct shutdown;
         struct irq_bypass_consumer consumer;
         struct irq_bypass_producer *producer;
+
+       struct list_head vcpu_list;
+       void *irq_bypass_data;
  };
  
  #endif /* __LINUX_KVM_IRQFD_H */
author	Sean Christopherson <seanjc@google.com>
	Wed, 11 Jun 2025 22:45:07 +0000 (15:45 -0700)
committer	Sean Christopherson <seanjc@google.com>
	Fri, 20 Jun 2025 20:52:54 +0000 (13:52 -0700)
arch/x86/kvm/svm/avic.c		patch \| blob \| blame \| history
arch/x86/kvm/svm/svm.h		patch \| blob \| blame \| history
include/linux/kvm_irqfd.h		patch \| blob \| blame \| history