]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
KVM: SVM: Track per-vCPU IRTEs using kvm_kernel_irqfd structure
authorSean Christopherson <seanjc@google.com>
Wed, 11 Jun 2025 22:45:07 +0000 (15:45 -0700)
committerSean Christopherson <seanjc@google.com>
Fri, 20 Jun 2025 20:52:54 +0000 (13:52 -0700)
Track the IRTEs that are posting to an SVM vCPU via the associated irqfd
structure and GSI routing instead of dynamically allocating a separate
data structure.  In addition to eliminating an atomic allocation, this
will allow hoisting much of the IRTE update logic to common x86.

Cc: Sairaj Kodilkar <sarunkod@amd.com>
Link: https://lore.kernel.org/r/20250611224604.313496-6-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/svm.h
include/linux/kvm_irqfd.h

index 49b73907de92c0b303393464415e8c646d89e2c8..accc36958a75a05f8ca151e2bbea6be20d72186b 100644 (file)
@@ -76,14 +76,6 @@ static bool next_vm_id_wrapped = 0;
 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
 bool x2avic_enabled;
 
-/*
- * This is a wrapper of struct amd_iommu_ir_data.
- */
-struct amd_svm_iommu_ir {
-       struct list_head node;  /* Used by SVM for per-vcpu ir_list */
-       void *data;             /* Storing pointer to struct amd_ir_data */
-};
-
 static void avic_activate_vmcb(struct vcpu_svm *svm)
 {
        struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -747,8 +739,8 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
 {
        int ret = 0;
        unsigned long flags;
-       struct amd_svm_iommu_ir *ir;
        struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_kernel_irqfd *irqfd;
 
        if (!kvm_arch_has_assigned_device(vcpu->kvm))
                return 0;
@@ -762,11 +754,11 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
        if (list_empty(&svm->ir_list))
                goto out;
 
-       list_for_each_entry(ir, &svm->ir_list, node) {
+       list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
                if (activate)
-                       ret = amd_iommu_activate_guest_mode(ir->data);
+                       ret = amd_iommu_activate_guest_mode(irqfd->irq_bypass_data);
                else
-                       ret = amd_iommu_deactivate_guest_mode(ir->data);
+                       ret = amd_iommu_deactivate_guest_mode(irqfd->irq_bypass_data);
                if (ret)
                        break;
        }
@@ -775,27 +767,30 @@ out:
        return ret;
 }
 
-static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+static void svm_ir_list_del(struct vcpu_svm *svm,
+                           struct kvm_kernel_irqfd *irqfd,
+                           struct amd_iommu_pi_data *pi)
 {
        unsigned long flags;
-       struct amd_svm_iommu_ir *cur;
+       struct kvm_kernel_irqfd *cur;
 
        spin_lock_irqsave(&svm->ir_list_lock, flags);
-       list_for_each_entry(cur, &svm->ir_list, node) {
-               if (cur->data != pi->ir_data)
+       list_for_each_entry(cur, &svm->ir_list, vcpu_list) {
+               if (cur->irq_bypass_data != pi->ir_data)
+                       continue;
+               if (WARN_ON_ONCE(cur != irqfd))
                        continue;
-               list_del(&cur->node);
-               kfree(cur);
+               list_del(&irqfd->vcpu_list);
                break;
        }
        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
 
-static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+static int svm_ir_list_add(struct vcpu_svm *svm,
+                          struct kvm_kernel_irqfd *irqfd,
+                          struct amd_iommu_pi_data *pi)
 {
-       int ret = 0;
        unsigned long flags;
-       struct amd_svm_iommu_ir *ir;
        u64 entry;
 
        if (WARN_ON_ONCE(!pi->ir_data))
@@ -812,25 +807,14 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
                struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
                struct vcpu_svm *prev_svm;
 
-               if (!prev_vcpu) {
-                       ret = -EINVAL;
-                       goto out;
-               }
+               if (!prev_vcpu)
+                       return -EINVAL;
 
                prev_svm = to_svm(prev_vcpu);
-               svm_ir_list_del(prev_svm, pi);
+               svm_ir_list_del(prev_svm, irqfd, pi);
        }
 
-       /**
-        * Allocating new amd_iommu_pi_data, which will get
-        * add to the per-vcpu ir_list.
-        */
-       ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT);
-       if (!ir) {
-               ret = -ENOMEM;
-               goto out;
-       }
-       ir->data = pi->ir_data;
+       irqfd->irq_bypass_data = pi->ir_data;
 
        spin_lock_irqsave(&svm->ir_list_lock, flags);
 
@@ -845,10 +829,9 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
                amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
                                    true, pi->ir_data);
 
-       list_add(&ir->node, &svm->ir_list);
+       list_add(&irqfd->vcpu_list, &svm->ir_list);
        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-out:
-       return ret;
+       return 0;
 }
 
 /*
@@ -952,7 +935,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
                         * scheduling information in IOMMU irte.
                         */
                        if (!ret && pi.is_guest_mode)
-                               svm_ir_list_add(svm, &pi);
+                               svm_ir_list_add(svm, irqfd, &pi);
                }
 
                if (!ret && svm) {
@@ -993,7 +976,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
 
                        vcpu = kvm_get_vcpu_by_id(kvm, id);
                        if (vcpu)
-                               svm_ir_list_del(to_svm(vcpu), &pi);
+                               svm_ir_list_del(to_svm(vcpu), irqfd, &pi);
                }
        }
 out:
@@ -1005,8 +988,8 @@ static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 {
        int ret = 0;
-       struct amd_svm_iommu_ir *ir;
        struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_kernel_irqfd *irqfd;
 
        lockdep_assert_held(&svm->ir_list_lock);
 
@@ -1020,8 +1003,8 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
        if (list_empty(&svm->ir_list))
                return 0;
 
-       list_for_each_entry(ir, &svm->ir_list, node) {
-               ret = amd_iommu_update_ga(cpu, r, ir->data);
+       list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
+               ret = amd_iommu_update_ga(cpu, r, irqfd->irq_bypass_data);
                if (ret)
                        return ret;
        }
index b35fce30d92314cbbb2d2ffe485093df4652bd7c..cc27877d69ae10d6154a1b75e5e1a8bb6961d6a6 100644 (file)
@@ -310,10 +310,12 @@ struct vcpu_svm {
        u64 *avic_physical_id_cache;
 
        /*
-        * Per-vcpu list of struct amd_svm_iommu_ir:
-        * This is used mainly to store interrupt remapping information used
-        * when update the vcpu affinity. This avoids the need to scan for
-        * IRTE and try to match ga_tag in the IOMMU driver.
+        * Per-vCPU list of irqfds that are eligible to post IRQs directly to
+        * the vCPU (a.k.a. device posted IRQs, a.k.a. IRQ bypass).  The list
+        * is used to reconfigure IRTEs when the vCPU is loaded/put (to set the
+        * target pCPU), when AVIC is toggled on/off (to (de)activate bypass),
+        * and if the irqfd becomes ineligible for posting (to put the IRTE
+        * back into remapped mode).
         */
        struct list_head ir_list;
        spinlock_t ir_list_lock;
index 8ad43692e3bbb459a75c3e33fd17fbd22b3d03e1..6510a48e62aa6bdf74b89bb81566213c2afb7293 100644 (file)
@@ -59,6 +59,9 @@ struct kvm_kernel_irqfd {
        struct work_struct shutdown;
        struct irq_bypass_consumer consumer;
        struct irq_bypass_producer *producer;
+
+       struct list_head vcpu_list;
+       void *irq_bypass_data;
 };
 
 #endif /* __LINUX_KVM_IRQFD_H */