From 0a917e9d4b7070fafcbf7a8ec32d2aa444b4e757 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:08 -0700 Subject: [PATCH] KVM: SVM: Delete IRTE link from previous vCPU before setting new IRTE Delete the previous per-vCPU IRTE link prior to modifying the IRTE. If forcing the IRTE back to remapped mode fails, the IRQ is already broken; keeping stale metadata won't change that, and the IOMMU should be sufficiently paranoid to sanitize the IRTE when the IRQ is freed and reallocated. This will allow hoisting the vCPU tracking to common x86, which in turn will allow most of the IRTE update code to be deduplicated. Tested-by: Sairaj Kodilkar Link: https://lore.kernel.org/r/20250611224604.313496-7-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/avic.c | 60 +++++++++------------------------------ include/linux/kvm_irqfd.h | 1 + 2 files changed, 14 insertions(+), 47 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index accc36958a75a..7f7af8ff627d1 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -767,23 +767,19 @@ out: return ret; } -static void svm_ir_list_del(struct vcpu_svm *svm, - struct kvm_kernel_irqfd *irqfd, - struct amd_iommu_pi_data *pi) +static void svm_ir_list_del(struct kvm_kernel_irqfd *irqfd) { + struct kvm_vcpu *vcpu = irqfd->irq_bypass_vcpu; unsigned long flags; - struct kvm_kernel_irqfd *cur; - spin_lock_irqsave(&svm->ir_list_lock, flags); - list_for_each_entry(cur, &svm->ir_list, vcpu_list) { - if (cur->irq_bypass_data != pi->ir_data) - continue; - if (WARN_ON_ONCE(cur != irqfd)) - continue; - list_del(&irqfd->vcpu_list); - break; - } - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + if (!vcpu) + return; + + spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); + list_del(&irqfd->vcpu_list); + spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); + + irqfd->irq_bypass_vcpu = NULL; } static int svm_ir_list_add(struct vcpu_svm *svm, @@ -796,24 +792,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, if (WARN_ON_ONCE(!pi->ir_data)) return -EINVAL; - /** - * In some cases, the existing irte is updated and re-set, - * so we need to check here if it's already been * added - * to the ir_list. - */ - if (pi->prev_ga_tag) { - struct kvm *kvm = svm->vcpu.kvm; - u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag); - struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); - struct vcpu_svm *prev_svm; - - if (!prev_vcpu) - return -EINVAL; - - prev_svm = to_svm(prev_vcpu); - svm_ir_list_del(prev_svm, irqfd, pi); - } - + irqfd->irq_bypass_vcpu = &svm->vcpu; irqfd->irq_bypass_data = pi->ir_data; spin_lock_irqsave(&svm->ir_list_lock, flags); @@ -905,6 +884,8 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new))); + svm_ir_list_del(irqfd); + /** * Here, we setup with legacy mode in the following cases: * 1. When cannot target interrupt to a specific vcpu. @@ -963,21 +944,6 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, pi.prev_ga_tag = 0; pi.is_guest_mode = false; ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), irqfd, &pi); - } } out: srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index 6510a48e62aa6..361c07f4466d1 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -60,6 +60,7 @@ struct kvm_kernel_irqfd { struct irq_bypass_consumer consumer; struct irq_bypass_producer *producer; + struct kvm_vcpu *irq_bypass_vcpu; struct list_head vcpu_list; void *irq_bypass_data; }; -- 2.47.2