From: Sean Christopherson Date: Wed, 11 Jun 2025 22:45:45 +0000 (-0700) Subject: iommu/amd: KVM: SVM: Set pCPU info in IRTE when setting vCPU affinity X-Git-Tag: v6.17-rc1~111^2~11^2~36 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f965255dc5033387ac7858787c6c792fd789ac34;p=thirdparty%2Fkernel%2Flinux.git iommu/amd: KVM: SVM: Set pCPU info in IRTE when setting vCPU affinity Now that setting vCPU affinity is guarded with ir_list_lock, i.e. now that avic_physical_id_entry can be safely accessed, set the pCPU info straight-away when setting vCPU affinity. Putting the IRTE into posted mode, and then immediately updating the IRTE a second time if the target vCPU is running is wasteful and confusing. This also fixes a flaw where a posted IRQ that arrives between putting the IRTE into guest_mode and setting the correct destination could cause the IOMMU to ring the doorbell on the wrong pCPU. Link: https://lore.kernel.org/r/20250611224604.313496-44-seanjc@google.com Signed-off-by: Sean Christopherson --- diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 2dbc9cb61c2fe..4c75a17632f60 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -35,6 +35,7 @@ struct amd_iommu_pi_data { u64 vapic_addr; /* Physical address of the vCPU's vAPIC. */ u32 ga_tag; u32 vector; /* Guest vector of the interrupt */ + int cpu; bool is_guest_mode; void *ir_data; }; diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1960bb06c4b94..35c5fb831c289 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -727,6 +727,7 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) { + int apic_id = kvm_cpu_get_apicid(vcpu->cpu); int ret = 0; unsigned long flags; struct vcpu_svm *svm = to_svm(vcpu); @@ -746,7 +747,7 @@ static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) { if (activate) - ret = amd_iommu_activate_guest_mode(irqfd->irq_bypass_data); + ret = amd_iommu_activate_guest_mode(irqfd->irq_bypass_data, apic_id); else ret = amd_iommu_deactivate_guest_mode(irqfd->irq_bypass_data); if (ret) @@ -810,6 +811,18 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, */ guard(spinlock_irqsave)(&svm->ir_list_lock); + /* + * Update the target pCPU for IOMMU doorbells if the vCPU is + * running. If the vCPU is NOT running, i.e. is blocking or + * scheduled out, KVM will update the pCPU info when the vCPU + * is awakened and/or scheduled in. See also avic_vcpu_load(). + */ + entry = svm->avic_physical_id_entry; + if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) + pi_data.cpu = entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; + else + pi_data.cpu = -1; + ret = irq_set_vcpu_affinity(host_irq, &pi_data); if (ret) return ret; @@ -824,17 +837,6 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, return -EIO; } - /* - * Update the target pCPU for IOMMU doorbells if the vCPU is - * running. If the vCPU is NOT running, i.e. is blocking or - * scheduled out, KVM will update the pCPU info when the vCPU - * is awakened and/or scheduled in. See also avic_vcpu_load(). - */ - entry = svm->avic_physical_id_entry; - if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) - amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK, - pi_data.ir_data); - irqfd->irq_bypass_data = pi_data.ir_data; list_add(&irqfd->vcpu_list, &svm->ir_list); return 0; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 2eaba64be68ab..6352930ad0115 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3850,7 +3850,7 @@ int amd_iommu_update_ga(int cpu, void *data) } EXPORT_SYMBOL(amd_iommu_update_ga); -int amd_iommu_activate_guest_mode(void *data) +int amd_iommu_activate_guest_mode(void *data, int cpu) { struct amd_ir_data *ir_data = (struct amd_ir_data *)data; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; @@ -3871,6 +3871,8 @@ int amd_iommu_activate_guest_mode(void *data) entry->hi.fields.vector = ir_data->ga_vector; entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; + __amd_iommu_update_ga(entry, cpu); + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry); } @@ -3937,7 +3939,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *info) ir_data->ga_root_ptr = (pi_data->vapic_addr >> 12); ir_data->ga_vector = pi_data->vector; ir_data->ga_tag = pi_data->ga_tag; - ret = amd_iommu_activate_guest_mode(ir_data); + ret = amd_iommu_activate_guest_mode(ir_data, pi_data->cpu); } else { ret = amd_iommu_deactivate_guest_mode(ir_data); } diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index fe0e16ffe0e5c..c9f2df0c4596c 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -32,7 +32,7 @@ extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); extern int amd_iommu_update_ga(int cpu, void *data); -extern int amd_iommu_activate_guest_mode(void *data); +extern int amd_iommu_activate_guest_mode(void *data, int cpu); extern int amd_iommu_deactivate_guest_mode(void *data); #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ @@ -48,7 +48,7 @@ static inline int amd_iommu_update_ga(int cpu, void *data) return 0; } -static inline int amd_iommu_activate_guest_mode(void *data) +static inline int amd_iommu_activate_guest_mode(void *data, int cpu) { return 0; }