From: Sean Christopherson Date: Wed, 11 Jun 2025 22:45:06 +0000 (-0700) Subject: KVM: Pass new routing entries and irqfd when updating IRTEs X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cb210737675ef4c1ad88721e84558eeb2f199312;p=thirdparty%2Fkernel%2Fstable.git KVM: Pass new routing entries and irqfd when updating IRTEs When updating IRTEs in response to a GSI routing or IRQ bypass change, pass the new/current routing information along with the associated irqfd. This will allow KVM x86 to harden, simplify, and deduplicate its code. Since adding/removing a bypass producer is now conveniently protected with irqfds.lock, i.e. can't run concurrently with kvm_irq_routing_update(), use the routing information cached in the irqfd instead of looking up the information in the current GSI routing tables. Opportunistically convert an existing printk() to pr_info() and put its string onto a single line (old code that strictly adhered to 80 chars). Link: https://lore.kernel.org/r/20250611224604.313496-5-seanjc@google.com Signed-off-by: Sean Christopherson --- diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 38a91bb5d4c75..a9a39e0375f7a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2771,8 +2771,9 @@ bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, return memcmp(&old->msi, &new->msi, sizeof(new->msi)); } -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) +int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new) { /* * Remapping the vLPI requires taking the its_lock mutex to resolve @@ -2781,7 +2782,7 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, * * Unmap the vLPI and fall back to software LPI injection. */ - return kvm_vgic_v4_unset_forwarding(kvm, host_irq); + return kvm_vgic_v4_unset_forwarding(irqfd->kvm, irqfd->producer->irq); } void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 44fd9ccc06248..2a14564dd08a5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -297,6 +297,7 @@ enum x86_intercept_stage; */ #define KVM_APIC_PV_EOI_PENDING 1 +struct kvm_kernel_irqfd; struct kvm_kernel_irq_routing_entry; /* @@ -1845,8 +1846,9 @@ struct kvm_x86_ops { void (*vcpu_blocking)(struct kvm_vcpu *vcpu); void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); - int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); + int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_kernel_irq_routing_entry *new); void (*pi_start_assignment)(struct kvm *kvm); void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 067f8e3f5a0dc..49b73907de92c 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -886,21 +887,14 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, return 0; } -/* - * avic_pi_update_irte - set IRTE for Posted-Interrupts - * - * @kvm: kvm - * @host_irq: host irq of the interrupt - * @guest_irq: gsi of the interrupt - * @set: set or unset PI - * returns 0 on success, < 0 on failure - */ -int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) +int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_kernel_irq_routing_entry *new) { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; bool enable_remapped_mode = true; + bool set = !!new; int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) @@ -926,6 +920,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, if (e->type != KVM_IRQ_ROUTING_MSI) continue; + WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new))); + /** * Here, we setup with legacy mode in the following cases: * 1. When cannot target interrupt to a specific vcpu. diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e6f3c6a153a0c..b35fce30d9231 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -736,8 +736,9 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); -int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); +int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_kernel_irq_routing_entry *new); void avic_vcpu_blocking(struct kvm_vcpu *vcpu); void avic_vcpu_unblocking(struct kvm_vcpu *vcpu); void avic_ring_doorbell(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 5c615e5845bf0..110fb19848ab4 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -2,6 +2,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include @@ -294,17 +295,9 @@ void vmx_pi_start_assignment(struct kvm *kvm) kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK); } -/* - * vmx_pi_update_irte - set IRTE for Posted-Interrupts - * - * @kvm: kvm - * @host_irq: host irq of the interrupt - * @guest_irq: gsi of the interrupt - * @set: set or unset PI - * returns 0 on success, < 0 on failure - */ -int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) +int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_kernel_irq_routing_entry *new) { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; @@ -312,6 +305,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; + bool set = !!new; int idx, ret = 0; if (!vmx_can_use_vtd_pi(kvm)) @@ -329,6 +323,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { if (e->type != KVM_IRQ_ROUTING_MSI) continue; + + WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new))); + /* * VT-d PI cannot support posting multicast/broadcast * interrupts to a vCPU, we still use interrupt remapping diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 80499ea0e6743..a94afcb55f7ff 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -3,6 +3,9 @@ #define __KVM_X86_VMX_POSTED_INTR_H #include +#include +#include + #include void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); @@ -11,8 +14,9 @@ void pi_wakeup_handler(void); void __init pi_init_cpu(int cpu); void pi_apicv_pre_state_restore(struct kvm_vcpu *vcpu); bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu); -int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); +int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, + unsigned int host_irq, uint32_t guest_irq, + struct kvm_kernel_irq_routing_entry *new); void vmx_pi_start_assignment(struct kvm *kvm); static inline int pi_find_highest_vector(struct pi_desc *pi_desc) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02237dbb7f322..e1304b002062c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13507,31 +13507,31 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); struct kvm *kvm = irqfd->kvm; - int ret; + int ret = 0; kvm_arch_start_assignment(irqfd->kvm); spin_lock_irq(&kvm->irqfds.lock); irqfd->producer = prod; - ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, - prod->irq, irqfd->gsi, 1); - if (ret) - kvm_arch_end_assignment(irqfd->kvm); - + if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) { + ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq, + irqfd->gsi, &irqfd->irq_entry); + if (ret) + kvm_arch_end_assignment(irqfd->kvm); + } spin_unlock_irq(&kvm->irqfds.lock); - return ret; } void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { - int ret; struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); struct kvm *kvm = irqfd->kvm; + int ret; WARN_ON(irqfd->producer != prod); @@ -13544,11 +13544,13 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, spin_lock_irq(&kvm->irqfds.lock); irqfd->producer = NULL; - ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, - prod->irq, irqfd->gsi, 0); - if (ret) - printk(KERN_INFO "irq bypass consumer (eventfd %p) unregistration" - " fails: %d\n", irqfd->consumer.eventfd, ret); + if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) { + ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq, + irqfd->gsi, NULL); + if (ret) + pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n", + irqfd->consumer.eventfd, ret); + } spin_unlock_irq(&kvm->irqfds.lock); @@ -13556,10 +13558,12 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, kvm_arch_end_assignment(irqfd->kvm); } -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) +int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new) { - return kvm_x86_call(pi_update_irte)(kvm, host_irq, guest_irq, set); + return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq, + irqfd->gsi, new); } bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b5575d0b5744..a4160c1c0c6bb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2401,6 +2401,8 @@ struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) +struct kvm_kernel_irqfd; + bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); @@ -2408,8 +2410,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); +int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new); bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *, struct kvm_kernel_irq_routing_entry *); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 5bc6abe307488..bd1766da68958 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -285,9 +285,9 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start( { } -int __attribute__((weak)) kvm_arch_update_irqfd_routing( - struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) +int __weak kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd, + struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new) { return 0; } @@ -618,9 +618,8 @@ void kvm_irq_routing_update(struct kvm *kvm) #if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) if (irqfd->producer && kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) { - int ret = kvm_arch_update_irqfd_routing( - irqfd->kvm, irqfd->producer->irq, - irqfd->gsi, 1); + int ret = kvm_arch_update_irqfd_routing(irqfd, &old, &irqfd->irq_entry); + WARN_ON(ret); } #endif