From: Greg Kroah-Hartman Date: Sat, 7 Feb 2026 15:17:17 +0000 (+0100) Subject: 6.6-stable patches X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=39d1582785676e3c1dba68e3a5d104594d78b4d0;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch --- diff --git a/queue-6.6/kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch b/queue-6.6/kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch new file mode 100644 index 0000000000..13dd152b0d --- /dev/null +++ b/queue-6.6/kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch @@ -0,0 +1,203 @@ +From b4d37cdb77a0015f51fee083598fa227cc07aaf1 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 13 Jan 2026 09:46:05 -0800 +Subject: KVM: Don't clobber irqfd routing type when deassigning irqfd + +From: Sean Christopherson + +commit b4d37cdb77a0015f51fee083598fa227cc07aaf1 upstream. + +When deassigning a KVM_IRQFD, don't clobber the irqfd's copy of the IRQ's +routing entry as doing so breaks kvm_arch_irq_bypass_del_producer() on x86 +and arm64, which explicitly look for KVM_IRQ_ROUTING_MSI. Instead, to +handle a concurrent routing update, verify that the irqfd is still active +before consuming the routing information. As evidenced by the x86 and +arm64 bugs, and another bug in kvm_arch_update_irqfd_routing() (see below), +clobbering the entry type without notifying arch code is surprising and +error prone. + +As a bonus, checking that the irqfd is active provides a convenient +location for documenting _why_ KVM must not consume the routing entry for +an irqfd that is in the process of being deassigned: once the irqfd is +deleted from the list (which happens *before* the eventfd is detached), it +will no longer receive updates via kvm_irq_routing_update(), and so KVM +could deliver an event using stale routing information (relative to +KVM_SET_GSI_ROUTING returning to userspace). + +As an even better bonus, explicitly checking for the irqfd being active +fixes a similar bug to the one the clobbering is trying to prevent: if an +irqfd is deactivated, and then its routing is changed, +kvm_irq_routing_update() won't invoke kvm_arch_update_irqfd_routing() +(because the irqfd isn't in the list). And so if the irqfd is in bypass +mode, IRQs will continue to be posted using the old routing information. + +As for kvm_arch_irq_bypass_del_producer(), clobbering the routing type +results in KVM incorrectly keeping the IRQ in bypass mode, which is +especially problematic on AMD as KVM tracks IRQs that are being posted to +a vCPU in a list whose lifetime is tied to the irqfd. + +Without the help of KASAN to detect use-after-free, the most common +sympton on AMD is a NULL pointer deref in amd_iommu_update_ga() due to +the memory for irqfd structure being re-allocated and zeroed, resulting +in irqfd->irq_bypass_data being NULL when read by +avic_update_iommu_vcpu_affinity(): + + BUG: kernel NULL pointer dereference, address: 0000000000000018 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 40cf2b9067 P4D 40cf2b9067 PUD 408362a067 PMD 0 + Oops: Oops: 0000 [#1] SMP + CPU: 6 UID: 0 PID: 40383 Comm: vfio_irq_test + Tainted: G U W O 6.19.0-smp--5dddc257e6b2-irqfd #31 NONE + Tainted: [U]=USER, [W]=WARN, [O]=OOT_MODULE + Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 34.78.2-0 09/05/2025 + RIP: 0010:amd_iommu_update_ga+0x19/0xe0 + Call Trace: + + avic_update_iommu_vcpu_affinity+0x3d/0x90 [kvm_amd] + __avic_vcpu_load+0xf4/0x130 [kvm_amd] + kvm_arch_vcpu_load+0x89/0x210 [kvm] + vcpu_load+0x30/0x40 [kvm] + kvm_arch_vcpu_ioctl_run+0x45/0x620 [kvm] + kvm_vcpu_ioctl+0x571/0x6a0 [kvm] + __se_sys_ioctl+0x6d/0xb0 + do_syscall_64+0x6f/0x9d0 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + RIP: 0033:0x46893b + + ---[ end trace 0000000000000000 ]--- + +If AVIC is inhibited when the irfd is deassigned, the bug will manifest as +list corruption, e.g. on the next irqfd assignment. + + list_add corruption. next->prev should be prev (ffff8d474d5cd588), + but was 0000000000000000. (next=ffff8d8658f86530). + ------------[ cut here ]------------ + kernel BUG at lib/list_debug.c:31! + Oops: invalid opcode: 0000 [#1] SMP + CPU: 128 UID: 0 PID: 80818 Comm: vfio_irq_test + Tainted: G U W O 6.19.0-smp--f19dc4d680ba-irqfd #28 NONE + Tainted: [U]=USER, [W]=WARN, [O]=OOT_MODULE + Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 34.78.2-0 09/05/2025 + RIP: 0010:__list_add_valid_or_report+0x97/0xc0 + Call Trace: + + avic_pi_update_irte+0x28e/0x2b0 [kvm_amd] + kvm_pi_update_irte+0xbf/0x190 [kvm] + kvm_arch_irq_bypass_add_producer+0x72/0x90 [kvm] + irq_bypass_register_consumer+0xcd/0x170 [irqbypass] + kvm_irqfd+0x4c6/0x540 [kvm] + kvm_vm_ioctl+0x118/0x5d0 [kvm] + __se_sys_ioctl+0x6d/0xb0 + do_syscall_64+0x6f/0x9d0 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + + ---[ end trace 0000000000000000 ]--- + +On Intel and arm64, the bug is less noisy, as the end result is that the +device keeps posting IRQs to the vCPU even after it's been deassigned. + +Note, the worst of the breakage can be traced back to commit cb210737675e +("KVM: Pass new routing entries and irqfd when updating IRTEs"), as before +that commit KVM would pull the routing information from the per-VM routing +table. But as above, similar bugs have existed since support for IRQ +bypass was added. E.g. if a routing change finished before irq_shutdown() +invoked kvm_arch_irq_bypass_del_producer(), VMX and SVM would see stale +routing information and potentially leave the irqfd in bypass mode. + +Alternatively, x86 could be fixed by explicitly checking irq_bypass_vcpu +instead of irq_entry.type in kvm_arch_irq_bypass_del_producer(), and arm64 +could be modified to utilize irq_bypass_vcpu in a similar manner. But (a) +that wouldn't fix the routing updates bug, and (b) fixing core code doesn't +preclude x86 (or arm64) from adding such code as a sanity check (spoiler +alert). + +Fixes: f70c20aaf141 ("KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'") +Fixes: cb210737675e ("KVM: Pass new routing entries and irqfd when updating IRTEs") +Fixes: a0d7e2fc61ab ("KVM: arm64: vgic-v4: Only attempt vLPI mapping for actual MSIs") +Cc: stable@vger.kernel.org +Cc: Marc Zyngier +Cc: Oliver Upton +Link: https://patch.msgid.link/20260113174606.104978-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/eventfd.c | 44 ++++++++++++++++++++++++-------------------- + 1 file changed, 24 insertions(+), 20 deletions(-) + +--- a/virt/kvm/eventfd.c ++++ b/virt/kvm/eventfd.c +@@ -156,21 +156,28 @@ irqfd_shutdown(struct work_struct *work) + } + + +-/* assumes kvm->irqfds.lock is held */ +-static bool +-irqfd_is_active(struct kvm_kernel_irqfd *irqfd) ++static bool irqfd_is_active(struct kvm_kernel_irqfd *irqfd) + { ++ /* ++ * Assert that either irqfds.lock or SRCU is held, as irqfds.lock must ++ * be held to prevent false positives (on the irqfd being active), and ++ * while false negatives are impossible as irqfds are never added back ++ * to the list once they're deactivated, the caller must at least hold ++ * SRCU to guard against routing changes if the irqfd is deactivated. ++ */ ++ lockdep_assert_once(lockdep_is_held(&irqfd->kvm->irqfds.lock) || ++ srcu_read_lock_held(&irqfd->kvm->irq_srcu)); ++ + return list_empty(&irqfd->list) ? false : true; + } + + /* + * Mark the irqfd as inactive and schedule it for removal +- * +- * assumes kvm->irqfds.lock is held + */ +-static void +-irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) ++static void irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) + { ++ lockdep_assert_held(&irqfd->kvm->irqfds.lock); ++ + BUG_ON(!irqfd_is_active(irqfd)); + + list_del_init(&irqfd->list); +@@ -211,8 +218,15 @@ irqfd_wakeup(wait_queue_entry_t *wait, u + seq = read_seqcount_begin(&irqfd->irq_entry_sc); + irq = irqfd->irq_entry; + } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); +- /* An event has been signaled, inject an interrupt */ +- if (kvm_arch_set_irq_inatomic(&irq, kvm, ++ ++ /* ++ * An event has been signaled, inject an interrupt unless the ++ * irqfd is being deassigned (isn't active), in which case the ++ * routing information may be stale (once the irqfd is removed ++ * from the list, it will stop receiving routing updates). ++ */ ++ if (unlikely(!irqfd_is_active(irqfd)) || ++ kvm_arch_set_irq_inatomic(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) + schedule_work(&irqfd->inject); +@@ -557,18 +571,8 @@ kvm_irqfd_deassign(struct kvm *kvm, stru + spin_lock_irq(&kvm->irqfds.lock); + + list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { +- if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { +- /* +- * This clearing of irq_entry.type is needed for when +- * another thread calls kvm_irq_routing_update before +- * we flush workqueue below (we synchronize with +- * kvm_irq_routing_update using irqfds.lock). +- */ +- write_seqcount_begin(&irqfd->irq_entry_sc); +- irqfd->irq_entry.type = 0; +- write_seqcount_end(&irqfd->irq_entry_sc); ++ if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) + irqfd_deactivate(irqfd); +- } + } + + spin_unlock_irq(&kvm->irqfds.lock); diff --git a/queue-6.6/series b/queue-6.6/series index e9b2965a4e..a5128ece6e 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -9,3 +9,4 @@ pmdomain-imx8m-blk-ctrl-fix-out-of-range-access-of-bc-domains.patch rbd-check-for-eod-after-exclusive-lock-is-ensured-to-be-held.patch arm-9468-1-fix-memset64-on-big-endian.patch revert-drm-amd-check-if-aspm-is-enabled-from-pcie-subsystem.patch +kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch