]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 7 Feb 2026 15:17:29 +0000 (16:17 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 7 Feb 2026 15:17:29 +0000 (16:17 +0100)
added patches:
kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch

queue-6.12/kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch [new file with mode: 0644]
queue-6.12/series

diff --git a/queue-6.12/kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch b/queue-6.12/kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch
new file mode 100644 (file)
index 0000000..b7584ad
--- /dev/null
@@ -0,0 +1,203 @@
+From b4d37cdb77a0015f51fee083598fa227cc07aaf1 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 13 Jan 2026 09:46:05 -0800
+Subject: KVM: Don't clobber irqfd routing type when deassigning irqfd
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit b4d37cdb77a0015f51fee083598fa227cc07aaf1 upstream.
+
+When deassigning a KVM_IRQFD, don't clobber the irqfd's copy of the IRQ's
+routing entry as doing so breaks kvm_arch_irq_bypass_del_producer() on x86
+and arm64, which explicitly look for KVM_IRQ_ROUTING_MSI.  Instead, to
+handle a concurrent routing update, verify that the irqfd is still active
+before consuming the routing information.  As evidenced by the x86 and
+arm64 bugs, and another bug in kvm_arch_update_irqfd_routing() (see below),
+clobbering the entry type without notifying arch code is surprising and
+error prone.
+
+As a bonus, checking that the irqfd is active provides a convenient
+location for documenting _why_ KVM must not consume the routing entry for
+an irqfd that is in the process of being deassigned: once the irqfd is
+deleted from the list (which happens *before* the eventfd is detached), it
+will no longer receive updates via kvm_irq_routing_update(), and so KVM
+could deliver an event using stale routing information (relative to
+KVM_SET_GSI_ROUTING returning to userspace).
+
+As an even better bonus, explicitly checking for the irqfd being active
+fixes a similar bug to the one the clobbering is trying to prevent: if an
+irqfd is deactivated, and then its routing is changed,
+kvm_irq_routing_update() won't invoke kvm_arch_update_irqfd_routing()
+(because the irqfd isn't in the list).  And so if the irqfd is in bypass
+mode, IRQs will continue to be posted using the old routing information.
+
+As for kvm_arch_irq_bypass_del_producer(), clobbering the routing type
+results in KVM incorrectly keeping the IRQ in bypass mode, which is
+especially problematic on AMD as KVM tracks IRQs that are being posted to
+a vCPU in a list whose lifetime is tied to the irqfd.
+
+Without the help of KASAN to detect use-after-free, the most common
+sympton on AMD is a NULL pointer deref in amd_iommu_update_ga() due to
+the memory for irqfd structure being re-allocated and zeroed, resulting
+in irqfd->irq_bypass_data being NULL when read by
+avic_update_iommu_vcpu_affinity():
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000018
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 40cf2b9067 P4D 40cf2b9067 PUD 408362a067 PMD 0
+  Oops: Oops: 0000 [#1] SMP
+  CPU: 6 UID: 0 PID: 40383 Comm: vfio_irq_test
+  Tainted: G     U  W  O        6.19.0-smp--5dddc257e6b2-irqfd #31 NONE
+  Tainted: [U]=USER, [W]=WARN, [O]=OOT_MODULE
+  Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 34.78.2-0 09/05/2025
+  RIP: 0010:amd_iommu_update_ga+0x19/0xe0
+  Call Trace:
+   <TASK>
+   avic_update_iommu_vcpu_affinity+0x3d/0x90 [kvm_amd]
+   __avic_vcpu_load+0xf4/0x130 [kvm_amd]
+   kvm_arch_vcpu_load+0x89/0x210 [kvm]
+   vcpu_load+0x30/0x40 [kvm]
+   kvm_arch_vcpu_ioctl_run+0x45/0x620 [kvm]
+   kvm_vcpu_ioctl+0x571/0x6a0 [kvm]
+   __se_sys_ioctl+0x6d/0xb0
+   do_syscall_64+0x6f/0x9d0
+   entry_SYSCALL_64_after_hwframe+0x4b/0x53
+  RIP: 0033:0x46893b
+    </TASK>
+  ---[ end trace 0000000000000000 ]---
+
+If AVIC is inhibited when the irfd is deassigned, the bug will manifest as
+list corruption, e.g. on the next irqfd assignment.
+
+  list_add corruption. next->prev should be prev (ffff8d474d5cd588),
+                       but was 0000000000000000. (next=ffff8d8658f86530).
+  ------------[ cut here ]------------
+  kernel BUG at lib/list_debug.c:31!
+  Oops: invalid opcode: 0000 [#1] SMP
+  CPU: 128 UID: 0 PID: 80818 Comm: vfio_irq_test
+  Tainted: G     U  W  O        6.19.0-smp--f19dc4d680ba-irqfd #28 NONE
+  Tainted: [U]=USER, [W]=WARN, [O]=OOT_MODULE
+  Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 34.78.2-0 09/05/2025
+  RIP: 0010:__list_add_valid_or_report+0x97/0xc0
+  Call Trace:
+   <TASK>
+   avic_pi_update_irte+0x28e/0x2b0 [kvm_amd]
+   kvm_pi_update_irte+0xbf/0x190 [kvm]
+   kvm_arch_irq_bypass_add_producer+0x72/0x90 [kvm]
+   irq_bypass_register_consumer+0xcd/0x170 [irqbypass]
+   kvm_irqfd+0x4c6/0x540 [kvm]
+   kvm_vm_ioctl+0x118/0x5d0 [kvm]
+   __se_sys_ioctl+0x6d/0xb0
+   do_syscall_64+0x6f/0x9d0
+   entry_SYSCALL_64_after_hwframe+0x4b/0x53
+   </TASK>
+  ---[ end trace 0000000000000000 ]---
+
+On Intel and arm64, the bug is less noisy, as the end result is that the
+device keeps posting IRQs to the vCPU even after it's been deassigned.
+
+Note, the worst of the breakage can be traced back to commit cb210737675e
+("KVM: Pass new routing entries and irqfd when updating IRTEs"), as before
+that commit KVM would pull the routing information from the per-VM routing
+table.  But as above, similar bugs have existed since support for IRQ
+bypass was added.  E.g. if a routing change finished before irq_shutdown()
+invoked kvm_arch_irq_bypass_del_producer(), VMX and SVM would see stale
+routing information and potentially leave the irqfd in bypass mode.
+
+Alternatively, x86 could be fixed by explicitly checking irq_bypass_vcpu
+instead of irq_entry.type in kvm_arch_irq_bypass_del_producer(), and arm64
+could be modified to utilize irq_bypass_vcpu in a similar manner.  But (a)
+that wouldn't fix the routing updates bug, and (b) fixing core code doesn't
+preclude x86 (or arm64) from adding such code as a sanity check (spoiler
+alert).
+
+Fixes: f70c20aaf141 ("KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'")
+Fixes: cb210737675e ("KVM: Pass new routing entries and irqfd when updating IRTEs")
+Fixes: a0d7e2fc61ab ("KVM: arm64: vgic-v4: Only attempt vLPI mapping for actual MSIs")
+Cc: stable@vger.kernel.org
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Oliver Upton <oupton@kernel.org>
+Link: https://patch.msgid.link/20260113174606.104978-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/eventfd.c |   44 ++++++++++++++++++++++++--------------------
+ 1 file changed, 24 insertions(+), 20 deletions(-)
+
+--- a/virt/kvm/eventfd.c
++++ b/virt/kvm/eventfd.c
+@@ -157,21 +157,28 @@ irqfd_shutdown(struct work_struct *work)
+ }
+-/* assumes kvm->irqfds.lock is held */
+-static bool
+-irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
++static bool irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
+ {
++      /*
++       * Assert that either irqfds.lock or SRCU is held, as irqfds.lock must
++       * be held to prevent false positives (on the irqfd being active), and
++       * while false negatives are impossible as irqfds are never added back
++       * to the list once they're deactivated, the caller must at least hold
++       * SRCU to guard against routing changes if the irqfd is deactivated.
++       */
++      lockdep_assert_once(lockdep_is_held(&irqfd->kvm->irqfds.lock) ||
++                          srcu_read_lock_held(&irqfd->kvm->irq_srcu));
++
+       return list_empty(&irqfd->list) ? false : true;
+ }
+ /*
+  * Mark the irqfd as inactive and schedule it for removal
+- *
+- * assumes kvm->irqfds.lock is held
+  */
+-static void
+-irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
++static void irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
+ {
++      lockdep_assert_held(&irqfd->kvm->irqfds.lock);
++
+       BUG_ON(!irqfd_is_active(irqfd));
+       list_del_init(&irqfd->list);
+@@ -212,8 +219,15 @@ irqfd_wakeup(wait_queue_entry_t *wait, u
+                       seq = read_seqcount_begin(&irqfd->irq_entry_sc);
+                       irq = irqfd->irq_entry;
+               } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
+-              /* An event has been signaled, inject an interrupt */
+-              if (kvm_arch_set_irq_inatomic(&irq, kvm,
++
++              /*
++               * An event has been signaled, inject an interrupt unless the
++               * irqfd is being deassigned (isn't active), in which case the
++               * routing information may be stale (once the irqfd is removed
++               * from the list, it will stop receiving routing updates).
++               */
++              if (unlikely(!irqfd_is_active(irqfd)) ||
++                  kvm_arch_set_irq_inatomic(&irq, kvm,
+                                             KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+                                             false) == -EWOULDBLOCK)
+                       schedule_work(&irqfd->inject);
+@@ -544,18 +558,8 @@ kvm_irqfd_deassign(struct kvm *kvm, stru
+       spin_lock_irq(&kvm->irqfds.lock);
+       list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
+-              if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
+-                      /*
+-                       * This clearing of irq_entry.type is needed for when
+-                       * another thread calls kvm_irq_routing_update before
+-                       * we flush workqueue below (we synchronize with
+-                       * kvm_irq_routing_update using irqfds.lock).
+-                       */
+-                      write_seqcount_begin(&irqfd->irq_entry_sc);
+-                      irqfd->irq_entry.type = 0;
+-                      write_seqcount_end(&irqfd->irq_entry_sc);
++              if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi)
+                       irqfd_deactivate(irqfd);
+-              }
+       }
+       spin_unlock_irq(&kvm->irqfds.lock);
index b253c8b27c65a09f08e3f89af084c80762499994..f0e0e913a6283251897f2706940b5462b2b38a86 100644 (file)
@@ -17,3 +17,4 @@ gve-fix-stats-report-corruption-on-queue-count-change.patch
 gve-correct-ethtool-rx_dropped-calculation.patch
 mm-shmem-prevent-infinite-loop-on-truncate-race.patch
 revert-drm-amd-check-if-aspm-is-enabled-from-pcie-subsystem.patch
+kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch