]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
virt: acrn: Fix irqfd use-after-free during eventfd shutdown
authorSicong Huang <congei42@163.com>
Tue, 19 May 2026 11:20:18 +0000 (19:20 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 23 May 2026 11:47:34 +0000 (13:47 +0200)
acrn_irqfd_deassign() and the eventfd EPOLLHUP wakeup can race and free
the same struct hsm_irqfd:

  CPU0                                 CPU1
  ----                                 ----
  eventfd_release()
    wake_up_poll(EPOLLHUP)
      hsm_irqfd_wakeup()
        queue_work(&irqfd->shutdown)
                                       acrn_irqfd_deassign()
                                         hsm_irqfd_shutdown()
                                           list_del_init()
                                           eventfd_ctx_remove_wait_queue()
                                           eventfd_ctx_put()
                                         kfree(irqfd)
  hsm_irqfd_shutdown_work()
    container_of(work, ..., shutdown)
    irqfd->vm                  <-- use-after-free

The deassign path freed the irqfd while a shutdown work item was
already queued by EPOLLHUP (or vice versa), so the work item could
resurrect a dangling pointer through container_of().

Switch to the lifetime model used by KVM irqfds:

 - Deassign/deinit only deactivate the irqfd: remove it from vm->irqfds
   under irqfds_lock and queue the cleanup work.
 - hsm_irqfd_shutdown_work() becomes the sole owner that unhooks the
   eventfd waitqueue entry, drops the eventfd reference and frees the
   irqfd.
 - A new HSM_IRQFD_FLAG_SHUTDOWN bit guarded by test_and_set_bit()
   ensures the cleanup work is queued at most once, no matter how many
   of {EPOLLHUP, deassign, deinit} fire concurrently.  This is safe to
   call from the waitqueue callback, which runs with wqh->lock held and
   IRQs disabled and therefore cannot take irqfds_lock.
 - acrn_irqfd_deassign() flushes vm->irqfd_wq before returning so the
   eventfd is fully detached on return.  acrn_irqfd_deinit() deactivates
   every irqfd, flushes the workqueue and only then destroys it, so no
   path can queue_work() onto a torn-down workqueue.
 - acrn_irqfd_assign() now installs the eventfd waitqueue entry and
   publishes the irqfd to vm->irqfds under irqfds_lock, so the irqfd is
   never visible to deassign/deinit before its waitqueue entry is in
   place, and any EPOLLHUP that fires in the assign window queues
   cleanup work that blocks on irqfds_lock until publication is done.

Signed-off-by: Sicong Huang <congei42@163.com>
Reviewed-by: Fei Li <fei1.li@intel.com>
Link: https://patch.msgid.link/20260519112018.2135000-2-congei42@163.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/virt/acrn/irqfd.c

index acf8cd5f8f8c25657e12d5e6303ef6bb776f2bf9..feeba7eda49415ca0793a984772f9efe5985f952 100644 (file)
@@ -16,6 +16,9 @@
 
 #include "acrn_drv.h"
 
+/* Cleanup work has been queued; set via test_and_set_bit(). */
+#define HSM_IRQFD_FLAG_SHUTDOWN        0
+
 /**
  * struct hsm_irqfd - Properties of HSM irqfd
  * @vm:                Associated VM pointer
@@ -25,6 +28,7 @@
  * @list:      Entry within &acrn_vm.irqfds of irqfds of a VM
  * @pt:                Structure for select/poll on the associated eventfd
  * @msi:       MSI data
+ * @flags:     Internal lifecycle flags (HSM_IRQFD_FLAG_*)
  */
 struct hsm_irqfd {
        struct acrn_vm          *vm;
@@ -34,6 +38,7 @@ struct hsm_irqfd {
        struct list_head        list;
        poll_table              pt;
        struct acrn_msi_entry   msi;
+       unsigned long           flags;
 };
 
 static void acrn_irqfd_inject(struct hsm_irqfd *irqfd)
@@ -44,30 +49,29 @@ static void acrn_irqfd_inject(struct hsm_irqfd *irqfd)
                        irqfd->msi.msi_data);
 }
 
-static void hsm_irqfd_shutdown(struct hsm_irqfd *irqfd)
+/* Queue the cleanup work at most once. Safe from atomic context. */
+static void hsm_irqfd_queue_shutdown(struct hsm_irqfd *irqfd)
 {
-       u64 cnt;
-
-       lockdep_assert_held(&irqfd->vm->irqfds_lock);
-
-       /* remove from wait queue */
-       list_del_init(&irqfd->list);
-       eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
-       eventfd_ctx_put(irqfd->eventfd);
-       kfree(irqfd);
+       if (!test_and_set_bit(HSM_IRQFD_FLAG_SHUTDOWN, &irqfd->flags))
+               queue_work(irqfd->vm->irqfd_wq, &irqfd->shutdown);
 }
 
+/* Sole owner of @irqfd: unhook waitqueue, drop eventfd ref, free. */
 static void hsm_irqfd_shutdown_work(struct work_struct *work)
 {
-       struct hsm_irqfd *irqfd;
-       struct acrn_vm *vm;
+       struct hsm_irqfd *irqfd = container_of(work, struct hsm_irqfd,
+                                              shutdown);
+       struct acrn_vm *vm = irqfd->vm;
+       u64 cnt;
 
-       irqfd = container_of(work, struct hsm_irqfd, shutdown);
-       vm = irqfd->vm;
        mutex_lock(&vm->irqfds_lock);
        if (!list_empty(&irqfd->list))
-               hsm_irqfd_shutdown(irqfd);
+               list_del_init(&irqfd->list);
        mutex_unlock(&vm->irqfds_lock);
+
+       eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
+       eventfd_ctx_put(irqfd->eventfd);
+       kfree(irqfd);
 }
 
 /* Called with wqh->lock held and interrupts disabled */
@@ -76,17 +80,16 @@ static int hsm_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
 {
        unsigned long poll_bits = (unsigned long)key;
        struct hsm_irqfd *irqfd;
-       struct acrn_vm *vm;
 
        irqfd = container_of(wait, struct hsm_irqfd, wait);
-       vm = irqfd->vm;
+
        if (poll_bits & POLLIN)
                /* An event has been signaled, inject an interrupt */
                acrn_irqfd_inject(irqfd);
 
        if (poll_bits & POLLHUP)
-               /* Do shutdown work in thread to hold wqh->lock */
-               queue_work(vm->irqfd_wq, &irqfd->shutdown);
+               /* Defer teardown to the cleanup work; can't sleep here. */
+               hsm_irqfd_queue_shutdown(irqfd);
 
        return 0;
 }
@@ -142,6 +145,12 @@ static int acrn_irqfd_assign(struct acrn_vm *vm, struct acrn_irqfd *args)
        init_waitqueue_func_entry(&irqfd->wait, hsm_irqfd_wakeup);
        init_poll_funcptr(&irqfd->pt, hsm_irqfd_poll_func);
 
+       /*
+        * Hold irqfds_lock across waitqueue install and list_add so the
+        * irqfd is not visible to deassign/deinit before its waitqueue
+        * entry is in place, and any racing EPOLLHUP cleanup work blocks
+        * on irqfds_lock until publication completes.
+        */
        mutex_lock(&vm->irqfds_lock);
        list_for_each_entry(tmp, &vm->irqfds, list) {
                if (irqfd->eventfd != tmp->eventfd)
@@ -150,14 +159,12 @@ static int acrn_irqfd_assign(struct acrn_vm *vm, struct acrn_irqfd *args)
                mutex_unlock(&vm->irqfds_lock);
                goto fail;
        }
-       list_add_tail(&irqfd->list, &vm->irqfds);
-       mutex_unlock(&vm->irqfds_lock);
 
-       /* Check the pending event in this stage */
        events = vfs_poll(fd_file(f), &irqfd->pt);
-
+       list_add_tail(&irqfd->list, &vm->irqfds);
        if (events & EPOLLIN)
                acrn_irqfd_inject(irqfd);
+       mutex_unlock(&vm->irqfds_lock);
 
        return 0;
 fail:
@@ -180,13 +187,17 @@ static int acrn_irqfd_deassign(struct acrn_vm *vm,
        mutex_lock(&vm->irqfds_lock);
        list_for_each_entry_safe(irqfd, tmp, &vm->irqfds, list) {
                if (irqfd->eventfd == eventfd) {
-                       hsm_irqfd_shutdown(irqfd);
+                       list_del_init(&irqfd->list);
+                       hsm_irqfd_queue_shutdown(irqfd);
                        break;
                }
        }
        mutex_unlock(&vm->irqfds_lock);
        eventfd_ctx_put(eventfd);
 
+       /* Wait for cleanup work to finish so the eventfd is fully detached. */
+       flush_workqueue(vm->irqfd_wq);
+
        return 0;
 }
 
@@ -219,9 +230,15 @@ void acrn_irqfd_deinit(struct acrn_vm *vm)
        struct hsm_irqfd *irqfd, *next;
 
        dev_dbg(acrn_dev.this_device, "VM %u irqfd deinit.\n", vm->vmid);
-       destroy_workqueue(vm->irqfd_wq);
+
        mutex_lock(&vm->irqfds_lock);
-       list_for_each_entry_safe(irqfd, next, &vm->irqfds, list)
-               hsm_irqfd_shutdown(irqfd);
+       list_for_each_entry_safe(irqfd, next, &vm->irqfds, list) {
+               list_del_init(&irqfd->list);
+               hsm_irqfd_queue_shutdown(irqfd);
+       }
        mutex_unlock(&vm->irqfds_lock);
+
+       /* Drain all cleanup work before tearing the workqueue down. */
+       flush_workqueue(vm->irqfd_wq);
+       destroy_workqueue(vm->irqfd_wq);
 }