6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 30 Mar 2024 08:59:39 +0000 (09:59 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 30 Mar 2024 08:59:39 +0000 (09:59 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 30 Mar 2024 08:59:39 +0000 (09:59 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 30 Mar 2024 08:59:39 +0000 (09:59 +0100)
diff --git a/queue-6.1/selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch b/queue-6.1/selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch

new file mode 100644 (file)

index 0000000..bd2f10e
--- /dev/null
+++ b/queue-6.1/selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch
@@ -0,0 +1,51 @@
+From 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 Mon Sep 17 00:00:00 2001
+From: Geliang Tang <tanggeliang@kylinos.cn>
+Date: Fri, 1 Mar 2024 18:11:22 +0100
+Subject: selftests: mptcp: diag: return KSFT_FAIL not test_cnt
+
+From: Geliang Tang <tanggeliang@kylinos.cn>
+
+commit 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 upstream.
+
+The test counter 'test_cnt' should not be returned in diag.sh, e.g. what
+if only the 4th test fail? Will do 'exit 4' which is 'exit ${KSFT_SKIP}',
+the whole test will be marked as skipped instead of 'failed'!
+
+So we should do ret=${KSFT_FAIL} instead.
+
+Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests")
+Cc: stable@vger.kernel.org
+Fixes: 42fb6cddec3b ("selftests: mptcp: more stable diag tests")
+Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/mptcp/diag.sh |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/tools/testing/selftests/net/mptcp/diag.sh
++++ b/tools/testing/selftests/net/mptcp/diag.sh
+@@ -56,7 +56,7 @@ __chk_nr()
+                       echo "[ skip ] Feature probably not supported"
+               else
+                       echo "[ fail ] expected $expected found $nr"
+-                      ret=$test_cnt
++                      ret=${KSFT_FAIL}
+               fi
+       else
+               echo "[  ok  ]"
+@@ -100,10 +100,10 @@ wait_msk_nr()
+       printf "%-50s" "$msg"
+       if [ $i -ge $timeout ]; then
+               echo "[ fail ] timeout while expecting $expected max $max last $nr"
+-              ret=$test_cnt
++              ret=${KSFT_FAIL}
+       elif [ $nr != $expected ]; then
+               echo "[ fail ] expected $expected found $nr"
+-              ret=$test_cnt
++              ret=${KSFT_FAIL}
+       else
+               echo "[  ok  ]"
+       fi
diff --git a/queue-6.1/series b/queue-6.1/series

index 41849b9bd4f4d7c439233ddb80a5f83135822df6..715f4d8e53a57f115d0274a52f1a40bbece26378 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -101,8 +101,6 @@ pci-hv-fix-ring-buffer-size-calculation.patch
  vfio-use-gfp_kernel_account-for-userspace-persistent.patch
  vfio-pci-consolidate-irq-cleanup-on-msi-msi-x-disabl.patch
  vfio-pci-remove-negative-check-on-unsigned-vector.patch
-vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch
-vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch
  vfio-pci-lock-external-intx-masking-ops.patch
  vfio-platform-disable-virqfds-on-cleanup.patch
  ksmbd-retrieve-number-of-blocks-using-vfs_getattr-in.patch
@@ -198,3 +196,9 @@ pwm-img-fix-pwm-clock-lookup.patch
  tty-serial-imx-fix-broken-rs485.patch
  block-fix-page-refcounts-for-unaligned-buffers-in-__bio_release_pages.patch
  blk-mq-release-scheduler-resource-when-request-completes.patch
+selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch
+vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch
+vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch
+vfio-pci-create-persistent-intx-handler.patch
+vfio-platform-create-persistent-irq-handlers.patch
+vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch
diff --git a/queue-6.1/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch b/queue-6.1/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch

new file mode 100644 (file)

index 0000000..6a07c4d
--- /dev/null
+++ b/queue-6.1/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch
@@ -0,0 +1,56 @@
+From stable+bounces-33770-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:20 2024
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Fri, 29 Mar 2024 15:38:54 -0600
+Subject: vfio/fsl-mc: Block calling interrupt handler without trigger
+To: stable@vger.kernel.org
+Cc: Alex Williamson <alex.williamson@redhat.com>, sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Diana Craciun <diana.craciun@oss.nxp.com>, Kevin Tian <kevin.tian@intel.com>
+Message-ID: <20240329213856.2550762-8-alex.williamson@redhat.com>
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+[ Upstream commit 7447d911af699a15f8d050dfcb7c680a86f87012 ]
+
+The eventfd_ctx trigger pointer of the vfio_fsl_mc_irq object is
+initially NULL and may become NULL if the user sets the trigger
+eventfd to -1.  The interrupt handler itself is guaranteed that
+trigger is always valid between request_irq() and free_irq(), but
+the loopback testing mechanisms to invoke the handler function
+need to test the trigger.  The triggering and setting ioctl paths
+both make use of igate and are therefore mutually exclusive.
+
+The vfio-fsl-mc driver does not make use of irqfds, nor does it
+support any sort of masking operations, therefore unlike vfio-pci
+and vfio-platform, the flow can remain essentially unchanged.
+
+Cc: Diana Craciun <diana.craciun@oss.nxp.com>
+Cc:  <stable@vger.kernel.org>
+Fixes: cc0ee20bd969 ("vfio/fsl-mc: trigger an interrupt via eventfd")
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Link: https://lore.kernel.org/r/20240308230557.805580-8-alex.williamson@redhat.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
++++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
+@@ -142,13 +142,14 @@ static int vfio_fsl_mc_set_irq_trigger(s
+       irq = &vdev->mc_irqs[index];
+ 
+       if (flags & VFIO_IRQ_SET_DATA_NONE) {
+-              vfio_fsl_mc_irq_handler(hwirq, irq);
++              if (irq->trigger)
++                      eventfd_signal(irq->trigger, 1);
+ 
+       } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+               u8 trigger = *(u8 *)data;
+ 
+-              if (trigger)
+-                      vfio_fsl_mc_irq_handler(hwirq, irq);
++              if (trigger && irq->trigger)
++                      eventfd_signal(irq->trigger, 1);
+       }
+ 
+       return 0;
diff --git a/queue-6.1/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch b/queue-6.1/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch

new file mode 100644 (file)

index 0000000..ee8c0d3
--- /dev/null
+++ b/queue-6.1/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch
@@ -0,0 +1,92 @@
+From stable+bounces-33768-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:16 2024
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Fri, 29 Mar 2024 15:38:51 -0600
+Subject: vfio: Introduce interface to flush virqfd inject workqueue
+To: stable@vger.kernel.org
+Cc: Alex Williamson <alex.williamson@redhat.com>, sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Kevin Tian <kevin.tian@intel.com>, Reinette Chatre <reinette.chatre@intel.com>
+Message-ID: <20240329213856.2550762-5-alex.williamson@redhat.com>
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+[ Upstream commit b620ecbd17a03cacd06f014a5d3f3a11285ce053 ]
+
+In order to synchronize changes that can affect the thread callback,
+introduce an interface to force a flush of the inject workqueue.  The
+irqfd pointer is only valid under spinlock, but the workqueue cannot
+be flushed under spinlock.  Therefore the flush work for the irqfd is
+queued under spinlock.  The vfio_irqfd_cleanup_wq workqueue is re-used
+for queuing this work such that flushing the workqueue is also ordered
+relative to shutdown.
+
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Link: https://lore.kernel.org/r/20240308230557.805580-4-alex.williamson@redhat.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/virqfd.c |   21 +++++++++++++++++++++
+ include/linux/vfio.h  |    2 ++
+ 2 files changed, 23 insertions(+)
+
+--- a/drivers/vfio/virqfd.c
++++ b/drivers/vfio/virqfd.c
+@@ -104,6 +104,13 @@ static void virqfd_inject(struct work_st
+               virqfd->thread(virqfd->opaque, virqfd->data);
+ }
+ 
++static void virqfd_flush_inject(struct work_struct *work)
++{
++      struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject);
++
++      flush_work(&virqfd->inject);
++}
++
+ int vfio_virqfd_enable(void *opaque,
+                      int (*handler)(void *, void *),
+                      void (*thread)(void *, void *),
+@@ -127,6 +134,7 @@ int vfio_virqfd_enable(void *opaque,
+ 
+       INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
+       INIT_WORK(&virqfd->inject, virqfd_inject);
++      INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject);
+ 
+       irqfd = fdget(fd);
+       if (!irqfd.file) {
+@@ -217,6 +225,19 @@ void vfio_virqfd_disable(struct virqfd *
+ }
+ EXPORT_SYMBOL_GPL(vfio_virqfd_disable);
+ 
++void vfio_virqfd_flush_thread(struct virqfd **pvirqfd)
++{
++      unsigned long flags;
++
++      spin_lock_irqsave(&virqfd_lock, flags);
++      if (*pvirqfd && (*pvirqfd)->thread)
++              queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject);
++      spin_unlock_irqrestore(&virqfd_lock, flags);
++
++      flush_workqueue(vfio_irqfd_cleanup_wq);
++}
++EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread);
++
+ module_init(vfio_virqfd_init);
+ module_exit(vfio_virqfd_exit);
+ 
+--- a/include/linux/vfio.h
++++ b/include/linux/vfio.h
+@@ -268,6 +268,7 @@ struct virqfd {
+       wait_queue_entry_t              wait;
+       poll_table              pt;
+       struct work_struct      shutdown;
++      struct work_struct      flush_inject;
+       struct virqfd           **pvirqfd;
+ };
+ 
+@@ -275,5 +276,6 @@ int vfio_virqfd_enable(void *opaque, int
+                      void (*thread)(void *, void *), void *data,
+                      struct virqfd **pvirqfd, int fd);
+ void vfio_virqfd_disable(struct virqfd **pvirqfd);
++void vfio_virqfd_flush_thread(struct virqfd **pvirqfd);
+ 
+ #endif /* VFIO_H */
diff --git a/queue-6.1/vfio-pci-create-persistent-intx-handler.patch b/queue-6.1/vfio-pci-create-persistent-intx-handler.patch

new file mode 100644 (file)

index 0000000..56e0555
--- /dev/null
+++ b/queue-6.1/vfio-pci-create-persistent-intx-handler.patch
@@ -0,0 +1,257 @@
+From stable+bounces-33769-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:17 2024
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Fri, 29 Mar 2024 15:38:52 -0600
+Subject: vfio/pci: Create persistent INTx handler
+To: stable@vger.kernel.org
+Cc: Alex Williamson <alex.williamson@redhat.com>, sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Reinette Chatre <reinette.chatre@intel.com>, Kevin Tian <kevin.tian@intel.com>
+Message-ID: <20240329213856.2550762-6-alex.williamson@redhat.com>
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+[ Upstream commit 18c198c96a815c962adc2b9b77909eec0be7df4d ]
+
+A vulnerability exists where the eventfd for INTx signaling can be
+deconfigured, which unregisters the IRQ handler but still allows
+eventfds to be signaled with a NULL context through the SET_IRQS ioctl
+or through unmask irqfd if the device interrupt is pending.
+
+Ideally this could be solved with some additional locking; the igate
+mutex serializes the ioctl and config space accesses, and the interrupt
+handler is unregistered relative to the trigger, but the irqfd path
+runs asynchronous to those.  The igate mutex cannot be acquired from the
+atomic context of the eventfd wake function.  Disabling the irqfd
+relative to the eventfd registration is potentially incompatible with
+existing userspace.
+
+As a result, the solution implemented here moves configuration of the
+INTx interrupt handler to track the lifetime of the INTx context object
+and irq_type configuration, rather than registration of a particular
+trigger eventfd.  Synchronization is added between the ioctl path and
+eventfd_signal() wrapper such that the eventfd trigger can be
+dynamically updated relative to in-flight interrupts or irqfd callbacks.
+
+Cc:  <stable@vger.kernel.org>
+Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver")
+Reported-by: Reinette Chatre <reinette.chatre@intel.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
+Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/pci/vfio_pci_intrs.c |  149 ++++++++++++++++++++------------------
+ 1 file changed, 82 insertions(+), 67 deletions(-)
+
+--- a/drivers/vfio/pci/vfio_pci_intrs.c
++++ b/drivers/vfio/pci/vfio_pci_intrs.c
+@@ -55,8 +55,13 @@ static void vfio_send_intx_eventfd(void
+ {
+       struct vfio_pci_core_device *vdev = opaque;
+ 
+-      if (likely(is_intx(vdev) && !vdev->virq_disabled))
+-              eventfd_signal(vdev->ctx[0].trigger, 1);
++      if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
++              struct eventfd_ctx *trigger;
++
++              trigger = READ_ONCE(vdev->ctx[0].trigger);
++              if (likely(trigger))
++                      eventfd_signal(trigger, 1);
++      }
+ }
+ 
+ /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
+@@ -191,98 +196,104 @@ static irqreturn_t vfio_intx_handler(int
+       return ret;
+ }
+ 
+-static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
++static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
++                          struct eventfd_ctx *trigger)
+ {
++      struct pci_dev *pdev = vdev->pdev;
++      unsigned long irqflags;
++      char *name;
++      int ret;
++
+       if (!is_irq_none(vdev))
+               return -EINVAL;
+ 
+-      if (!vdev->pdev->irq)
++      if (!pdev->irq)
+               return -ENODEV;
+ 
++      name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
++      if (!name)
++              return -ENOMEM;
++
+       vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT);
+       if (!vdev->ctx)
+               return -ENOMEM;
+ 
+       vdev->num_ctx = 1;
+ 
++      vdev->ctx[0].name = name;
++      vdev->ctx[0].trigger = trigger;
++
+       /*
+-       * If the virtual interrupt is masked, restore it.  Devices
+-       * supporting DisINTx can be masked at the hardware level
+-       * here, non-PCI-2.3 devices will have to wait until the
+-       * interrupt is enabled.
++       * Fill the initial masked state based on virq_disabled.  After
++       * enable, changing the DisINTx bit in vconfig directly changes INTx
++       * masking.  igate prevents races during setup, once running masked
++       * is protected via irqlock.
++       *
++       * Devices supporting DisINTx also reflect the current mask state in
++       * the physical DisINTx bit, which is not affected during IRQ setup.
++       *
++       * Devices without DisINTx support require an exclusive interrupt.
++       * IRQ masking is performed at the IRQ chip.  Again, igate protects
++       * against races during setup and IRQ handlers and irqfds are not
++       * yet active, therefore masked is stable and can be used to
++       * conditionally auto-enable the IRQ.
++       *
++       * irq_type must be stable while the IRQ handler is registered,
++       * therefore it must be set before request_irq().
+        */
+       vdev->ctx[0].masked = vdev->virq_disabled;
+-      if (vdev->pci_2_3)
+-              pci_intx(vdev->pdev, !vdev->ctx[0].masked);
++      if (vdev->pci_2_3) {
++              pci_intx(pdev, !vdev->ctx[0].masked);
++              irqflags = IRQF_SHARED;
++      } else {
++              irqflags = vdev->ctx[0].masked ? IRQF_NO_AUTOEN : 0;
++      }
+ 
+       vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
+ 
++      ret = request_irq(pdev->irq, vfio_intx_handler,
++                        irqflags, vdev->ctx[0].name, vdev);
++      if (ret) {
++              vdev->irq_type = VFIO_PCI_NUM_IRQS;
++              kfree(name);
++              vdev->num_ctx = 0;
++              kfree(vdev->ctx);
++              return ret;
++      }
++
+       return 0;
+ }
+ 
+-static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
++static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
++                              struct eventfd_ctx *trigger)
+ {
+       struct pci_dev *pdev = vdev->pdev;
+-      unsigned long irqflags = IRQF_SHARED;
+-      struct eventfd_ctx *trigger;
+-      unsigned long flags;
+-      int ret;
+-
+-      if (vdev->ctx[0].trigger) {
+-              free_irq(pdev->irq, vdev);
+-              kfree(vdev->ctx[0].name);
+-              eventfd_ctx_put(vdev->ctx[0].trigger);
+-              vdev->ctx[0].trigger = NULL;
+-      }
+-
+-      if (fd < 0) /* Disable only */
+-              return 0;
+-
+-      vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
+-                                    pci_name(pdev));
+-      if (!vdev->ctx[0].name)
+-              return -ENOMEM;
+-
+-      trigger = eventfd_ctx_fdget(fd);
+-      if (IS_ERR(trigger)) {
+-              kfree(vdev->ctx[0].name);
+-              return PTR_ERR(trigger);
+-      }
++      struct eventfd_ctx *old;
+ 
+-      vdev->ctx[0].trigger = trigger;
++      old = vdev->ctx[0].trigger;
+ 
+-      /*
+-       * Devices without DisINTx support require an exclusive interrupt,
+-       * IRQ masking is performed at the IRQ chip.  The masked status is
+-       * protected by vdev->irqlock. Setup the IRQ without auto-enable and
+-       * unmask as necessary below under lock.  DisINTx is unmodified by
+-       * the IRQ configuration and may therefore use auto-enable.
+-       */
+-      if (!vdev->pci_2_3)
+-              irqflags = IRQF_NO_AUTOEN;
++      WRITE_ONCE(vdev->ctx[0].trigger, trigger);
+ 
+-      ret = request_irq(pdev->irq, vfio_intx_handler,
+-                        irqflags, vdev->ctx[0].name, vdev);
+-      if (ret) {
+-              vdev->ctx[0].trigger = NULL;
+-              kfree(vdev->ctx[0].name);
+-              eventfd_ctx_put(trigger);
+-              return ret;
++      /* Releasing an old ctx requires synchronizing in-flight users */
++      if (old) {
++              synchronize_irq(pdev->irq);
++              vfio_virqfd_flush_thread(&vdev->ctx[0].unmask);
++              eventfd_ctx_put(old);
+       }
+ 
+-      spin_lock_irqsave(&vdev->irqlock, flags);
+-      if (!vdev->pci_2_3 && !vdev->ctx[0].masked)
+-              enable_irq(pdev->irq);
+-      spin_unlock_irqrestore(&vdev->irqlock, flags);
+-
+       return 0;
+ }
+ 
+ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
+ {
++      struct pci_dev *pdev = vdev->pdev;
++
+       vfio_virqfd_disable(&vdev->ctx[0].unmask);
+       vfio_virqfd_disable(&vdev->ctx[0].mask);
+-      vfio_intx_set_signal(vdev, -1);
++      free_irq(pdev->irq, vdev);
++      if (vdev->ctx[0].trigger)
++              eventfd_ctx_put(vdev->ctx[0].trigger);
++      kfree(vdev->ctx[0].name);
+       vdev->irq_type = VFIO_PCI_NUM_IRQS;
+       vdev->num_ctx = 0;
+       kfree(vdev->ctx);
+@@ -534,19 +545,23 @@ static int vfio_pci_set_intx_trigger(str
+               return -EINVAL;
+ 
+       if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
++              struct eventfd_ctx *trigger = NULL;
+               int32_t fd = *(int32_t *)data;
+               int ret;
+ 
++              if (fd >= 0) {
++                      trigger = eventfd_ctx_fdget(fd);
++                      if (IS_ERR(trigger))
++                              return PTR_ERR(trigger);
++              }
++
+               if (is_intx(vdev))
+-                      return vfio_intx_set_signal(vdev, fd);
++                      ret = vfio_intx_set_signal(vdev, trigger);
++              else
++                      ret = vfio_intx_enable(vdev, trigger);
+ 
+-              ret = vfio_intx_enable(vdev);
+-              if (ret)
+-                      return ret;
+-
+-              ret = vfio_intx_set_signal(vdev, fd);
+-              if (ret)
+-                      vfio_intx_disable(vdev);
++              if (ret && trigger)
++                      eventfd_ctx_put(trigger);
+ 
+               return ret;
+       }
diff --git a/queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch b/queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch

index 66b1cfe4ec7639e0f6a53d2d1c884ee518701893..aaa18a4776d9bb72129649d6139cc43181d3a7fd 100644 (file)
--- a/queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch
+++ b/queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch
@@ -1,7 +1,10 @@
-From 4450a484681c5c31687830485192c35625232427 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Fri, 8 Mar 2024 16:05:22 -0700
+From stable+bounces-33771-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:19 2024
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Fri, 29 Mar 2024 15:38:50 -0600
  Subject: vfio/pci: Disable auto-enable of exclusive INTx IRQ
+To: stable@vger.kernel.org
+Cc: Alex Williamson <alex.williamson@redhat.com>, sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Kevin Tian <kevin.tian@intel.com>
+Message-ID: <20240329213856.2550762-4-alex.williamson@redhat.com>
  
  From: Alex Williamson <alex.williamson@redhat.com>
  
@@ -24,18 +27,16 @@ Reviewed-by: Kevin Tian <kevin.tian@intel.com>
  Reviewed-by: Eric Auger <eric.auger@redhat.com>
  Link: https://lore.kernel.org/r/20240308230557.805580-2-alex.williamson@redhat.com
  Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  ---
- drivers/vfio/pci/vfio_pci_intrs.c | 17 ++++++++++-------
+ drivers/vfio/pci/vfio_pci_intrs.c |   17 ++++++++++-------
   1 file changed, 10 insertions(+), 7 deletions(-)
  
-diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
-index 6094679349d9c..e64f118c4156f 100644
  --- a/drivers/vfio/pci/vfio_pci_intrs.c
  +++ b/drivers/vfio/pci/vfio_pci_intrs.c
-@@ -297,8 +297,15 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
+@@ -251,8 +251,15 @@ static int vfio_intx_set_signal(struct v
   
-       ctx->trigger = trigger;
+       vdev->ctx[0].trigger = trigger;
   
  +      /*
  +       * Devices without DisINTx support require an exclusive interrupt,
@@ -49,8 +50,8 @@ index 6094679349d9c..e64f118c4156f 100644
  +              irqflags = IRQF_NO_AUTOEN;
   
         ret = request_irq(pdev->irq, vfio_intx_handler,
-                         irqflags, ctx->name, vdev);
-@@ -309,13 +316,9 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
+                         irqflags, vdev->ctx[0].name, vdev);
+@@ -263,13 +270,9 @@ static int vfio_intx_set_signal(struct v
                 return ret;
         }
   
@@ -59,13 +60,10 @@ index 6094679349d9c..e64f118c4156f 100644
  -       * disable_irq won't.
  -       */
         spin_lock_irqsave(&vdev->irqlock, flags);
--      if (!vdev->pci_2_3 && ctx->masked)
+-      if (!vdev->pci_2_3 && vdev->ctx[0].masked)
  -              disable_irq_nosync(pdev->irq);
-+      if (!vdev->pci_2_3 && !ctx->masked)
++      if (!vdev->pci_2_3 && !vdev->ctx[0].masked)
  +              enable_irq(pdev->irq);
         spin_unlock_irqrestore(&vdev->irqlock, flags);
   
         return 0;
--- 
-2.43.0
-
diff --git a/queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch b/queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch

index 00232bf7d55fb405914f6853a5afc7720cf21fc8..29c91abbb2e530e2afc0e653cbbfd9e03ddc3d61 100644 (file)
--- a/queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch
+++ b/queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch
@@ -30,14 +30,12 @@ Link: https://lore.kernel.org/r/20240308230557.805580-3-alex.williamson@redhat.c
  Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
  Signed-off-by: Sasha Levin <sashal@kernel.org>
  ---
- drivers/vfio/pci/vfio_pci_intrs.c | 34 +++++++++++++++++++++++++------
+ drivers/vfio/pci/vfio_pci_intrs.c |   34 ++++++++++++++++++++++++++++------
   1 file changed, 28 insertions(+), 6 deletions(-)
  
-diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
-index e64f118c4156f..0deb51c820d2e 100644
  --- a/drivers/vfio/pci/vfio_pci_intrs.c
  +++ b/drivers/vfio/pci/vfio_pci_intrs.c
-@@ -91,13 +91,15 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
+@@ -60,12 +60,14 @@ static void vfio_send_intx_eventfd(void
   }
   
   /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
@@ -45,7 +43,6 @@ index e64f118c4156f..0deb51c820d2e 100644
  +static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
   {
         struct pci_dev *pdev = vdev->pdev;
-       struct vfio_pci_irq_ctx *ctx;
         unsigned long flags;
         bool masked_changed = false;
   
@@ -54,7 +51,7 @@ index e64f118c4156f..0deb51c820d2e 100644
         spin_lock_irqsave(&vdev->irqlock, flags);
   
         /*
-@@ -135,6 +137,17 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
+@@ -95,6 +97,17 @@ bool vfio_pci_intx_mask(struct vfio_pci_
         return masked_changed;
   }
   
@@ -72,7 +69,7 @@ index e64f118c4156f..0deb51c820d2e 100644
   /*
    * If this is triggered by an eventfd, we can't call eventfd_signal
    * or else we'll deadlock on the eventfd wait queue.  Return >0 when
-@@ -186,12 +199,21 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
+@@ -137,12 +150,21 @@ static int vfio_pci_intx_unmask_handler(
         return ret;
   }
   
@@ -95,7 +92,7 @@ index e64f118c4156f..0deb51c820d2e 100644
   static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
   {
         struct vfio_pci_core_device *vdev = dev_id;
-@@ -537,11 +559,11 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
+@@ -457,11 +479,11 @@ static int vfio_pci_set_intx_unmask(stru
                 return -EINVAL;
   
         if (flags & VFIO_IRQ_SET_DATA_NONE) {
@@ -107,9 +104,9 @@ index e64f118c4156f..0deb51c820d2e 100644
  -                      vfio_pci_intx_unmask(vdev);
  +                      __vfio_pci_intx_unmask(vdev);
         } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
-               struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
                 int32_t fd = *(int32_t *)data;
-@@ -568,11 +590,11 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
+               if (fd >= 0)
+@@ -484,11 +506,11 @@ static int vfio_pci_set_intx_mask(struct
                 return -EINVAL;
   
         if (flags & VFIO_IRQ_SET_DATA_NONE) {
@@ -123,6 +120,3 @@ index e64f118c4156f..0deb51c820d2e 100644
         } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
                 return -ENOTTY; /* XXX implement me */
         }
--- 
-2.43.0
-
diff --git a/queue-6.1/vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch b/queue-6.1/vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch

deleted file mode 100644 (file)

index 6283a4b..0000000
--- a/queue-6.1/vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch
+++ /dev/null
@@ -1,515 +0,0 @@
-From bca808da62c6a87ef168554caa318c2801d19b70 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 11 May 2023 08:44:30 -0700
-Subject: vfio/pci: Prepare for dynamic interrupt context storage
-
-From: Reinette Chatre <reinette.chatre@intel.com>
-
-[ Upstream commit d977e0f7663961368f6442589e52d27484c2f5c2 ]
-
-Interrupt context storage is statically allocated at the time
-interrupts are allocated. Following allocation, the interrupt
-context is managed by directly accessing the elements of the
-array using the vector as index.
-
-It is possible to allocate additional MSI-X vectors after
-MSI-X has been enabled. Dynamic storage of interrupt context
-is needed to support adding new MSI-X vectors after initial
-allocation.
-
-Replace direct access of array elements with pointers to the
-array elements. Doing so reduces impact of moving to a new data
-structure. Move interactions with the array to helpers to
-mostly contain changes needed to transition to a dynamic
-data structure.
-
-No functional change intended.
-
-Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-Acked-by: Thomas Gleixner <tglx@linutronix.de>
-Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
-Link: https://lore.kernel.org/r/eab289693c8325ede9aba99380f8b8d5143980a4.1683740667.git.reinette.chatre@intel.com
-Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
-Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- drivers/vfio/pci/vfio_pci_intrs.c | 215 +++++++++++++++++++++---------
- 1 file changed, 149 insertions(+), 66 deletions(-)
-
-diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
-index 258de57ef9564..6094679349d9c 100644
---- a/drivers/vfio/pci/vfio_pci_intrs.c
-+++ b/drivers/vfio/pci/vfio_pci_intrs.c
-@@ -48,6 +48,31 @@ static bool is_irq_none(struct vfio_pci_core_device *vdev)
-                vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
- }
- 
-+static
-+struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
-+                                        unsigned long index)
-+{
-+      if (index >= vdev->num_ctx)
-+              return NULL;
-+      return &vdev->ctx[index];
-+}
-+
-+static void vfio_irq_ctx_free_all(struct vfio_pci_core_device *vdev)
-+{
-+      kfree(vdev->ctx);
-+}
-+
-+static int vfio_irq_ctx_alloc_num(struct vfio_pci_core_device *vdev,
-+                                unsigned long num)
-+{
-+      vdev->ctx = kcalloc(num, sizeof(struct vfio_pci_irq_ctx),
-+                          GFP_KERNEL_ACCOUNT);
-+      if (!vdev->ctx)
-+              return -ENOMEM;
-+
-+      return 0;
-+}
-+
- /*
-  * INTx
-  */
-@@ -55,14 +80,21 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
- {
-       struct vfio_pci_core_device *vdev = opaque;
- 
--      if (likely(is_intx(vdev) && !vdev->virq_disabled))
--              eventfd_signal(vdev->ctx[0].trigger, 1);
-+      if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
-+              struct vfio_pci_irq_ctx *ctx;
-+
-+              ctx = vfio_irq_ctx_get(vdev, 0);
-+              if (WARN_ON_ONCE(!ctx))
-+                      return;
-+              eventfd_signal(ctx->trigger, 1);
-+      }
- }
- 
- /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
- bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
- {
-       struct pci_dev *pdev = vdev->pdev;
-+      struct vfio_pci_irq_ctx *ctx;
-       unsigned long flags;
-       bool masked_changed = false;
- 
-@@ -77,7 +109,14 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
-       if (unlikely(!is_intx(vdev))) {
-               if (vdev->pci_2_3)
-                       pci_intx(pdev, 0);
--      } else if (!vdev->ctx[0].masked) {
-+              goto out_unlock;
-+      }
-+
-+      ctx = vfio_irq_ctx_get(vdev, 0);
-+      if (WARN_ON_ONCE(!ctx))
-+              goto out_unlock;
-+
-+      if (!ctx->masked) {
-               /*
-                * Can't use check_and_mask here because we always want to
-                * mask, not just when something is pending.
-@@ -87,10 +126,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
-               else
-                       disable_irq_nosync(pdev->irq);
- 
--              vdev->ctx[0].masked = true;
-+              ctx->masked = true;
-               masked_changed = true;
-       }
- 
-+out_unlock:
-       spin_unlock_irqrestore(&vdev->irqlock, flags);
-       return masked_changed;
- }
-@@ -105,6 +145,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
- {
-       struct vfio_pci_core_device *vdev = opaque;
-       struct pci_dev *pdev = vdev->pdev;
-+      struct vfio_pci_irq_ctx *ctx;
-       unsigned long flags;
-       int ret = 0;
- 
-@@ -117,7 +158,14 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
-       if (unlikely(!is_intx(vdev))) {
-               if (vdev->pci_2_3)
-                       pci_intx(pdev, 1);
--      } else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
-+              goto out_unlock;
-+      }
-+
-+      ctx = vfio_irq_ctx_get(vdev, 0);
-+      if (WARN_ON_ONCE(!ctx))
-+              goto out_unlock;
-+
-+      if (ctx->masked && !vdev->virq_disabled) {
-               /*
-                * A pending interrupt here would immediately trigger,
-                * but we can avoid that overhead by just re-sending
-@@ -129,9 +177,10 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
-               } else
-                       enable_irq(pdev->irq);
- 
--              vdev->ctx[0].masked = (ret > 0);
-+              ctx->masked = (ret > 0);
-       }
- 
-+out_unlock:
-       spin_unlock_irqrestore(&vdev->irqlock, flags);
- 
-       return ret;
-@@ -146,18 +195,23 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
- static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
- {
-       struct vfio_pci_core_device *vdev = dev_id;
-+      struct vfio_pci_irq_ctx *ctx;
-       unsigned long flags;
-       int ret = IRQ_NONE;
- 
-+      ctx = vfio_irq_ctx_get(vdev, 0);
-+      if (WARN_ON_ONCE(!ctx))
-+              return ret;
-+
-       spin_lock_irqsave(&vdev->irqlock, flags);
- 
-       if (!vdev->pci_2_3) {
-               disable_irq_nosync(vdev->pdev->irq);
--              vdev->ctx[0].masked = true;
-+              ctx->masked = true;
-               ret = IRQ_HANDLED;
--      } else if (!vdev->ctx[0].masked &&  /* may be shared */
-+      } else if (!ctx->masked &&  /* may be shared */
-                  pci_check_and_mask_intx(vdev->pdev)) {
--              vdev->ctx[0].masked = true;
-+              ctx->masked = true;
-               ret = IRQ_HANDLED;
-       }
- 
-@@ -171,15 +225,24 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
- 
- static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
- {
-+      struct vfio_pci_irq_ctx *ctx;
-+      int ret;
-+
-       if (!is_irq_none(vdev))
-               return -EINVAL;
- 
-       if (!vdev->pdev->irq)
-               return -ENODEV;
- 
--      vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT);
--      if (!vdev->ctx)
--              return -ENOMEM;
-+      ret = vfio_irq_ctx_alloc_num(vdev, 1);
-+      if (ret)
-+              return ret;
-+
-+      ctx = vfio_irq_ctx_get(vdev, 0);
-+      if (!ctx) {
-+              vfio_irq_ctx_free_all(vdev);
-+              return -EINVAL;
-+      }
- 
-       vdev->num_ctx = 1;
- 
-@@ -189,9 +252,9 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
-        * here, non-PCI-2.3 devices will have to wait until the
-        * interrupt is enabled.
-        */
--      vdev->ctx[0].masked = vdev->virq_disabled;
-+      ctx->masked = vdev->virq_disabled;
-       if (vdev->pci_2_3)
--              pci_intx(vdev->pdev, !vdev->ctx[0].masked);
-+              pci_intx(vdev->pdev, !ctx->masked);
- 
-       vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
- 
-@@ -202,41 +265,46 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
- {
-       struct pci_dev *pdev = vdev->pdev;
-       unsigned long irqflags = IRQF_SHARED;
-+      struct vfio_pci_irq_ctx *ctx;
-       struct eventfd_ctx *trigger;
-       unsigned long flags;
-       int ret;
- 
--      if (vdev->ctx[0].trigger) {
-+      ctx = vfio_irq_ctx_get(vdev, 0);
-+      if (WARN_ON_ONCE(!ctx))
-+              return -EINVAL;
-+
-+      if (ctx->trigger) {
-               free_irq(pdev->irq, vdev);
--              kfree(vdev->ctx[0].name);
--              eventfd_ctx_put(vdev->ctx[0].trigger);
--              vdev->ctx[0].trigger = NULL;
-+              kfree(ctx->name);
-+              eventfd_ctx_put(ctx->trigger);
-+              ctx->trigger = NULL;
-       }
- 
-       if (fd < 0) /* Disable only */
-               return 0;
- 
--      vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
--                                    pci_name(pdev));
--      if (!vdev->ctx[0].name)
-+      ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
-+                            pci_name(pdev));
-+      if (!ctx->name)
-               return -ENOMEM;
- 
-       trigger = eventfd_ctx_fdget(fd);
-       if (IS_ERR(trigger)) {
--              kfree(vdev->ctx[0].name);
-+              kfree(ctx->name);
-               return PTR_ERR(trigger);
-       }
- 
--      vdev->ctx[0].trigger = trigger;
-+      ctx->trigger = trigger;
- 
-       if (!vdev->pci_2_3)
-               irqflags = 0;
- 
-       ret = request_irq(pdev->irq, vfio_intx_handler,
--                        irqflags, vdev->ctx[0].name, vdev);
-+                        irqflags, ctx->name, vdev);
-       if (ret) {
--              vdev->ctx[0].trigger = NULL;
--              kfree(vdev->ctx[0].name);
-+              ctx->trigger = NULL;
-+              kfree(ctx->name);
-               eventfd_ctx_put(trigger);
-               return ret;
-       }
-@@ -246,7 +314,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
-        * disable_irq won't.
-        */
-       spin_lock_irqsave(&vdev->irqlock, flags);
--      if (!vdev->pci_2_3 && vdev->ctx[0].masked)
-+      if (!vdev->pci_2_3 && ctx->masked)
-               disable_irq_nosync(pdev->irq);
-       spin_unlock_irqrestore(&vdev->irqlock, flags);
- 
-@@ -255,12 +323,18 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
- 
- static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
- {
--      vfio_virqfd_disable(&vdev->ctx[0].unmask);
--      vfio_virqfd_disable(&vdev->ctx[0].mask);
-+      struct vfio_pci_irq_ctx *ctx;
-+
-+      ctx = vfio_irq_ctx_get(vdev, 0);
-+      WARN_ON_ONCE(!ctx);
-+      if (ctx) {
-+              vfio_virqfd_disable(&ctx->unmask);
-+              vfio_virqfd_disable(&ctx->mask);
-+      }
-       vfio_intx_set_signal(vdev, -1);
-       vdev->irq_type = VFIO_PCI_NUM_IRQS;
-       vdev->num_ctx = 0;
--      kfree(vdev->ctx);
-+      vfio_irq_ctx_free_all(vdev);
- }
- 
- /*
-@@ -284,10 +358,9 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
-       if (!is_irq_none(vdev))
-               return -EINVAL;
- 
--      vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx),
--                          GFP_KERNEL_ACCOUNT);
--      if (!vdev->ctx)
--              return -ENOMEM;
-+      ret = vfio_irq_ctx_alloc_num(vdev, nvec);
-+      if (ret)
-+              return ret;
- 
-       /* return the number of supported vectors if we can't get all: */
-       cmd = vfio_pci_memory_lock_and_enable(vdev);
-@@ -296,7 +369,7 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
-               if (ret > 0)
-                       pci_free_irq_vectors(pdev);
-               vfio_pci_memory_unlock_and_restore(vdev, cmd);
--              kfree(vdev->ctx);
-+              vfio_irq_ctx_free_all(vdev);
-               return ret;
-       }
-       vfio_pci_memory_unlock_and_restore(vdev, cmd);
-@@ -320,6 +393,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
-                                     unsigned int vector, int fd, bool msix)
- {
-       struct pci_dev *pdev = vdev->pdev;
-+      struct vfio_pci_irq_ctx *ctx;
-       struct eventfd_ctx *trigger;
-       int irq, ret;
-       u16 cmd;
-@@ -327,33 +401,33 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
-       if (vector >= vdev->num_ctx)
-               return -EINVAL;
- 
-+      ctx = vfio_irq_ctx_get(vdev, vector);
-+      if (!ctx)
-+              return -EINVAL;
-       irq = pci_irq_vector(pdev, vector);
- 
--      if (vdev->ctx[vector].trigger) {
--              irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
-+      if (ctx->trigger) {
-+              irq_bypass_unregister_producer(&ctx->producer);
- 
-               cmd = vfio_pci_memory_lock_and_enable(vdev);
--              free_irq(irq, vdev->ctx[vector].trigger);
-+              free_irq(irq, ctx->trigger);
-               vfio_pci_memory_unlock_and_restore(vdev, cmd);
--
--              kfree(vdev->ctx[vector].name);
--              eventfd_ctx_put(vdev->ctx[vector].trigger);
--              vdev->ctx[vector].trigger = NULL;
-+              kfree(ctx->name);
-+              eventfd_ctx_put(ctx->trigger);
-+              ctx->trigger = NULL;
-       }
- 
-       if (fd < 0)
-               return 0;
- 
--      vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT,
--                                         "vfio-msi%s[%d](%s)",
--                                         msix ? "x" : "", vector,
--                                         pci_name(pdev));
--      if (!vdev->ctx[vector].name)
-+      ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
-+                            msix ? "x" : "", vector, pci_name(pdev));
-+      if (!ctx->name)
-               return -ENOMEM;
- 
-       trigger = eventfd_ctx_fdget(fd);
-       if (IS_ERR(trigger)) {
--              kfree(vdev->ctx[vector].name);
-+              kfree(ctx->name);
-               return PTR_ERR(trigger);
-       }
- 
-@@ -372,26 +446,25 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
-               pci_write_msi_msg(irq, &msg);
-       }
- 
--      ret = request_irq(irq, vfio_msihandler, 0,
--                        vdev->ctx[vector].name, trigger);
-+      ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger);
-       vfio_pci_memory_unlock_and_restore(vdev, cmd);
-       if (ret) {
--              kfree(vdev->ctx[vector].name);
-+              kfree(ctx->name);
-               eventfd_ctx_put(trigger);
-               return ret;
-       }
- 
--      vdev->ctx[vector].producer.token = trigger;
--      vdev->ctx[vector].producer.irq = irq;
--      ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
-+      ctx->producer.token = trigger;
-+      ctx->producer.irq = irq;
-+      ret = irq_bypass_register_producer(&ctx->producer);
-       if (unlikely(ret)) {
-               dev_info(&pdev->dev,
-               "irq bypass producer (token %p) registration fails: %d\n",
--              vdev->ctx[vector].producer.token, ret);
-+              ctx->producer.token, ret);
- 
--              vdev->ctx[vector].producer.token = NULL;
-+              ctx->producer.token = NULL;
-       }
--      vdev->ctx[vector].trigger = trigger;
-+      ctx->trigger = trigger;
- 
-       return 0;
- }
-@@ -421,13 +494,17 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
- static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
- {
-       struct pci_dev *pdev = vdev->pdev;
-+      struct vfio_pci_irq_ctx *ctx;
-       unsigned int i;
-       u16 cmd;
- 
-       for (i = 0; i < vdev->num_ctx; i++) {
--              vfio_virqfd_disable(&vdev->ctx[i].unmask);
--              vfio_virqfd_disable(&vdev->ctx[i].mask);
--              vfio_msi_set_vector_signal(vdev, i, -1, msix);
-+              ctx = vfio_irq_ctx_get(vdev, i);
-+              if (ctx) {
-+                      vfio_virqfd_disable(&ctx->unmask);
-+                      vfio_virqfd_disable(&ctx->mask);
-+                      vfio_msi_set_vector_signal(vdev, i, -1, msix);
-+              }
-       }
- 
-       cmd = vfio_pci_memory_lock_and_enable(vdev);
-@@ -443,7 +520,7 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
- 
-       vdev->irq_type = VFIO_PCI_NUM_IRQS;
-       vdev->num_ctx = 0;
--      kfree(vdev->ctx);
-+      vfio_irq_ctx_free_all(vdev);
- }
- 
- /*
-@@ -463,14 +540,18 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
-               if (unmask)
-                       vfio_pci_intx_unmask(vdev);
-       } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
-+              struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
-               int32_t fd = *(int32_t *)data;
-+
-+              if (WARN_ON_ONCE(!ctx))
-+                      return -EINVAL;
-               if (fd >= 0)
-                       return vfio_virqfd_enable((void *) vdev,
-                                                 vfio_pci_intx_unmask_handler,
-                                                 vfio_send_intx_eventfd, NULL,
--                                                &vdev->ctx[0].unmask, fd);
-+                                                &ctx->unmask, fd);
- 
--              vfio_virqfd_disable(&vdev->ctx[0].unmask);
-+              vfio_virqfd_disable(&ctx->unmask);
-       }
- 
-       return 0;
-@@ -543,6 +624,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
-                                   unsigned index, unsigned start,
-                                   unsigned count, uint32_t flags, void *data)
- {
-+      struct vfio_pci_irq_ctx *ctx;
-       unsigned int i;
-       bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
- 
-@@ -577,14 +659,15 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
-               return -EINVAL;
- 
-       for (i = start; i < start + count; i++) {
--              if (!vdev->ctx[i].trigger)
-+              ctx = vfio_irq_ctx_get(vdev, i);
-+              if (!ctx || !ctx->trigger)
-                       continue;
-               if (flags & VFIO_IRQ_SET_DATA_NONE) {
--                      eventfd_signal(vdev->ctx[i].trigger, 1);
-+                      eventfd_signal(ctx->trigger, 1);
-               } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
-                       uint8_t *bools = data;
-                       if (bools[i - start])
--                              eventfd_signal(vdev->ctx[i].trigger, 1);
-+                              eventfd_signal(ctx->trigger, 1);
-               }
-       }
-       return 0;
--- 
-2.43.0
-
diff --git a/queue-6.1/vfio-platform-create-persistent-irq-handlers.patch b/queue-6.1/vfio-platform-create-persistent-irq-handlers.patch

new file mode 100644 (file)

index 0000000..33127f0
--- /dev/null
+++ b/queue-6.1/vfio-platform-create-persistent-irq-handlers.patch
@@ -0,0 +1,253 @@
+From stable+bounces-33772-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:32 2024
+From: Alex Williamson <alex.williamson@redhat.com>
+Date: Fri, 29 Mar 2024 15:38:53 -0600
+Subject: vfio/platform: Create persistent IRQ handlers
+To: stable@vger.kernel.org
+Cc: Alex Williamson <alex.williamson@redhat.com>, sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Kevin Tian <kevin.tian@intel.com>
+Message-ID: <20240329213856.2550762-7-alex.williamson@redhat.com>
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+[ Upstream commit 675daf435e9f8e5a5eab140a9864dfad6668b375 ]
+
+The vfio-platform SET_IRQS ioctl currently allows loopback triggering of
+an interrupt before a signaling eventfd has been configured by the user,
+which thereby allows a NULL pointer dereference.
+
+Rather than register the IRQ relative to a valid trigger, register all
+IRQs in a disabled state in the device open path.  This allows mask
+operations on the IRQ to nest within the overall enable state governed
+by a valid eventfd signal.  This decouples @masked, protected by the
+@locked spinlock from @trigger, protected via the @igate mutex.
+
+In doing so, it's guaranteed that changes to @trigger cannot race the
+IRQ handlers because the IRQ handler is synchronously disabled before
+modifying the trigger, and loopback triggering of the IRQ via ioctl is
+safe due to serialization with trigger changes via igate.
+
+For compatibility, request_irq() failures are maintained to be local to
+the SET_IRQS ioctl rather than a fatal error in the open device path.
+This allows, for example, a userspace driver with polling mode support
+to continue to work regardless of moving the request_irq() call site.
+This necessarily blocks all SET_IRQS access to the failed index.
+
+Cc: Eric Auger <eric.auger@redhat.com>
+Cc:  <stable@vger.kernel.org>
+Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd")
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reviewed-by: Eric Auger <eric.auger@redhat.com>
+Link: https://lore.kernel.org/r/20240308230557.805580-7-alex.williamson@redhat.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vfio/platform/vfio_platform_irq.c |  101 ++++++++++++++++++++----------
+ 1 file changed, 68 insertions(+), 33 deletions(-)
+
+--- a/drivers/vfio/platform/vfio_platform_irq.c
++++ b/drivers/vfio/platform/vfio_platform_irq.c
+@@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(
+       return 0;
+ }
+ 
++/*
++ * The trigger eventfd is guaranteed valid in the interrupt path
++ * and protected by the igate mutex when triggered via ioctl.
++ */
++static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx)
++{
++      if (likely(irq_ctx->trigger))
++              eventfd_signal(irq_ctx->trigger, 1);
++}
++
+ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
+ {
+       struct vfio_platform_irq *irq_ctx = dev_id;
+@@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_h
+       spin_unlock_irqrestore(&irq_ctx->lock, flags);
+ 
+       if (ret == IRQ_HANDLED)
+-              eventfd_signal(irq_ctx->trigger, 1);
++              vfio_send_eventfd(irq_ctx);
+ 
+       return ret;
+ }
+@@ -164,22 +174,19 @@ static irqreturn_t vfio_irq_handler(int
+ {
+       struct vfio_platform_irq *irq_ctx = dev_id;
+ 
+-      eventfd_signal(irq_ctx->trigger, 1);
++      vfio_send_eventfd(irq_ctx);
+ 
+       return IRQ_HANDLED;
+ }
+ 
+ static int vfio_set_trigger(struct vfio_platform_device *vdev, int index,
+-                          int fd, irq_handler_t handler)
++                          int fd)
+ {
+       struct vfio_platform_irq *irq = &vdev->irqs[index];
+       struct eventfd_ctx *trigger;
+-      int ret;
+ 
+       if (irq->trigger) {
+-              irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN);
+-              free_irq(irq->hwirq, irq);
+-              kfree(irq->name);
++              disable_irq(irq->hwirq);
+               eventfd_ctx_put(irq->trigger);
+               irq->trigger = NULL;
+       }
+@@ -187,30 +194,20 @@ static int vfio_set_trigger(struct vfio_
+       if (fd < 0) /* Disable only */
+               return 0;
+ 
+-      irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)",
+-                                              irq->hwirq, vdev->name);
+-      if (!irq->name)
+-              return -ENOMEM;
+-
+       trigger = eventfd_ctx_fdget(fd);
+-      if (IS_ERR(trigger)) {
+-              kfree(irq->name);
++      if (IS_ERR(trigger))
+               return PTR_ERR(trigger);
+-      }
+ 
+       irq->trigger = trigger;
+ 
+-      irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN);
+-      ret = request_irq(irq->hwirq, handler, 0, irq->name, irq);
+-      if (ret) {
+-              kfree(irq->name);
+-              eventfd_ctx_put(trigger);
+-              irq->trigger = NULL;
+-              return ret;
+-      }
+-
+-      if (!irq->masked)
+-              enable_irq(irq->hwirq);
++      /*
++       * irq->masked effectively provides nested disables within the overall
++       * enable relative to trigger.  Specifically request_irq() is called
++       * with NO_AUTOEN, therefore the IRQ is initially disabled.  The user
++       * may only further disable the IRQ with a MASK operations because
++       * irq->masked is initially false.
++       */
++      enable_irq(irq->hwirq);
+ 
+       return 0;
+ }
+@@ -229,7 +226,7 @@ static int vfio_platform_set_irq_trigger
+               handler = vfio_irq_handler;
+ 
+       if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
+-              return vfio_set_trigger(vdev, index, -1, handler);
++              return vfio_set_trigger(vdev, index, -1);
+ 
+       if (start != 0 || count != 1)
+               return -EINVAL;
+@@ -237,7 +234,7 @@ static int vfio_platform_set_irq_trigger
+       if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+               int32_t fd = *(int32_t *)data;
+ 
+-              return vfio_set_trigger(vdev, index, fd, handler);
++              return vfio_set_trigger(vdev, index, fd);
+       }
+ 
+       if (flags & VFIO_IRQ_SET_DATA_NONE) {
+@@ -261,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct
+                   unsigned start, unsigned count, uint32_t flags,
+                   void *data) = NULL;
+ 
++      /*
++       * For compatibility, errors from request_irq() are local to the
++       * SET_IRQS path and reflected in the name pointer.  This allows,
++       * for example, polling mode fallback for an exclusive IRQ failure.
++       */
++      if (IS_ERR(vdev->irqs[index].name))
++              return PTR_ERR(vdev->irqs[index].name);
++
+       switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+       case VFIO_IRQ_SET_ACTION_MASK:
+               func = vfio_platform_set_irq_mask;
+@@ -281,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct
+ 
+ int vfio_platform_irq_init(struct vfio_platform_device *vdev)
+ {
+-      int cnt = 0, i;
++      int cnt = 0, i, ret = 0;
+ 
+       while (vdev->get_irq(vdev, cnt) >= 0)
+               cnt++;
+@@ -292,29 +297,54 @@ int vfio_platform_irq_init(struct vfio_p
+ 
+       for (i = 0; i < cnt; i++) {
+               int hwirq = vdev->get_irq(vdev, i);
++              irq_handler_t handler = vfio_irq_handler;
+ 
+-              if (hwirq < 0)
++              if (hwirq < 0) {
++                      ret = -EINVAL;
+                       goto err;
++              }
+ 
+               spin_lock_init(&vdev->irqs[i].lock);
+ 
+               vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD;
+ 
+-              if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK)
++              if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) {
+                       vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE
+                                               | VFIO_IRQ_INFO_AUTOMASKED;
++                      handler = vfio_automasked_irq_handler;
++              }
+ 
+               vdev->irqs[i].count = 1;
+               vdev->irqs[i].hwirq = hwirq;
+               vdev->irqs[i].masked = false;
++              vdev->irqs[i].name = kasprintf(GFP_KERNEL,
++                                             "vfio-irq[%d](%s)", hwirq,
++                                             vdev->name);
++              if (!vdev->irqs[i].name) {
++                      ret = -ENOMEM;
++                      goto err;
++              }
++
++              ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN,
++                                vdev->irqs[i].name, &vdev->irqs[i]);
++              if (ret) {
++                      kfree(vdev->irqs[i].name);
++                      vdev->irqs[i].name = ERR_PTR(ret);
++              }
+       }
+ 
+       vdev->num_irqs = cnt;
+ 
+       return 0;
+ err:
++      for (--i; i >= 0; i--) {
++              if (!IS_ERR(vdev->irqs[i].name)) {
++                      free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]);
++                      kfree(vdev->irqs[i].name);
++              }
++      }
+       kfree(vdev->irqs);
+-      return -EINVAL;
++      return ret;
+ }
+ 
+ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev)
+@@ -324,7 +354,12 @@ void vfio_platform_irq_cleanup(struct vf
+       for (i = 0; i < vdev->num_irqs; i++) {
+               vfio_virqfd_disable(&vdev->irqs[i].mask);
+               vfio_virqfd_disable(&vdev->irqs[i].unmask);
+-              vfio_set_trigger(vdev, i, -1, NULL);
++              if (!IS_ERR(vdev->irqs[i].name)) {
++                      free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]);
++                      if (vdev->irqs[i].trigger)
++                              eventfd_ctx_put(vdev->irqs[i].trigger);
++                      kfree(vdev->irqs[i].name);
++              }
+       }
+ 
+       vdev->num_irqs = 0;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 30 Mar 2024 08:59:39 +0000 (09:59 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 30 Mar 2024 08:59:39 +0000 (09:59 +0100)
queue-6.1/selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history
queue-6.1/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/vfio-pci-create-persistent-intx-handler.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch		patch \| blob \| blame \| history
queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch		patch \| blob \| blame \| history
queue-6.1/vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch	[deleted file]	patch \| blob \| blame \| history
queue-6.1/vfio-platform-create-persistent-irq-handlers.patch	[new file with mode: 0644]	patch \| blob