From: Greg Kroah-Hartman Date: Sat, 30 Mar 2024 08:59:39 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.7.12~129 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0f7f39aa26172a10f0988ee8626a593a07a9d11e;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch vfio-pci-create-persistent-intx-handler.patch vfio-platform-create-persistent-irq-handlers.patch --- diff --git a/queue-6.1/selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch b/queue-6.1/selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch new file mode 100644 index 00000000000..bd2f10ebf61 --- /dev/null +++ b/queue-6.1/selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch @@ -0,0 +1,51 @@ +From 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 Mon Sep 17 00:00:00 2001 +From: Geliang Tang +Date: Fri, 1 Mar 2024 18:11:22 +0100 +Subject: selftests: mptcp: diag: return KSFT_FAIL not test_cnt + +From: Geliang Tang + +commit 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 upstream. + +The test counter 'test_cnt' should not be returned in diag.sh, e.g. what +if only the 4th test fail? Will do 'exit 4' which is 'exit ${KSFT_SKIP}', +the whole test will be marked as skipped instead of 'failed'! + +So we should do ret=${KSFT_FAIL} instead. + +Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") +Cc: stable@vger.kernel.org +Fixes: 42fb6cddec3b ("selftests: mptcp: more stable diag tests") +Signed-off-by: Geliang Tang +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/mptcp/diag.sh | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/tools/testing/selftests/net/mptcp/diag.sh ++++ b/tools/testing/selftests/net/mptcp/diag.sh +@@ -56,7 +56,7 @@ __chk_nr() + echo "[ skip ] Feature probably not supported" + else + echo "[ fail ] expected $expected found $nr" +- ret=$test_cnt ++ ret=${KSFT_FAIL} + fi + else + echo "[ ok ]" +@@ -100,10 +100,10 @@ wait_msk_nr() + printf "%-50s" "$msg" + if [ $i -ge $timeout ]; then + echo "[ fail ] timeout while expecting $expected max $max last $nr" +- ret=$test_cnt ++ ret=${KSFT_FAIL} + elif [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" +- ret=$test_cnt ++ ret=${KSFT_FAIL} + else + echo "[ ok ]" + fi diff --git a/queue-6.1/series b/queue-6.1/series index 41849b9bd4f..715f4d8e53a 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -101,8 +101,6 @@ pci-hv-fix-ring-buffer-size-calculation.patch vfio-use-gfp_kernel_account-for-userspace-persistent.patch vfio-pci-consolidate-irq-cleanup-on-msi-msi-x-disabl.patch vfio-pci-remove-negative-check-on-unsigned-vector.patch -vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch -vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch vfio-pci-lock-external-intx-masking-ops.patch vfio-platform-disable-virqfds-on-cleanup.patch ksmbd-retrieve-number-of-blocks-using-vfs_getattr-in.patch @@ -198,3 +196,9 @@ pwm-img-fix-pwm-clock-lookup.patch tty-serial-imx-fix-broken-rs485.patch block-fix-page-refcounts-for-unaligned-buffers-in-__bio_release_pages.patch blk-mq-release-scheduler-resource-when-request-completes.patch +selftests-mptcp-diag-return-ksft_fail-not-test_cnt.patch +vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch +vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch +vfio-pci-create-persistent-intx-handler.patch +vfio-platform-create-persistent-irq-handlers.patch +vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch diff --git a/queue-6.1/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch b/queue-6.1/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch new file mode 100644 index 00000000000..6a07c4dd018 --- /dev/null +++ b/queue-6.1/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch @@ -0,0 +1,56 @@ +From stable+bounces-33770-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:20 2024 +From: Alex Williamson +Date: Fri, 29 Mar 2024 15:38:54 -0600 +Subject: vfio/fsl-mc: Block calling interrupt handler without trigger +To: stable@vger.kernel.org +Cc: Alex Williamson , sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Diana Craciun , Kevin Tian +Message-ID: <20240329213856.2550762-8-alex.williamson@redhat.com> + +From: Alex Williamson + +[ Upstream commit 7447d911af699a15f8d050dfcb7c680a86f87012 ] + +The eventfd_ctx trigger pointer of the vfio_fsl_mc_irq object is +initially NULL and may become NULL if the user sets the trigger +eventfd to -1. The interrupt handler itself is guaranteed that +trigger is always valid between request_irq() and free_irq(), but +the loopback testing mechanisms to invoke the handler function +need to test the trigger. The triggering and setting ioctl paths +both make use of igate and are therefore mutually exclusive. + +The vfio-fsl-mc driver does not make use of irqfds, nor does it +support any sort of masking operations, therefore unlike vfio-pci +and vfio-platform, the flow can remain essentially unchanged. + +Cc: Diana Craciun +Cc: +Fixes: cc0ee20bd969 ("vfio/fsl-mc: trigger an interrupt via eventfd") +Reviewed-by: Kevin Tian +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20240308230557.805580-8-alex.williamson@redhat.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c ++++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +@@ -142,13 +142,14 @@ static int vfio_fsl_mc_set_irq_trigger(s + irq = &vdev->mc_irqs[index]; + + if (flags & VFIO_IRQ_SET_DATA_NONE) { +- vfio_fsl_mc_irq_handler(hwirq, irq); ++ if (irq->trigger) ++ eventfd_signal(irq->trigger, 1); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + u8 trigger = *(u8 *)data; + +- if (trigger) +- vfio_fsl_mc_irq_handler(hwirq, irq); ++ if (trigger && irq->trigger) ++ eventfd_signal(irq->trigger, 1); + } + + return 0; diff --git a/queue-6.1/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch b/queue-6.1/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch new file mode 100644 index 00000000000..ee8c0d37538 --- /dev/null +++ b/queue-6.1/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch @@ -0,0 +1,92 @@ +From stable+bounces-33768-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:16 2024 +From: Alex Williamson +Date: Fri, 29 Mar 2024 15:38:51 -0600 +Subject: vfio: Introduce interface to flush virqfd inject workqueue +To: stable@vger.kernel.org +Cc: Alex Williamson , sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Kevin Tian , Reinette Chatre +Message-ID: <20240329213856.2550762-5-alex.williamson@redhat.com> + +From: Alex Williamson + +[ Upstream commit b620ecbd17a03cacd06f014a5d3f3a11285ce053 ] + +In order to synchronize changes that can affect the thread callback, +introduce an interface to force a flush of the inject workqueue. The +irqfd pointer is only valid under spinlock, but the workqueue cannot +be flushed under spinlock. Therefore the flush work for the irqfd is +queued under spinlock. The vfio_irqfd_cleanup_wq workqueue is re-used +for queuing this work such that flushing the workqueue is also ordered +relative to shutdown. + +Reviewed-by: Kevin Tian +Reviewed-by: Reinette Chatre +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20240308230557.805580-4-alex.williamson@redhat.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/virqfd.c | 21 +++++++++++++++++++++ + include/linux/vfio.h | 2 ++ + 2 files changed, 23 insertions(+) + +--- a/drivers/vfio/virqfd.c ++++ b/drivers/vfio/virqfd.c +@@ -104,6 +104,13 @@ static void virqfd_inject(struct work_st + virqfd->thread(virqfd->opaque, virqfd->data); + } + ++static void virqfd_flush_inject(struct work_struct *work) ++{ ++ struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject); ++ ++ flush_work(&virqfd->inject); ++} ++ + int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), +@@ -127,6 +134,7 @@ int vfio_virqfd_enable(void *opaque, + + INIT_WORK(&virqfd->shutdown, virqfd_shutdown); + INIT_WORK(&virqfd->inject, virqfd_inject); ++ INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject); + + irqfd = fdget(fd); + if (!irqfd.file) { +@@ -217,6 +225,19 @@ void vfio_virqfd_disable(struct virqfd * + } + EXPORT_SYMBOL_GPL(vfio_virqfd_disable); + ++void vfio_virqfd_flush_thread(struct virqfd **pvirqfd) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&virqfd_lock, flags); ++ if (*pvirqfd && (*pvirqfd)->thread) ++ queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject); ++ spin_unlock_irqrestore(&virqfd_lock, flags); ++ ++ flush_workqueue(vfio_irqfd_cleanup_wq); ++} ++EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread); ++ + module_init(vfio_virqfd_init); + module_exit(vfio_virqfd_exit); + +--- a/include/linux/vfio.h ++++ b/include/linux/vfio.h +@@ -268,6 +268,7 @@ struct virqfd { + wait_queue_entry_t wait; + poll_table pt; + struct work_struct shutdown; ++ struct work_struct flush_inject; + struct virqfd **pvirqfd; + }; + +@@ -275,5 +276,6 @@ int vfio_virqfd_enable(void *opaque, int + void (*thread)(void *, void *), void *data, + struct virqfd **pvirqfd, int fd); + void vfio_virqfd_disable(struct virqfd **pvirqfd); ++void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); + + #endif /* VFIO_H */ diff --git a/queue-6.1/vfio-pci-create-persistent-intx-handler.patch b/queue-6.1/vfio-pci-create-persistent-intx-handler.patch new file mode 100644 index 00000000000..56e0555c4ed --- /dev/null +++ b/queue-6.1/vfio-pci-create-persistent-intx-handler.patch @@ -0,0 +1,257 @@ +From stable+bounces-33769-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:17 2024 +From: Alex Williamson +Date: Fri, 29 Mar 2024 15:38:52 -0600 +Subject: vfio/pci: Create persistent INTx handler +To: stable@vger.kernel.org +Cc: Alex Williamson , sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Reinette Chatre , Kevin Tian +Message-ID: <20240329213856.2550762-6-alex.williamson@redhat.com> + +From: Alex Williamson + +[ Upstream commit 18c198c96a815c962adc2b9b77909eec0be7df4d ] + +A vulnerability exists where the eventfd for INTx signaling can be +deconfigured, which unregisters the IRQ handler but still allows +eventfds to be signaled with a NULL context through the SET_IRQS ioctl +or through unmask irqfd if the device interrupt is pending. + +Ideally this could be solved with some additional locking; the igate +mutex serializes the ioctl and config space accesses, and the interrupt +handler is unregistered relative to the trigger, but the irqfd path +runs asynchronous to those. The igate mutex cannot be acquired from the +atomic context of the eventfd wake function. Disabling the irqfd +relative to the eventfd registration is potentially incompatible with +existing userspace. + +As a result, the solution implemented here moves configuration of the +INTx interrupt handler to track the lifetime of the INTx context object +and irq_type configuration, rather than registration of a particular +trigger eventfd. Synchronization is added between the ioctl path and +eventfd_signal() wrapper such that the eventfd trigger can be +dynamically updated relative to in-flight interrupts or irqfd callbacks. + +Cc: +Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") +Reported-by: Reinette Chatre +Reviewed-by: Kevin Tian +Reviewed-by: Reinette Chatre +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/pci/vfio_pci_intrs.c | 149 ++++++++++++++++++++------------------ + 1 file changed, 82 insertions(+), 67 deletions(-) + +--- a/drivers/vfio/pci/vfio_pci_intrs.c ++++ b/drivers/vfio/pci/vfio_pci_intrs.c +@@ -55,8 +55,13 @@ static void vfio_send_intx_eventfd(void + { + struct vfio_pci_core_device *vdev = opaque; + +- if (likely(is_intx(vdev) && !vdev->virq_disabled)) +- eventfd_signal(vdev->ctx[0].trigger, 1); ++ if (likely(is_intx(vdev) && !vdev->virq_disabled)) { ++ struct eventfd_ctx *trigger; ++ ++ trigger = READ_ONCE(vdev->ctx[0].trigger); ++ if (likely(trigger)) ++ eventfd_signal(trigger, 1); ++ } + } + + /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ +@@ -191,98 +196,104 @@ static irqreturn_t vfio_intx_handler(int + return ret; + } + +-static int vfio_intx_enable(struct vfio_pci_core_device *vdev) ++static int vfio_intx_enable(struct vfio_pci_core_device *vdev, ++ struct eventfd_ctx *trigger) + { ++ struct pci_dev *pdev = vdev->pdev; ++ unsigned long irqflags; ++ char *name; ++ int ret; ++ + if (!is_irq_none(vdev)) + return -EINVAL; + +- if (!vdev->pdev->irq) ++ if (!pdev->irq) + return -ENODEV; + ++ name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); ++ if (!name) ++ return -ENOMEM; ++ + vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); + if (!vdev->ctx) + return -ENOMEM; + + vdev->num_ctx = 1; + ++ vdev->ctx[0].name = name; ++ vdev->ctx[0].trigger = trigger; ++ + /* +- * If the virtual interrupt is masked, restore it. Devices +- * supporting DisINTx can be masked at the hardware level +- * here, non-PCI-2.3 devices will have to wait until the +- * interrupt is enabled. ++ * Fill the initial masked state based on virq_disabled. After ++ * enable, changing the DisINTx bit in vconfig directly changes INTx ++ * masking. igate prevents races during setup, once running masked ++ * is protected via irqlock. ++ * ++ * Devices supporting DisINTx also reflect the current mask state in ++ * the physical DisINTx bit, which is not affected during IRQ setup. ++ * ++ * Devices without DisINTx support require an exclusive interrupt. ++ * IRQ masking is performed at the IRQ chip. Again, igate protects ++ * against races during setup and IRQ handlers and irqfds are not ++ * yet active, therefore masked is stable and can be used to ++ * conditionally auto-enable the IRQ. ++ * ++ * irq_type must be stable while the IRQ handler is registered, ++ * therefore it must be set before request_irq(). + */ + vdev->ctx[0].masked = vdev->virq_disabled; +- if (vdev->pci_2_3) +- pci_intx(vdev->pdev, !vdev->ctx[0].masked); ++ if (vdev->pci_2_3) { ++ pci_intx(pdev, !vdev->ctx[0].masked); ++ irqflags = IRQF_SHARED; ++ } else { ++ irqflags = vdev->ctx[0].masked ? IRQF_NO_AUTOEN : 0; ++ } + + vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; + ++ ret = request_irq(pdev->irq, vfio_intx_handler, ++ irqflags, vdev->ctx[0].name, vdev); ++ if (ret) { ++ vdev->irq_type = VFIO_PCI_NUM_IRQS; ++ kfree(name); ++ vdev->num_ctx = 0; ++ kfree(vdev->ctx); ++ return ret; ++ } ++ + return 0; + } + +-static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) ++static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, ++ struct eventfd_ctx *trigger) + { + struct pci_dev *pdev = vdev->pdev; +- unsigned long irqflags = IRQF_SHARED; +- struct eventfd_ctx *trigger; +- unsigned long flags; +- int ret; +- +- if (vdev->ctx[0].trigger) { +- free_irq(pdev->irq, vdev); +- kfree(vdev->ctx[0].name); +- eventfd_ctx_put(vdev->ctx[0].trigger); +- vdev->ctx[0].trigger = NULL; +- } +- +- if (fd < 0) /* Disable only */ +- return 0; +- +- vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", +- pci_name(pdev)); +- if (!vdev->ctx[0].name) +- return -ENOMEM; +- +- trigger = eventfd_ctx_fdget(fd); +- if (IS_ERR(trigger)) { +- kfree(vdev->ctx[0].name); +- return PTR_ERR(trigger); +- } ++ struct eventfd_ctx *old; + +- vdev->ctx[0].trigger = trigger; ++ old = vdev->ctx[0].trigger; + +- /* +- * Devices without DisINTx support require an exclusive interrupt, +- * IRQ masking is performed at the IRQ chip. The masked status is +- * protected by vdev->irqlock. Setup the IRQ without auto-enable and +- * unmask as necessary below under lock. DisINTx is unmodified by +- * the IRQ configuration and may therefore use auto-enable. +- */ +- if (!vdev->pci_2_3) +- irqflags = IRQF_NO_AUTOEN; ++ WRITE_ONCE(vdev->ctx[0].trigger, trigger); + +- ret = request_irq(pdev->irq, vfio_intx_handler, +- irqflags, vdev->ctx[0].name, vdev); +- if (ret) { +- vdev->ctx[0].trigger = NULL; +- kfree(vdev->ctx[0].name); +- eventfd_ctx_put(trigger); +- return ret; ++ /* Releasing an old ctx requires synchronizing in-flight users */ ++ if (old) { ++ synchronize_irq(pdev->irq); ++ vfio_virqfd_flush_thread(&vdev->ctx[0].unmask); ++ eventfd_ctx_put(old); + } + +- spin_lock_irqsave(&vdev->irqlock, flags); +- if (!vdev->pci_2_3 && !vdev->ctx[0].masked) +- enable_irq(pdev->irq); +- spin_unlock_irqrestore(&vdev->irqlock, flags); +- + return 0; + } + + static void vfio_intx_disable(struct vfio_pci_core_device *vdev) + { ++ struct pci_dev *pdev = vdev->pdev; ++ + vfio_virqfd_disable(&vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].mask); +- vfio_intx_set_signal(vdev, -1); ++ free_irq(pdev->irq, vdev); ++ if (vdev->ctx[0].trigger) ++ eventfd_ctx_put(vdev->ctx[0].trigger); ++ kfree(vdev->ctx[0].name); + vdev->irq_type = VFIO_PCI_NUM_IRQS; + vdev->num_ctx = 0; + kfree(vdev->ctx); +@@ -534,19 +545,23 @@ static int vfio_pci_set_intx_trigger(str + return -EINVAL; + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { ++ struct eventfd_ctx *trigger = NULL; + int32_t fd = *(int32_t *)data; + int ret; + ++ if (fd >= 0) { ++ trigger = eventfd_ctx_fdget(fd); ++ if (IS_ERR(trigger)) ++ return PTR_ERR(trigger); ++ } ++ + if (is_intx(vdev)) +- return vfio_intx_set_signal(vdev, fd); ++ ret = vfio_intx_set_signal(vdev, trigger); ++ else ++ ret = vfio_intx_enable(vdev, trigger); + +- ret = vfio_intx_enable(vdev); +- if (ret) +- return ret; +- +- ret = vfio_intx_set_signal(vdev, fd); +- if (ret) +- vfio_intx_disable(vdev); ++ if (ret && trigger) ++ eventfd_ctx_put(trigger); + + return ret; + } diff --git a/queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch b/queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch index 66b1cfe4ec7..aaa18a4776d 100644 --- a/queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch +++ b/queue-6.1/vfio-pci-disable-auto-enable-of-exclusive-intx-irq.patch @@ -1,7 +1,10 @@ -From 4450a484681c5c31687830485192c35625232427 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 8 Mar 2024 16:05:22 -0700 +From stable+bounces-33771-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:19 2024 +From: Alex Williamson +Date: Fri, 29 Mar 2024 15:38:50 -0600 Subject: vfio/pci: Disable auto-enable of exclusive INTx IRQ +To: stable@vger.kernel.org +Cc: Alex Williamson , sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Kevin Tian +Message-ID: <20240329213856.2550762-4-alex.williamson@redhat.com> From: Alex Williamson @@ -24,18 +27,16 @@ Reviewed-by: Kevin Tian Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240308230557.805580-2-alex.williamson@redhat.com Signed-off-by: Alex Williamson -Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman --- - drivers/vfio/pci/vfio_pci_intrs.c | 17 ++++++++++------- + drivers/vfio/pci/vfio_pci_intrs.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) -diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c -index 6094679349d9c..e64f118c4156f 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c -@@ -297,8 +297,15 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) +@@ -251,8 +251,15 @@ static int vfio_intx_set_signal(struct v - ctx->trigger = trigger; + vdev->ctx[0].trigger = trigger; + /* + * Devices without DisINTx support require an exclusive interrupt, @@ -49,8 +50,8 @@ index 6094679349d9c..e64f118c4156f 100644 + irqflags = IRQF_NO_AUTOEN; ret = request_irq(pdev->irq, vfio_intx_handler, - irqflags, ctx->name, vdev); -@@ -309,13 +316,9 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) + irqflags, vdev->ctx[0].name, vdev); +@@ -263,13 +270,9 @@ static int vfio_intx_set_signal(struct v return ret; } @@ -59,13 +60,10 @@ index 6094679349d9c..e64f118c4156f 100644 - * disable_irq won't. - */ spin_lock_irqsave(&vdev->irqlock, flags); -- if (!vdev->pci_2_3 && ctx->masked) +- if (!vdev->pci_2_3 && vdev->ctx[0].masked) - disable_irq_nosync(pdev->irq); -+ if (!vdev->pci_2_3 && !ctx->masked) ++ if (!vdev->pci_2_3 && !vdev->ctx[0].masked) + enable_irq(pdev->irq); spin_unlock_irqrestore(&vdev->irqlock, flags); return 0; --- -2.43.0 - diff --git a/queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch b/queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch index 00232bf7d55..29c91abbb2e 100644 --- a/queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch +++ b/queue-6.1/vfio-pci-lock-external-intx-masking-ops.patch @@ -30,14 +30,12 @@ Link: https://lore.kernel.org/r/20240308230557.805580-3-alex.williamson@redhat.c Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- - drivers/vfio/pci/vfio_pci_intrs.c | 34 +++++++++++++++++++++++++------ + drivers/vfio/pci/vfio_pci_intrs.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) -diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c -index e64f118c4156f..0deb51c820d2e 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c -@@ -91,13 +91,15 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) +@@ -60,12 +60,14 @@ static void vfio_send_intx_eventfd(void } /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ @@ -45,7 +43,6 @@ index e64f118c4156f..0deb51c820d2e 100644 +static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; - struct vfio_pci_irq_ctx *ctx; unsigned long flags; bool masked_changed = false; @@ -54,7 +51,7 @@ index e64f118c4156f..0deb51c820d2e 100644 spin_lock_irqsave(&vdev->irqlock, flags); /* -@@ -135,6 +137,17 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) +@@ -95,6 +97,17 @@ bool vfio_pci_intx_mask(struct vfio_pci_ return masked_changed; } @@ -72,7 +69,7 @@ index e64f118c4156f..0deb51c820d2e 100644 /* * If this is triggered by an eventfd, we can't call eventfd_signal * or else we'll deadlock on the eventfd wait queue. Return >0 when -@@ -186,12 +199,21 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) +@@ -137,12 +150,21 @@ static int vfio_pci_intx_unmask_handler( return ret; } @@ -95,7 +92,7 @@ index e64f118c4156f..0deb51c820d2e 100644 static irqreturn_t vfio_intx_handler(int irq, void *dev_id) { struct vfio_pci_core_device *vdev = dev_id; -@@ -537,11 +559,11 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, +@@ -457,11 +479,11 @@ static int vfio_pci_set_intx_unmask(stru return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { @@ -107,9 +104,9 @@ index e64f118c4156f..0deb51c820d2e 100644 - vfio_pci_intx_unmask(vdev); + __vfio_pci_intx_unmask(vdev); } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { - struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0); int32_t fd = *(int32_t *)data; -@@ -568,11 +590,11 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev, + if (fd >= 0) +@@ -484,11 +506,11 @@ static int vfio_pci_set_intx_mask(struct return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { @@ -123,6 +120,3 @@ index e64f118c4156f..0deb51c820d2e 100644 } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { return -ENOTTY; /* XXX implement me */ } --- -2.43.0 - diff --git a/queue-6.1/vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch b/queue-6.1/vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch deleted file mode 100644 index 6283a4b6f50..00000000000 --- a/queue-6.1/vfio-pci-prepare-for-dynamic-interrupt-context-stora.patch +++ /dev/null @@ -1,515 +0,0 @@ -From bca808da62c6a87ef168554caa318c2801d19b70 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 11 May 2023 08:44:30 -0700 -Subject: vfio/pci: Prepare for dynamic interrupt context storage - -From: Reinette Chatre - -[ Upstream commit d977e0f7663961368f6442589e52d27484c2f5c2 ] - -Interrupt context storage is statically allocated at the time -interrupts are allocated. Following allocation, the interrupt -context is managed by directly accessing the elements of the -array using the vector as index. - -It is possible to allocate additional MSI-X vectors after -MSI-X has been enabled. Dynamic storage of interrupt context -is needed to support adding new MSI-X vectors after initial -allocation. - -Replace direct access of array elements with pointers to the -array elements. Doing so reduces impact of moving to a new data -structure. Move interactions with the array to helpers to -mostly contain changes needed to transition to a dynamic -data structure. - -No functional change intended. - -Signed-off-by: Reinette Chatre -Reviewed-by: Kevin Tian -Acked-by: Thomas Gleixner -Reviewed-by: Jason Gunthorpe -Link: https://lore.kernel.org/r/eab289693c8325ede9aba99380f8b8d5143980a4.1683740667.git.reinette.chatre@intel.com -Signed-off-by: Alex Williamson -Stable-dep-of: fe9a7082684e ("vfio/pci: Disable auto-enable of exclusive INTx IRQ") -Signed-off-by: Sasha Levin ---- - drivers/vfio/pci/vfio_pci_intrs.c | 215 +++++++++++++++++++++--------- - 1 file changed, 149 insertions(+), 66 deletions(-) - -diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c -index 258de57ef9564..6094679349d9c 100644 ---- a/drivers/vfio/pci/vfio_pci_intrs.c -+++ b/drivers/vfio/pci/vfio_pci_intrs.c -@@ -48,6 +48,31 @@ static bool is_irq_none(struct vfio_pci_core_device *vdev) - vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX); - } - -+static -+struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev, -+ unsigned long index) -+{ -+ if (index >= vdev->num_ctx) -+ return NULL; -+ return &vdev->ctx[index]; -+} -+ -+static void vfio_irq_ctx_free_all(struct vfio_pci_core_device *vdev) -+{ -+ kfree(vdev->ctx); -+} -+ -+static int vfio_irq_ctx_alloc_num(struct vfio_pci_core_device *vdev, -+ unsigned long num) -+{ -+ vdev->ctx = kcalloc(num, sizeof(struct vfio_pci_irq_ctx), -+ GFP_KERNEL_ACCOUNT); -+ if (!vdev->ctx) -+ return -ENOMEM; -+ -+ return 0; -+} -+ - /* - * INTx - */ -@@ -55,14 +80,21 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused) - { - struct vfio_pci_core_device *vdev = opaque; - -- if (likely(is_intx(vdev) && !vdev->virq_disabled)) -- eventfd_signal(vdev->ctx[0].trigger, 1); -+ if (likely(is_intx(vdev) && !vdev->virq_disabled)) { -+ struct vfio_pci_irq_ctx *ctx; -+ -+ ctx = vfio_irq_ctx_get(vdev, 0); -+ if (WARN_ON_ONCE(!ctx)) -+ return; -+ eventfd_signal(ctx->trigger, 1); -+ } - } - - /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */ - bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) - { - struct pci_dev *pdev = vdev->pdev; -+ struct vfio_pci_irq_ctx *ctx; - unsigned long flags; - bool masked_changed = false; - -@@ -77,7 +109,14 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) - if (unlikely(!is_intx(vdev))) { - if (vdev->pci_2_3) - pci_intx(pdev, 0); -- } else if (!vdev->ctx[0].masked) { -+ goto out_unlock; -+ } -+ -+ ctx = vfio_irq_ctx_get(vdev, 0); -+ if (WARN_ON_ONCE(!ctx)) -+ goto out_unlock; -+ -+ if (!ctx->masked) { - /* - * Can't use check_and_mask here because we always want to - * mask, not just when something is pending. -@@ -87,10 +126,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) - else - disable_irq_nosync(pdev->irq); - -- vdev->ctx[0].masked = true; -+ ctx->masked = true; - masked_changed = true; - } - -+out_unlock: - spin_unlock_irqrestore(&vdev->irqlock, flags); - return masked_changed; - } -@@ -105,6 +145,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) - { - struct vfio_pci_core_device *vdev = opaque; - struct pci_dev *pdev = vdev->pdev; -+ struct vfio_pci_irq_ctx *ctx; - unsigned long flags; - int ret = 0; - -@@ -117,7 +158,14 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) - if (unlikely(!is_intx(vdev))) { - if (vdev->pci_2_3) - pci_intx(pdev, 1); -- } else if (vdev->ctx[0].masked && !vdev->virq_disabled) { -+ goto out_unlock; -+ } -+ -+ ctx = vfio_irq_ctx_get(vdev, 0); -+ if (WARN_ON_ONCE(!ctx)) -+ goto out_unlock; -+ -+ if (ctx->masked && !vdev->virq_disabled) { - /* - * A pending interrupt here would immediately trigger, - * but we can avoid that overhead by just re-sending -@@ -129,9 +177,10 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) - } else - enable_irq(pdev->irq); - -- vdev->ctx[0].masked = (ret > 0); -+ ctx->masked = (ret > 0); - } - -+out_unlock: - spin_unlock_irqrestore(&vdev->irqlock, flags); - - return ret; -@@ -146,18 +195,23 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) - static irqreturn_t vfio_intx_handler(int irq, void *dev_id) - { - struct vfio_pci_core_device *vdev = dev_id; -+ struct vfio_pci_irq_ctx *ctx; - unsigned long flags; - int ret = IRQ_NONE; - -+ ctx = vfio_irq_ctx_get(vdev, 0); -+ if (WARN_ON_ONCE(!ctx)) -+ return ret; -+ - spin_lock_irqsave(&vdev->irqlock, flags); - - if (!vdev->pci_2_3) { - disable_irq_nosync(vdev->pdev->irq); -- vdev->ctx[0].masked = true; -+ ctx->masked = true; - ret = IRQ_HANDLED; -- } else if (!vdev->ctx[0].masked && /* may be shared */ -+ } else if (!ctx->masked && /* may be shared */ - pci_check_and_mask_intx(vdev->pdev)) { -- vdev->ctx[0].masked = true; -+ ctx->masked = true; - ret = IRQ_HANDLED; - } - -@@ -171,15 +225,24 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) - - static int vfio_intx_enable(struct vfio_pci_core_device *vdev) - { -+ struct vfio_pci_irq_ctx *ctx; -+ int ret; -+ - if (!is_irq_none(vdev)) - return -EINVAL; - - if (!vdev->pdev->irq) - return -ENODEV; - -- vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT); -- if (!vdev->ctx) -- return -ENOMEM; -+ ret = vfio_irq_ctx_alloc_num(vdev, 1); -+ if (ret) -+ return ret; -+ -+ ctx = vfio_irq_ctx_get(vdev, 0); -+ if (!ctx) { -+ vfio_irq_ctx_free_all(vdev); -+ return -EINVAL; -+ } - - vdev->num_ctx = 1; - -@@ -189,9 +252,9 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev) - * here, non-PCI-2.3 devices will have to wait until the - * interrupt is enabled. - */ -- vdev->ctx[0].masked = vdev->virq_disabled; -+ ctx->masked = vdev->virq_disabled; - if (vdev->pci_2_3) -- pci_intx(vdev->pdev, !vdev->ctx[0].masked); -+ pci_intx(vdev->pdev, !ctx->masked); - - vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; - -@@ -202,41 +265,46 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) - { - struct pci_dev *pdev = vdev->pdev; - unsigned long irqflags = IRQF_SHARED; -+ struct vfio_pci_irq_ctx *ctx; - struct eventfd_ctx *trigger; - unsigned long flags; - int ret; - -- if (vdev->ctx[0].trigger) { -+ ctx = vfio_irq_ctx_get(vdev, 0); -+ if (WARN_ON_ONCE(!ctx)) -+ return -EINVAL; -+ -+ if (ctx->trigger) { - free_irq(pdev->irq, vdev); -- kfree(vdev->ctx[0].name); -- eventfd_ctx_put(vdev->ctx[0].trigger); -- vdev->ctx[0].trigger = NULL; -+ kfree(ctx->name); -+ eventfd_ctx_put(ctx->trigger); -+ ctx->trigger = NULL; - } - - if (fd < 0) /* Disable only */ - return 0; - -- vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", -- pci_name(pdev)); -- if (!vdev->ctx[0].name) -+ ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", -+ pci_name(pdev)); -+ if (!ctx->name) - return -ENOMEM; - - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { -- kfree(vdev->ctx[0].name); -+ kfree(ctx->name); - return PTR_ERR(trigger); - } - -- vdev->ctx[0].trigger = trigger; -+ ctx->trigger = trigger; - - if (!vdev->pci_2_3) - irqflags = 0; - - ret = request_irq(pdev->irq, vfio_intx_handler, -- irqflags, vdev->ctx[0].name, vdev); -+ irqflags, ctx->name, vdev); - if (ret) { -- vdev->ctx[0].trigger = NULL; -- kfree(vdev->ctx[0].name); -+ ctx->trigger = NULL; -+ kfree(ctx->name); - eventfd_ctx_put(trigger); - return ret; - } -@@ -246,7 +314,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) - * disable_irq won't. - */ - spin_lock_irqsave(&vdev->irqlock, flags); -- if (!vdev->pci_2_3 && vdev->ctx[0].masked) -+ if (!vdev->pci_2_3 && ctx->masked) - disable_irq_nosync(pdev->irq); - spin_unlock_irqrestore(&vdev->irqlock, flags); - -@@ -255,12 +323,18 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) - - static void vfio_intx_disable(struct vfio_pci_core_device *vdev) - { -- vfio_virqfd_disable(&vdev->ctx[0].unmask); -- vfio_virqfd_disable(&vdev->ctx[0].mask); -+ struct vfio_pci_irq_ctx *ctx; -+ -+ ctx = vfio_irq_ctx_get(vdev, 0); -+ WARN_ON_ONCE(!ctx); -+ if (ctx) { -+ vfio_virqfd_disable(&ctx->unmask); -+ vfio_virqfd_disable(&ctx->mask); -+ } - vfio_intx_set_signal(vdev, -1); - vdev->irq_type = VFIO_PCI_NUM_IRQS; - vdev->num_ctx = 0; -- kfree(vdev->ctx); -+ vfio_irq_ctx_free_all(vdev); - } - - /* -@@ -284,10 +358,9 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi - if (!is_irq_none(vdev)) - return -EINVAL; - -- vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), -- GFP_KERNEL_ACCOUNT); -- if (!vdev->ctx) -- return -ENOMEM; -+ ret = vfio_irq_ctx_alloc_num(vdev, nvec); -+ if (ret) -+ return ret; - - /* return the number of supported vectors if we can't get all: */ - cmd = vfio_pci_memory_lock_and_enable(vdev); -@@ -296,7 +369,7 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi - if (ret > 0) - pci_free_irq_vectors(pdev); - vfio_pci_memory_unlock_and_restore(vdev, cmd); -- kfree(vdev->ctx); -+ vfio_irq_ctx_free_all(vdev); - return ret; - } - vfio_pci_memory_unlock_and_restore(vdev, cmd); -@@ -320,6 +393,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, - unsigned int vector, int fd, bool msix) - { - struct pci_dev *pdev = vdev->pdev; -+ struct vfio_pci_irq_ctx *ctx; - struct eventfd_ctx *trigger; - int irq, ret; - u16 cmd; -@@ -327,33 +401,33 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, - if (vector >= vdev->num_ctx) - return -EINVAL; - -+ ctx = vfio_irq_ctx_get(vdev, vector); -+ if (!ctx) -+ return -EINVAL; - irq = pci_irq_vector(pdev, vector); - -- if (vdev->ctx[vector].trigger) { -- irq_bypass_unregister_producer(&vdev->ctx[vector].producer); -+ if (ctx->trigger) { -+ irq_bypass_unregister_producer(&ctx->producer); - - cmd = vfio_pci_memory_lock_and_enable(vdev); -- free_irq(irq, vdev->ctx[vector].trigger); -+ free_irq(irq, ctx->trigger); - vfio_pci_memory_unlock_and_restore(vdev, cmd); -- -- kfree(vdev->ctx[vector].name); -- eventfd_ctx_put(vdev->ctx[vector].trigger); -- vdev->ctx[vector].trigger = NULL; -+ kfree(ctx->name); -+ eventfd_ctx_put(ctx->trigger); -+ ctx->trigger = NULL; - } - - if (fd < 0) - return 0; - -- vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT, -- "vfio-msi%s[%d](%s)", -- msix ? "x" : "", vector, -- pci_name(pdev)); -- if (!vdev->ctx[vector].name) -+ ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)", -+ msix ? "x" : "", vector, pci_name(pdev)); -+ if (!ctx->name) - return -ENOMEM; - - trigger = eventfd_ctx_fdget(fd); - if (IS_ERR(trigger)) { -- kfree(vdev->ctx[vector].name); -+ kfree(ctx->name); - return PTR_ERR(trigger); - } - -@@ -372,26 +446,25 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, - pci_write_msi_msg(irq, &msg); - } - -- ret = request_irq(irq, vfio_msihandler, 0, -- vdev->ctx[vector].name, trigger); -+ ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger); - vfio_pci_memory_unlock_and_restore(vdev, cmd); - if (ret) { -- kfree(vdev->ctx[vector].name); -+ kfree(ctx->name); - eventfd_ctx_put(trigger); - return ret; - } - -- vdev->ctx[vector].producer.token = trigger; -- vdev->ctx[vector].producer.irq = irq; -- ret = irq_bypass_register_producer(&vdev->ctx[vector].producer); -+ ctx->producer.token = trigger; -+ ctx->producer.irq = irq; -+ ret = irq_bypass_register_producer(&ctx->producer); - if (unlikely(ret)) { - dev_info(&pdev->dev, - "irq bypass producer (token %p) registration fails: %d\n", -- vdev->ctx[vector].producer.token, ret); -+ ctx->producer.token, ret); - -- vdev->ctx[vector].producer.token = NULL; -+ ctx->producer.token = NULL; - } -- vdev->ctx[vector].trigger = trigger; -+ ctx->trigger = trigger; - - return 0; - } -@@ -421,13 +494,17 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start, - static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) - { - struct pci_dev *pdev = vdev->pdev; -+ struct vfio_pci_irq_ctx *ctx; - unsigned int i; - u16 cmd; - - for (i = 0; i < vdev->num_ctx; i++) { -- vfio_virqfd_disable(&vdev->ctx[i].unmask); -- vfio_virqfd_disable(&vdev->ctx[i].mask); -- vfio_msi_set_vector_signal(vdev, i, -1, msix); -+ ctx = vfio_irq_ctx_get(vdev, i); -+ if (ctx) { -+ vfio_virqfd_disable(&ctx->unmask); -+ vfio_virqfd_disable(&ctx->mask); -+ vfio_msi_set_vector_signal(vdev, i, -1, msix); -+ } - } - - cmd = vfio_pci_memory_lock_and_enable(vdev); -@@ -443,7 +520,7 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix) - - vdev->irq_type = VFIO_PCI_NUM_IRQS; - vdev->num_ctx = 0; -- kfree(vdev->ctx); -+ vfio_irq_ctx_free_all(vdev); - } - - /* -@@ -463,14 +540,18 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, - if (unmask) - vfio_pci_intx_unmask(vdev); - } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { -+ struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0); - int32_t fd = *(int32_t *)data; -+ -+ if (WARN_ON_ONCE(!ctx)) -+ return -EINVAL; - if (fd >= 0) - return vfio_virqfd_enable((void *) vdev, - vfio_pci_intx_unmask_handler, - vfio_send_intx_eventfd, NULL, -- &vdev->ctx[0].unmask, fd); -+ &ctx->unmask, fd); - -- vfio_virqfd_disable(&vdev->ctx[0].unmask); -+ vfio_virqfd_disable(&ctx->unmask); - } - - return 0; -@@ -543,6 +624,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, - unsigned index, unsigned start, - unsigned count, uint32_t flags, void *data) - { -+ struct vfio_pci_irq_ctx *ctx; - unsigned int i; - bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false; - -@@ -577,14 +659,15 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev, - return -EINVAL; - - for (i = start; i < start + count; i++) { -- if (!vdev->ctx[i].trigger) -+ ctx = vfio_irq_ctx_get(vdev, i); -+ if (!ctx || !ctx->trigger) - continue; - if (flags & VFIO_IRQ_SET_DATA_NONE) { -- eventfd_signal(vdev->ctx[i].trigger, 1); -+ eventfd_signal(ctx->trigger, 1); - } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { - uint8_t *bools = data; - if (bools[i - start]) -- eventfd_signal(vdev->ctx[i].trigger, 1); -+ eventfd_signal(ctx->trigger, 1); - } - } - return 0; --- -2.43.0 - diff --git a/queue-6.1/vfio-platform-create-persistent-irq-handlers.patch b/queue-6.1/vfio-platform-create-persistent-irq-handlers.patch new file mode 100644 index 00000000000..33127f0789b --- /dev/null +++ b/queue-6.1/vfio-platform-create-persistent-irq-handlers.patch @@ -0,0 +1,253 @@ +From stable+bounces-33772-greg=kroah.com@vger.kernel.org Fri Mar 29 22:39:32 2024 +From: Alex Williamson +Date: Fri, 29 Mar 2024 15:38:53 -0600 +Subject: vfio/platform: Create persistent IRQ handlers +To: stable@vger.kernel.org +Cc: Alex Williamson , sashal@kernel.org, gregkh@linuxfoundation.org, eric.auger@redhat.com, Kevin Tian +Message-ID: <20240329213856.2550762-7-alex.williamson@redhat.com> + +From: Alex Williamson + +[ Upstream commit 675daf435e9f8e5a5eab140a9864dfad6668b375 ] + +The vfio-platform SET_IRQS ioctl currently allows loopback triggering of +an interrupt before a signaling eventfd has been configured by the user, +which thereby allows a NULL pointer dereference. + +Rather than register the IRQ relative to a valid trigger, register all +IRQs in a disabled state in the device open path. This allows mask +operations on the IRQ to nest within the overall enable state governed +by a valid eventfd signal. This decouples @masked, protected by the +@locked spinlock from @trigger, protected via the @igate mutex. + +In doing so, it's guaranteed that changes to @trigger cannot race the +IRQ handlers because the IRQ handler is synchronously disabled before +modifying the trigger, and loopback triggering of the IRQ via ioctl is +safe due to serialization with trigger changes via igate. + +For compatibility, request_irq() failures are maintained to be local to +the SET_IRQS ioctl rather than a fatal error in the open device path. +This allows, for example, a userspace driver with polling mode support +to continue to work regardless of moving the request_irq() call site. +This necessarily blocks all SET_IRQS access to the failed index. + +Cc: Eric Auger +Cc: +Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd") +Reviewed-by: Kevin Tian +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20240308230557.805580-7-alex.williamson@redhat.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/platform/vfio_platform_irq.c | 101 ++++++++++++++++++++---------- + 1 file changed, 68 insertions(+), 33 deletions(-) + +--- a/drivers/vfio/platform/vfio_platform_irq.c ++++ b/drivers/vfio/platform/vfio_platform_irq.c +@@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask( + return 0; + } + ++/* ++ * The trigger eventfd is guaranteed valid in the interrupt path ++ * and protected by the igate mutex when triggered via ioctl. ++ */ ++static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx) ++{ ++ if (likely(irq_ctx->trigger)) ++ eventfd_signal(irq_ctx->trigger, 1); ++} ++ + static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) + { + struct vfio_platform_irq *irq_ctx = dev_id; +@@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_h + spin_unlock_irqrestore(&irq_ctx->lock, flags); + + if (ret == IRQ_HANDLED) +- eventfd_signal(irq_ctx->trigger, 1); ++ vfio_send_eventfd(irq_ctx); + + return ret; + } +@@ -164,22 +174,19 @@ static irqreturn_t vfio_irq_handler(int + { + struct vfio_platform_irq *irq_ctx = dev_id; + +- eventfd_signal(irq_ctx->trigger, 1); ++ vfio_send_eventfd(irq_ctx); + + return IRQ_HANDLED; + } + + static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, +- int fd, irq_handler_t handler) ++ int fd) + { + struct vfio_platform_irq *irq = &vdev->irqs[index]; + struct eventfd_ctx *trigger; +- int ret; + + if (irq->trigger) { +- irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); +- free_irq(irq->hwirq, irq); +- kfree(irq->name); ++ disable_irq(irq->hwirq); + eventfd_ctx_put(irq->trigger); + irq->trigger = NULL; + } +@@ -187,30 +194,20 @@ static int vfio_set_trigger(struct vfio_ + if (fd < 0) /* Disable only */ + return 0; + +- irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", +- irq->hwirq, vdev->name); +- if (!irq->name) +- return -ENOMEM; +- + trigger = eventfd_ctx_fdget(fd); +- if (IS_ERR(trigger)) { +- kfree(irq->name); ++ if (IS_ERR(trigger)) + return PTR_ERR(trigger); +- } + + irq->trigger = trigger; + +- irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); +- ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); +- if (ret) { +- kfree(irq->name); +- eventfd_ctx_put(trigger); +- irq->trigger = NULL; +- return ret; +- } +- +- if (!irq->masked) +- enable_irq(irq->hwirq); ++ /* ++ * irq->masked effectively provides nested disables within the overall ++ * enable relative to trigger. Specifically request_irq() is called ++ * with NO_AUTOEN, therefore the IRQ is initially disabled. The user ++ * may only further disable the IRQ with a MASK operations because ++ * irq->masked is initially false. ++ */ ++ enable_irq(irq->hwirq); + + return 0; + } +@@ -229,7 +226,7 @@ static int vfio_platform_set_irq_trigger + handler = vfio_irq_handler; + + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) +- return vfio_set_trigger(vdev, index, -1, handler); ++ return vfio_set_trigger(vdev, index, -1); + + if (start != 0 || count != 1) + return -EINVAL; +@@ -237,7 +234,7 @@ static int vfio_platform_set_irq_trigger + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + +- return vfio_set_trigger(vdev, index, fd, handler); ++ return vfio_set_trigger(vdev, index, fd); + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { +@@ -261,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct + unsigned start, unsigned count, uint32_t flags, + void *data) = NULL; + ++ /* ++ * For compatibility, errors from request_irq() are local to the ++ * SET_IRQS path and reflected in the name pointer. This allows, ++ * for example, polling mode fallback for an exclusive IRQ failure. ++ */ ++ if (IS_ERR(vdev->irqs[index].name)) ++ return PTR_ERR(vdev->irqs[index].name); ++ + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_MASK: + func = vfio_platform_set_irq_mask; +@@ -281,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct + + int vfio_platform_irq_init(struct vfio_platform_device *vdev) + { +- int cnt = 0, i; ++ int cnt = 0, i, ret = 0; + + while (vdev->get_irq(vdev, cnt) >= 0) + cnt++; +@@ -292,29 +297,54 @@ int vfio_platform_irq_init(struct vfio_p + + for (i = 0; i < cnt; i++) { + int hwirq = vdev->get_irq(vdev, i); ++ irq_handler_t handler = vfio_irq_handler; + +- if (hwirq < 0) ++ if (hwirq < 0) { ++ ret = -EINVAL; + goto err; ++ } + + spin_lock_init(&vdev->irqs[i].lock); + + vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; + +- if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) ++ if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) { + vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE + | VFIO_IRQ_INFO_AUTOMASKED; ++ handler = vfio_automasked_irq_handler; ++ } + + vdev->irqs[i].count = 1; + vdev->irqs[i].hwirq = hwirq; + vdev->irqs[i].masked = false; ++ vdev->irqs[i].name = kasprintf(GFP_KERNEL, ++ "vfio-irq[%d](%s)", hwirq, ++ vdev->name); ++ if (!vdev->irqs[i].name) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN, ++ vdev->irqs[i].name, &vdev->irqs[i]); ++ if (ret) { ++ kfree(vdev->irqs[i].name); ++ vdev->irqs[i].name = ERR_PTR(ret); ++ } + } + + vdev->num_irqs = cnt; + + return 0; + err: ++ for (--i; i >= 0; i--) { ++ if (!IS_ERR(vdev->irqs[i].name)) { ++ free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); ++ kfree(vdev->irqs[i].name); ++ } ++ } + kfree(vdev->irqs); +- return -EINVAL; ++ return ret; + } + + void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) +@@ -324,7 +354,12 @@ void vfio_platform_irq_cleanup(struct vf + for (i = 0; i < vdev->num_irqs; i++) { + vfio_virqfd_disable(&vdev->irqs[i].mask); + vfio_virqfd_disable(&vdev->irqs[i].unmask); +- vfio_set_trigger(vdev, i, -1, NULL); ++ if (!IS_ERR(vdev->irqs[i].name)) { ++ free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); ++ if (vdev->irqs[i].trigger) ++ eventfd_ctx_put(vdev->irqs[i].trigger); ++ kfree(vdev->irqs[i].name); ++ } + } + + vdev->num_irqs = 0;