From: Greg Kroah-Hartman Date: Fri, 29 Mar 2024 12:00:29 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v6.7.12~170 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f8a235fbfe73d860c6019674d285f39a9e010111;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch vfio-pci-create-persistent-intx-handler.patch vfio-platform-create-persistent-irq-handlers.patch x86-kconfig-remove-config_amd_mem_encrypt_active_by_default.patch x86-sev-fix-position-dependent-variable-references-in-startup-code.patch --- diff --git a/queue-6.6/series b/queue-6.6/series index 778fcd44813..dffb4d0a025 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -275,3 +275,9 @@ xfs-update-dir3-leaf-block-metadata-after-swap.patch xfs-reset-xfs_attr_incomplete-filter-on-node-removal.patch xfs-remove-conditional-building-of-rt-geometry-validator-functions.patch btrfs-fix-deadlock-with-fiemap-and-extent-locking.patch +vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch +vfio-pci-create-persistent-intx-handler.patch +vfio-platform-create-persistent-irq-handlers.patch +vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch +x86-kconfig-remove-config_amd_mem_encrypt_active_by_default.patch +x86-sev-fix-position-dependent-variable-references-in-startup-code.patch diff --git a/queue-6.6/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch b/queue-6.6/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch new file mode 100644 index 00000000000..d022ee8bd88 --- /dev/null +++ b/queue-6.6/vfio-fsl-mc-block-calling-interrupt-handler-without-trigger.patch @@ -0,0 +1,53 @@ +From 7447d911af699a15f8d050dfcb7c680a86f87012 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Fri, 8 Mar 2024 16:05:28 -0700 +Subject: vfio/fsl-mc: Block calling interrupt handler without trigger + +From: Alex Williamson + +commit 7447d911af699a15f8d050dfcb7c680a86f87012 upstream. + +The eventfd_ctx trigger pointer of the vfio_fsl_mc_irq object is +initially NULL and may become NULL if the user sets the trigger +eventfd to -1. The interrupt handler itself is guaranteed that +trigger is always valid between request_irq() and free_irq(), but +the loopback testing mechanisms to invoke the handler function +need to test the trigger. The triggering and setting ioctl paths +both make use of igate and are therefore mutually exclusive. + +The vfio-fsl-mc driver does not make use of irqfds, nor does it +support any sort of masking operations, therefore unlike vfio-pci +and vfio-platform, the flow can remain essentially unchanged. + +Cc: Diana Craciun +Cc: +Fixes: cc0ee20bd969 ("vfio/fsl-mc: trigger an interrupt via eventfd") +Reviewed-by: Kevin Tian +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20240308230557.805580-8-alex.williamson@redhat.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c ++++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +@@ -141,13 +141,14 @@ static int vfio_fsl_mc_set_irq_trigger(s + irq = &vdev->mc_irqs[index]; + + if (flags & VFIO_IRQ_SET_DATA_NONE) { +- vfio_fsl_mc_irq_handler(hwirq, irq); ++ if (irq->trigger) ++ eventfd_signal(irq->trigger); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + u8 trigger = *(u8 *)data; + +- if (trigger) +- vfio_fsl_mc_irq_handler(hwirq, irq); ++ if (trigger && irq->trigger) ++ eventfd_signal(irq->trigger); + } + + return 0; diff --git a/queue-6.6/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch b/queue-6.6/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch new file mode 100644 index 00000000000..e5b1f24cb72 --- /dev/null +++ b/queue-6.6/vfio-introduce-interface-to-flush-virqfd-inject-workqueue.patch @@ -0,0 +1,87 @@ +From b620ecbd17a03cacd06f014a5d3f3a11285ce053 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Fri, 8 Mar 2024 16:05:24 -0700 +Subject: vfio: Introduce interface to flush virqfd inject workqueue + +From: Alex Williamson + +commit b620ecbd17a03cacd06f014a5d3f3a11285ce053 upstream. + +In order to synchronize changes that can affect the thread callback, +introduce an interface to force a flush of the inject workqueue. The +irqfd pointer is only valid under spinlock, but the workqueue cannot +be flushed under spinlock. Therefore the flush work for the irqfd is +queued under spinlock. The vfio_irqfd_cleanup_wq workqueue is re-used +for queuing this work such that flushing the workqueue is also ordered +relative to shutdown. + +Reviewed-by: Kevin Tian +Reviewed-by: Reinette Chatre +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20240308230557.805580-4-alex.williamson@redhat.com +Signed-off-by: Alex Williamson +Stable-dep-of: 18c198c96a81 ("vfio/pci: Create persistent INTx handler") +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/virqfd.c | 21 +++++++++++++++++++++ + include/linux/vfio.h | 2 ++ + 2 files changed, 23 insertions(+) + +--- a/drivers/vfio/virqfd.c ++++ b/drivers/vfio/virqfd.c +@@ -101,6 +101,13 @@ static void virqfd_inject(struct work_st + virqfd->thread(virqfd->opaque, virqfd->data); + } + ++static void virqfd_flush_inject(struct work_struct *work) ++{ ++ struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject); ++ ++ flush_work(&virqfd->inject); ++} ++ + int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), +@@ -124,6 +131,7 @@ int vfio_virqfd_enable(void *opaque, + + INIT_WORK(&virqfd->shutdown, virqfd_shutdown); + INIT_WORK(&virqfd->inject, virqfd_inject); ++ INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject); + + irqfd = fdget(fd); + if (!irqfd.file) { +@@ -213,3 +221,16 @@ void vfio_virqfd_disable(struct virqfd * + flush_workqueue(vfio_irqfd_cleanup_wq); + } + EXPORT_SYMBOL_GPL(vfio_virqfd_disable); ++ ++void vfio_virqfd_flush_thread(struct virqfd **pvirqfd) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&virqfd_lock, flags); ++ if (*pvirqfd && (*pvirqfd)->thread) ++ queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject); ++ spin_unlock_irqrestore(&virqfd_lock, flags); ++ ++ flush_workqueue(vfio_irqfd_cleanup_wq); ++} ++EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread); +--- a/include/linux/vfio.h ++++ b/include/linux/vfio.h +@@ -349,6 +349,7 @@ struct virqfd { + wait_queue_entry_t wait; + poll_table pt; + struct work_struct shutdown; ++ struct work_struct flush_inject; + struct virqfd **pvirqfd; + }; + +@@ -356,5 +357,6 @@ int vfio_virqfd_enable(void *opaque, int + void (*thread)(void *, void *), void *data, + struct virqfd **pvirqfd, int fd); + void vfio_virqfd_disable(struct virqfd **pvirqfd); ++void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); + + #endif /* VFIO_H */ diff --git a/queue-6.6/vfio-pci-create-persistent-intx-handler.patch b/queue-6.6/vfio-pci-create-persistent-intx-handler.patch new file mode 100644 index 00000000000..b271accaaaa --- /dev/null +++ b/queue-6.6/vfio-pci-create-persistent-intx-handler.patch @@ -0,0 +1,266 @@ +From 18c198c96a815c962adc2b9b77909eec0be7df4d Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Fri, 8 Mar 2024 16:05:25 -0700 +Subject: vfio/pci: Create persistent INTx handler + +From: Alex Williamson + +commit 18c198c96a815c962adc2b9b77909eec0be7df4d upstream. + +A vulnerability exists where the eventfd for INTx signaling can be +deconfigured, which unregisters the IRQ handler but still allows +eventfds to be signaled with a NULL context through the SET_IRQS ioctl +or through unmask irqfd if the device interrupt is pending. + +Ideally this could be solved with some additional locking; the igate +mutex serializes the ioctl and config space accesses, and the interrupt +handler is unregistered relative to the trigger, but the irqfd path +runs asynchronous to those. The igate mutex cannot be acquired from the +atomic context of the eventfd wake function. Disabling the irqfd +relative to the eventfd registration is potentially incompatible with +existing userspace. + +As a result, the solution implemented here moves configuration of the +INTx interrupt handler to track the lifetime of the INTx context object +and irq_type configuration, rather than registration of a particular +trigger eventfd. Synchronization is added between the ioctl path and +eventfd_signal() wrapper such that the eventfd trigger can be +dynamically updated relative to in-flight interrupts or irqfd callbacks. + +Cc: +Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver") +Reported-by: Reinette Chatre +Reviewed-by: Kevin Tian +Reviewed-by: Reinette Chatre +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/pci/vfio_pci_intrs.c | 145 ++++++++++++++++++++------------------ + 1 file changed, 78 insertions(+), 67 deletions(-) + +--- a/drivers/vfio/pci/vfio_pci_intrs.c ++++ b/drivers/vfio/pci/vfio_pci_intrs.c +@@ -90,11 +90,15 @@ static void vfio_send_intx_eventfd(void + + if (likely(is_intx(vdev) && !vdev->virq_disabled)) { + struct vfio_pci_irq_ctx *ctx; ++ struct eventfd_ctx *trigger; + + ctx = vfio_irq_ctx_get(vdev, 0); + if (WARN_ON_ONCE(!ctx)) + return; +- eventfd_signal(ctx->trigger, 1); ++ ++ trigger = READ_ONCE(ctx->trigger); ++ if (likely(trigger)) ++ eventfd_signal(trigger, 1); + } + } + +@@ -253,100 +257,100 @@ static irqreturn_t vfio_intx_handler(int + return ret; + } + +-static int vfio_intx_enable(struct vfio_pci_core_device *vdev) ++static int vfio_intx_enable(struct vfio_pci_core_device *vdev, ++ struct eventfd_ctx *trigger) + { ++ struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_irq_ctx *ctx; ++ unsigned long irqflags; ++ char *name; ++ int ret; + + if (!is_irq_none(vdev)) + return -EINVAL; + +- if (!vdev->pdev->irq) ++ if (!pdev->irq) + return -ENODEV; + ++ name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev)); ++ if (!name) ++ return -ENOMEM; ++ + ctx = vfio_irq_ctx_alloc(vdev, 0); + if (!ctx) + return -ENOMEM; + ++ ctx->name = name; ++ ctx->trigger = trigger; ++ + /* +- * If the virtual interrupt is masked, restore it. Devices +- * supporting DisINTx can be masked at the hardware level +- * here, non-PCI-2.3 devices will have to wait until the +- * interrupt is enabled. ++ * Fill the initial masked state based on virq_disabled. After ++ * enable, changing the DisINTx bit in vconfig directly changes INTx ++ * masking. igate prevents races during setup, once running masked ++ * is protected via irqlock. ++ * ++ * Devices supporting DisINTx also reflect the current mask state in ++ * the physical DisINTx bit, which is not affected during IRQ setup. ++ * ++ * Devices without DisINTx support require an exclusive interrupt. ++ * IRQ masking is performed at the IRQ chip. Again, igate protects ++ * against races during setup and IRQ handlers and irqfds are not ++ * yet active, therefore masked is stable and can be used to ++ * conditionally auto-enable the IRQ. ++ * ++ * irq_type must be stable while the IRQ handler is registered, ++ * therefore it must be set before request_irq(). + */ + ctx->masked = vdev->virq_disabled; +- if (vdev->pci_2_3) +- pci_intx(vdev->pdev, !ctx->masked); ++ if (vdev->pci_2_3) { ++ pci_intx(pdev, !ctx->masked); ++ irqflags = IRQF_SHARED; ++ } else { ++ irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0; ++ } + + vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; + ++ ret = request_irq(pdev->irq, vfio_intx_handler, ++ irqflags, ctx->name, vdev); ++ if (ret) { ++ vdev->irq_type = VFIO_PCI_NUM_IRQS; ++ kfree(name); ++ vfio_irq_ctx_free(vdev, ctx, 0); ++ return ret; ++ } ++ + return 0; + } + +-static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd) ++static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, ++ struct eventfd_ctx *trigger) + { + struct pci_dev *pdev = vdev->pdev; +- unsigned long irqflags = IRQF_SHARED; + struct vfio_pci_irq_ctx *ctx; +- struct eventfd_ctx *trigger; +- unsigned long flags; +- int ret; ++ struct eventfd_ctx *old; + + ctx = vfio_irq_ctx_get(vdev, 0); + if (WARN_ON_ONCE(!ctx)) + return -EINVAL; + +- if (ctx->trigger) { +- free_irq(pdev->irq, vdev); +- kfree(ctx->name); +- eventfd_ctx_put(ctx->trigger); +- ctx->trigger = NULL; +- } +- +- if (fd < 0) /* Disable only */ +- return 0; ++ old = ctx->trigger; + +- ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", +- pci_name(pdev)); +- if (!ctx->name) +- return -ENOMEM; ++ WRITE_ONCE(ctx->trigger, trigger); + +- trigger = eventfd_ctx_fdget(fd); +- if (IS_ERR(trigger)) { +- kfree(ctx->name); +- return PTR_ERR(trigger); ++ /* Releasing an old ctx requires synchronizing in-flight users */ ++ if (old) { ++ synchronize_irq(pdev->irq); ++ vfio_virqfd_flush_thread(&ctx->unmask); ++ eventfd_ctx_put(old); + } + +- ctx->trigger = trigger; +- +- /* +- * Devices without DisINTx support require an exclusive interrupt, +- * IRQ masking is performed at the IRQ chip. The masked status is +- * protected by vdev->irqlock. Setup the IRQ without auto-enable and +- * unmask as necessary below under lock. DisINTx is unmodified by +- * the IRQ configuration and may therefore use auto-enable. +- */ +- if (!vdev->pci_2_3) +- irqflags = IRQF_NO_AUTOEN; +- +- ret = request_irq(pdev->irq, vfio_intx_handler, +- irqflags, ctx->name, vdev); +- if (ret) { +- ctx->trigger = NULL; +- kfree(ctx->name); +- eventfd_ctx_put(trigger); +- return ret; +- } +- +- spin_lock_irqsave(&vdev->irqlock, flags); +- if (!vdev->pci_2_3 && !ctx->masked) +- enable_irq(pdev->irq); +- spin_unlock_irqrestore(&vdev->irqlock, flags); +- + return 0; + } + + static void vfio_intx_disable(struct vfio_pci_core_device *vdev) + { ++ struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_irq_ctx *ctx; + + ctx = vfio_irq_ctx_get(vdev, 0); +@@ -354,10 +358,13 @@ static void vfio_intx_disable(struct vfi + if (ctx) { + vfio_virqfd_disable(&ctx->unmask); + vfio_virqfd_disable(&ctx->mask); ++ free_irq(pdev->irq, vdev); ++ if (ctx->trigger) ++ eventfd_ctx_put(ctx->trigger); ++ kfree(ctx->name); ++ vfio_irq_ctx_free(vdev, ctx, 0); + } +- vfio_intx_set_signal(vdev, -1); + vdev->irq_type = VFIO_PCI_NUM_IRQS; +- vfio_irq_ctx_free(vdev, ctx, 0); + } + + /* +@@ -641,19 +648,23 @@ static int vfio_pci_set_intx_trigger(str + return -EINVAL; + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { ++ struct eventfd_ctx *trigger = NULL; + int32_t fd = *(int32_t *)data; + int ret; + ++ if (fd >= 0) { ++ trigger = eventfd_ctx_fdget(fd); ++ if (IS_ERR(trigger)) ++ return PTR_ERR(trigger); ++ } ++ + if (is_intx(vdev)) +- return vfio_intx_set_signal(vdev, fd); ++ ret = vfio_intx_set_signal(vdev, trigger); ++ else ++ ret = vfio_intx_enable(vdev, trigger); + +- ret = vfio_intx_enable(vdev); +- if (ret) +- return ret; +- +- ret = vfio_intx_set_signal(vdev, fd); +- if (ret) +- vfio_intx_disable(vdev); ++ if (ret && trigger) ++ eventfd_ctx_put(trigger); + + return ret; + } diff --git a/queue-6.6/vfio-platform-create-persistent-irq-handlers.patch b/queue-6.6/vfio-platform-create-persistent-irq-handlers.patch new file mode 100644 index 00000000000..4a479baf14f --- /dev/null +++ b/queue-6.6/vfio-platform-create-persistent-irq-handlers.patch @@ -0,0 +1,249 @@ +From 675daf435e9f8e5a5eab140a9864dfad6668b375 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Fri, 8 Mar 2024 16:05:27 -0700 +Subject: vfio/platform: Create persistent IRQ handlers + +From: Alex Williamson + +commit 675daf435e9f8e5a5eab140a9864dfad6668b375 upstream. + +The vfio-platform SET_IRQS ioctl currently allows loopback triggering of +an interrupt before a signaling eventfd has been configured by the user, +which thereby allows a NULL pointer dereference. + +Rather than register the IRQ relative to a valid trigger, register all +IRQs in a disabled state in the device open path. This allows mask +operations on the IRQ to nest within the overall enable state governed +by a valid eventfd signal. This decouples @masked, protected by the +@locked spinlock from @trigger, protected via the @igate mutex. + +In doing so, it's guaranteed that changes to @trigger cannot race the +IRQ handlers because the IRQ handler is synchronously disabled before +modifying the trigger, and loopback triggering of the IRQ via ioctl is +safe due to serialization with trigger changes via igate. + +For compatibility, request_irq() failures are maintained to be local to +the SET_IRQS ioctl rather than a fatal error in the open device path. +This allows, for example, a userspace driver with polling mode support +to continue to work regardless of moving the request_irq() call site. +This necessarily blocks all SET_IRQS access to the failed index. + +Cc: Eric Auger +Cc: +Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd") +Reviewed-by: Kevin Tian +Reviewed-by: Eric Auger +Link: https://lore.kernel.org/r/20240308230557.805580-7-alex.williamson@redhat.com +Signed-off-by: Alex Williamson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vfio/platform/vfio_platform_irq.c | 100 ++++++++++++++++++++---------- + 1 file changed, 68 insertions(+), 32 deletions(-) + +--- a/drivers/vfio/platform/vfio_platform_irq.c ++++ b/drivers/vfio/platform/vfio_platform_irq.c +@@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask( + return 0; + } + ++/* ++ * The trigger eventfd is guaranteed valid in the interrupt path ++ * and protected by the igate mutex when triggered via ioctl. ++ */ ++static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx) ++{ ++ if (likely(irq_ctx->trigger)) ++ eventfd_signal(irq_ctx->trigger, 1); ++} ++ + static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) + { + struct vfio_platform_irq *irq_ctx = dev_id; +@@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_h + spin_unlock_irqrestore(&irq_ctx->lock, flags); + + if (ret == IRQ_HANDLED) +- eventfd_signal(irq_ctx->trigger, 1); ++ vfio_send_eventfd(irq_ctx); + + return ret; + } +@@ -164,52 +174,40 @@ static irqreturn_t vfio_irq_handler(int + { + struct vfio_platform_irq *irq_ctx = dev_id; + +- eventfd_signal(irq_ctx->trigger, 1); ++ vfio_send_eventfd(irq_ctx); + + return IRQ_HANDLED; + } + + static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, +- int fd, irq_handler_t handler) ++ int fd) + { + struct vfio_platform_irq *irq = &vdev->irqs[index]; + struct eventfd_ctx *trigger; +- int ret; + + if (irq->trigger) { +- irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); +- free_irq(irq->hwirq, irq); +- kfree(irq->name); ++ disable_irq(irq->hwirq); + eventfd_ctx_put(irq->trigger); + irq->trigger = NULL; + } + + if (fd < 0) /* Disable only */ + return 0; +- irq->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-irq[%d](%s)", +- irq->hwirq, vdev->name); +- if (!irq->name) +- return -ENOMEM; + + trigger = eventfd_ctx_fdget(fd); +- if (IS_ERR(trigger)) { +- kfree(irq->name); ++ if (IS_ERR(trigger)) + return PTR_ERR(trigger); +- } + + irq->trigger = trigger; + +- irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); +- ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); +- if (ret) { +- kfree(irq->name); +- eventfd_ctx_put(trigger); +- irq->trigger = NULL; +- return ret; +- } +- +- if (!irq->masked) +- enable_irq(irq->hwirq); ++ /* ++ * irq->masked effectively provides nested disables within the overall ++ * enable relative to trigger. Specifically request_irq() is called ++ * with NO_AUTOEN, therefore the IRQ is initially disabled. The user ++ * may only further disable the IRQ with a MASK operations because ++ * irq->masked is initially false. ++ */ ++ enable_irq(irq->hwirq); + + return 0; + } +@@ -228,7 +226,7 @@ static int vfio_platform_set_irq_trigger + handler = vfio_irq_handler; + + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) +- return vfio_set_trigger(vdev, index, -1, handler); ++ return vfio_set_trigger(vdev, index, -1); + + if (start != 0 || count != 1) + return -EINVAL; +@@ -236,7 +234,7 @@ static int vfio_platform_set_irq_trigger + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + +- return vfio_set_trigger(vdev, index, fd, handler); ++ return vfio_set_trigger(vdev, index, fd); + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { +@@ -260,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct + unsigned start, unsigned count, uint32_t flags, + void *data) = NULL; + ++ /* ++ * For compatibility, errors from request_irq() are local to the ++ * SET_IRQS path and reflected in the name pointer. This allows, ++ * for example, polling mode fallback for an exclusive IRQ failure. ++ */ ++ if (IS_ERR(vdev->irqs[index].name)) ++ return PTR_ERR(vdev->irqs[index].name); ++ + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_MASK: + func = vfio_platform_set_irq_mask; +@@ -280,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct + + int vfio_platform_irq_init(struct vfio_platform_device *vdev) + { +- int cnt = 0, i; ++ int cnt = 0, i, ret = 0; + + while (vdev->get_irq(vdev, cnt) >= 0) + cnt++; +@@ -292,29 +298,54 @@ int vfio_platform_irq_init(struct vfio_p + + for (i = 0; i < cnt; i++) { + int hwirq = vdev->get_irq(vdev, i); ++ irq_handler_t handler = vfio_irq_handler; + +- if (hwirq < 0) ++ if (hwirq < 0) { ++ ret = -EINVAL; + goto err; ++ } + + spin_lock_init(&vdev->irqs[i].lock); + + vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; + +- if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) ++ if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) { + vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE + | VFIO_IRQ_INFO_AUTOMASKED; ++ handler = vfio_automasked_irq_handler; ++ } + + vdev->irqs[i].count = 1; + vdev->irqs[i].hwirq = hwirq; + vdev->irqs[i].masked = false; ++ vdev->irqs[i].name = kasprintf(GFP_KERNEL_ACCOUNT, ++ "vfio-irq[%d](%s)", hwirq, ++ vdev->name); ++ if (!vdev->irqs[i].name) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN, ++ vdev->irqs[i].name, &vdev->irqs[i]); ++ if (ret) { ++ kfree(vdev->irqs[i].name); ++ vdev->irqs[i].name = ERR_PTR(ret); ++ } + } + + vdev->num_irqs = cnt; + + return 0; + err: ++ for (--i; i >= 0; i--) { ++ if (!IS_ERR(vdev->irqs[i].name)) { ++ free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); ++ kfree(vdev->irqs[i].name); ++ } ++ } + kfree(vdev->irqs); +- return -EINVAL; ++ return ret; + } + + void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) +@@ -324,7 +355,12 @@ void vfio_platform_irq_cleanup(struct vf + for (i = 0; i < vdev->num_irqs; i++) { + vfio_virqfd_disable(&vdev->irqs[i].mask); + vfio_virqfd_disable(&vdev->irqs[i].unmask); +- vfio_set_trigger(vdev, i, -1, NULL); ++ if (!IS_ERR(vdev->irqs[i].name)) { ++ free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); ++ if (vdev->irqs[i].trigger) ++ eventfd_ctx_put(vdev->irqs[i].trigger); ++ kfree(vdev->irqs[i].name); ++ } + } + + vdev->num_irqs = 0; diff --git a/queue-6.6/x86-kconfig-remove-config_amd_mem_encrypt_active_by_default.patch b/queue-6.6/x86-kconfig-remove-config_amd_mem_encrypt_active_by_default.patch new file mode 100644 index 00000000000..23afa2b70df --- /dev/null +++ b/queue-6.6/x86-kconfig-remove-config_amd_mem_encrypt_active_by_default.patch @@ -0,0 +1,123 @@ +From 29956748339aa8757a7e2f927a8679dd08f24bb6 Mon Sep 17 00:00:00 2001 +From: "Borislav Petkov (AMD)" +Date: Fri, 2 Feb 2024 17:29:32 +0100 +Subject: x86/Kconfig: Remove CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT + +From: Borislav Petkov (AMD) + +commit 29956748339aa8757a7e2f927a8679dd08f24bb6 upstream. + +It was meant well at the time but nothing's using it so get rid of it. + +Signed-off-by: Borislav Petkov (AMD) +Acked-by: Ard Biesheuvel +Link: https://lore.kernel.org/r/20240202163510.GDZb0Zvj8qOndvFOiZ@fat_crate.local +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/admin-guide/kernel-parameters.txt | 4 +--- + Documentation/arch/x86/amd-memory-encryption.rst | 16 ++++++++-------- + arch/x86/Kconfig | 13 ------------- + arch/x86/mm/mem_encrypt_identity.c | 11 +---------- + 4 files changed, 10 insertions(+), 34 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -3269,9 +3269,7 @@ + + mem_encrypt= [X86-64] AMD Secure Memory Encryption (SME) control + Valid arguments: on, off +- Default (depends on kernel configuration option): +- on (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) +- off (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=n) ++ Default: off + mem_encrypt=on: Activate SME + mem_encrypt=off: Do not activate SME + +--- a/Documentation/arch/x86/amd-memory-encryption.rst ++++ b/Documentation/arch/x86/amd-memory-encryption.rst +@@ -87,14 +87,14 @@ The state of SME in the Linux kernel can + kernel is non-zero). + + SME can also be enabled and activated in the BIOS. If SME is enabled and +-activated in the BIOS, then all memory accesses will be encrypted and it will +-not be necessary to activate the Linux memory encryption support. If the BIOS +-merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate +-memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or +-by supplying mem_encrypt=on on the kernel command line. However, if BIOS does +-not enable SME, then Linux will not be able to activate memory encryption, even +-if configured to do so by default or the mem_encrypt=on command line parameter +-is specified. ++activated in the BIOS, then all memory accesses will be encrypted and it ++will not be necessary to activate the Linux memory encryption support. ++ ++If the BIOS merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), ++then memory encryption can be enabled by supplying mem_encrypt=on on the ++kernel command line. However, if BIOS does not enable SME, then Linux ++will not be able to activate memory encryption, even if configured to do ++so by default or the mem_encrypt=on command line parameter is specified. + + Secure Nested Paging (SNP) + ========================== +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1514,19 +1514,6 @@ config AMD_MEM_ENCRYPT + This requires an AMD processor that supports Secure Memory + Encryption (SME). + +-config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT +- bool "Activate AMD Secure Memory Encryption (SME) by default" +- depends on AMD_MEM_ENCRYPT +- help +- Say yes to have system memory encrypted by default if running on +- an AMD processor that supports Secure Memory Encryption (SME). +- +- If set to Y, then the encryption of system memory can be +- deactivated with the mem_encrypt=off command line option. +- +- If set to N, then the encryption of system memory can be +- activated with the mem_encrypt=on command line option. +- + # Common NUMA Features + config NUMA + bool "NUMA Memory Allocation and Scheduler Support" +--- a/arch/x86/mm/mem_encrypt_identity.c ++++ b/arch/x86/mm/mem_encrypt_identity.c +@@ -97,7 +97,6 @@ static char sme_workarea[2 * PMD_SIZE] _ + + static char sme_cmdline_arg[] __initdata = "mem_encrypt"; + static char sme_cmdline_on[] __initdata = "on"; +-static char sme_cmdline_off[] __initdata = "off"; + + static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) + { +@@ -504,7 +503,7 @@ void __init sme_encrypt_kernel(struct bo + + void __init sme_enable(struct boot_params *bp) + { +- const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; ++ const char *cmdline_ptr, *cmdline_arg, *cmdline_on; + unsigned int eax, ebx, ecx, edx; + unsigned long feature_mask; + unsigned long me_mask; +@@ -587,12 +586,6 @@ void __init sme_enable(struct boot_param + asm ("lea sme_cmdline_on(%%rip), %0" + : "=r" (cmdline_on) + : "p" (sme_cmdline_on)); +- asm ("lea sme_cmdline_off(%%rip), %0" +- : "=r" (cmdline_off) +- : "p" (sme_cmdline_off)); +- +- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) +- sme_me_mask = me_mask; + + cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | + ((u64)bp->ext_cmd_line_ptr << 32)); +@@ -602,8 +595,6 @@ void __init sme_enable(struct boot_param + + if (!strncmp(buffer, cmdline_on, sizeof(buffer))) + sme_me_mask = me_mask; +- else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) +- sme_me_mask = 0; + + out: + if (sme_me_mask) { diff --git a/queue-6.6/x86-sev-fix-position-dependent-variable-references-in-startup-code.patch b/queue-6.6/x86-sev-fix-position-dependent-variable-references-in-startup-code.patch new file mode 100644 index 00000000000..20bd8d0a501 --- /dev/null +++ b/queue-6.6/x86-sev-fix-position-dependent-variable-references-in-startup-code.patch @@ -0,0 +1,279 @@ +From 1c811d403afd73f04bde82b83b24c754011bd0e8 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel +Date: Sat, 3 Feb 2024 13:53:06 +0100 +Subject: x86/sev: Fix position dependent variable references in startup code + +From: Ard Biesheuvel + +commit 1c811d403afd73f04bde82b83b24c754011bd0e8 upstream. + +The early startup code executes from a 1:1 mapping of memory, which +differs from the mapping that the code was linked and/or relocated to +run at. The latter mapping is not active yet at this point, and so +symbol references that rely on it will fault. + +Given that the core kernel is built without -fPIC, symbol references are +typically emitted as absolute, and so any such references occuring in +the early startup code will therefore crash the kernel. + +While an attempt was made to work around this for the early SEV/SME +startup code, by forcing RIP-relative addressing for certain global +SEV/SME variables via inline assembly (see snp_cpuid_get_table() for +example), RIP-relative addressing must be pervasively enforced for +SEV/SME global variables when accessed prior to page table fixups. + +__startup_64() already handles this issue for select non-SEV/SME global +variables using fixup_pointer(), which adjusts the pointer relative to a +`physaddr` argument. To avoid having to pass around this `physaddr` +argument across all functions needing to apply pointer fixups, introduce +a macro RIP_RELATIVE_REF() which generates a RIP-relative reference to +a given global variable. It is used where necessary to force +RIP-relative accesses to global variables. + +For backporting purposes, this patch makes no attempt at cleaning up +other occurrences of this pattern, involving either inline asm or +fixup_pointer(). Those will be addressed later. + + [ bp: Call it "rip_rel_ref" everywhere like other code shortens + "rIP-relative reference" and make the asm wrapper __always_inline. ] + +Co-developed-by: Kevin Loughlin +Signed-off-by: Kevin Loughlin +Signed-off-by: Ard Biesheuvel +Signed-off-by: Borislav Petkov (AMD) +Cc: +Link: https://lore.kernel.org/all/20240130220845.1978329-1-kevinloughlin@google.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/coco/core.c | 7 +------ + arch/x86/include/asm/asm.h | 14 ++++++++++++++ + arch/x86/include/asm/coco.h | 8 +++++++- + arch/x86/include/asm/mem_encrypt.h | 15 +++++++++------ + arch/x86/kernel/sev-shared.c | 12 ++++++------ + arch/x86/kernel/sev.c | 4 ++-- + arch/x86/mm/mem_encrypt_identity.c | 27 ++++++++++++--------------- + 7 files changed, 51 insertions(+), 36 deletions(-) + +--- a/arch/x86/coco/core.c ++++ b/arch/x86/coco/core.c +@@ -14,7 +14,7 @@ + #include + + enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; +-static u64 cc_mask __ro_after_init; ++u64 cc_mask __ro_after_init; + + static bool noinstr intel_cc_platform_has(enum cc_attr attr) + { +@@ -148,8 +148,3 @@ u64 cc_mkdec(u64 val) + } + } + EXPORT_SYMBOL_GPL(cc_mkdec); +- +-__init void cc_set_mask(u64 mask) +-{ +- cc_mask = mask; +-} +--- a/arch/x86/include/asm/asm.h ++++ b/arch/x86/include/asm/asm.h +@@ -113,6 +113,20 @@ + + #endif + ++#ifndef __ASSEMBLY__ ++#ifndef __pic__ ++static __always_inline __pure void *rip_rel_ptr(void *p) ++{ ++ asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p)); ++ ++ return p; ++} ++#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var))) ++#else ++#define RIP_REL_REF(var) (var) ++#endif ++#endif ++ + /* + * Macros to generate condition code outputs from inline assembly, + * The output operand must be type "bool". +--- a/arch/x86/include/asm/coco.h ++++ b/arch/x86/include/asm/coco.h +@@ -2,6 +2,7 @@ + #ifndef _ASM_X86_COCO_H + #define _ASM_X86_COCO_H + ++#include + #include + + enum cc_vendor { +@@ -11,9 +12,14 @@ enum cc_vendor { + }; + + extern enum cc_vendor cc_vendor; ++extern u64 cc_mask; + + #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +-void cc_set_mask(u64 mask); ++static inline void cc_set_mask(u64 mask) ++{ ++ RIP_REL_REF(cc_mask) = mask; ++} ++ + u64 cc_mkenc(u64 val); + u64 cc_mkdec(u64 val); + #else +--- a/arch/x86/include/asm/mem_encrypt.h ++++ b/arch/x86/include/asm/mem_encrypt.h +@@ -15,7 +15,8 @@ + #include + #include + +-#include ++#include ++struct boot_params; + + #ifdef CONFIG_X86_MEM_ENCRYPT + void __init mem_encrypt_init(void); +@@ -57,6 +58,11 @@ void __init mem_encrypt_free_decrypted_m + + void __init sev_es_init_vc_handling(void); + ++static inline u64 sme_get_me_mask(void) ++{ ++ return RIP_REL_REF(sme_me_mask); ++} ++ + #define __bss_decrypted __section(".bss..decrypted") + + #else /* !CONFIG_AMD_MEM_ENCRYPT */ +@@ -89,6 +95,8 @@ early_set_mem_enc_dec_hypercall(unsigned + + static inline void mem_encrypt_free_decrypted_mem(void) { } + ++static inline u64 sme_get_me_mask(void) { return 0; } ++ + #define __bss_decrypted + + #endif /* CONFIG_AMD_MEM_ENCRYPT */ +@@ -106,11 +114,6 @@ void add_encrypt_protection_map(void); + + extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; + +-static inline u64 sme_get_me_mask(void) +-{ +- return sme_me_mask; +-} +- + #endif /* __ASSEMBLY__ */ + + #endif /* __X86_MEM_ENCRYPT_H__ */ +--- a/arch/x86/kernel/sev-shared.c ++++ b/arch/x86/kernel/sev-shared.c +@@ -556,9 +556,9 @@ static int snp_cpuid(struct ghcb *ghcb, + leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; + + /* Skip post-processing for out-of-range zero leafs. */ +- if (!(leaf->fn <= cpuid_std_range_max || +- (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || +- (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) ++ if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) || ++ (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) || ++ (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max)))) + return 0; + } + +@@ -1063,11 +1063,11 @@ static void __init setup_cpuid_table(con + const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; + + if (fn->eax_in == 0x0) +- cpuid_std_range_max = fn->eax; ++ RIP_REL_REF(cpuid_std_range_max) = fn->eax; + else if (fn->eax_in == 0x40000000) +- cpuid_hyp_range_max = fn->eax; ++ RIP_REL_REF(cpuid_hyp_range_max) = fn->eax; + else if (fn->eax_in == 0x80000000) +- cpuid_ext_range_max = fn->eax; ++ RIP_REL_REF(cpuid_ext_range_max) = fn->eax; + } + } + +--- a/arch/x86/kernel/sev.c ++++ b/arch/x86/kernel/sev.c +@@ -748,7 +748,7 @@ void __init early_snp_set_memory_private + * This eliminates worries about jump tables or checking boot_cpu_data + * in the cc_platform_has() function. + */ +- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) ++ if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) + return; + + /* +@@ -767,7 +767,7 @@ void __init early_snp_set_memory_shared( + * This eliminates worries about jump tables or checking boot_cpu_data + * in the cc_platform_has() function. + */ +- if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) ++ if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) + return; + + /* Ask hypervisor to mark the memory pages shared in the RMP table. */ +--- a/arch/x86/mm/mem_encrypt_identity.c ++++ b/arch/x86/mm/mem_encrypt_identity.c +@@ -304,7 +304,8 @@ void __init sme_encrypt_kernel(struct bo + * instrumentation or checking boot_cpu_data in the cc_platform_has() + * function. + */ +- if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED) ++ if (!sme_get_me_mask() || ++ RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED) + return; + + /* +@@ -541,11 +542,11 @@ void __init sme_enable(struct boot_param + me_mask = 1UL << (ebx & 0x3f); + + /* Check the SEV MSR whether SEV or SME is enabled */ +- sev_status = __rdmsr(MSR_AMD64_SEV); +- feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; ++ RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV); ++ feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; + + /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */ +- if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) ++ if (snp && !(msr & MSR_AMD64_SEV_SNP_ENABLED)) + snp_abort(); + + /* Check if memory encryption is enabled */ +@@ -571,7 +572,6 @@ void __init sme_enable(struct boot_param + return; + } else { + /* SEV state cannot be controlled by a command line option */ +- sme_me_mask = me_mask; + goto out; + } + +@@ -590,16 +590,13 @@ void __init sme_enable(struct boot_param + cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | + ((u64)bp->ext_cmd_line_ptr << 32)); + +- if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0) +- goto out; +- +- if (!strncmp(buffer, cmdline_on, sizeof(buffer))) +- sme_me_mask = me_mask; ++ if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 || ++ strncmp(buffer, cmdline_on, sizeof(buffer))) ++ return; + + out: +- if (sme_me_mask) { +- physical_mask &= ~sme_me_mask; +- cc_vendor = CC_VENDOR_AMD; +- cc_set_mask(sme_me_mask); +- } ++ RIP_REL_REF(sme_me_mask) = me_mask; ++ physical_mask &= ~me_mask; ++ cc_vendor = CC_VENDOR_AMD; ++ cc_set_mask(me_mask); + }