int id;
struct iommu_domain *default_domain;
struct iommu_domain *blocking_domain;
+ /*
+ * During a group device reset, @resetting_domain points to the physical
+ * domain, while @domain points to the attached domain before the reset.
+ */
+ struct iommu_domain *resetting_domain;
struct iommu_domain *domain;
struct list_head entry;
unsigned int owner_cnt;
guard(mutex)(&dev->iommu_group->mutex);
+ /*
+ * This is a concurrent attach during a device reset. Reject it until
+ * pci_dev_reset_iommu_done() attaches the device to group->domain.
+ *
+ * Note that this might fail the iommu_dma_map(). But there's nothing
+ * more we can do here.
+ */
+ if (dev->iommu_group->resetting_domain)
+ return -EBUSY;
return __iommu_attach_device(domain, dev, NULL);
}
lockdep_assert_held(&group->mutex);
+ /*
+ * Driver handles the low-level __iommu_attach_device(), including the
+ * one invoked by pci_dev_reset_iommu_done() re-attaching the device to
+ * the cached group->domain. In this case, the driver must get the old
+ * domain from group->resetting_domain rather than group->domain. This
+ * prevents it from re-attaching the device from group->domain (old) to
+ * group->domain (new).
+ */
+ if (group->resetting_domain)
+ return group->resetting_domain;
+
return group->domain;
}
EXPORT_SYMBOL_GPL(iommu_driver_get_domain_for_dev);
if (WARN_ON(!new_domain))
return -EINVAL;
+ /*
+ * This is a concurrent attach during a device reset. Reject it until
+ * pci_dev_reset_iommu_done() attaches the device to group->domain.
+ */
+ if (group->resetting_domain)
+ return -EBUSY;
+
/*
* Changing the domain is done by calling attach_dev() on the new
* domain. This switch does not have to be atomic and DMA can be
return -EINVAL;
mutex_lock(&group->mutex);
+
+ /*
+ * This is a concurrent attach during a device reset. Reject it until
+ * pci_dev_reset_iommu_done() attaches the device to group->domain.
+ */
+ if (group->resetting_domain) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
for_each_group_device(group, device) {
/*
* Skip PASID validation for devices without PASID support
return -EINVAL;
mutex_lock(&group->mutex);
+
+ /*
+ * This is a concurrent attach during a device reset. Reject it until
+ * pci_dev_reset_iommu_done() attaches the device to group->domain.
+ */
+ if (group->resetting_domain) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
entry = iommu_make_pasid_array_entry(domain, handle);
curr = xa_cmpxchg(&group->pasid_array, pasid, NULL,
XA_ZERO_ENTRY, GFP_KERNEL);
}
EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
+/**
+ * pci_dev_reset_iommu_prepare() - Block IOMMU to prepare for a PCI device reset
+ * @pdev: PCI device that is going to enter a reset routine
+ *
+ * The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends to disable and block
+ * ATS before initiating a reset. This means that a PCIe device during the reset
+ * routine wants to block any IOMMU activity: translation and ATS invalidation.
+ *
+ * This function attaches the device's RID/PASID(s) the group->blocking_domain,
+ * setting the group->resetting_domain. This allows the IOMMU driver pausing any
+ * IOMMU activity while leaving the group->domain pointer intact. Later when the
+ * reset is finished, pci_dev_reset_iommu_done() can restore everything.
+ *
+ * Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done()
+ * before/after the core-level reset routine, to unset the resetting_domain.
+ *
+ * Return: 0 on success or negative error code if the preparation failed.
+ *
+ * These two functions are designed to be used by PCI reset functions that would
+ * not invoke any racy iommu_release_device(), since PCI sysfs node gets removed
+ * before it notifies with a BUS_NOTIFY_REMOVED_DEVICE. When using them in other
+ * case, callers must ensure there will be no racy iommu_release_device() call,
+ * which otherwise would UAF the dev->iommu_group pointer.
+ */
+int pci_dev_reset_iommu_prepare(struct pci_dev *pdev)
+{
+ struct iommu_group *group = pdev->dev.iommu_group;
+ unsigned long pasid;
+ void *entry;
+ int ret;
+
+ if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev))
+ return 0;
+
+ guard(mutex)(&group->mutex);
+
+ /* Re-entry is not allowed */
+ if (WARN_ON(group->resetting_domain))
+ return -EBUSY;
+
+ ret = __iommu_group_alloc_blocking_domain(group);
+ if (ret)
+ return ret;
+
+ /* Stage RID domain at blocking_domain while retaining group->domain */
+ if (group->domain != group->blocking_domain) {
+ ret = __iommu_attach_device(group->blocking_domain, &pdev->dev,
+ group->domain);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Stage PASID domains at blocking_domain while retaining pasid_array.
+ *
+ * The pasid_array is mostly fenced by group->mutex, except one reader
+ * in iommu_attach_handle_get(), so it's safe to read without xa_lock.
+ */
+ xa_for_each_start(&group->pasid_array, pasid, entry, 1)
+ iommu_remove_dev_pasid(&pdev->dev, pasid,
+ pasid_array_entry_to_domain(entry));
+
+ group->resetting_domain = group->blocking_domain;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare);
+
+/**
+ * pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done
+ * @pdev: PCI device that has finished a reset routine
+ *
+ * After a PCIe device finishes a reset routine, it wants to restore its IOMMU
+ * IOMMU activity, including new translation as well as cache invalidation, by
+ * re-attaching all RID/PASID of the device's back to the domains retained in
+ * the core-level structure.
+ *
+ * Caller must pair it with a successful pci_dev_reset_iommu_prepare().
+ *
+ * Note that, although unlikely, there is a risk that re-attaching domains might
+ * fail due to some unexpected happening like OOM.
+ */
+void pci_dev_reset_iommu_done(struct pci_dev *pdev)
+{
+ struct iommu_group *group = pdev->dev.iommu_group;
+ unsigned long pasid;
+ void *entry;
+
+ if (!pci_ats_supported(pdev) || !dev_has_iommu(&pdev->dev))
+ return;
+
+ guard(mutex)(&group->mutex);
+
+ /* pci_dev_reset_iommu_prepare() was bypassed for the device */
+ if (!group->resetting_domain)
+ return;
+
+ /* pci_dev_reset_iommu_prepare() was not successfully called */
+ if (WARN_ON(!group->blocking_domain))
+ return;
+
+ /* Re-attach RID domain back to group->domain */
+ if (group->domain != group->blocking_domain) {
+ WARN_ON(__iommu_attach_device(group->domain, &pdev->dev,
+ group->blocking_domain));
+ }
+
+ /*
+ * Re-attach PASID domains back to the domains retained in pasid_array.
+ *
+ * The pasid_array is mostly fenced by group->mutex, except one reader
+ * in iommu_attach_handle_get(), so it's safe to read without xa_lock.
+ */
+ xa_for_each_start(&group->pasid_array, pasid, entry, 1)
+ WARN_ON(__iommu_set_group_pasid(
+ pasid_array_entry_to_domain(entry), group, pasid,
+ group->blocking_domain));
+
+ group->resetting_domain = NULL;
+}
+EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done);
+
#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
/**
* iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain