]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
iommu/vt-d: Fix NULL domain on device release
authorLu Baolu <baolu.lu@linux.intel.com>
Tue, 5 Mar 2024 12:21:18 +0000 (20:21 +0800)
committerJoerg Roedel <jroedel@suse.de>
Wed, 6 Mar 2024 16:35:57 +0000 (17:35 +0100)
In the kdump kernel, the IOMMU operates in deferred_attach mode. In this
mode, info->domain may not yet be assigned by the time the release_device
function is called. It leads to the following crash in the crash kernel:

    BUG: kernel NULL pointer dereference, address: 000000000000003c
    ...
    RIP: 0010:do_raw_spin_lock+0xa/0xa0
    ...
    _raw_spin_lock_irqsave+0x1b/0x30
    intel_iommu_release_device+0x96/0x170
    iommu_deinit_device+0x39/0xf0
    __iommu_group_remove_device+0xa0/0xd0
    iommu_bus_notifier+0x55/0xb0
    notifier_call_chain+0x5a/0xd0
    blocking_notifier_call_chain+0x41/0x60
    bus_notify+0x34/0x50
    device_del+0x269/0x3d0
    pci_remove_bus_device+0x77/0x100
    p2sb_bar+0xae/0x1d0
    ...
    i801_probe+0x423/0x740

Use the release_domain mechanism to fix it. The scalable mode context
entry which is not part of release domain should be cleared in
release_device().

Fixes: 586081d3f6b1 ("iommu/vt-d: Remove DEFER_DEVICE_DOMAIN_INFO")
Reported-by: Eric Badger <ebadger@purestorage.com>
Closes: https://lore.kernel.org/r/20240113181713.1817855-1-ebadger@purestorage.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20240305013305.204605-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
drivers/iommu/intel/iommu.c
drivers/iommu/intel/pasid.c
drivers/iommu/intel/pasid.h

index 60aa2dce32ef69db766922abc19ebd65090578f2..eff7abcc420b87d9239b0d5ca799b0b54f803a10 100644 (file)
@@ -3818,30 +3818,6 @@ static void domain_context_clear(struct device_domain_info *info)
                               &domain_context_clear_one_cb, info);
 }
 
-static void dmar_remove_one_dev_info(struct device *dev)
-{
-       struct device_domain_info *info = dev_iommu_priv_get(dev);
-       struct dmar_domain *domain = info->domain;
-       struct intel_iommu *iommu = info->iommu;
-       unsigned long flags;
-
-       if (!dev_is_real_dma_subdevice(info->dev)) {
-               if (dev_is_pci(info->dev) && sm_supported(iommu))
-                       intel_pasid_tear_down_entry(iommu, info->dev,
-                                       IOMMU_NO_PASID, false);
-
-               iommu_disable_pci_caps(info);
-               domain_context_clear(info);
-       }
-
-       spin_lock_irqsave(&domain->lock, flags);
-       list_del(&info->link);
-       spin_unlock_irqrestore(&domain->lock, flags);
-
-       domain_detach_iommu(domain, iommu);
-       info->domain = NULL;
-}
-
 /*
  * Clear the page table pointer in context or pasid table entries so that
  * all DMA requests without PASID from the device are blocked. If the page
@@ -4367,7 +4343,11 @@ static void intel_iommu_release_device(struct device *dev)
        mutex_lock(&iommu->iopf_lock);
        device_rbtree_remove(info);
        mutex_unlock(&iommu->iopf_lock);
-       dmar_remove_one_dev_info(dev);
+
+       if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
+           !context_copied(iommu, info->bus, info->devfn))
+               intel_pasid_teardown_sm_context(dev);
+
        intel_pasid_free_table(dev);
        intel_iommu_debugfs_remove_dev(info);
        kfree(info);
@@ -4826,6 +4806,7 @@ static const struct iommu_dirty_ops intel_dirty_ops = {
 
 const struct iommu_ops intel_iommu_ops = {
        .blocked_domain         = &blocking_domain,
+       .release_domain         = &blocking_domain,
        .capable                = intel_iommu_capable,
        .hw_info                = intel_iommu_hw_info,
        .domain_alloc           = intel_iommu_domain_alloc,
index 953592125e4ac6891e70cda2f6ee89e631836b61..135ed16511245d5f5082e8141e614e5175ad7b3d 100644 (file)
@@ -669,3 +669,67 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
 
        return 0;
 }
+
+/*
+ * Interfaces to setup or teardown a pasid table to the scalable-mode
+ * context table entry:
+ */
+
+static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
+{
+       struct device_domain_info *info = dev_iommu_priv_get(dev);
+       struct intel_iommu *iommu = info->iommu;
+       struct context_entry *context;
+
+       spin_lock(&iommu->lock);
+       context = iommu_context_addr(iommu, bus, devfn, false);
+       if (!context) {
+               spin_unlock(&iommu->lock);
+               return;
+       }
+
+       context_clear_entry(context);
+       __iommu_flush_cache(iommu, context, sizeof(*context));
+       spin_unlock(&iommu->lock);
+
+       /*
+        * Cache invalidation for changes to a scalable-mode context table
+        * entry.
+        *
+        * Section 6.5.3.3 of the VT-d spec:
+        * - Device-selective context-cache invalidation;
+        * - Domain-selective PASID-cache invalidation to affected domains
+        *   (can be skipped if all PASID entries were not-present);
+        * - Domain-selective IOTLB invalidation to affected domains;
+        * - Global Device-TLB invalidation to affected functions.
+        *
+        * The iommu has been parked in the blocking state. All domains have
+        * been detached from the device or PASID. The PASID and IOTLB caches
+        * have been invalidated during the domain detach path.
+        */
+       iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn),
+                                  DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL);
+       devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID);
+}
+
+static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
+{
+       struct device *dev = data;
+
+       if (dev == &pdev->dev)
+               device_pasid_table_teardown(dev, PCI_BUS_NUM(alias), alias & 0xff);
+
+       return 0;
+}
+
+void intel_pasid_teardown_sm_context(struct device *dev)
+{
+       struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+       if (!dev_is_pci(dev)) {
+               device_pasid_table_teardown(dev, info->bus, info->devfn);
+               return;
+       }
+
+       pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_teardown, dev);
+}
index 8d40d4c66e3198a7ce90c83168a3f86491d79f71..c299e4c6e94b898da48f5d4b86b3ed0b84af4ee7 100644 (file)
@@ -319,4 +319,5 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
                                 bool fault_ignore);
 void intel_pasid_setup_page_snoop_control(struct intel_iommu *iommu,
                                          struct device *dev, u32 pasid);
+void intel_pasid_teardown_sm_context(struct device *dev);
 #endif /* __INTEL_PASID_H */