]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
iommu/vt-d: Use device rbtree in iopf reporting path
authorLu Baolu <baolu.lu@linux.intel.com>
Tue, 27 Feb 2024 02:14:41 +0000 (10:14 +0800)
committerJoerg Roedel <jroedel@suse.de>
Fri, 1 Mar 2024 12:51:21 +0000 (13:51 +0100)
The existing I/O page fault handler currently locates the PCI device by
calling pci_get_domain_bus_and_slot(). This function searches the list
of all PCI devices until the desired device is found. To improve lookup
efficiency, replace it with device_rbtree_find() to search the device
within the probed device rbtree.

The I/O page fault is initiated by the device, which does not have any
synchronization mechanism with the software to ensure that the device
stays in the probed device tree. Theoretically, a device could be released
by the IOMMU subsystem after device_rbtree_find() and before
iopf_get_dev_fault_param(), which would cause a use-after-free problem.

Add a mutex to synchronize the I/O page fault reporting path and the IOMMU
release device path. This lock doesn't introduce any performance overhead,
as the conflict between I/O page fault reporting and device releasing is
very rare.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240220065939.121116-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
drivers/iommu/intel/dmar.c
drivers/iommu/intel/iommu.c
drivers/iommu/intel/iommu.h
drivers/iommu/intel/svm.c

index f9b63c2875f715c3b1818bf4d265f19b428492ad..d14797aabb7ad6f325c6ec53987449f422e7aa72 100644 (file)
@@ -1097,6 +1097,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
        iommu->segment = drhd->segment;
        iommu->device_rbtree = RB_ROOT;
        spin_lock_init(&iommu->device_rbtree_lock);
+       mutex_init(&iommu->iopf_lock);
        iommu->node = NUMA_NO_NODE;
 
        ver = readl(iommu->reg + DMAR_VER_REG);
index 025d7385cf581324d5ba289e3617b51ffbfda4f5..60aa2dce32ef69db766922abc19ebd65090578f2 100644 (file)
@@ -4362,8 +4362,11 @@ free:
 static void intel_iommu_release_device(struct device *dev)
 {
        struct device_domain_info *info = dev_iommu_priv_get(dev);
+       struct intel_iommu *iommu = info->iommu;
 
+       mutex_lock(&iommu->iopf_lock);
        device_rbtree_remove(info);
+       mutex_unlock(&iommu->iopf_lock);
        dmar_remove_one_dev_info(dev);
        intel_pasid_free_table(dev);
        intel_iommu_debugfs_remove_dev(info);
index 50d1e196db52ca749c63ea2706a02d0bb197aa13..a0feab099f1209b99a1a17c9a31b1acdd7439270 100644 (file)
@@ -713,6 +713,8 @@ struct intel_iommu {
 #endif
        struct iopf_queue *iopf_queue;
        unsigned char iopfq_name[16];
+       /* Synchronization between fault report and iommu device release. */
+       struct mutex iopf_lock;
        struct q_inval  *qi;            /* Queued invalidation info */
        u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/
 
index 1dd56d4eb88c5e6c885f2a278450c0f909b0354c..bdf3584ca0af74be9a67ef95562698d1e163832b 100644 (file)
@@ -643,7 +643,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
        struct intel_iommu *iommu = d;
        struct page_req_dsc *req;
        int head, tail, handled;
-       struct pci_dev *pdev;
+       struct device *dev;
        u64 address;
 
        /*
@@ -689,23 +689,24 @@ bad_req:
                if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
                        goto prq_advance;
 
-               pdev = pci_get_domain_bus_and_slot(iommu->segment,
-                                                  PCI_BUS_NUM(req->rid),
-                                                  req->rid & 0xff);
                /*
                 * If prq is to be handled outside iommu driver via receiver of
                 * the fault notifiers, we skip the page response here.
                 */
-               if (!pdev)
+               mutex_lock(&iommu->iopf_lock);
+               dev = device_rbtree_find(iommu, req->rid);
+               if (!dev) {
+                       mutex_unlock(&iommu->iopf_lock);
                        goto bad_req;
+               }
 
-               if (intel_svm_prq_report(iommu, &pdev->dev, req))
+               if (intel_svm_prq_report(iommu, dev, req))
                        handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
                else
-                       trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
+                       trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
                                         req->priv_data[0], req->priv_data[1],
                                         iommu->prq_seq_number++);
-               pci_dev_put(pdev);
+               mutex_unlock(&iommu->iopf_lock);
 prq_advance:
                head = (head + sizeof(*req)) & PRQ_RING_MASK;
        }