]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
iommu: Fix nested pci_dev_reset_iommu_prepare/done()
authorNicolin Chen <nicolinc@nvidia.com>
Sat, 25 Apr 2026 01:15:24 +0000 (18:15 -0700)
committerJoerg Roedel <joerg.roedel@amd.com>
Mon, 11 May 2026 08:12:44 +0000 (10:12 +0200)
Shuai found that cxl_reset_bus_function() calls pci_reset_bus_function()
internally while both are calling pci_dev_reset_iommu_prepare/done().

As pci_dev_reset_iommu_prepare() doesn't support re-entry, the inner call
will trigger a WARN_ON and return -EBUSY, resulting in failing the entire
device reset.

On the other hand, removing the outer calls in the PCI callers is unsafe.
As pointed out by Kevin, device-specific quirks like reset_hinic_vf_dev()
execute custom firmware waits after their inner pcie_flr() completes. If
the IOMMU protection relies solely on the inner reset, the IOMMU will be
unblocked prematurely while the device is still resetting.

Instead, fix this by making pci_dev_reset_iommu_prepare/done() reentrant.

Introduce gdev->reset_depth to handle the re-entries on the same device.

Fixes: c279e83953d9 ("iommu: Introduce pci_dev_reset_iommu_prepare/done()")
Cc: stable@vger.kernel.org
Reported-by: Shuai Xue <xueshuai@linux.alibaba.com>
Closes: https://lore.kernel.org/all/absKsk7qQOwzhpzv@Asurada-Nvidia/
Suggested-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Shuai Xue <xueshuai@linux.alibaba.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
drivers/iommu/iommu.c

index 221be84db5ad54c489871fc41a8f9861b9b5704c..301c76c40e3d0bb703284f1eb63fb8ec28bf192d 100644 (file)
@@ -83,6 +83,7 @@ struct group_device {
         *  - Device is undergoing a reset
         */
        bool blocked;
+       unsigned int reset_depth;
 };
 
 /* Iterate over each struct group_device in a struct iommu_group */
@@ -4045,20 +4046,23 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev)
        if (WARN_ON(!gdev))
                return -ENODEV;
 
-       /* Re-entry is not allowed */
-       if (WARN_ON(gdev->blocked))
-               return -EBUSY;
+       if (gdev->reset_depth++)
+               return 0;
 
        ret = __iommu_group_alloc_blocking_domain(group);
-       if (ret)
+       if (ret) {
+               gdev->reset_depth--;
                return ret;
+       }
 
        /* Stage RID domain at blocking_domain while retaining group->domain */
        if (group->domain != group->blocking_domain) {
                ret = __iommu_attach_device(group->blocking_domain, &pdev->dev,
                                            group->domain);
-               if (ret)
+               if (ret) {
+                       gdev->reset_depth--;
                        return ret;
+               }
        }
 
        /*
@@ -4118,7 +4122,10 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev)
        if (WARN_ON(!gdev))
                return;
 
-       if (!gdev->blocked)
+       /* Unbalanced done() calls would underflow the counter */
+       if (WARN_ON(gdev->reset_depth == 0))
+               return;
+       if (--gdev->reset_depth)
                return;
 
        if (WARN_ON(!group->blocking_domain))