]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
vfio/nvgrace-gpu: split the code to wait for GPU ready
authorAnkit Agrawal <ankita@nvidia.com>
Thu, 27 Nov 2025 17:06:30 +0000 (17:06 +0000)
committerAlex Williamson <alex@shazbot.org>
Fri, 28 Nov 2025 17:07:25 +0000 (10:07 -0700)
Split the function that check for the GPU device being ready on
the probe.

Move the code to wait for the GPU to be ready through BAR0 register
reads to a separate function. This would help reuse the code.

This also fixes a bug where the return status in case of timeout
gets overridden by return from pci_enable_device. With the fix,
a timeout generate an error as initially intended.

Fixes: d85f69d520e6 ("vfio/nvgrace-gpu: Check the HBM training and C2C link status")
Reviewed-by: Zhi Wang <zhiw@nvidia.com>
Reviewed-by: Shameer Kolothum <skolothumtho@nvidia.com>
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
Link: https://lore.kernel.org/r/20251127170632.3477-5-ankita@nvidia.com
Signed-off-by: Alex Williamson <alex@shazbot.org>
drivers/vfio/pci/nvgrace-gpu/main.c

index 3034d6adf576a863d267789da32d82e3b5af613e..5f122929dd443b9956ca0e4a47f49832537fe4cb 100644 (file)
@@ -131,6 +131,20 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
        vfio_pci_core_close_device(core_vdev);
 }
 
+static int nvgrace_gpu_wait_device_ready(void __iomem *io)
+{
+       unsigned long timeout = jiffies + msecs_to_jiffies(POLL_TIMEOUT_MS);
+
+       do {
+               if ((ioread32(io + C2C_LINK_BAR0_OFFSET) == STATUS_READY) &&
+                   (ioread32(io + HBM_TRAINING_BAR0_OFFSET) == STATUS_READY))
+                       return 0;
+               msleep(POLL_QUANTUM_MS);
+       } while (!time_after(jiffies, timeout));
+
+       return -ETIME;
+}
+
 static unsigned long addr_to_pgoff(struct vm_area_struct *vma,
                                   unsigned long addr)
 {
@@ -950,11 +964,10 @@ static bool nvgrace_gpu_has_mig_hw_bug(struct pci_dev *pdev)
  * Ensure that the BAR0 region is enabled before accessing the
  * registers.
  */
-static int nvgrace_gpu_wait_device_ready(struct pci_dev *pdev)
+static int nvgrace_gpu_probe_check_device_ready(struct pci_dev *pdev)
 {
-       unsigned long timeout = jiffies + msecs_to_jiffies(POLL_TIMEOUT_MS);
        void __iomem *io;
-       int ret = -ETIME;
+       int ret;
 
        ret = pci_enable_device(pdev);
        if (ret)
@@ -970,16 +983,8 @@ static int nvgrace_gpu_wait_device_ready(struct pci_dev *pdev)
                goto iomap_exit;
        }
 
-       do {
-               if ((ioread32(io + C2C_LINK_BAR0_OFFSET) == STATUS_READY) &&
-                   (ioread32(io + HBM_TRAINING_BAR0_OFFSET) == STATUS_READY)) {
-                       ret = 0;
-                       goto reg_check_exit;
-               }
-               msleep(POLL_QUANTUM_MS);
-       } while (!time_after(jiffies, timeout));
+       ret = nvgrace_gpu_wait_device_ready(io);
 
-reg_check_exit:
        pci_iounmap(pdev, io);
 iomap_exit:
        pci_release_selected_regions(pdev, 1 << 0);
@@ -996,7 +1001,7 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
        u64 memphys, memlength;
        int ret;
 
-       ret = nvgrace_gpu_wait_device_ready(pdev);
+       ret = nvgrace_gpu_probe_check_device_ready(pdev);
        if (ret)
                return ret;