From: Ankit Agrawal Date: Thu, 27 Nov 2025 17:06:30 +0000 (+0000) Subject: vfio/nvgrace-gpu: split the code to wait for GPU ready X-Git-Tag: v6.19-rc1~132^2~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7d055071d73bac7acc3a0b441f0632f564f048fe;p=thirdparty%2Fkernel%2Flinux.git vfio/nvgrace-gpu: split the code to wait for GPU ready Split the function that check for the GPU device being ready on the probe. Move the code to wait for the GPU to be ready through BAR0 register reads to a separate function. This would help reuse the code. This also fixes a bug where the return status in case of timeout gets overridden by return from pci_enable_device. With the fix, a timeout generate an error as initially intended. Fixes: d85f69d520e6 ("vfio/nvgrace-gpu: Check the HBM training and C2C link status") Reviewed-by: Zhi Wang Reviewed-by: Shameer Kolothum Signed-off-by: Ankit Agrawal Link: https://lore.kernel.org/r/20251127170632.3477-5-ankita@nvidia.com Signed-off-by: Alex Williamson --- diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index 3034d6adf576a..5f122929dd443 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -131,6 +131,20 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev) vfio_pci_core_close_device(core_vdev); } +static int nvgrace_gpu_wait_device_ready(void __iomem *io) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(POLL_TIMEOUT_MS); + + do { + if ((ioread32(io + C2C_LINK_BAR0_OFFSET) == STATUS_READY) && + (ioread32(io + HBM_TRAINING_BAR0_OFFSET) == STATUS_READY)) + return 0; + msleep(POLL_QUANTUM_MS); + } while (!time_after(jiffies, timeout)); + + return -ETIME; +} + static unsigned long addr_to_pgoff(struct vm_area_struct *vma, unsigned long addr) { @@ -950,11 +964,10 @@ static bool nvgrace_gpu_has_mig_hw_bug(struct pci_dev *pdev) * Ensure that the BAR0 region is enabled before accessing the * registers. */ -static int nvgrace_gpu_wait_device_ready(struct pci_dev *pdev) +static int nvgrace_gpu_probe_check_device_ready(struct pci_dev *pdev) { - unsigned long timeout = jiffies + msecs_to_jiffies(POLL_TIMEOUT_MS); void __iomem *io; - int ret = -ETIME; + int ret; ret = pci_enable_device(pdev); if (ret) @@ -970,16 +983,8 @@ static int nvgrace_gpu_wait_device_ready(struct pci_dev *pdev) goto iomap_exit; } - do { - if ((ioread32(io + C2C_LINK_BAR0_OFFSET) == STATUS_READY) && - (ioread32(io + HBM_TRAINING_BAR0_OFFSET) == STATUS_READY)) { - ret = 0; - goto reg_check_exit; - } - msleep(POLL_QUANTUM_MS); - } while (!time_after(jiffies, timeout)); + ret = nvgrace_gpu_wait_device_ready(io); -reg_check_exit: pci_iounmap(pdev, io); iomap_exit: pci_release_selected_regions(pdev, 1 << 0); @@ -996,7 +1001,7 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev, u64 memphys, memlength; int ret; - ret = nvgrace_gpu_wait_device_ready(pdev); + ret = nvgrace_gpu_probe_check_device_ready(pdev); if (ret) return ret;