vfio/nvgrace-gpu: register device memory for poison handling

author Ankit Agrawal <ankita@nvidia.com>

Thu, 15 Jan 2026 20:28:49 +0000 (20:28 +0000)

committer Alex Williamson <alex@shazbot.org>

Mon, 19 Jan 2026 17:06:31 +0000 (10:06 -0700)
author Ankit Agrawal <ankita@nvidia.com>
Thu, 15 Jan 2026 20:28:49 +0000 (20:28 +0000)
committer Alex Williamson <alex@shazbot.org>
Mon, 19 Jan 2026 17:06:31 +0000 (10:06 -0700)
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c

index b45a24d003877b142cf8f19d6654200cfbe6face..3be5d0d97aad52859bd76cdf5202e02db4682c0f 100644 (file)
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -9,6 +9,7 @@
  #include <linux/jiffies.h>
  #include <linux/pci-p2pdma.h>
  #include <linux/pm_runtime.h>
+#include <linux/memory-failure.h>
  
  /*
   * The device memory usable to the workloads running in the VM is cached
@@ -49,6 +50,7 @@ struct mem_region {
                 void *memaddr;
                 void __iomem *ioaddr;
         };                      /* Base virtual address of the region */
+       struct pfn_address_space pfn_address_space;
  };
  
  struct nvgrace_gpu_pci_core_device {
@@ -88,6 +90,80 @@ nvgrace_gpu_memregion(int index,
         return NULL;
  }
  
+static int pfn_memregion_offset(struct nvgrace_gpu_pci_core_device *nvdev,
+                               unsigned int index,
+                               unsigned long pfn,
+                               pgoff_t *pfn_offset_in_region)
+{
+       struct mem_region *region;
+       unsigned long start_pfn, num_pages;
+
+       region = nvgrace_gpu_memregion(index, nvdev);
+       if (!region)
+               return -EINVAL;
+
+       start_pfn = PHYS_PFN(region->memphys);
+       num_pages = region->memlength >> PAGE_SHIFT;
+
+       if (pfn < start_pfn || pfn >= start_pfn + num_pages)
+               return -EFAULT;
+
+       *pfn_offset_in_region = pfn - start_pfn;
+
+       return 0;
+}
+
+static inline
+struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma);
+
+static int nvgrace_gpu_pfn_to_vma_pgoff(struct vm_area_struct *vma,
+                                       unsigned long pfn,
+                                       pgoff_t *pgoff)
+{
+       struct nvgrace_gpu_pci_core_device *nvdev;
+       unsigned int index =
+               vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+       pgoff_t vma_offset_in_region = vma->vm_pgoff &
+               ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+       pgoff_t pfn_offset_in_region;
+       int ret;
+
+       nvdev = vma_to_nvdev(vma);
+       if (!nvdev)
+               return -ENOENT;
+
+       ret = pfn_memregion_offset(nvdev, index, pfn, &pfn_offset_in_region);
+       if (ret)
+               return ret;
+
+       /* Ensure PFN is not before VMA's start within the region */
+       if (pfn_offset_in_region < vma_offset_in_region)
+               return -EFAULT;
+
+       /* Calculate offset from VMA start */
+       *pgoff = vma->vm_pgoff +
+                (pfn_offset_in_region - vma_offset_in_region);
+
+       return 0;
+}
+
+static int
+nvgrace_gpu_vfio_pci_register_pfn_range(struct vfio_device *core_vdev,
+                                       struct mem_region *region)
+{
+       unsigned long pfn, nr_pages;
+
+       pfn = PHYS_PFN(region->memphys);
+       nr_pages = region->memlength >> PAGE_SHIFT;
+
+       region->pfn_address_space.node.start = pfn;
+       region->pfn_address_space.node.last = pfn + nr_pages - 1;
+       region->pfn_address_space.mapping = core_vdev->inode->i_mapping;
+       region->pfn_address_space.pfn_to_vma_pgoff = nvgrace_gpu_pfn_to_vma_pgoff;
+
+       return register_pfn_address_space(&region->pfn_address_space);
+}
+
  static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
  {
         struct vfio_pci_core_device *vdev =
@@ -114,14 +190,28 @@ static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
          * memory mapping.
          */
         ret = vfio_pci_core_setup_barmap(vdev, 0);
-       if (ret) {
-               vfio_pci_core_disable(vdev);
-               return ret;
+       if (ret)
+               goto error_exit;
+
+       if (nvdev->resmem.memlength) {
+               ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->resmem);
+               if (ret && ret != -EOPNOTSUPP)
+                       goto error_exit;
         }
  
-       vfio_pci_core_finish_enable(vdev);
+       ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->usemem);
+       if (ret && ret != -EOPNOTSUPP)
+               goto register_mem_failed;
  
+       vfio_pci_core_finish_enable(vdev);
         return 0;
+
+register_mem_failed:
+       if (nvdev->resmem.memlength)
+               unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
+error_exit:
+       vfio_pci_core_disable(vdev);
+       return ret;
  }
  
  static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
@@ -130,6 +220,11 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
                 container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
                              core_device.vdev);
  
+       if (nvdev->resmem.memlength)
+               unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
+
+       unregister_pfn_address_space(&nvdev->usemem.pfn_address_space);
+
         /* Unmap the mapping to the device memory cached region */
         if (nvdev->usemem.memaddr) {
                 memunmap(nvdev->usemem.memaddr);
@@ -247,6 +342,16 @@ static const struct vm_operations_struct nvgrace_gpu_vfio_pci_mmap_ops = {
  #endif
  };
  
+static inline
+struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma)
+{
+       /* Check if this VMA belongs to us */
+       if (vma->vm_ops != &nvgrace_gpu_vfio_pci_mmap_ops)
+               return NULL;
+
+       return vma->vm_private_data;
+}
+
  static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
                             struct vm_area_struct *vma)
  {
author	Ankit Agrawal <ankita@nvidia.com>
	Thu, 15 Jan 2026 20:28:49 +0000 (20:28 +0000)
committer	Alex Williamson <alex@shazbot.org>
	Mon, 19 Jan 2026 17:06:31 +0000 (10:06 -0700)