#include <linux/jiffies.h>
#include <linux/pci-p2pdma.h>
#include <linux/pm_runtime.h>
+#include <linux/memory-failure.h>
/*
* The device memory usable to the workloads running in the VM is cached
void *memaddr;
void __iomem *ioaddr;
}; /* Base virtual address of the region */
+ struct pfn_address_space pfn_address_space;
};
struct nvgrace_gpu_pci_core_device {
return NULL;
}
+static int pfn_memregion_offset(struct nvgrace_gpu_pci_core_device *nvdev,
+ unsigned int index,
+ unsigned long pfn,
+ pgoff_t *pfn_offset_in_region)
+{
+ struct mem_region *region;
+ unsigned long start_pfn, num_pages;
+
+ region = nvgrace_gpu_memregion(index, nvdev);
+ if (!region)
+ return -EINVAL;
+
+ start_pfn = PHYS_PFN(region->memphys);
+ num_pages = region->memlength >> PAGE_SHIFT;
+
+ if (pfn < start_pfn || pfn >= start_pfn + num_pages)
+ return -EFAULT;
+
+ *pfn_offset_in_region = pfn - start_pfn;
+
+ return 0;
+}
+
+static inline
+struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma);
+
+static int nvgrace_gpu_pfn_to_vma_pgoff(struct vm_area_struct *vma,
+ unsigned long pfn,
+ pgoff_t *pgoff)
+{
+ struct nvgrace_gpu_pci_core_device *nvdev;
+ unsigned int index =
+ vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+ pgoff_t vma_offset_in_region = vma->vm_pgoff &
+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ pgoff_t pfn_offset_in_region;
+ int ret;
+
+ nvdev = vma_to_nvdev(vma);
+ if (!nvdev)
+ return -ENOENT;
+
+ ret = pfn_memregion_offset(nvdev, index, pfn, &pfn_offset_in_region);
+ if (ret)
+ return ret;
+
+ /* Ensure PFN is not before VMA's start within the region */
+ if (pfn_offset_in_region < vma_offset_in_region)
+ return -EFAULT;
+
+ /* Calculate offset from VMA start */
+ *pgoff = vma->vm_pgoff +
+ (pfn_offset_in_region - vma_offset_in_region);
+
+ return 0;
+}
+
+static int
+nvgrace_gpu_vfio_pci_register_pfn_range(struct vfio_device *core_vdev,
+ struct mem_region *region)
+{
+ unsigned long pfn, nr_pages;
+
+ pfn = PHYS_PFN(region->memphys);
+ nr_pages = region->memlength >> PAGE_SHIFT;
+
+ region->pfn_address_space.node.start = pfn;
+ region->pfn_address_space.node.last = pfn + nr_pages - 1;
+ region->pfn_address_space.mapping = core_vdev->inode->i_mapping;
+ region->pfn_address_space.pfn_to_vma_pgoff = nvgrace_gpu_pfn_to_vma_pgoff;
+
+ return register_pfn_address_space(®ion->pfn_address_space);
+}
+
static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
{
struct vfio_pci_core_device *vdev =
* memory mapping.
*/
ret = vfio_pci_core_setup_barmap(vdev, 0);
- if (ret) {
- vfio_pci_core_disable(vdev);
- return ret;
+ if (ret)
+ goto error_exit;
+
+ if (nvdev->resmem.memlength) {
+ ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->resmem);
+ if (ret && ret != -EOPNOTSUPP)
+ goto error_exit;
}
- vfio_pci_core_finish_enable(vdev);
+ ret = nvgrace_gpu_vfio_pci_register_pfn_range(core_vdev, &nvdev->usemem);
+ if (ret && ret != -EOPNOTSUPP)
+ goto register_mem_failed;
+ vfio_pci_core_finish_enable(vdev);
return 0;
+
+register_mem_failed:
+ if (nvdev->resmem.memlength)
+ unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
+error_exit:
+ vfio_pci_core_disable(vdev);
+ return ret;
}
static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
core_device.vdev);
+ if (nvdev->resmem.memlength)
+ unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
+
+ unregister_pfn_address_space(&nvdev->usemem.pfn_address_space);
+
/* Unmap the mapping to the device memory cached region */
if (nvdev->usemem.memaddr) {
memunmap(nvdev->usemem.memaddr);
#endif
};
+static inline
+struct nvgrace_gpu_pci_core_device *vma_to_nvdev(struct vm_area_struct *vma)
+{
+ /* Check if this VMA belongs to us */
+ if (vma->vm_ops != &nvgrace_gpu_vfio_pci_mmap_ops)
+ return NULL;
+
+ return vma->vm_private_data;
+}
+
static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
struct vm_area_struct *vma)
{