]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
vfio/nvgrace-gpu: register device memory for poison handling
authorAnkit Agrawal <ankita@nvidia.com>
Sun, 2 Nov 2025 18:44:34 +0000 (18:44 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 17 Nov 2025 01:28:30 +0000 (17:28 -0800)
The nvgrace-gpu-vfio-pci module [1] maps the device memory to the user VA
(Qemu) using remap_pfn_range() without adding the memory to the kernel.
The device memory pages are not backed by struct page.  The previous patch
implements the mechanism to handle ECC/poison on memory page without
struct page.  This new mechanism is being used here.

The module registers its memory region and the address_space with the
kernel MM for ECC handling using the register_pfn_address_space()
registration API exposed by the kernel.

Link: https://lore.kernel.org/all/20240220115055.23546-1-ankita@nvidia.com/
Link: https://lkml.kernel.org/r/20251102184434.2406-4-ankita@nvidia.com
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
Acked-by: Alex Williamson <alex@shazbot.org>
Cc: Aniket Agashe <aniketa@nvidia.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Kirti Wankhede <kwankhede@nvidia.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew R. Ochs <mochs@nvidia.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Neo Jia <cjia@nvidia.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuai Xue <xueshuai@linux.alibaba.com>
Cc: Smita Koralahalli Channabasappa <smita.koralahallichannabasappa@amd.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tarun Gupta <targupta@nvidia.com>
Cc: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Cc: Vikram Sethi <vsethi@nvidia.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zhi Wang <zhiw@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
drivers/vfio/pci/nvgrace-gpu/main.c

index e346392b72f6ab9ccb916ce39df5124b9bd5fa73..3ce56d039cbec42215411bc6328fe177e78bd36b 100644 (file)
@@ -8,6 +8,10 @@
 #include <linux/delay.h>
 #include <linux/jiffies.h>
 
+#ifdef CONFIG_MEMORY_FAILURE
+#include <linux/memory-failure.h>
+#endif
+
 /*
  * The device memory usable to the workloads running in the VM is cached
  * and showcased as a 64b device BAR (comprising of BAR4 and BAR5 region)
@@ -47,6 +51,9 @@ struct mem_region {
                void *memaddr;
                void __iomem *ioaddr;
        };                      /* Base virtual address of the region */
+#ifdef CONFIG_MEMORY_FAILURE
+       struct pfn_address_space pfn_address_space;
+#endif
 };
 
 struct nvgrace_gpu_pci_core_device {
@@ -60,6 +67,28 @@ struct nvgrace_gpu_pci_core_device {
        bool has_mig_hw_bug;
 };
 
+#ifdef CONFIG_MEMORY_FAILURE
+
+static int
+nvgrace_gpu_vfio_pci_register_pfn_range(struct mem_region *region,
+                                       struct vm_area_struct *vma)
+{
+       unsigned long nr_pages;
+       int ret = 0;
+
+       nr_pages = region->memlength >> PAGE_SHIFT;
+
+       region->pfn_address_space.node.start = vma->vm_pgoff;
+       region->pfn_address_space.node.last = vma->vm_pgoff + nr_pages - 1;
+       region->pfn_address_space.mapping = vma->vm_file->f_mapping;
+
+       ret = register_pfn_address_space(&region->pfn_address_space);
+
+       return ret;
+}
+
+#endif
+
 static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
 {
        struct nvgrace_gpu_pci_core_device *nvdev =
@@ -127,6 +156,13 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
 
        mutex_destroy(&nvdev->remap_lock);
 
+#ifdef CONFIG_MEMORY_FAILURE
+       if (nvdev->resmem.memlength)
+               unregister_pfn_address_space(&nvdev->resmem.pfn_address_space);
+
+       unregister_pfn_address_space(&nvdev->usemem.pfn_address_space);
+#endif
+
        vfio_pci_core_close_device(core_vdev);
 }
 
@@ -202,7 +238,14 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
 
        vma->vm_pgoff = start_pfn;
 
-       return 0;
+#ifdef CONFIG_MEMORY_FAILURE
+       if (nvdev->resmem.memlength && index == VFIO_PCI_BAR2_REGION_INDEX)
+               ret = nvgrace_gpu_vfio_pci_register_pfn_range(&nvdev->resmem, vma);
+       else if (index == VFIO_PCI_BAR4_REGION_INDEX)
+               ret = nvgrace_gpu_vfio_pci_register_pfn_range(&nvdev->usemem, vma);
+#endif
+
+       return ret;
 }
 
 static long