]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
vfio/type1: Use mapping page mask for pfnmaps
authorAlex Williamson <alex.williamson@redhat.com>
Tue, 18 Feb 2025 22:22:06 +0000 (15:22 -0700)
committerAlex Williamson <alex.williamson@redhat.com>
Thu, 27 Feb 2025 18:55:54 +0000 (11:55 -0700)
vfio-pci supports huge_fault for PCI MMIO BARs and will insert pud and
pmd mappings for well aligned mappings.  follow_pfnmap_start() walks the
page table and therefore knows the page mask of the level where the
address is found and returns this through follow_pfnmap_args.addr_mask.
Subsequent pfns from this address until the end of the mapping page are
necessarily consecutive.  Use this information to retrieve a range of
pfnmap pfns in a single pass.

With optimal mappings and alignment on systems with 1GB pud and 4KB
page size, this reduces iterations for DMA mapping PCI BARs by a
factor of 256K.  In real world testing, the overhead of iterating
pfns for a VM DMA mapping a 32GB PCI BAR is reduced from ~1s to
sub-millisecond overhead.

Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Mitchell Augustin <mitchell.augustin@canonical.com>
Tested-by: Mitchell Augustin <mitchell.augustin@canonical.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20250218222209.1382449-7-alex.williamson@redhat.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
drivers/vfio/vfio_iommu_type1.c

index ce661f03f13917e45e96f5a76f68697ec420cf13..0ac56072af9f23684184b4969a31c28f3caf7fa7 100644 (file)
@@ -520,7 +520,7 @@ static void vfio_batch_fini(struct vfio_batch *batch)
 
 static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
                            unsigned long vaddr, unsigned long *pfn,
-                           bool write_fault)
+                           unsigned long *addr_mask, bool write_fault)
 {
        struct follow_pfnmap_args args = { .vma = vma, .address = vaddr };
        int ret;
@@ -544,10 +544,12 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
                        return ret;
        }
 
-       if (write_fault && !args.writable)
+       if (write_fault && !args.writable) {
                ret = -EFAULT;
-       else
+       } else {
                *pfn = args.pfn;
+               *addr_mask = args.addr_mask;
+       }
 
        follow_pfnmap_end(&args);
        return ret;
@@ -590,15 +592,22 @@ retry:
        vma = vma_lookup(mm, vaddr);
 
        if (vma && vma->vm_flags & VM_PFNMAP) {
-               ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
+               unsigned long addr_mask;
+
+               ret = follow_fault_pfn(vma, mm, vaddr, pfn, &addr_mask,
+                                      prot & IOMMU_WRITE);
                if (ret == -EAGAIN)
                        goto retry;
 
                if (!ret) {
-                       if (is_invalid_reserved_pfn(*pfn))
-                               ret = 1;
-                       else
+                       if (is_invalid_reserved_pfn(*pfn)) {
+                               unsigned long epfn;
+
+                               epfn = (*pfn | (~addr_mask >> PAGE_SHIFT)) + 1;
+                               ret = min_t(long, npages, epfn - *pfn);
+                       } else {
                                ret = -EFAULT;
+                       }
                }
        }
 done: