From: Barry Song Date: Tue, 21 Oct 2025 04:20:22 +0000 (+1300) Subject: dma-buf: system_heap: use larger contiguous mappings instead of per-page mmap X-Git-Tag: v6.19-rc1~157^2~1^2~82 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=04c7adb5871ad04c9e3fd645570e21c93f1b2f54;p=thirdparty%2Flinux.git dma-buf: system_heap: use larger contiguous mappings instead of per-page mmap We can allocate high-order pages, but mapping them one by one is inefficient. This patch changes the code to map as large a chunk as possible. The code looks somewhat complicated mainly because supporting mmap with a non-zero offset is a bit tricky. Using the micro-benchmark below, we see that mmap becomes 35X faster: #include #include #include #include #include #include #include #include #define SIZE (512UL * 1024 * 1024) #define PAGE 4096 #define STRIDE (PAGE/sizeof(int)) #define PAGES (SIZE/PAGE) int main(void) { int heap = open("/dev/dma_heap/system", O_RDONLY); struct dma_heap_allocation_data d = { .len = SIZE, .fd_flags = O_RDWR|O_CLOEXEC }; ioctl(heap, DMA_HEAP_IOCTL_ALLOC, &d); struct timespec t0, t1; clock_gettime(CLOCK_MONOTONIC, &t0); int *p = mmap(NULL, SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, d.fd, 0); clock_gettime(CLOCK_MONOTONIC, &t1); for (int i = 0; i < PAGES; i++) p[i*STRIDE] = i; for (int i = 0; i < PAGES; i++) if (p[i*STRIDE] != i) { fprintf(stderr, "mismatch at page %d\n", i); exit(1); } long ns = (t1.tv_sec-t0.tv_sec)*1000000000L + (t1.tv_nsec-t0.tv_nsec); printf("mmap 512MB took %.3f us, verify OK\n", ns/1000.0); return 0; } W/ patch: ~ # ./a.out mmap 512MB took 200266.000 us, verify OK ~ # ./a.out mmap 512MB took 198151.000 us, verify OK ~ # ./a.out mmap 512MB took 197069.000 us, verify OK ~ # ./a.out mmap 512MB took 196781.000 us, verify OK ~ # ./a.out mmap 512MB took 198102.000 us, verify OK ~ # ./a.out mmap 512MB took 195552.000 us, verify OK W/o patch: ~ # ./a.out mmap 512MB took 6987470.000 us, verify OK ~ # ./a.out mmap 512MB took 6970739.000 us, verify OK ~ # ./a.out mmap 512MB took 6984383.000 us, verify OK ~ # ./a.out mmap 512MB took 6971311.000 us, verify OK ~ # ./a.out mmap 512MB took 6991680.000 us, verify OK Signed-off-by: Barry Song Acked-by: John Stultz Reviewed-by: Maxime Ripard Signed-off-by: Sumit Semwal [sumits: correct from 3.5x to 35x] Link: https://patch.msgid.link/20251021042022.47919-1-21cnbao@gmail.com --- diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index bbe7881f13601..4c782fe33fd49 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -186,20 +186,35 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) struct system_heap_buffer *buffer = dmabuf->priv; struct sg_table *table = &buffer->sg_table; unsigned long addr = vma->vm_start; - struct sg_page_iter piter; - int ret; + unsigned long pgoff = vma->vm_pgoff; + struct scatterlist *sg; + int i, ret; + + for_each_sgtable_sg(table, sg, i) { + unsigned long n = sg->length >> PAGE_SHIFT; - for_each_sgtable_page(table, &piter, vma->vm_pgoff) { - struct page *page = sg_page_iter_page(&piter); + if (pgoff < n) + break; + pgoff -= n; + } + + for (; sg && addr < vma->vm_end; sg = sg_next(sg)) { + unsigned long n = (sg->length >> PAGE_SHIFT) - pgoff; + struct page *page = sg_page(sg) + pgoff; + unsigned long size = n << PAGE_SHIFT; + + if (addr + size > vma->vm_end) + size = vma->vm_end - addr; - ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE, - vma->vm_page_prot); + ret = remap_pfn_range(vma, addr, page_to_pfn(page), + size, vma->vm_page_prot); if (ret) return ret; - addr += PAGE_SIZE; - if (addr >= vma->vm_end) - return 0; + + addr += size; + pgoff = 0; } + return 0; }