]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
vduse: Use fixed 4KB bounce pages for non-4KB page size
authorSheng Zhao <sheng.zhao@bytedance.com>
Thu, 25 Sep 2025 11:35:16 +0000 (19:35 +0800)
committerMichael S. Tsirkin <mst@redhat.com>
Wed, 1 Oct 2025 11:24:55 +0000 (07:24 -0400)
The allocation granularity of bounce pages is PAGE_SIZE. This may cause
even small IO requests to occupy an entire bounce page exclusively. The
kind of memory waste will be more significant when PAGE_SIZE is larger
than 4KB (e.g. arm64 with 64KB pages).

So, optimize it by using fixed 4KB bounce maps and iova allocation
granularity. A single IO request occupies at least a 4KB bounce page
instead of the entire memory page of PAGE_SIZE.

Signed-off-by: Sheng Zhao <sheng.zhao@bytedance.com>
Message-Id: <20250925113516.60305-1-sheng.zhao@bytedance.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
drivers/vdpa/vdpa_user/iova_domain.c
drivers/vdpa/vdpa_user/iova_domain.h

index ccaed24b7ef8ded1c622bd0bd1f1ccdcacb92adc..4352b5cf74f0783c8d4a6dce9f9db80bab5c5bea 100644 (file)
@@ -103,19 +103,38 @@ void vduse_domain_clear_map(struct vduse_iova_domain *domain,
 static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
                                         u64 iova, u64 size, u64 paddr)
 {
-       struct vduse_bounce_map *map;
+       struct vduse_bounce_map *map, *head_map;
+       struct page *tmp_page;
        u64 last = iova + size - 1;
 
        while (iova <= last) {
-               map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+               /*
+                * When PAGE_SIZE is larger than 4KB, multiple adjacent bounce_maps will
+                * point to the same memory page of PAGE_SIZE. Since bounce_maps originate
+                * from IO requests, we may not be able to guarantee that the orig_phys
+                * values of all IO requests within the same 64KB memory page are contiguous.
+                * Therefore, we need to store them separately.
+                *
+                * Bounce pages are allocated on demand. As a result, it may occur that
+                * multiple bounce pages corresponding to the same 64KB memory page attempt
+                * to allocate memory simultaneously, so we use cmpxchg to handle this
+                * concurrency.
+                */
+               map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
                if (!map->bounce_page) {
-                       map->bounce_page = alloc_page(GFP_ATOMIC);
-                       if (!map->bounce_page)
-                               return -ENOMEM;
+                       head_map = &domain->bounce_maps[(iova & PAGE_MASK) >> BOUNCE_MAP_SHIFT];
+                       if (!head_map->bounce_page) {
+                               tmp_page = alloc_page(GFP_ATOMIC);
+                               if (!tmp_page)
+                                       return -ENOMEM;
+                               if (cmpxchg(&head_map->bounce_page, NULL, tmp_page))
+                                       __free_page(tmp_page);
+                       }
+                       map->bounce_page = head_map->bounce_page;
                }
                map->orig_phys = paddr;
-               paddr += PAGE_SIZE;
-               iova += PAGE_SIZE;
+               paddr += BOUNCE_MAP_SIZE;
+               iova += BOUNCE_MAP_SIZE;
        }
        return 0;
 }
@@ -127,12 +146,17 @@ static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
        u64 last = iova + size - 1;
 
        while (iova <= last) {
-               map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+               map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
                map->orig_phys = INVALID_PHYS_ADDR;
-               iova += PAGE_SIZE;
+               iova += BOUNCE_MAP_SIZE;
        }
 }
 
+static unsigned int offset_in_bounce_page(dma_addr_t addr)
+{
+       return (addr & ~BOUNCE_MAP_MASK);
+}
+
 static void do_bounce(phys_addr_t orig, void *addr, size_t size,
                      enum dma_data_direction dir)
 {
@@ -163,7 +187,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
 {
        struct vduse_bounce_map *map;
        struct page *page;
-       unsigned int offset;
+       unsigned int offset, head_offset;
        void *addr;
        size_t sz;
 
@@ -171,9 +195,10 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
                return;
 
        while (size) {
-               map = &domain->bounce_maps[iova >> PAGE_SHIFT];
-               offset = offset_in_page(iova);
-               sz = min_t(size_t, PAGE_SIZE - offset, size);
+               map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
+               head_offset = offset_in_page(iova);
+               offset = offset_in_bounce_page(iova);
+               sz = min_t(size_t, BOUNCE_MAP_SIZE - offset, size);
 
                if (WARN_ON(!map->bounce_page ||
                            map->orig_phys == INVALID_PHYS_ADDR))
@@ -183,7 +208,7 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
                       map->user_bounce_page : map->bounce_page;
 
                addr = kmap_local_page(page);
-               do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
+               do_bounce(map->orig_phys + offset, addr + head_offset, sz, dir);
                kunmap_local(addr);
                size -= sz;
                iova += sz;
@@ -218,7 +243,7 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
        struct page *page = NULL;
 
        read_lock(&domain->bounce_lock);
-       map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+       map = &domain->bounce_maps[iova >> BOUNCE_MAP_SHIFT];
        if (domain->user_bounce_pages || !map->bounce_page)
                goto out;
 
@@ -236,7 +261,7 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
        struct vduse_bounce_map *map;
        unsigned long pfn, bounce_pfns;
 
-       bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
+       bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;
 
        for (pfn = 0; pfn < bounce_pfns; pfn++) {
                map = &domain->bounce_maps[pfn];
@@ -246,7 +271,8 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
                if (!map->bounce_page)
                        continue;
 
-               __free_page(map->bounce_page);
+               if (!((pfn << BOUNCE_MAP_SHIFT) & ~PAGE_MASK))
+                       __free_page(map->bounce_page);
                map->bounce_page = NULL;
        }
 }
@@ -254,8 +280,12 @@ vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
 int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
                                       struct page **pages, int count)
 {
-       struct vduse_bounce_map *map;
-       int i, ret;
+       struct vduse_bounce_map *map, *head_map;
+       int i, j, ret;
+       int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE;
+       int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;
+       struct page *head_page = NULL;
+       bool need_copy;
 
        /* Now we don't support partial mapping */
        if (count != (domain->bounce_size >> PAGE_SHIFT))
@@ -267,16 +297,23 @@ int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
                goto out;
 
        for (i = 0; i < count; i++) {
-               map = &domain->bounce_maps[i];
-               if (map->bounce_page) {
+               need_copy = false;
+               head_map = &domain->bounce_maps[(i * inner_pages)];
+               head_page = head_map->bounce_page;
+               for (j = 0; j < inner_pages; j++) {
+                       if ((i * inner_pages + j) >= bounce_pfns)
+                               break;
+                       map = &domain->bounce_maps[(i * inner_pages + j)];
                        /* Copy kernel page to user page if it's in use */
-                       if (map->orig_phys != INVALID_PHYS_ADDR)
-                               memcpy_to_page(pages[i], 0,
-                                              page_address(map->bounce_page),
-                                              PAGE_SIZE);
+                       if ((head_page) && (map->orig_phys != INVALID_PHYS_ADDR))
+                               need_copy = true;
+                       map->user_bounce_page = pages[i];
                }
-               map->user_bounce_page = pages[i];
                get_page(pages[i]);
+               if ((head_page) && (need_copy))
+                       memcpy_to_page(pages[i], 0,
+                                      page_address(head_page),
+                                      PAGE_SIZE);
        }
        domain->user_bounce_pages = true;
        ret = 0;
@@ -288,8 +325,12 @@ out:
 
 void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
 {
-       struct vduse_bounce_map *map;
-       unsigned long i, count;
+       struct vduse_bounce_map *map, *head_map;
+       unsigned long i, j, count;
+       int inner_pages = PAGE_SIZE / BOUNCE_MAP_SIZE;
+       int bounce_pfns = domain->bounce_size >> BOUNCE_MAP_SHIFT;
+       struct page *head_page = NULL;
+       bool need_copy;
 
        write_lock(&domain->bounce_lock);
        if (!domain->user_bounce_pages)
@@ -297,20 +338,27 @@ void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
 
        count = domain->bounce_size >> PAGE_SHIFT;
        for (i = 0; i < count; i++) {
-               struct page *page = NULL;
-
-               map = &domain->bounce_maps[i];
-               if (WARN_ON(!map->user_bounce_page))
+               need_copy = false;
+               head_map = &domain->bounce_maps[(i * inner_pages)];
+               if (WARN_ON(!head_map->user_bounce_page))
                        continue;
-
-               /* Copy user page to kernel page if it's in use */
-               if (map->orig_phys != INVALID_PHYS_ADDR) {
-                       page = map->bounce_page;
-                       memcpy_from_page(page_address(page),
-                                        map->user_bounce_page, 0, PAGE_SIZE);
+               head_page = head_map->user_bounce_page;
+
+               for (j = 0; j < inner_pages; j++) {
+                       if ((i * inner_pages + j) >= bounce_pfns)
+                               break;
+                       map = &domain->bounce_maps[(i * inner_pages + j)];
+                       if (WARN_ON(!map->user_bounce_page))
+                               continue;
+                       /* Copy user page to kernel page if it's in use */
+                       if ((map->orig_phys != INVALID_PHYS_ADDR) && (head_map->bounce_page))
+                               need_copy = true;
+                       map->user_bounce_page = NULL;
                }
-               put_page(map->user_bounce_page);
-               map->user_bounce_page = NULL;
+               if (need_copy)
+                       memcpy_from_page(page_address(head_map->bounce_page),
+                                        head_page, 0, PAGE_SIZE);
+               put_page(head_page);
        }
        domain->user_bounce_pages = false;
 out:
@@ -581,7 +629,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
        unsigned long pfn, bounce_pfns;
        int ret;
 
-       bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
+       bounce_pfns = PAGE_ALIGN(bounce_size) >> BOUNCE_MAP_SHIFT;
        if (iova_limit <= bounce_size)
                return NULL;
 
@@ -613,7 +661,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
        rwlock_init(&domain->bounce_lock);
        spin_lock_init(&domain->iotlb_lock);
        init_iova_domain(&domain->stream_iovad,
-                       PAGE_SIZE, IOVA_START_PFN);
+                       BOUNCE_MAP_SIZE, IOVA_START_PFN);
        ret = iova_domain_init_rcaches(&domain->stream_iovad);
        if (ret)
                goto err_iovad_stream;
index 1f3c30be272af1f4be47028115067de5294e7711..775cad5238f3aa54716a9e34faa4a1ff1bf18a64 100644 (file)
 
 #define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 
+#define BOUNCE_MAP_SHIFT       12
+#define BOUNCE_MAP_SIZE        (1 << BOUNCE_MAP_SHIFT)
+#define BOUNCE_MAP_MASK        (~(BOUNCE_MAP_SIZE - 1))
+#define BOUNCE_MAP_ALIGN(addr) (((addr) + BOUNCE_MAP_SIZE - 1) & ~(BOUNCE_MAP_SIZE - 1))
+
 struct vduse_bounce_map {
        struct page *bounce_page;
        struct page *user_bounce_page;