]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
io_uring/zcrx: dmabuf backed zerocopy receive
authorPavel Begunkov <asml.silence@gmail.com>
Thu, 1 May 2025 12:17:18 +0000 (13:17 +0100)
committerJens Axboe <axboe@kernel.dk>
Tue, 6 May 2025 16:11:00 +0000 (10:11 -0600)
Add support for dmabuf backed zcrx areas. To use it, the user should
pass IORING_ZCRX_AREA_DMABUF in the struct io_uring_zcrx_area_reg flags
field and pass a dmabuf fd in the dmabuf_fd field.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20bb1890e60a82ec945ab36370d1fd54be414ab6.1746097431.git.asml.silence@gmail.com
Link: https://lore.kernel.org/io-uring/6e37db97303212bbd8955f9501cf99b579f8aece.1746547722.git.asml.silence@gmail.com
[axboe: fold in fixup]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/uapi/linux/io_uring.h
io_uring/zcrx.c
io_uring/zcrx.h

index 130f3bc71a69117ddd210f7f9864ec3ba8220526..5ce096090b0c6c758a1f79d579dbafe3cea22603 100644 (file)
@@ -990,12 +990,16 @@ struct io_uring_zcrx_offsets {
        __u64   __resv[2];
 };
 
+enum io_uring_zcrx_area_flags {
+       IORING_ZCRX_AREA_DMABUF         = 1,
+};
+
 struct io_uring_zcrx_area_reg {
        __u64   addr;
        __u64   len;
        __u64   rq_area_token;
        __u32   flags;
-       __u32   __resv1;
+       __u32   dmabuf_fd;
        __u64   __resv2[2];
 };
 
index 34b09beba9926b4ac072829c4bb1d5f79f4fad37..9a568d04920470df99308d381e65bceec043982e 100644 (file)
@@ -47,30 +47,118 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
        return area->mem.pages[net_iov_idx(niov)];
 }
 
-static void io_release_area_mem(struct io_zcrx_mem *mem)
+static void io_release_dmabuf(struct io_zcrx_mem *mem)
 {
-       if (mem->pages) {
-               unpin_user_pages(mem->pages, mem->nr_folios);
-               kvfree(mem->pages);
+       if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+               return;
+
+       if (mem->sgt)
+               dma_buf_unmap_attachment_unlocked(mem->attach, mem->sgt,
+                                                 DMA_FROM_DEVICE);
+       if (mem->attach)
+               dma_buf_detach(mem->dmabuf, mem->attach);
+       if (mem->dmabuf)
+               dma_buf_put(mem->dmabuf);
+
+       mem->sgt = NULL;
+       mem->attach = NULL;
+       mem->dmabuf = NULL;
+}
+
+static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
+                           struct io_zcrx_mem *mem,
+                           struct io_uring_zcrx_area_reg *area_reg)
+{
+       unsigned long off = (unsigned long)area_reg->addr;
+       unsigned long len = (unsigned long)area_reg->len;
+       unsigned long total_size = 0;
+       struct scatterlist *sg;
+       int dmabuf_fd = area_reg->dmabuf_fd;
+       int i, ret;
+
+       if (WARN_ON_ONCE(!ifq->dev))
+               return -EFAULT;
+       if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+               return -EINVAL;
+
+       mem->is_dmabuf = true;
+       mem->dmabuf = dma_buf_get(dmabuf_fd);
+       if (IS_ERR(mem->dmabuf)) {
+               ret = PTR_ERR(mem->dmabuf);
+               mem->dmabuf = NULL;
+               goto err;
        }
+
+       mem->attach = dma_buf_attach(mem->dmabuf, ifq->dev);
+       if (IS_ERR(mem->attach)) {
+               ret = PTR_ERR(mem->attach);
+               mem->attach = NULL;
+               goto err;
+       }
+
+       mem->sgt = dma_buf_map_attachment_unlocked(mem->attach, DMA_FROM_DEVICE);
+       if (IS_ERR(mem->sgt)) {
+               ret = PTR_ERR(mem->sgt);
+               mem->sgt = NULL;
+               goto err;
+       }
+
+       for_each_sgtable_dma_sg(mem->sgt, sg, i)
+               total_size += sg_dma_len(sg);
+
+       if (total_size < off + len)
+               return -EINVAL;
+
+       mem->dmabuf_offset = off;
+       mem->size = len;
+       return 0;
+err:
+       io_release_dmabuf(mem);
+       return ret;
 }
 
-static int io_import_area(struct io_zcrx_ifq *ifq,
+static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+{
+       unsigned long off = area->mem.dmabuf_offset;
+       struct scatterlist *sg;
+       unsigned i, niov_idx = 0;
+
+       if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+               return -EINVAL;
+
+       for_each_sgtable_dma_sg(area->mem.sgt, sg, i) {
+               dma_addr_t dma = sg_dma_address(sg);
+               unsigned long sg_len = sg_dma_len(sg);
+               unsigned long sg_off = min(sg_len, off);
+
+               off -= sg_off;
+               sg_len -= sg_off;
+               dma += sg_off;
+
+               while (sg_len && niov_idx < area->nia.num_niovs) {
+                       struct net_iov *niov = &area->nia.niovs[niov_idx];
+
+                       if (net_mp_niov_set_dma_addr(niov, dma))
+                               return 0;
+                       sg_len -= PAGE_SIZE;
+                       dma += PAGE_SIZE;
+                       niov_idx++;
+               }
+       }
+       return niov_idx;
+}
+
+static int io_import_umem(struct io_zcrx_ifq *ifq,
                          struct io_zcrx_mem *mem,
                          struct io_uring_zcrx_area_reg *area_reg)
 {
        struct page **pages;
        int nr_pages;
-       int ret;
 
-       ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
-       if (ret)
-               return ret;
+       if (area_reg->dmabuf_fd)
+               return -EINVAL;
        if (!area_reg->addr)
                return -EFAULT;
-       if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
-               return -EINVAL;
-
        pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
                                   &nr_pages);
        if (IS_ERR(pages))
@@ -82,6 +170,35 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
        return 0;
 }
 
+static void io_release_area_mem(struct io_zcrx_mem *mem)
+{
+       if (mem->is_dmabuf) {
+               io_release_dmabuf(mem);
+               return;
+       }
+       if (mem->pages) {
+               unpin_user_pages(mem->pages, mem->nr_folios);
+               kvfree(mem->pages);
+       }
+}
+
+static int io_import_area(struct io_zcrx_ifq *ifq,
+                         struct io_zcrx_mem *mem,
+                         struct io_uring_zcrx_area_reg *area_reg)
+{
+       int ret;
+
+       ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
+       if (ret)
+               return ret;
+       if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
+               return -EINVAL;
+
+       if (area_reg->flags & IORING_ZCRX_AREA_DMABUF)
+               return io_import_dmabuf(ifq, mem, area_reg);
+       return io_import_umem(ifq, mem, area_reg);
+}
+
 static void io_zcrx_unmap_umem(struct io_zcrx_ifq *ifq,
                                struct io_zcrx_area *area, int nr_mapped)
 {
@@ -101,7 +218,10 @@ static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
 {
        int i;
 
-       io_zcrx_unmap_umem(ifq, area, nr_mapped);
+       if (area->mem.is_dmabuf)
+               io_release_dmabuf(&area->mem);
+       else
+               io_zcrx_unmap_umem(ifq, area, nr_mapped);
 
        for (i = 0; i < area->nia.num_niovs; i++)
                net_mp_niov_set_dma_addr(&area->nia.niovs[i], 0);
@@ -145,7 +265,11 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
        if (area->is_mapped)
                return 0;
 
-       nr = io_zcrx_map_area_umem(ifq, area);
+       if (area->mem.is_dmabuf)
+               nr = io_zcrx_map_area_dmabuf(ifq, area);
+       else
+               nr = io_zcrx_map_area_umem(ifq, area);
+
        if (nr != area->nia.num_niovs) {
                __io_zcrx_unmap_area(ifq, area, nr);
                return -EINVAL;
@@ -251,6 +375,8 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
        kfree(area);
 }
 
+#define IO_ZCRX_AREA_SUPPORTED_FLAGS   (IORING_ZCRX_AREA_DMABUF)
+
 static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
                               struct io_zcrx_area **res,
                               struct io_uring_zcrx_area_reg *area_reg)
@@ -259,9 +385,11 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
        unsigned nr_iovs;
        int i, ret;
 
-       if (area_reg->flags || area_reg->rq_area_token)
+       if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
+               return -EINVAL;
+       if (area_reg->rq_area_token)
                return -EINVAL;
-       if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1])
+       if (area_reg->__resv2[0] || area_reg->__resv2[1])
                return -EINVAL;
 
        ret = -ENOMEM;
@@ -819,6 +947,9 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
        size_t copied = 0;
        int ret = 0;
 
+       if (area->mem.is_dmabuf)
+               return -EFAULT;
+
        while (len) {
                size_t copy_size = min_t(size_t, PAGE_SIZE, len);
                const int dst_off = 0;
index 9c22807af8072730de61aebf9a952a0c46c6409b..2f5e26389f2218b15469a3c78bfc03affdbbea8d 100644 (file)
@@ -3,15 +3,22 @@
 #define IOU_ZC_RX_H
 
 #include <linux/io_uring_types.h>
+#include <linux/dma-buf.h>
 #include <linux/socket.h>
 #include <net/page_pool/types.h>
 #include <net/net_trackers.h>
 
 struct io_zcrx_mem {
        unsigned long                   size;
+       bool                            is_dmabuf;
 
        struct page                     **pages;
        unsigned long                   nr_folios;
+
+       struct dma_buf_attachment       *attach;
+       struct dma_buf                  *dmabuf;
+       struct sg_table                 *sgt;
+       unsigned long                   dmabuf_offset;
 };
 
 struct io_zcrx_area {