]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
io_uring/zcrx: add copy fallback
authorPavel Begunkov <asml.silence@gmail.com>
Sat, 15 Feb 2025 00:09:44 +0000 (16:09 -0800)
committerJens Axboe <axboe@kernel.dk>
Mon, 17 Feb 2025 12:41:09 +0000 (05:41 -0700)
There are scenarios in which the zerocopy path can get a kernel buffer
instead of a net_iov and needs to copy it to the user, whether it is
because of mis-steering or simply getting an skb with the linear part.
In this case, grab a net_iov, copy into it and return it to the user as
normally.

At the moment the user doesn't get any indication whether there was a
copy or not, which is left for follow up work.

Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: David Wei <dw@davidwei.uk>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20250215000947.789731-10-dw@davidwei.uk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
io_uring/zcrx.c

index 7e0cba1e0f3965d7cf5a020ff51f7de82117ab3d..026efb8dd381448db11eb95f29efec0a5c8ea0a7 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/io_uring.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/skbuff_ref.h>
 
 #include <net/page_pool/helpers.h>
 #include <net/page_pool/memory_provider.h>
@@ -134,6 +135,13 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
        atomic_inc(io_get_user_counter(niov));
 }
 
+static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
+{
+       struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+
+       return area->pages[net_iov_idx(niov)];
+}
+
 static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
                                 struct io_uring_zcrx_ifq_reg *reg,
                                 struct io_uring_region_desc *rd)
@@ -448,6 +456,11 @@ static void io_zcrx_return_niov(struct net_iov *niov)
 {
        netmem_ref netmem = net_iov_to_netmem(niov);
 
+       if (!niov->pp) {
+               /* copy fallback allocated niovs */
+               io_zcrx_return_niov_freelist(niov);
+               return;
+       }
        page_pool_put_unrefed_netmem(niov->pp, netmem, -1, false);
 }
 
@@ -686,13 +699,93 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
        return true;
 }
 
+static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
+{
+       struct net_iov *niov = NULL;
+
+       spin_lock_bh(&area->freelist_lock);
+       if (area->free_count)
+               niov = __io_zcrx_get_free_niov(area);
+       spin_unlock_bh(&area->freelist_lock);
+
+       if (niov)
+               page_pool_fragment_netmem(net_iov_to_netmem(niov), 1);
+       return niov;
+}
+
+static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+                                 void *src_base, struct page *src_page,
+                                 unsigned int src_offset, size_t len)
+{
+       struct io_zcrx_area *area = ifq->area;
+       size_t copied = 0;
+       int ret = 0;
+
+       while (len) {
+               size_t copy_size = min_t(size_t, PAGE_SIZE, len);
+               const int dst_off = 0;
+               struct net_iov *niov;
+               struct page *dst_page;
+               void *dst_addr;
+
+               niov = io_zcrx_alloc_fallback(area);
+               if (!niov) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               dst_page = io_zcrx_iov_page(niov);
+               dst_addr = kmap_local_page(dst_page);
+               if (src_page)
+                       src_base = kmap_local_page(src_page);
+
+               memcpy(dst_addr, src_base + src_offset, copy_size);
+
+               if (src_page)
+                       kunmap_local(src_base);
+               kunmap_local(dst_addr);
+
+               if (!io_zcrx_queue_cqe(req, niov, ifq, dst_off, copy_size)) {
+                       io_zcrx_return_niov(niov);
+                       ret = -ENOSPC;
+                       break;
+               }
+
+               io_zcrx_get_niov_uref(niov);
+               src_offset += copy_size;
+               len -= copy_size;
+               copied += copy_size;
+       }
+
+       return copied ? copied : ret;
+}
+
+static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+                            const skb_frag_t *frag, int off, int len)
+{
+       struct page *page = skb_frag_page(frag);
+       u32 p_off, p_len, t, copied = 0;
+       int ret = 0;
+
+       off += skb_frag_off(frag);
+
+       skb_frag_foreach_page(frag, off, len,
+                             page, p_off, p_len, t) {
+               ret = io_zcrx_copy_chunk(req, ifq, NULL, page, p_off, p_len);
+               if (ret < 0)
+                       return copied ? copied : ret;
+               copied += ret;
+       }
+       return copied;
+}
+
 static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
                             const skb_frag_t *frag, int off, int len)
 {
        struct net_iov *niov;
 
        if (unlikely(!skb_frag_is_net_iov(frag)))
-               return -EOPNOTSUPP;
+               return io_zcrx_copy_frag(req, ifq, frag, off, len);
 
        niov = netmem_to_net_iov(frag->netmem);
        if (niov->pp->mp_ops != &io_uring_pp_zc_ops ||
@@ -719,18 +812,33 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
        struct io_zcrx_ifq *ifq = args->ifq;
        struct io_kiocb *req = args->req;
        struct sk_buff *frag_iter;
-       unsigned start, start_off;
+       unsigned start, start_off = offset;
        int i, copy, end, off;
        int ret = 0;
 
        if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
                return -EAGAIN;
 
-       start = skb_headlen(skb);
-       start_off = offset;
+       if (unlikely(offset < skb_headlen(skb))) {
+               ssize_t copied;
+               size_t to_copy;
 
-       if (offset < start)
-               return -EOPNOTSUPP;
+               to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
+               copied = io_zcrx_copy_chunk(req, ifq, skb->data, NULL,
+                                           offset, to_copy);
+               if (copied < 0) {
+                       ret = copied;
+                       goto out;
+               }
+               offset += copied;
+               len -= copied;
+               if (!len)
+                       goto out;
+               if (offset != skb_headlen(skb))
+                       goto out;
+       }
+
+       start = skb_headlen(skb);
 
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                const skb_frag_t *frag;