]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
io_uring/net: Support multishot receive len cap
authorNorman Maurer <norman_maurer@apple.com>
Tue, 15 Jul 2025 14:02:50 +0000 (16:02 +0200)
committerJens Axboe <axboe@kernel.dk>
Wed, 16 Jul 2025 14:28:11 +0000 (08:28 -0600)
At the moment its very hard to do fine grained backpressure when using
multishot as the kernel might produce a lot of completions before the
user has a chance to cancel a previous submitted multishot recv.

This change adds support to issue a multishot recv that is capped by a
len, which means the kernel will only rearm until X amount of data is
received. When the limit is reached the completion will signal to the
user that a re-arm needs to happen manually by not setting the IORING_CQE_F_MORE
flag.

Signed-off-by: Norman Maurer <norman_maurer@apple.com>
Link: https://lore.kernel.org/r/20250715140249.31186-1-norman_maurer@apple.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
io_uring/net.c

index 639f111408a1db197661ae11467a3f0b6b33d0a2..ba2d0abea3498565e54bb9aa45696c03e97c9ed3 100644 (file)
@@ -75,7 +75,10 @@ struct io_sr_msg {
        u16                             flags;
        /* initialised and used only by !msg send variants */
        u16                             buf_group;
+       /* per-invocation mshot limit */
        unsigned                        mshot_len;
+       /* overall mshot byte limit */
+       unsigned                        mshot_total_len;
        void __user                     *msg_control;
        /* used only for send zerocopy */
        struct io_kiocb                 *notif;
@@ -89,10 +92,12 @@ enum sr_retry_flags {
        IORING_RECV_RETRY       = (1U << 15),
        IORING_RECV_PARTIAL_MAP = (1U << 14),
        IORING_RECV_MSHOT_CAP   = (1U << 13),
+       IORING_RECV_MSHOT_LIM   = (1U << 12),
+       IORING_RECV_MSHOT_DONE  = (1U << 11),
 
        IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
        IORING_RECV_NO_RETRY    = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
-                                 IORING_RECV_MSHOT_CAP,
+                                 IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE,
 };
 
 /*
@@ -765,7 +770,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
        sr->done_io = 0;
 
-       if (unlikely(sqe->file_index || sqe->addr2))
+       if (unlikely(sqe->addr2))
                return -EINVAL;
 
        sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -790,16 +795,25 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                sr->buf_group = req->buf_index;
                req->buf_list = NULL;
        }
-       sr->mshot_len = 0;
+       sr->mshot_total_len = sr->mshot_len = 0;
        if (sr->flags & IORING_RECV_MULTISHOT) {
                if (!(req->flags & REQ_F_BUFFER_SELECT))
                        return -EINVAL;
                if (sr->msg_flags & MSG_WAITALL)
                        return -EINVAL;
-               if (req->opcode == IORING_OP_RECV)
+               if (req->opcode == IORING_OP_RECV) {
                        sr->mshot_len = sr->len;
+                       sr->mshot_total_len = READ_ONCE(sqe->optlen);
+                       if (sr->mshot_total_len)
+                               sr->flags |= IORING_RECV_MSHOT_LIM;
+               } else if (sqe->optlen) {
+                       return -EINVAL;
+               }
                req->flags |= REQ_F_APOLL_MULTISHOT;
+       } else if (sqe->optlen) {
+               return -EINVAL;
        }
+
        if (sr->flags & IORING_RECVSEND_BUNDLE) {
                if (req->opcode == IORING_OP_RECVMSG)
                        return -EINVAL;
@@ -831,6 +845,19 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
        if (kmsg->msg.msg_inq > 0)
                cflags |= IORING_CQE_F_SOCK_NONEMPTY;
 
+       if (*ret > 0 && sr->flags & IORING_RECV_MSHOT_LIM) {
+               /*
+                * If sr->len hits zero, the limit has been reached. Mark
+                * mshot as finished, and flag MSHOT_DONE as well to prevent
+                * a potential bundle from being retried.
+                */
+               sr->mshot_total_len -= min_t(int, *ret, sr->mshot_total_len);
+               if (!sr->mshot_total_len) {
+                       sr->flags |= IORING_RECV_MSHOT_DONE;
+                       mshot_finished = true;
+               }
+       }
+
        if (sr->flags & IORING_RECVSEND_BUNDLE) {
                size_t this_ret = *ret - sr->done_io;
 
@@ -1094,6 +1121,9 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
                else if (kmsg->msg.msg_inq > 1)
                        arg.max_len = min_not_zero(*len, (size_t) kmsg->msg.msg_inq);
 
+               /* if mshot limited, ensure we don't go over */
+               if (sr->flags & IORING_RECV_MSHOT_LIM)
+                       arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len);
                ret = io_buffers_peek(req, &arg);
                if (unlikely(ret < 0))
                        return ret;