]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
io_uring: introduce non-circular SQ
authorPavel Begunkov <asml.silence@gmail.com>
Tue, 20 Jan 2026 20:47:40 +0000 (20:47 +0000)
committerJens Axboe <axboe@kernel.dk>
Thu, 22 Jan 2026 22:47:23 +0000 (15:47 -0700)
Outside of SQPOLL, normally SQ entries are consumed by the time the
submission syscall returns. For those cases we don't need a circular
buffer and the head/tail tracking, instead the kernel can assume that
entries always start from the beginning of the SQ at index 0. This patch
introduces a setup flag doing exactly that. It's a simpler and helps
to keeps SQEs hot in cache.

The feature is optional and enabled by setting IORING_SETUP_SQ_REWIND.
The flag is rejected if passed together with SQPOLL as it'd require
waiting for SQ before each submission. It also requires
IORING_SETUP_NO_SQARRAY, which can be supported but it's unlikely there
will be users, so leave more space for future optimisations.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/uapi/linux/io_uring.h
io_uring/io_uring.c
io_uring/io_uring.h

index b5b23c0d5283419b77c5f3000defbc19903421bc..475094c7a6681247f864c24b39a6af99b3f4deaf 100644 (file)
@@ -237,6 +237,18 @@ enum io_uring_sqe_flags_bit {
  */
 #define IORING_SETUP_SQE_MIXED         (1U << 19)
 
+/*
+ * When set, io_uring ignores SQ head and tail and fetches SQEs to submit
+ * starting from index 0 instead from the index stored in the head pointer.
+ * IOW, the user should place all SQE at the beginning of the SQ memory
+ * before issuing a submission syscall.
+ *
+ * It requires IORING_SETUP_NO_SQARRAY and is incompatible with
+ * IORING_SETUP_SQPOLL. The user must also never change the SQ head and tail
+ * values and keep it set to 0. Any other value is undefined behaviour.
+ */
+#define IORING_SETUP_SQ_REWIND         (1U << 20)
+
 enum io_uring_op {
        IORING_OP_NOP,
        IORING_OP_READV,
index a50459238bee078020806c5206e523342781e7ce..0f88ec74e55d13f56e6d7b53acbe56a629d42304 100644 (file)
@@ -1945,12 +1945,16 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
 {
        struct io_rings *rings = ctx->rings;
 
-       /*
-        * Ensure any loads from the SQEs are done at this point,
-        * since once we write the new head, the application could
-        * write new data to them.
-        */
-       smp_store_release(&rings->sq.head, ctx->cached_sq_head);
+       if (ctx->flags & IORING_SETUP_SQ_REWIND) {
+               ctx->cached_sq_head = 0;
+       } else {
+               /*
+                * Ensure any loads from the SQEs are done at this point,
+                * since once we write the new head, the application could
+                * write new data to them.
+                */
+               smp_store_release(&rings->sq.head, ctx->cached_sq_head);
+       }
 }
 
 /*
@@ -1996,10 +2000,15 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
 int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
        __must_hold(&ctx->uring_lock)
 {
-       unsigned int entries = io_sqring_entries(ctx);
+       unsigned int entries;
        unsigned int left;
        int ret;
 
+       if (ctx->flags & IORING_SETUP_SQ_REWIND)
+               entries = ctx->sq_entries;
+       else
+               entries = io_sqring_entries(ctx);
+
        entries = min(nr, entries);
        if (unlikely(!entries))
                return 0;
@@ -2728,6 +2737,12 @@ static int io_uring_sanitise_params(struct io_uring_params *p)
        if (flags & ~IORING_SETUP_FLAGS)
                return -EINVAL;
 
+       if (flags & IORING_SETUP_SQ_REWIND) {
+               if ((flags & IORING_SETUP_SQPOLL) ||
+                   !(flags & IORING_SETUP_NO_SQARRAY))
+               return -EINVAL;
+       }
+
        /* There is no way to mmap rings without a real fd */
        if ((flags & IORING_SETUP_REGISTERED_FD_ONLY) &&
            !(flags & IORING_SETUP_NO_MMAP))
index 29b8f90fdabf7d249a3ad2d8d2ec8f77933240d4..acdc39b9f8d67c87a466599d9cf80a177b1bd103 100644 (file)
@@ -69,7 +69,8 @@ struct io_ctx_config {
                        IORING_SETUP_NO_SQARRAY |\
                        IORING_SETUP_HYBRID_IOPOLL |\
                        IORING_SETUP_CQE_MIXED |\
-                       IORING_SETUP_SQE_MIXED)
+                       IORING_SETUP_SQE_MIXED |\
+                       IORING_SETUP_SQ_REWIND)
 
 #define IORING_ENTER_FLAGS (IORING_ENTER_GETEVENTS |\
                        IORING_ENTER_SQ_WAKEUP |\