]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Apr 2026 11:47:33 +0000 (13:47 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Apr 2026 11:47:33 +0000 (13:47 +0200)
added patches:
io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch
io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch
io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch
io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch
io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch
io_uring-kbuf-introduce-struct-io_br_sel.patch
io_uring-kbuf-open-code-__io_put_kbuf.patch
io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch
io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch
io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch
io_uring-kbuf-remove-legacy-kbuf-caching.patch
io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch
io_uring-kbuf-simplify-__io_put_kbuf.patch
io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch
io_uring-kbuf-uninline-__io_put_kbufs.patch
io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch
io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch
io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch
io_uring-net-clarify-io_recv_buf_select-return-value.patch
io_uring-net-correct-type-for-min_not_zero-cast.patch
io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch
io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch
io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch
io_uring-remove-async-poll-related-provided-buffer-recycles.patch
io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch
series

26 files changed:
queue-6.12/io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-introduce-struct-io_br_sel.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-open-code-__io_put_kbuf.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-remove-legacy-kbuf-caching.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-simplify-__io_put_kbuf.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-uninline-__io_put_kbufs.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch [new file with mode: 0644]
queue-6.12/io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch [new file with mode: 0644]
queue-6.12/io_uring-net-clarify-io_recv_buf_select-return-value.patch [new file with mode: 0644]
queue-6.12/io_uring-net-correct-type-for-min_not_zero-cast.patch [new file with mode: 0644]
queue-6.12/io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch [new file with mode: 0644]
queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch [new file with mode: 0644]
queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch [new file with mode: 0644]
queue-6.12/io_uring-remove-async-poll-related-provided-buffer-recycles.patch [new file with mode: 0644]
queue-6.12/io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch [new file with mode: 0644]
queue-6.12/series [new file with mode: 0644]

diff --git a/queue-6.12/io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch b/queue-6.12/io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch
new file mode 100644 (file)
index 0000000..2a75cf9
--- /dev/null
@@ -0,0 +1,85 @@
+From 07642299c8028add4bd03aa4f85d7bb736b865b2 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 27 Aug 2025 15:27:30 -0600
+Subject: io_uring/kbuf: always use READ_ONCE() to read ring provided buffer lengths
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 98b6fa62c84f2e129161e976a5b9b3cb4ccd117b upstream.
+
+Since the buffers are mapped from userspace, it is prudent to use
+READ_ONCE() to read the value into a local variable, and use that for
+any other actions taken. Having a stable read of the buffer length
+avoids worrying about it changing after checking, or being read multiple
+times.
+
+Similarly, the buffer may well change in between it being picked and
+being committed. Ensure the looping for incremental ring buffer commit
+stops if it hits a zero sized buffer, as no further progress can be made
+at that point.
+
+Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
+Link: https://lore.kernel.org/io-uring/tencent_000C02641F6250C856D0C26228DE29A3D30A@qq.com/
+Reported-by: Qingyue Zhang <chunzhennn@qq.com>
+Reported-by: Suoxing Zhang <aftern00n@qq.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |   20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -36,15 +36,19 @@ static bool io_kbuf_inc_commit(struct io
+ {
+       while (len) {
+               struct io_uring_buf *buf;
+-              u32 this_len;
++              u32 buf_len, this_len;
+               buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
+-              this_len = min_t(u32, len, buf->len);
+-              buf->len -= this_len;
+-              if (buf->len) {
++              buf_len = READ_ONCE(buf->len);
++              this_len = min_t(u32, len, buf_len);
++              buf_len -= this_len;
++              /* Stop looping for invalid buffer length of 0 */
++              if (buf_len || !this_len) {
+                       buf->addr += this_len;
++                      buf->len = buf_len;
+                       return false;
+               }
++              buf->len = 0;
+               bl->head++;
+               len -= this_len;
+       }
+@@ -167,6 +171,7 @@ static struct io_br_sel io_ring_buffer_s
+       __u16 tail, head = bl->head;
+       struct io_br_sel sel = { };
+       struct io_uring_buf *buf;
++      u32 buf_len;
+       tail = smp_load_acquire(&br->tail);
+       if (unlikely(tail == head))
+@@ -176,8 +181,9 @@ static struct io_br_sel io_ring_buffer_s
+               req->flags |= REQ_F_BL_EMPTY;
+       buf = io_ring_head_to_buf(br, head, bl->mask);
+-      if (*len == 0 || *len > buf->len)
+-              *len = buf->len;
++      buf_len = READ_ONCE(buf->len);
++      if (*len == 0 || *len > buf_len)
++              *len = buf_len;
+       req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
+       req->buf_index = buf->bid;
+       sel.buf_list = bl;
+@@ -274,7 +280,7 @@ static int io_ring_buffers_peek(struct i
+       req->buf_index = buf->bid;
+       do {
+-              u32 len = buf->len;
++              u32 len = READ_ONCE(buf->len);
+               /* truncate end piece, if needed, for non partial buffers */
+               if (len > arg->max_len) {
diff --git a/queue-6.12/io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch b/queue-6.12/io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch
new file mode 100644 (file)
index 0000000..214b2bb
--- /dev/null
@@ -0,0 +1,156 @@
+From 9f348b1b92e7a9a7e90c28e943a528e79e84cc12 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:30 -0600
+Subject: io_uring/kbuf: drop 'issue_flags' from io_put_kbuf(s)() arguments
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 5e73b402cbbea51bcab90fc5ee6c6d06af76ae1b upstream.
+
+Picking multiple buffers always requires the ring lock to be held across
+the operation, so there's no need to pass in the issue_flags to
+io_put_kbufs(). On the single buffer side, if the initial picking of a
+ring buffer was unlocked, then it will have been committed already. For
+legacy buffers, no locking is required, as they will simply be freed.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    2 +-
+ io_uring/kbuf.h     |    5 ++---
+ io_uring/net.c      |   14 ++++++--------
+ io_uring/rw.c       |   10 +++++-----
+ 4 files changed, 14 insertions(+), 17 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb
+       lockdep_assert_held(&req->ctx->uring_lock);
+       req_set_fail(req);
+-      io_req_set_res(req, res, io_put_kbuf(req, res, IO_URING_F_UNLOCKED));
++      io_req_set_res(req, res, io_put_kbuf(req, res));
+       if (def->fail)
+               def->fail(req);
+       io_req_complete_defer(req);
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -131,8 +131,7 @@ static inline bool io_kbuf_recycle(struc
+ /* Mapped buffer ring, return io_uring_buf from head */
+ #define io_ring_head_to_buf(br, head, mask)   &(br)->bufs[(head) & (mask)]
+-static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len,
+-                                     unsigned issue_flags)
++static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len)
+ {
+       if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+               return 0;
+@@ -140,7 +139,7 @@ static inline unsigned int io_put_kbuf(s
+ }
+ static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len,
+-                                      int nbufs, unsigned issue_flags)
++                                      int nbufs)
+ {
+       if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+               return 0;
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -508,19 +508,18 @@ static int io_net_kbuf_recyle(struct io_
+ }
+ static inline bool io_send_finish(struct io_kiocb *req, int *ret,
+-                                struct io_async_msghdr *kmsg,
+-                                unsigned issue_flags)
++                                struct io_async_msghdr *kmsg)
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       bool bundle_finished = *ret <= 0;
+       unsigned int cflags;
+       if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
+-              cflags = io_put_kbuf(req, *ret, issue_flags);
++              cflags = io_put_kbuf(req, *ret);
+               goto finish;
+       }
+-      cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags);
++      cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret));
+       /*
+        * Don't start new bundles if the buffer list is empty, or if the
+@@ -687,7 +686,7 @@ retry_bundle:
+       else if (sr->done_io)
+               ret = sr->done_io;
+-      if (!io_send_finish(req, &ret, kmsg, issue_flags))
++      if (!io_send_finish(req, &ret, kmsg))
+               goto retry_bundle;
+       io_req_msg_cleanup(req, issue_flags);
+@@ -870,8 +869,7 @@ static inline bool io_recv_finish(struct
+       if (sr->flags & IORING_RECVSEND_BUNDLE) {
+               size_t this_ret = *ret - sr->done_io;
+-              cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
+-                                    issue_flags);
++              cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret));
+               if (sr->retry_flags & IO_SR_MSG_RETRY)
+                       cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+               /* bundle with no more immediate buffers, we're done */
+@@ -890,7 +888,7 @@ static inline bool io_recv_finish(struct
+                       return false;
+               }
+       } else {
+-              cflags |= io_put_kbuf(req, *ret, issue_flags);
++              cflags |= io_put_kbuf(req, *ret);
+       }
+       /*
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -520,7 +520,7 @@ void io_req_rw_complete(struct io_kiocb
+       io_req_io_end(req);
+       if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
+-              req->cqe.flags |= io_put_kbuf(req, req->cqe.res, 0);
++              req->cqe.flags |= io_put_kbuf(req, req->cqe.res);
+       io_req_rw_cleanup(req, 0);
+       io_req_task_complete(req, ts);
+@@ -602,7 +602,7 @@ static int kiocb_done(struct io_kiocb *r
+                        */
+                       io_req_io_end(req);
+                       io_req_set_res(req, final_ret,
+-                                     io_put_kbuf(req, ret, issue_flags));
++                                     io_put_kbuf(req, ret));
+                       io_req_rw_cleanup(req, issue_flags);
+                       return IOU_OK;
+               }
+@@ -991,7 +991,7 @@ int io_read_mshot(struct io_kiocb *req,
+               if (ret < 0)
+                       req_set_fail(req);
+       } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+-              cflags = io_put_kbuf(req, ret, issue_flags);
++              cflags = io_put_kbuf(req, ret);
+       } else {
+               /*
+                * Any successful return value will keep the multishot read
+@@ -999,7 +999,7 @@ int io_read_mshot(struct io_kiocb *req,
+                * we fail to post a CQE, or multishot is no longer set, then
+                * jump to the termination path. This request is then done.
+                */
+-              cflags = io_put_kbuf(req, ret, issue_flags);
++              cflags = io_put_kbuf(req, ret);
+               rw->len = 0; /* similarly to above, reset len to 0 */
+               if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
+@@ -1210,7 +1210,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx
+               if (!smp_load_acquire(&req->iopoll_completed))
+                       break;
+               nr_events++;
+-              req->cqe.flags = io_put_kbuf(req, req->cqe.res, 0);
++              req->cqe.flags = io_put_kbuf(req, req->cqe.res);
+               if (req->opcode != IORING_OP_URING_CMD)
+                       io_req_rw_cleanup(req, 0);
+       }
diff --git a/queue-6.12/io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch b/queue-6.12/io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch
new file mode 100644 (file)
index 0000000..3d09b99
--- /dev/null
@@ -0,0 +1,112 @@
+From 145c6a4a6e0d6d44b2bf75e60cb495c3d05d0461 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 10 Mar 2025 14:01:49 -0600
+Subject: io_uring/kbuf: enable bundles for incrementally consumed buffers
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit cf9536e550dd243a1681fdbf804221527da20a80 upstream.
+
+The original support for incrementally consumed buffers didn't allow it
+to be used with bundles, with the assumption being that incremental
+buffers are generally larger, and hence there's less of a nedd to
+support it.
+
+But that assumption may not be correct - it's perfectly viable to use
+smaller buffers with incremental consumption, and there may be valid
+reasons for an application or framework to do so.
+
+As there's really no need to explicitly disable bundles with
+incrementally consumed buffers, allow it. This actually makes the peek
+side cheaper and simpler, with the completion side basically the same,
+just needing to iterate for the consumed length.
+
+Reported-by: Norman Maurer <norman_maurer@apple.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |   56 ++++++++++++++++++++++++++------------------------------
+ 1 file changed, 26 insertions(+), 30 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -32,6 +32,25 @@ struct io_provide_buf {
+       __u16                           bid;
+ };
++static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
++{
++      while (len) {
++              struct io_uring_buf *buf;
++              u32 this_len;
++
++              buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
++              this_len = min_t(u32, len, buf->len);
++              buf->len -= this_len;
++              if (buf->len) {
++                      buf->addr += this_len;
++                      return false;
++              }
++              bl->head++;
++              len -= this_len;
++      }
++      return true;
++}
++
+ bool io_kbuf_commit(struct io_kiocb *req,
+                   struct io_buffer_list *bl, int len, int nr)
+ {
+@@ -42,20 +61,8 @@ bool io_kbuf_commit(struct io_kiocb *req
+       if (unlikely(len < 0))
+               return true;
+-
+-      if (bl->flags & IOBL_INC) {
+-              struct io_uring_buf *buf;
+-
+-              buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
+-              if (WARN_ON_ONCE(len > buf->len))
+-                      len = buf->len;
+-              buf->len -= len;
+-              if (buf->len) {
+-                      buf->addr += len;
+-                      return false;
+-              }
+-      }
+-
++      if (bl->flags & IOBL_INC)
++              return io_kbuf_inc_commit(bl, len);
+       bl->head += nr;
+       return true;
+ }
+@@ -235,25 +242,14 @@ static int io_ring_buffers_peek(struct i
+       buf = io_ring_head_to_buf(br, head, bl->mask);
+       if (arg->max_len) {
+               u32 len = READ_ONCE(buf->len);
++              size_t needed;
+               if (unlikely(!len))
+                       return -ENOBUFS;
+-              /*
+-               * Limit incremental buffers to 1 segment. No point trying
+-               * to peek ahead and map more than we need, when the buffers
+-               * themselves should be large when setup with
+-               * IOU_PBUF_RING_INC.
+-               */
+-              if (bl->flags & IOBL_INC) {
+-                      nr_avail = 1;
+-              } else {
+-                      size_t needed;
+-
+-                      needed = (arg->max_len + len - 1) / len;
+-                      needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
+-                      if (nr_avail > needed)
+-                              nr_avail = needed;
+-              }
++              needed = (arg->max_len + len - 1) / len;
++              needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
++              if (nr_avail > needed)
++                      nr_avail = needed;
+       }
+       /*
diff --git a/queue-6.12/io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch b/queue-6.12/io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch
new file mode 100644 (file)
index 0000000..afae3e9
--- /dev/null
@@ -0,0 +1,38 @@
+From 17b4417dca061d29ab2975564e1a33ce7c4fa4d5 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Thu, 19 Mar 2026 14:29:09 -0600
+Subject: io_uring/kbuf: fix missing BUF_MORE for incremental buffers at EOF
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 3ecd3e03144b38a21a3b70254f1b9d2e16629b09 upstream.
+
+For a zero length transfer, io_kbuf_inc_commit() is called with !len.
+Since we never enter the while loop to consume the buffers,
+io_kbuf_inc_commit() ends up returning true, consuming the buffer. But
+if no data was consumed, by definition it cannot have consumed the
+buffer. Return false for that case.
+
+Reported-by: Martin Michaelis <code@mgjm.de>
+Cc: stable@vger.kernel.org
+Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
+Link: https://github.com/axboe/liburing/issues/1553
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -34,6 +34,10 @@ struct io_provide_buf {
+ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
+ {
++      /* No data consumed, return false early to avoid consuming the buffer */
++      if (!len)
++              return false;
++
+       while (len) {
+               struct io_uring_buf *buf;
+               u32 buf_len, this_len;
diff --git a/queue-6.12/io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch b/queue-6.12/io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch
new file mode 100644 (file)
index 0000000..6f16ab9
--- /dev/null
@@ -0,0 +1,104 @@
+From 7414a76696e4561ba2fe0b31bf66d4bdfd7641c9 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:48 +0000
+Subject: io_uring/kbuf: introduce io_kbuf_drop_legacy()
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 54e00d9a612ab93f37f612a5ccd7c0c4f8a31cea upstream.
+
+io_kbuf_drop() is only used for legacy provided buffers, and so
+__io_put_kbuf_list() is never called for REQ_F_BUFFER_RING. Remove the
+dead branch out of __io_put_kbuf_list(), rename it into
+io_kbuf_drop_legacy() and use it directly instead of io_kbuf_drop().
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/c8cc73e2272f09a86ecbdad9ebdd8304f8e583c0.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    5 ++---
+ io_uring/kbuf.c     |   10 ++++++++++
+ io_uring/kbuf.h     |   24 ++----------------------
+ 3 files changed, 14 insertions(+), 25 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -382,9 +382,8 @@ static bool req_need_defer(struct io_kio
+ static void io_clean_op(struct io_kiocb *req)
+ {
+-      if (req->flags & REQ_F_BUFFER_SELECTED) {
+-              io_kbuf_drop(req);
+-      }
++      if (unlikely(req->flags & REQ_F_BUFFER_SELECTED))
++              io_kbuf_drop_legacy(req);
+       if (req->flags & REQ_F_NEED_CLEANUP) {
+               const struct io_cold_def *def = &io_cold_defs[req->opcode];
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -50,6 +50,16 @@ static int io_buffer_add_list(struct io_
+       return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
+ }
++void io_kbuf_drop_legacy(struct io_kiocb *req)
++{
++      if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
++              return;
++      req->buf_index = req->kbuf->bgid;
++      req->flags &= ~REQ_F_BUFFER_SELECTED;
++      kfree(req->kbuf);
++      req->kbuf = NULL;
++}
++
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -82,6 +82,7 @@ int io_unregister_pbuf_ring(struct io_ri
+ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
++void io_kbuf_drop_legacy(struct io_kiocb *req);
+ void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+ struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+@@ -169,27 +170,6 @@ static inline bool __io_put_kbuf_ring(st
+       return ret;
+ }
+-static inline void __io_put_kbuf_list(struct io_kiocb *req, int len)
+-{
+-      if (req->flags & REQ_F_BUFFER_RING) {
+-              __io_put_kbuf_ring(req, len, 1);
+-      } else {
+-              req->buf_index = req->kbuf->bgid;
+-              req->flags &= ~REQ_F_BUFFER_SELECTED;
+-              kfree(req->kbuf);
+-              req->kbuf = NULL;
+-      }
+-}
+-
+-static inline void io_kbuf_drop(struct io_kiocb *req)
+-{
+-      if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
+-              return;
+-
+-      /* len == 0 is fine here, non-ring will always drop all of it */
+-      __io_put_kbuf_list(req, 0);
+-}
+-
+ static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
+                                         int nbufs, unsigned issue_flags)
+ {
+@@ -203,7 +183,7 @@ static inline unsigned int __io_put_kbuf
+               if (!__io_put_kbuf_ring(req, len, nbufs))
+                       ret |= IORING_CQE_F_BUF_MORE;
+       } else {
+-              __io_put_kbuf_list(req, len);
++              io_kbuf_drop_legacy(req);
+       }
+       return ret;
+ }
diff --git a/queue-6.12/io_uring-kbuf-introduce-struct-io_br_sel.patch b/queue-6.12/io_uring-kbuf-introduce-struct-io_br_sel.patch
new file mode 100644 (file)
index 0000000..e579aee
--- /dev/null
@@ -0,0 +1,295 @@
+From 725d49239061ab49650f767bdc3c45fea32dbe4f Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:34 -0600
+Subject: io_uring/kbuf: introduce struct io_br_sel
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit ab6559bdbb08f6bee606435cd014fc5ba0f7b750 upstream.
+
+Rather than return addresses directly from buffer selection, add a
+struct around it. No functional changes in this patch, it's in
+preparation for storing more buffer related information locally, rather
+than in struct io_kiocb.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-7-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |   26 +++++++++++++-------------
+ io_uring/kbuf.h |   19 +++++++++++++++++--
+ io_uring/net.c  |   18 +++++++++---------
+ io_uring/rw.c   |   31 ++++++++++++++++++-------------
+ 4 files changed, 57 insertions(+), 37 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -152,18 +152,18 @@ static int io_provided_buffers_select(st
+       return 1;
+ }
+-static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
+-                                        struct io_buffer_list *bl,
+-                                        unsigned int issue_flags)
++static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
++                                            struct io_buffer_list *bl,
++                                            unsigned int issue_flags)
+ {
+       struct io_uring_buf_ring *br = bl->buf_ring;
+       __u16 tail, head = bl->head;
++      struct io_br_sel sel = { };
+       struct io_uring_buf *buf;
+-      void __user *ret;
+       tail = smp_load_acquire(&br->tail);
+       if (unlikely(tail == head))
+-              return NULL;
++              return sel;
+       if (head + 1 == tail)
+               req->flags |= REQ_F_BL_EMPTY;
+@@ -174,7 +174,7 @@ static void __user *io_ring_buffer_selec
+       req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
+       req->buf_list = bl;
+       req->buf_index = buf->bid;
+-      ret = u64_to_user_ptr(buf->addr);
++      sel.addr = u64_to_user_ptr(buf->addr);
+       if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
+               /*
+@@ -191,27 +191,27 @@ static void __user *io_ring_buffer_selec
+                       req->flags |= REQ_F_BUF_MORE;
+               req->buf_list = NULL;
+       }
+-      return ret;
++      return sel;
+ }
+-void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+-                            unsigned int issue_flags)
++struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
++                                unsigned int issue_flags)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
++      struct io_br_sel sel = { };
+       struct io_buffer_list *bl;
+-      void __user *ret = NULL;
+       io_ring_submit_lock(req->ctx, issue_flags);
+       bl = io_buffer_get_list(ctx, req->buf_index);
+       if (likely(bl)) {
+               if (bl->flags & IOBL_BUF_RING)
+-                      ret = io_ring_buffer_select(req, len, bl, issue_flags);
++                      sel = io_ring_buffer_select(req, len, bl, issue_flags);
+               else
+-                      ret = io_provided_buffer_select(req, len, bl);
++                      sel.addr = io_provided_buffer_select(req, len, bl);
+       }
+       io_ring_submit_unlock(req->ctx, issue_flags);
+-      return ret;
++      return sel;
+ }
+ /* cap it at a reasonable 256, will be one page even for 4K */
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -64,8 +64,23 @@ struct buf_sel_arg {
+       unsigned short partial_map;
+ };
+-void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+-                            unsigned int issue_flags);
++/*
++ * Return value from io_buffer_list selection. Just returns the error or
++ * user address for now, will be extended to return the buffer list in the
++ * future.
++ */
++struct io_br_sel {
++      /*
++       * Some selection parts return the user address, others return an error.
++       */
++      union {
++              void __user *addr;
++              ssize_t val;
++      };
++};
++
++struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
++                               unsigned int issue_flags);
+ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
+                     unsigned int issue_flags);
+ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg);
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1035,22 +1035,22 @@ int io_recvmsg(struct io_kiocb *req, uns
+ retry_multishot:
+       if (io_do_buffer_select(req)) {
+-              void __user *buf;
++              struct io_br_sel sel;
+               size_t len = sr->len;
+-              buf = io_buffer_select(req, &len, issue_flags);
+-              if (!buf)
++              sel = io_buffer_select(req, &len, issue_flags);
++              if (!sel.addr)
+                       return -ENOBUFS;
+               if (req->flags & REQ_F_APOLL_MULTISHOT) {
+-                      ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
++                      ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len);
+                       if (ret) {
+                               io_kbuf_recycle(req, req->buf_list, issue_flags);
+                               return ret;
+                       }
+               }
+-              iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
++              iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len);
+       }
+       kmsg->msg.msg_get_inq = 1;
+@@ -1144,13 +1144,13 @@ static int io_recv_buf_select(struct io_
+               iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
+                               arg.out_len);
+       } else {
+-              void __user *buf;
++              struct io_br_sel sel;
+               *len = sr->len;
+-              buf = io_buffer_select(req, len, issue_flags);
+-              if (!buf)
++              sel = io_buffer_select(req, len, issue_flags);
++              if (!sel.addr)
+                       return -ENOBUFS;
+-              sr->buf = buf;
++              sr->buf = sel.addr;
+               sr->len = *len;
+ map_ubuf:
+               ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -88,28 +88,28 @@ static int io_iov_buffer_select_prep(str
+ static int __io_import_iovec(int ddir, struct io_kiocb *req,
+                            struct io_async_rw *io,
++                           struct io_br_sel *sel,
+                            unsigned int issue_flags)
+ {
+       const struct io_issue_def *def = &io_issue_defs[req->opcode];
+       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+       struct iovec *iov;
+-      void __user *buf;
+       int nr_segs, ret;
+       size_t sqe_len;
+-      buf = u64_to_user_ptr(rw->addr);
++      sel->addr = u64_to_user_ptr(rw->addr);
+       sqe_len = rw->len;
+       if (!def->vectored || req->flags & REQ_F_BUFFER_SELECT) {
+               if (io_do_buffer_select(req)) {
+-                      buf = io_buffer_select(req, &sqe_len, issue_flags);
+-                      if (!buf)
++                      *sel = io_buffer_select(req, &sqe_len, issue_flags);
++                      if (!sel->addr)
+                               return -ENOBUFS;
+-                      rw->addr = (unsigned long) buf;
++                      rw->addr = (unsigned long) sel->addr;
+                       rw->len = sqe_len;
+               }
+-              return import_ubuf(ddir, buf, sqe_len, &io->iter);
++              return import_ubuf(ddir, sel->addr, sqe_len, &io->iter);
+       }
+       if (io->free_iovec) {
+@@ -119,7 +119,7 @@ static int __io_import_iovec(int ddir, s
+               iov = &io->fast_iov;
+               nr_segs = 1;
+       }
+-      ret = __import_iovec(ddir, buf, sqe_len, nr_segs, &iov, &io->iter,
++      ret = __import_iovec(ddir, sel->addr, sqe_len, nr_segs, &iov, &io->iter,
+                               req->ctx->compat);
+       if (unlikely(ret < 0))
+               return ret;
+@@ -134,11 +134,12 @@ static int __io_import_iovec(int ddir, s
+ static inline int io_import_iovec(int rw, struct io_kiocb *req,
+                                 struct io_async_rw *io,
++                                struct io_br_sel *sel,
+                                 unsigned int issue_flags)
+ {
+       int ret;
+-      ret = __io_import_iovec(rw, req, io, issue_flags);
++      ret = __io_import_iovec(rw, req, io, sel, issue_flags);
+       if (unlikely(ret < 0))
+               return ret;
+@@ -240,6 +241,7 @@ done:
+ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
+ {
+       struct io_async_rw *rw;
++      struct io_br_sel sel = { };
+       int ret;
+       if (io_rw_alloc_async(req))
+@@ -249,7 +251,7 @@ static int io_prep_rw_setup(struct io_ki
+               return 0;
+       rw = req->async_data;
+-      ret = io_import_iovec(ddir, req, rw, 0);
++      ret = io_import_iovec(ddir, req, rw, &sel, 0);
+       if (unlikely(ret < 0))
+               return ret;
+@@ -827,7 +829,8 @@ static int io_rw_init_file(struct io_kio
+       return 0;
+ }
+-static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
++static int __io_read(struct io_kiocb *req, struct io_br_sel *sel,
++                   unsigned int issue_flags)
+ {
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+@@ -837,7 +840,7 @@ static int __io_read(struct io_kiocb *re
+       loff_t *ppos;
+       if (io_do_buffer_select(req)) {
+-              ret = io_import_iovec(ITER_DEST, req, io, issue_flags);
++              ret = io_import_iovec(ITER_DEST, req, io, sel, issue_flags);
+               if (unlikely(ret < 0))
+                       return ret;
+       }
+@@ -947,9 +950,10 @@ done:
+ int io_read(struct io_kiocb *req, unsigned int issue_flags)
+ {
++      struct io_br_sel sel = { };
+       int ret;
+-      ret = __io_read(req, issue_flags);
++      ret = __io_read(req, &sel, issue_flags);
+       if (ret >= 0)
+               return kiocb_done(req, ret, issue_flags);
+@@ -961,6 +965,7 @@ int io_read(struct io_kiocb *req, unsign
+ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
++      struct io_br_sel sel = { };
+       unsigned int cflags = 0;
+       int ret;
+@@ -970,7 +975,7 @@ int io_read_mshot(struct io_kiocb *req,
+       if (!io_file_can_poll(req))
+               return -EBADFD;
+-      ret = __io_read(req, issue_flags);
++      ret = __io_read(req, &sel, issue_flags);
+       /*
+        * If we get -EAGAIN, recycle our buffer and just let normal poll
diff --git a/queue-6.12/io_uring-kbuf-open-code-__io_put_kbuf.patch b/queue-6.12/io_uring-kbuf-open-code-__io_put_kbuf.patch
new file mode 100644 (file)
index 0000000..53d2d0e
--- /dev/null
@@ -0,0 +1,55 @@
+From 7828a049e6c26ed764dcbe0579954a43d6e44edb Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:47 +0000
+Subject: io_uring/kbuf: open code __io_put_kbuf()
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit e150e70fce425e1cdfc227974893cad9fb90a0d3 upstream.
+
+__io_put_kbuf() is a trivial wrapper, open code it into
+__io_put_kbufs().
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/9dc17380272b48d56c95992c6f9eaacd5546e1d3.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |    5 -----
+ io_uring/kbuf.h |    4 +---
+ 2 files changed, 1 insertion(+), 8 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -76,11 +76,6 @@ bool io_kbuf_recycle_legacy(struct io_ki
+       return true;
+ }
+-void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags)
+-{
+-      __io_put_kbuf_list(req, len);
+-}
+-
+ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+                                             struct io_buffer_list *bl)
+ {
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -81,8 +81,6 @@ int io_register_pbuf_ring(struct io_ring
+ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
+ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
+-void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags);
+-
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+ void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+@@ -205,7 +203,7 @@ static inline unsigned int __io_put_kbuf
+               if (!__io_put_kbuf_ring(req, len, nbufs))
+                       ret |= IORING_CQE_F_BUF_MORE;
+       } else {
+-              __io_put_kbuf(req, len, issue_flags);
++              __io_put_kbuf_list(req, len);
+       }
+       return ret;
+ }
diff --git a/queue-6.12/io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch b/queue-6.12/io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch
new file mode 100644 (file)
index 0000000..1ac5965
--- /dev/null
@@ -0,0 +1,369 @@
+From 185e462830eaf7a6df916ba1400b46182b36ec9d Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:33 -0600
+Subject: io_uring/kbuf: pass in struct io_buffer_list to commit/recycle helpers
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 1b5add75d7c894c62506c9b55f1d9eaadae50ef1 upstream.
+
+Rather than have this implied being in the io_kiocb, pass it in directly
+so it's immediately obvious where these users of ->buf_list are coming
+from.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-6-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    6 +++---
+ io_uring/kbuf.c     |    9 +++++----
+ io_uring/kbuf.h     |   24 ++++++++++++++----------
+ io_uring/net.c      |   30 +++++++++++++-----------------
+ io_uring/poll.c     |    6 +++---
+ io_uring/rw.c       |   16 ++++++++--------
+ 6 files changed, 46 insertions(+), 45 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb
+       lockdep_assert_held(&req->ctx->uring_lock);
+       req_set_fail(req);
+-      io_req_set_res(req, res, io_put_kbuf(req, res));
++      io_req_set_res(req, res, io_put_kbuf(req, res, req->buf_list));
+       if (def->fail)
+               def->fail(req);
+       io_req_complete_defer(req);
+@@ -1921,11 +1921,11 @@ static void io_queue_async(struct io_kio
+       switch (io_arm_poll_handler(req, 0)) {
+       case IO_APOLL_READY:
+-              io_kbuf_recycle(req, 0);
++              io_kbuf_recycle(req, req->buf_list, 0);
+               io_req_task_queue(req);
+               break;
+       case IO_APOLL_ABORTED:
+-              io_kbuf_recycle(req, 0);
++              io_kbuf_recycle(req, req->buf_list, 0);
+               io_queue_iowq(req);
+               break;
+       case IO_APOLL_OK:
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -368,9 +368,9 @@ int io_buffers_peek(struct io_kiocb *req
+       return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
+ }
+-static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
++static inline bool __io_put_kbuf_ring(struct io_kiocb *req,
++                                    struct io_buffer_list *bl, int len, int nr)
+ {
+-      struct io_buffer_list *bl = req->buf_list;
+       bool ret = true;
+       if (bl) {
+@@ -381,7 +381,8 @@ static inline bool __io_put_kbuf_ring(st
+       return ret;
+ }
+-unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
++unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
++                          int len, int nbufs)
+ {
+       unsigned int ret;
+@@ -392,7 +393,7 @@ unsigned int __io_put_kbufs(struct io_ki
+               return ret;
+       }
+-      if (!__io_put_kbuf_ring(req, len, nbufs))
++      if (!__io_put_kbuf_ring(req, bl, len, nbufs))
+               ret |= IORING_CQE_F_BUF_MORE;
+       return ret;
+ }
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -84,7 +84,8 @@ int io_register_pbuf_status(struct io_ri
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+ void io_kbuf_drop_legacy(struct io_kiocb *req);
+-unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs);
++unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
++                          int len, int nbufs);
+ bool io_kbuf_commit(struct io_kiocb *req,
+                   struct io_buffer_list *bl, int len, int nr);
+@@ -93,7 +94,8 @@ struct io_buffer_list *io_pbuf_get_bl(st
+                                     unsigned long bgid);
+ int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma);
+-static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
++static inline bool io_kbuf_recycle_ring(struct io_kiocb *req,
++                                      struct io_buffer_list *bl)
+ {
+       /*
+        * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
+@@ -102,8 +104,8 @@ static inline bool io_kbuf_recycle_ring(
+        * The exception is partial io, that case we should increment bl->head
+        * to monopolize the buffer.
+        */
+-      if (req->buf_list) {
+-              req->buf_index = req->buf_list->bgid;
++      if (bl) {
++              req->buf_index = bl->bgid;
+               req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
+               return true;
+       }
+@@ -117,32 +119,34 @@ static inline bool io_do_buffer_select(s
+       return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
+ }
+-static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
++static inline bool io_kbuf_recycle(struct io_kiocb *req, struct io_buffer_list *bl,
++                                 unsigned issue_flags)
+ {
+       if (req->flags & REQ_F_BL_NO_RECYCLE)
+               return false;
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               return io_kbuf_recycle_legacy(req, issue_flags);
+       if (req->flags & REQ_F_BUFFER_RING)
+-              return io_kbuf_recycle_ring(req);
++              return io_kbuf_recycle_ring(req, bl);
+       return false;
+ }
+ /* Mapped buffer ring, return io_uring_buf from head */
+ #define io_ring_head_to_buf(br, head, mask)   &(br)->bufs[(head) & (mask)]
+-static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len)
++static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len,
++                                     struct io_buffer_list *bl)
+ {
+       if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+               return 0;
+-      return __io_put_kbufs(req, len, 1);
++      return __io_put_kbufs(req, bl, len, 1);
+ }
+ static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len,
+-                                      int nbufs)
++                                      struct io_buffer_list *bl, int nbufs)
+ {
+       if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+               return 0;
+-      return __io_put_kbufs(req, len, nbufs);
++      return __io_put_kbufs(req, bl, len, nbufs);
+ }
+ #endif
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -498,12 +498,12 @@ static int io_bundle_nbufs(struct io_asy
+       return nbufs;
+ }
+-static int io_net_kbuf_recyle(struct io_kiocb *req,
++static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl,
+                             struct io_async_msghdr *kmsg, int len)
+ {
+       req->flags |= REQ_F_BL_NO_RECYCLE;
+       if (req->flags & REQ_F_BUFFERS_COMMIT)
+-              io_kbuf_commit(req, req->buf_list, len, io_bundle_nbufs(kmsg, len));
++              io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len));
+       return -EAGAIN;
+ }
+@@ -515,11 +515,11 @@ static inline bool io_send_finish(struct
+       unsigned int cflags;
+       if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
+-              cflags = io_put_kbuf(req, *ret);
++              cflags = io_put_kbuf(req, *ret, req->buf_list);
+               goto finish;
+       }
+-      cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret));
++      cflags = io_put_kbufs(req, *ret, req->buf_list, io_bundle_nbufs(kmsg, *ret));
+       /*
+        * Don't start new bundles if the buffer list is empty, or if the
+@@ -675,7 +675,7 @@ retry_bundle:
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
+-                      return io_net_kbuf_recyle(req, kmsg, ret);
++                      return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+@@ -869,7 +869,7 @@ static inline bool io_recv_finish(struct
+       if (sr->flags & IORING_RECVSEND_BUNDLE) {
+               size_t this_ret = *ret - sr->done_io;
+-              cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret));
++              cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret));
+               if (sr->retry_flags & IO_SR_MSG_RETRY)
+                       cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+               /* bundle with no more immediate buffers, we're done */
+@@ -888,7 +888,7 @@ static inline bool io_recv_finish(struct
+                       return false;
+               }
+       } else {
+-              cflags |= io_put_kbuf(req, *ret);
++              cflags |= io_put_kbuf(req, *ret, req->buf_list);
+       }
+       /*
+@@ -1045,7 +1045,7 @@ retry_multishot:
+               if (req->flags & REQ_F_APOLL_MULTISHOT) {
+                       ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
+                       if (ret) {
+-                              io_kbuf_recycle(req, issue_flags);
++                              io_kbuf_recycle(req, req->buf_list, issue_flags);
+                               return ret;
+                       }
+               }
+@@ -1070,15 +1070,11 @@ retry_multishot:
+       if (ret < min_ret) {
+               if (ret == -EAGAIN && force_nonblock) {
+                       if (issue_flags & IO_URING_F_MULTISHOT) {
+-                              io_kbuf_recycle(req, issue_flags);
++                              io_kbuf_recycle(req, req->buf_list, issue_flags);
+                               return IOU_ISSUE_SKIP_COMPLETE;
+                       }
+                       return -EAGAIN;
+               }
+-              if (ret > 0 && io_net_retry(sock, flags)) {
+-                      sr->done_io += ret;
+-                      return io_net_kbuf_recyle(req, kmsg, ret);
+-              }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               req_set_fail(req);
+@@ -1091,7 +1087,7 @@ retry_multishot:
+       else if (sr->done_io)
+               ret = sr->done_io;
+       else
+-              io_kbuf_recycle(req, issue_flags);
++              io_kbuf_recycle(req, req->buf_list, issue_flags);
+       if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
+               goto retry_multishot;
+@@ -1209,7 +1205,7 @@ retry_multishot:
+       if (ret < min_ret) {
+               if (ret == -EAGAIN && force_nonblock) {
+                       if (issue_flags & IO_URING_F_MULTISHOT) {
+-                              io_kbuf_recycle(req, issue_flags);
++                              io_kbuf_recycle(req, req->buf_list, issue_flags);
+                               return IOU_ISSUE_SKIP_COMPLETE;
+                       }
+@@ -1219,7 +1215,7 @@ retry_multishot:
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
+-                      return io_net_kbuf_recyle(req, kmsg, ret);
++                      return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+@@ -1235,7 +1231,7 @@ out_free:
+       else if (sr->done_io)
+               ret = sr->done_io;
+       else
+-              io_kbuf_recycle(req, issue_flags);
++              io_kbuf_recycle(req, req->buf_list, issue_flags);
+       if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
+               goto retry_multishot;
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -356,10 +356,10 @@ void io_poll_task_func(struct io_kiocb *
+       ret = io_poll_check_events(req, ts);
+       if (ret == IOU_POLL_NO_ACTION) {
+-              io_kbuf_recycle(req, 0);
++              io_kbuf_recycle(req, req->buf_list, 0);
+               return;
+       } else if (ret == IOU_POLL_REQUEUE) {
+-              io_kbuf_recycle(req, 0);
++              io_kbuf_recycle(req, req->buf_list, 0);
+               __io_poll_execute(req, 0);
+               return;
+       }
+@@ -753,7 +753,7 @@ int io_arm_poll_handler(struct io_kiocb
+       req->flags |= REQ_F_POLLED;
+       ipt.pt._qproc = io_async_queue_proc;
+-      io_kbuf_recycle(req, issue_flags);
++      io_kbuf_recycle(req, req->buf_list, issue_flags);
+       ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
+       if (ret)
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -520,7 +520,7 @@ void io_req_rw_complete(struct io_kiocb
+       io_req_io_end(req);
+       if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
+-              req->cqe.flags |= io_put_kbuf(req, req->cqe.res);
++              req->cqe.flags |= io_put_kbuf(req, req->cqe.res, req->buf_list);
+       io_req_rw_cleanup(req, 0);
+       io_req_task_complete(req, ts);
+@@ -602,7 +602,7 @@ static int kiocb_done(struct io_kiocb *r
+                        */
+                       io_req_io_end(req);
+                       io_req_set_res(req, final_ret,
+-                                     io_put_kbuf(req, ret));
++                                     io_put_kbuf(req, ret, req->buf_list));
+                       io_req_rw_cleanup(req, issue_flags);
+                       return IOU_OK;
+               }
+@@ -954,7 +954,7 @@ int io_read(struct io_kiocb *req, unsign
+               return kiocb_done(req, ret, issue_flags);
+       if (req->flags & REQ_F_BUFFERS_COMMIT)
+-              io_kbuf_recycle(req, issue_flags);
++              io_kbuf_recycle(req, req->buf_list, issue_flags);
+       return ret;
+ }
+@@ -981,17 +981,17 @@ int io_read_mshot(struct io_kiocb *req,
+                * Reset rw->len to 0 again to avoid clamping future mshot
+                * reads, in case the buffer size varies.
+                */
+-              if (io_kbuf_recycle(req, issue_flags))
++              if (io_kbuf_recycle(req, req->buf_list, issue_flags))
+                       rw->len = 0;
+               if (issue_flags & IO_URING_F_MULTISHOT)
+                       return IOU_ISSUE_SKIP_COMPLETE;
+               return -EAGAIN;
+       } else if (ret <= 0) {
+-              io_kbuf_recycle(req, issue_flags);
++              io_kbuf_recycle(req, req->buf_list, issue_flags);
+               if (ret < 0)
+                       req_set_fail(req);
+       } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+-              cflags = io_put_kbuf(req, ret);
++              cflags = io_put_kbuf(req, ret, req->buf_list);
+       } else {
+               /*
+                * Any successful return value will keep the multishot read
+@@ -999,7 +999,7 @@ int io_read_mshot(struct io_kiocb *req,
+                * we fail to post a CQE, or multishot is no longer set, then
+                * jump to the termination path. This request is then done.
+                */
+-              cflags = io_put_kbuf(req, ret);
++              cflags = io_put_kbuf(req, ret, req->buf_list);
+               rw->len = 0; /* similarly to above, reset len to 0 */
+               if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
+@@ -1210,7 +1210,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx
+               if (!smp_load_acquire(&req->iopoll_completed))
+                       break;
+               nr_events++;
+-              req->cqe.flags = io_put_kbuf(req, req->cqe.res);
++              req->cqe.flags = io_put_kbuf(req, req->cqe.res, req->buf_list);
+               if (req->opcode != IORING_OP_URING_CMD)
+                       io_req_rw_cleanup(req, 0);
+       }
diff --git a/queue-6.12/io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch b/queue-6.12/io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch
new file mode 100644 (file)
index 0000000..f372dd0
--- /dev/null
@@ -0,0 +1,48 @@
+From aecbedeb4dd8a16964f1fc52778c421c136825f1 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Thu, 19 Mar 2026 14:29:20 -0600
+Subject: io_uring/kbuf: propagate BUF_MORE through early buffer commit path
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 418eab7a6f3c002d8e64d6e95ec27118017019af upstream.
+
+When io_should_commit() returns true (eg for non-pollable files), buffer
+commit happens at buffer selection time and sel->buf_list is set to
+NULL. When __io_put_kbufs() generates CQE flags at completion time, it
+calls __io_put_kbuf_ring() which finds a NULL buffer_list and hence
+cannot determine whether the buffer was consumed or not. This means that
+IORING_CQE_F_BUF_MORE is never set for non-pollable input with
+incrementally consumed buffers.
+
+Likewise for io_buffers_select(), which always commits upfront and
+discards the return value of io_kbuf_commit().
+
+Add REQ_F_BUF_MORE to store the result of io_kbuf_commit() during early
+commit. Then __io_put_kbuf_ring() can check this flag and set
+IORING_F_BUF_MORE accordingy.
+
+Reported-by: Martin Michaelis <code@mgjm.de>
+Cc: stable@vger.kernel.org
+Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
+Link: https://github.com/axboe/liburing/issues/1553
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -387,7 +387,10 @@ static inline bool __io_put_kbuf_ring(st
+               ret = io_kbuf_commit(req, bl, len, nr);
+               req->buf_index = bl->bgid;
+       }
+-      req->flags &= ~REQ_F_BUFFER_RING;
++      if (ret && (req->flags & REQ_F_BUF_MORE))
++              ret = false;
++
++      req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE);
+       return ret;
+ }
diff --git a/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch
new file mode 100644 (file)
index 0000000..333ebdf
--- /dev/null
@@ -0,0 +1,69 @@
+From 1fc437c00b774e5b56d91a169298e558947f9e27 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:42 +0000
+Subject: io_uring/kbuf: remove legacy kbuf bulk allocation
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 7919292a961421bfdb22f83c16657684c96076b3 upstream.
+
+Legacy provided buffers are slow and discouraged in favour of the ring
+variant. Remove the bulk allocation to keep it simpler as we don't care
+about performance.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/a064d70370e590efed8076e9501ae4cfc20fe0ca.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |   30 +++++-------------------------
+ 1 file changed, 5 insertions(+), 25 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -521,12 +521,9 @@ int io_provide_buffers_prep(struct io_ki
+       return 0;
+ }
+-#define IO_BUFFER_ALLOC_BATCH 64
+-
+ static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
+ {
+-      struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH];
+-      int allocated;
++      struct io_buffer *buf;
+       /*
+        * Completions that don't happen inline (eg not under uring_lock) will
+@@ -544,27 +541,10 @@ static int io_refill_buffer_cache(struct
+               spin_unlock(&ctx->completion_lock);
+       }
+-      /*
+-       * No free buffers and no completion entries either. Allocate a new
+-       * batch of buffer entries and add those to our freelist.
+-       */
+-
+-      allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT,
+-                                        ARRAY_SIZE(bufs), (void **) bufs);
+-      if (unlikely(!allocated)) {
+-              /*
+-               * Bulk alloc is all-or-nothing. If we fail to get a batch,
+-               * retry single alloc to be on the safe side.
+-               */
+-              bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
+-              if (!bufs[0])
+-                      return -ENOMEM;
+-              allocated = 1;
+-      }
+-
+-      while (allocated)
+-              list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache);
+-
++      buf = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
++      if (!buf)
++              return -ENOMEM;
++      list_add_tail(&buf->list, &ctx->io_buffers_cache);
+       return 0;
+ }
diff --git a/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-caching.patch b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-caching.patch
new file mode 100644 (file)
index 0000000..2b290d7
--- /dev/null
@@ -0,0 +1,194 @@
+From 7d352330a367b477ac6c75a7dbffc850dec5a757 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:46 +0000
+Subject: io_uring/kbuf: remove legacy kbuf caching
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 13ee854e7c04236a47a5beaacdcf51eb0bc7a8fa upstream.
+
+Remove all struct io_buffer caches. It makes it a fair bit simpler.
+Apart from from killing a bunch of lines and juggling between lists,
+__io_put_kbuf_list() doesn't need ->completion_lock locking now.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/18287217466ee2576ea0b1e72daccf7b22c7e856.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/io_uring_types.h |    3 --
+ io_uring/io_uring.c            |    2 -
+ io_uring/kbuf.c                |   58 ++++-------------------------------------
+ io_uring/kbuf.h                |    5 +--
+ 4 files changed, 9 insertions(+), 59 deletions(-)
+
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -341,7 +341,6 @@ struct io_ring_ctx {
+       spinlock_t              completion_lock;
+-      struct list_head        io_buffers_comp;
+       struct list_head        cq_overflow_list;
+       struct io_hash_table    cancel_table;
+@@ -361,8 +360,6 @@ struct io_ring_ctx {
+       unsigned int            file_alloc_start;
+       unsigned int            file_alloc_end;
+-      struct list_head        io_buffers_cache;
+-
+       /* Keep this last, we don't need it for the fast path */
+       struct wait_queue_head          poll_wq;
+       struct io_restriction           restrictions;
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -305,7 +305,6 @@ static __cold struct io_ring_ctx *io_rin
+       init_waitqueue_head(&ctx->sqo_sq_wait);
+       INIT_LIST_HEAD(&ctx->sqd_list);
+       INIT_LIST_HEAD(&ctx->cq_overflow_list);
+-      INIT_LIST_HEAD(&ctx->io_buffers_cache);
+       ret = io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
+                           sizeof(struct io_rsrc_node));
+       ret |= io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
+@@ -328,7 +327,6 @@ static __cold struct io_ring_ctx *io_rin
+       spin_lock_init(&ctx->completion_lock);
+       spin_lock_init(&ctx->timeout_lock);
+       INIT_WQ_LIST(&ctx->iopoll_list);
+-      INIT_LIST_HEAD(&ctx->io_buffers_comp);
+       INIT_LIST_HEAD(&ctx->defer_list);
+       INIT_LIST_HEAD(&ctx->timeout_list);
+       INIT_LIST_HEAD(&ctx->ltimeout_list);
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -78,9 +78,7 @@ bool io_kbuf_recycle_legacy(struct io_ki
+ void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags)
+ {
+-      spin_lock(&req->ctx->completion_lock);
+       __io_put_kbuf_list(req, len);
+-      spin_unlock(&req->ctx->completion_lock);
+ }
+ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+@@ -362,14 +360,15 @@ static int __io_remove_buffers(struct io
+               return i;
+       }
+-      /* protects io_buffers_cache */
+       lockdep_assert_held(&ctx->uring_lock);
+       while (!list_empty(&bl->buf_list)) {
+               struct io_buffer *nxt;
+               nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
+-              list_move(&nxt->list, &ctx->io_buffers_cache);
++              list_del(&nxt->list);
++              kfree(nxt);
++
+               if (++i == nbufs)
+                       return i;
+               cond_resched();
+@@ -389,27 +388,12 @@ void io_put_bl(struct io_ring_ctx *ctx,
+ void io_destroy_buffers(struct io_ring_ctx *ctx)
+ {
+       struct io_buffer_list *bl;
+-      struct list_head *item, *tmp;
+-      struct io_buffer *buf;
+       unsigned long index;
+       xa_for_each(&ctx->io_bl_xa, index, bl) {
+               xa_erase(&ctx->io_bl_xa, bl->bgid);
+               io_put_bl(ctx, bl);
+       }
+-
+-      /*
+-       * Move deferred locked entries to cache before pruning
+-       */
+-      spin_lock(&ctx->completion_lock);
+-      if (!list_empty(&ctx->io_buffers_comp))
+-              list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache);
+-      spin_unlock(&ctx->completion_lock);
+-
+-      list_for_each_safe(item, tmp, &ctx->io_buffers_cache) {
+-              buf = list_entry(item, struct io_buffer, list);
+-              kfree(buf);
+-      }
+ }
+ static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+@@ -499,33 +483,6 @@ int io_provide_buffers_prep(struct io_ki
+       return 0;
+ }
+-static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
+-{
+-      struct io_buffer *buf;
+-
+-      /*
+-       * Completions that don't happen inline (eg not under uring_lock) will
+-       * add to ->io_buffers_comp. If we don't have any free buffers, check
+-       * the completion list and splice those entries first.
+-       */
+-      if (!list_empty_careful(&ctx->io_buffers_comp)) {
+-              spin_lock(&ctx->completion_lock);
+-              if (!list_empty(&ctx->io_buffers_comp)) {
+-                      list_splice_init(&ctx->io_buffers_comp,
+-                                              &ctx->io_buffers_cache);
+-                      spin_unlock(&ctx->completion_lock);
+-                      return 0;
+-              }
+-              spin_unlock(&ctx->completion_lock);
+-      }
+-
+-      buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
+-      if (!buf)
+-              return -ENOMEM;
+-      list_add_tail(&buf->list, &ctx->io_buffers_cache);
+-      return 0;
+-}
+-
+ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
+                         struct io_buffer_list *bl)
+ {
+@@ -534,12 +491,11 @@ static int io_add_buffers(struct io_ring
+       int i, bid = pbuf->bid;
+       for (i = 0; i < pbuf->nbufs; i++) {
+-              if (list_empty(&ctx->io_buffers_cache) &&
+-                  io_refill_buffer_cache(ctx))
++              buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
++              if (!buf)
+                       break;
+-              buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
+-                                      list);
+-              list_move_tail(&buf->list, &bl->buf_list);
++
++              list_add_tail(&buf->list, &bl->buf_list);
+               buf->addr = addr;
+               buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
+               buf->bid = bid;
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -177,8 +177,9 @@ static inline void __io_put_kbuf_list(st
+               __io_put_kbuf_ring(req, len, 1);
+       } else {
+               req->buf_index = req->kbuf->bgid;
+-              list_add(&req->kbuf->list, &req->ctx->io_buffers_comp);
+               req->flags &= ~REQ_F_BUFFER_SELECTED;
++              kfree(req->kbuf);
++              req->kbuf = NULL;
+       }
+ }
+@@ -187,10 +188,8 @@ static inline void io_kbuf_drop(struct i
+       if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
+               return;
+-      spin_lock(&req->ctx->completion_lock);
+       /* len == 0 is fine here, non-ring will always drop all of it */
+       __io_put_kbuf_list(req, 0);
+-      spin_unlock(&req->ctx->completion_lock);
+ }
+ static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
diff --git a/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch
new file mode 100644 (file)
index 0000000..5c9d2c7
--- /dev/null
@@ -0,0 +1,80 @@
+From 3aa159b85a008972392a89b8a7cda51b674fc32d Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:43 +0000
+Subject: io_uring/kbuf: remove legacy kbuf kmem cache
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 9afe6847cff78e7f3aa8f4c920265cf298033251 upstream.
+
+Remove the kmem cache used by legacy provided buffers.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/8195c207d8524d94e972c0c82de99282289f7f5c.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    2 --
+ io_uring/io_uring.h |    1 -
+ io_uring/kbuf.c     |    8 +++-----
+ 3 files changed, 3 insertions(+), 8 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3867,8 +3867,6 @@ static int __init io_uring_init(void)
+       req_cachep = kmem_cache_create("io_kiocb", sizeof(struct io_kiocb), &kmem_args,
+                               SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT |
+                               SLAB_TYPESAFE_BY_RCU);
+-      io_buf_cachep = KMEM_CACHE(io_buffer,
+-                                        SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
+       iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -389,7 +389,6 @@ static inline bool io_req_cache_empty(st
+ }
+ extern struct kmem_cache *req_cachep;
+-extern struct kmem_cache *io_buf_cachep;
+ static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx)
+ {
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -20,8 +20,6 @@
+ /* BIDs are addressed by a 16-bit field in a CQE */
+ #define MAX_BIDS_PER_BGID (1 << 16)
+-struct kmem_cache *io_buf_cachep;
+-
+ struct io_provide_buf {
+       struct file                     *file;
+       __u64                           addr;
+@@ -70,7 +68,7 @@ bool io_kbuf_recycle_legacy(struct io_ki
+       if (bl && !(bl->flags & IOBL_BUF_RING))
+               list_add(&buf->list, &bl->buf_list);
+       else
+-              kmem_cache_free(io_buf_cachep, buf);
++              kfree(buf);
+       req->flags &= ~REQ_F_BUFFER_SELECTED;
+       req->kbuf = NULL;
+@@ -430,7 +428,7 @@ void io_destroy_buffers(struct io_ring_c
+       list_for_each_safe(item, tmp, &ctx->io_buffers_cache) {
+               buf = list_entry(item, struct io_buffer, list);
+-              kmem_cache_free(io_buf_cachep, buf);
++              kfree(buf);
+       }
+ }
+@@ -541,7 +539,7 @@ static int io_refill_buffer_cache(struct
+               spin_unlock(&ctx->completion_lock);
+       }
+-      buf = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
++      buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
+       if (!buf)
+               return -ENOMEM;
+       list_add_tail(&buf->list, &ctx->io_buffers_cache);
diff --git a/queue-6.12/io_uring-kbuf-simplify-__io_put_kbuf.patch b/queue-6.12/io_uring-kbuf-simplify-__io_put_kbuf.patch
new file mode 100644 (file)
index 0000000..f757acb
--- /dev/null
@@ -0,0 +1,105 @@
+From e74da9819b43033a42cfc36f58b95b3e89d85fdf Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:45 +0000
+Subject: io_uring/kbuf: simplify __io_put_kbuf
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit dc39fb1093ea33019f192c93b77b863282e10162 upstream.
+
+As a preparation step remove an optimisation from __io_put_kbuf() trying
+to use the locked cache. With that __io_put_kbuf_list() is only used
+with ->io_buffers_comp, and we remove the explicit list argument.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/1b7f1394ec4afc7f96b35a61f5992e27c49fd067.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    2 --
+ io_uring/kbuf.c     |   26 +++-----------------------
+ io_uring/kbuf.h     |   11 +++++------
+ 3 files changed, 8 insertions(+), 31 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -385,9 +385,7 @@ static bool req_need_defer(struct io_kio
+ static void io_clean_op(struct io_kiocb *req)
+ {
+       if (req->flags & REQ_F_BUFFER_SELECTED) {
+-              spin_lock(&req->ctx->completion_lock);
+               io_kbuf_drop(req);
+-              spin_unlock(&req->ctx->completion_lock);
+       }
+       if (req->flags & REQ_F_NEED_CLEANUP) {
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -78,29 +78,9 @@ bool io_kbuf_recycle_legacy(struct io_ki
+ void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags)
+ {
+-      /*
+-       * We can add this buffer back to two lists:
+-       *
+-       * 1) The io_buffers_cache list. This one is protected by the
+-       *    ctx->uring_lock. If we already hold this lock, add back to this
+-       *    list as we can grab it from issue as well.
+-       * 2) The io_buffers_comp list. This one is protected by the
+-       *    ctx->completion_lock.
+-       *
+-       * We migrate buffers from the comp_list to the issue cache list
+-       * when we need one.
+-       */
+-      if (issue_flags & IO_URING_F_UNLOCKED) {
+-              struct io_ring_ctx *ctx = req->ctx;
+-
+-              spin_lock(&ctx->completion_lock);
+-              __io_put_kbuf_list(req, len, &ctx->io_buffers_comp);
+-              spin_unlock(&ctx->completion_lock);
+-      } else {
+-              lockdep_assert_held(&req->ctx->uring_lock);
+-
+-              __io_put_kbuf_list(req, len, &req->ctx->io_buffers_cache);
+-      }
++      spin_lock(&req->ctx->completion_lock);
++      __io_put_kbuf_list(req, len);
++      spin_unlock(&req->ctx->completion_lock);
+ }
+ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -171,27 +171,26 @@ static inline bool __io_put_kbuf_ring(st
+       return ret;
+ }
+-static inline void __io_put_kbuf_list(struct io_kiocb *req, int len,
+-                                    struct list_head *list)
++static inline void __io_put_kbuf_list(struct io_kiocb *req, int len)
+ {
+       if (req->flags & REQ_F_BUFFER_RING) {
+               __io_put_kbuf_ring(req, len, 1);
+       } else {
+               req->buf_index = req->kbuf->bgid;
+-              list_add(&req->kbuf->list, list);
++              list_add(&req->kbuf->list, &req->ctx->io_buffers_comp);
+               req->flags &= ~REQ_F_BUFFER_SELECTED;
+       }
+ }
+ static inline void io_kbuf_drop(struct io_kiocb *req)
+ {
+-      lockdep_assert_held(&req->ctx->completion_lock);
+-
+       if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
+               return;
++      spin_lock(&req->ctx->completion_lock);
+       /* len == 0 is fine here, non-ring will always drop all of it */
+-      __io_put_kbuf_list(req, 0, &req->ctx->io_buffers_comp);
++      __io_put_kbuf_list(req, 0);
++      spin_unlock(&req->ctx->completion_lock);
+ }
+ static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
diff --git a/queue-6.12/io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch b/queue-6.12/io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch
new file mode 100644 (file)
index 0000000..049c746
--- /dev/null
@@ -0,0 +1,498 @@
+From 69db0294e6d11bc765f5cbee2f39602fa2b36f7b Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:39 -0600
+Subject: io_uring/kbuf: switch to storing struct io_buffer_list locally
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 5fda51255439addd1c9059098e30847a375a1008 upstream.
+
+Currently the buffer list is stored in struct io_kiocb. The buffer list
+can be of two types:
+
+1) Classic/legacy buffer list. These don't need to get referenced after
+   a buffer pick, and hence storing them in struct io_kiocb is perfectly
+   fine.
+
+2) Ring provided buffer lists. These DO need to be referenced after the
+   initial buffer pick, as they need to get consumed later on. This can
+   be either just incrementing the head of the ring, or it can be
+   consuming parts of a buffer if incremental buffer consumptions has
+   been configured.
+
+For case 2, io_uring needs to be careful not to access the buffer list
+after the initial pick-and-execute context. The core does recycling of
+these, but it's easy to make a mistake, because it's stored in the
+io_kiocb which does persist across multiple execution contexts. Either
+because it's a multishot request, or simply because it needed some kind
+of async trigger (eg poll) for retry purposes.
+
+Add a struct io_buffer_list to struct io_br_sel, which is always on
+stack for the various users of it. This prevents the buffer list from
+leaking outside of that execution context, and additionally it enables
+kbuf to not even pass back the struct io_buffer_list if the given
+context isn't appropriately locked already.
+
+This doesn't fix any bugs, it's simply a defensive measure to prevent
+any issues with reuse of a buffer list.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-12-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/io_uring_types.h |    6 ----
+ io_uring/io_uring.c            |    6 ++--
+ io_uring/kbuf.c                |   27 ++++++++++++---------
+ io_uring/kbuf.h                |   16 ++++--------
+ io_uring/net.c                 |   51 +++++++++++++++++------------------------
+ io_uring/poll.c                |    6 ++--
+ io_uring/rw.c                  |   22 ++++++++---------
+ 7 files changed, 60 insertions(+), 74 deletions(-)
+
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -624,12 +624,6 @@ struct io_kiocb {
+               /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
+               struct io_buffer        *kbuf;
+-
+-              /*
+-               * stores buffer ID for ring provided buffers, valid IFF
+-               * REQ_F_BUFFER_RING is set.
+-               */
+-              struct io_buffer_list   *buf_list;
+       };
+       union {
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb
+       lockdep_assert_held(&req->ctx->uring_lock);
+       req_set_fail(req);
+-      io_req_set_res(req, res, io_put_kbuf(req, res, req->buf_list));
++      io_req_set_res(req, res, io_put_kbuf(req, res, NULL));
+       if (def->fail)
+               def->fail(req);
+       io_req_complete_defer(req);
+@@ -1921,11 +1921,11 @@ static void io_queue_async(struct io_kio
+       switch (io_arm_poll_handler(req, 0)) {
+       case IO_APOLL_READY:
+-              io_kbuf_recycle(req, req->buf_list, 0);
++              io_kbuf_recycle(req, NULL, 0);
+               io_req_task_queue(req);
+               break;
+       case IO_APOLL_ABORTED:
+-              io_kbuf_recycle(req, req->buf_list, 0);
++              io_kbuf_recycle(req, NULL, 0);
+               io_queue_iowq(req);
+               break;
+       case IO_APOLL_OK:
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -172,8 +172,8 @@ static struct io_br_sel io_ring_buffer_s
+       if (*len == 0 || *len > buf->len)
+               *len = buf->len;
+       req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
+-      req->buf_list = bl;
+       req->buf_index = buf->bid;
++      sel.buf_list = bl;
+       sel.addr = u64_to_user_ptr(buf->addr);
+       if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
+@@ -187,9 +187,9 @@ static struct io_br_sel io_ring_buffer_s
+                * the transfer completes (or if we get -EAGAIN and must poll of
+                * retry).
+                */
+-              if (!io_kbuf_commit(req, bl, *len, 1))
++              if (!io_kbuf_commit(req, sel.buf_list, *len, 1))
+                       req->flags |= REQ_F_BUF_MORE;
+-              req->buf_list = NULL;
++              sel.buf_list = NULL;
+       }
+       return sel;
+ }
+@@ -307,7 +307,6 @@ static int io_ring_buffers_peek(struct i
+               req->flags |= REQ_F_BL_EMPTY;
+       req->flags |= REQ_F_BUFFER_RING;
+-      req->buf_list = bl;
+       return iov - arg->iovs;
+ }
+@@ -315,16 +314,15 @@ int io_buffers_select(struct io_kiocb *r
+                     struct io_br_sel *sel, unsigned int issue_flags)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+-      struct io_buffer_list *bl;
+       int ret = -ENOENT;
+       io_ring_submit_lock(ctx, issue_flags);
+-      bl = io_buffer_get_list(ctx, req->buf_index);
+-      if (unlikely(!bl))
++      sel->buf_list = io_buffer_get_list(ctx, req->buf_index);
++      if (unlikely(!sel->buf_list))
+               goto out_unlock;
+-      if (bl->flags & IOBL_BUF_RING) {
+-              ret = io_ring_buffers_peek(req, arg, bl);
++      if (sel->buf_list->flags & IOBL_BUF_RING) {
++              ret = io_ring_buffers_peek(req, arg, sel->buf_list);
+               /*
+                * Don't recycle these buffers if we need to go through poll.
+                * Nobody else can use them anyway, and holding on to provided
+@@ -334,14 +332,17 @@ int io_buffers_select(struct io_kiocb *r
+                */
+               if (ret > 0) {
+                       req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE;
+-                      if (!io_kbuf_commit(req, bl, arg->out_len, ret))
++                      if (!io_kbuf_commit(req, sel->buf_list, arg->out_len, ret))
+                               req->flags |= REQ_F_BUF_MORE;
+               }
+       } else {
+-              ret = io_provided_buffers_select(req, &arg->out_len, bl, arg->iovs);
++              ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs);
+       }
+ out_unlock:
+-      io_ring_submit_unlock(ctx, issue_flags);
++      if (issue_flags & IO_URING_F_UNLOCKED) {
++              sel->buf_list = NULL;
++              mutex_unlock(&ctx->uring_lock);
++      }
+       return ret;
+ }
+@@ -362,10 +363,12 @@ int io_buffers_peek(struct io_kiocb *req
+               ret = io_ring_buffers_peek(req, arg, bl);
+               if (ret > 0)
+                       req->flags |= REQ_F_BUFFERS_COMMIT;
++              sel->buf_list = bl;
+               return ret;
+       }
+       /* don't support multiple buffer selections for legacy */
++      sel->buf_list = NULL;
+       return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
+ }
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -65,11 +65,14 @@ struct buf_sel_arg {
+ };
+ /*
+- * Return value from io_buffer_list selection. Just returns the error or
+- * user address for now, will be extended to return the buffer list in the
+- * future.
++ * Return value from io_buffer_list selection, to avoid stashing it in
++ * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
++ * across execution contexts are fine. But for ring provided buffers, the
++ * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
++ * persists, it's better to just keep the buffer local for those cases.
+  */
+ struct io_br_sel {
++      struct io_buffer_list *buf_list;
+       /*
+        * Some selection parts return the user address, others return an error.
+        */
+@@ -113,13 +116,6 @@ int io_pbuf_mmap(struct file *file, stru
+ static inline bool io_kbuf_recycle_ring(struct io_kiocb *req,
+                                       struct io_buffer_list *bl)
+ {
+-      /*
+-       * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
+-       * the flag and hence ensure that bl->head doesn't get incremented.
+-       * If the tail has already been incremented, hang on to it.
+-       * The exception is partial io, that case we should increment bl->head
+-       * to monopolize the buffer.
+-       */
+       if (bl) {
+               req->buf_index = bl->bgid;
+               req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -442,7 +442,6 @@ int io_sendmsg_prep(struct io_kiocb *req
+                       return -EINVAL;
+               sr->msg_flags |= MSG_WAITALL;
+               sr->buf_group = req->buf_index;
+-              req->buf_list = NULL;
+               req->flags |= REQ_F_MULTISHOT;
+       }
+@@ -516,11 +515,11 @@ static inline bool io_send_finish(struct
+       unsigned int cflags;
+       if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
+-              cflags = io_put_kbuf(req, sel->val, req->buf_list);
++              cflags = io_put_kbuf(req, sel->val, sel->buf_list);
+               goto finish;
+       }
+-      cflags = io_put_kbufs(req, sel->val, req->buf_list, io_bundle_nbufs(kmsg, sel->val));
++      cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val));
+       /*
+        * Don't start new bundles if the buffer list is empty, or if the
+@@ -617,6 +616,7 @@ int io_send(struct io_kiocb *req, unsign
+               flags |= MSG_DONTWAIT;
+ retry_bundle:
++      sel.buf_list = NULL;
+       if (io_do_buffer_select(req)) {
+               struct buf_sel_arg arg = {
+                       .iovs = &kmsg->fast_iov,
+@@ -677,7 +677,7 @@ retry_bundle:
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
+-                      return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
++                      return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+@@ -816,18 +816,8 @@ int io_recvmsg_prep(struct io_kiocb *req
+               req->flags |= REQ_F_NOWAIT;
+       if (sr->msg_flags & MSG_ERRQUEUE)
+               req->flags |= REQ_F_CLEAR_POLLIN;
+-      if (req->flags & REQ_F_BUFFER_SELECT) {
+-              /*
+-               * Store the buffer group for this multishot receive separately,
+-               * as if we end up doing an io-wq based issue that selects a
+-               * buffer, it has to be committed immediately and that will
+-               * clear ->buf_list. This means we lose the link to the buffer
+-               * list, and the eventual buffer put on completion then cannot
+-               * restore it.
+-               */
++      if (req->flags & REQ_F_BUFFER_SELECT)
+               sr->buf_group = req->buf_index;
+-              req->buf_list = NULL;
+-      }
+       if (sr->flags & IORING_RECV_MULTISHOT) {
+               if (!(req->flags & REQ_F_BUFFER_SELECT))
+                       return -EINVAL;
+@@ -873,7 +863,7 @@ static inline bool io_recv_finish(struct
+       if (sr->flags & IORING_RECVSEND_BUNDLE) {
+               size_t this_ret = sel->val - sr->done_io;
+-              cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret));
++              cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret));
+               if (sr->retry_flags & IO_SR_MSG_RETRY)
+                       cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+               /* bundle with no more immediate buffers, we're done */
+@@ -892,7 +882,7 @@ static inline bool io_recv_finish(struct
+                       return false;
+               }
+       } else {
+-              cflags |= io_put_kbuf(req, sel->val, req->buf_list);
++              cflags |= io_put_kbuf(req, sel->val, sel->buf_list);
+       }
+       /*
+@@ -1039,6 +1029,7 @@ int io_recvmsg(struct io_kiocb *req, uns
+               flags |= MSG_DONTWAIT;
+ retry_multishot:
++      sel.buf_list = NULL;
+       if (io_do_buffer_select(req)) {
+               size_t len = sr->len;
+@@ -1049,7 +1040,7 @@ retry_multishot:
+               if (req->flags & REQ_F_APOLL_MULTISHOT) {
+                       ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len);
+                       if (ret) {
+-                              io_kbuf_recycle(req, req->buf_list, issue_flags);
++                              io_kbuf_recycle(req, sel.buf_list, issue_flags);
+                               return ret;
+                       }
+               }
+@@ -1073,12 +1064,15 @@ retry_multishot:
+       if (ret < min_ret) {
+               if (ret == -EAGAIN && force_nonblock) {
+-                      if (issue_flags & IO_URING_F_MULTISHOT) {
+-                              io_kbuf_recycle(req, req->buf_list, issue_flags);
++                      io_kbuf_recycle(req, sel.buf_list, issue_flags);
++                      if (issue_flags & IO_URING_F_MULTISHOT)
+                               return IOU_ISSUE_SKIP_COMPLETE;
+-                      }
+                       return -EAGAIN;
+               }
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->done_io += ret;
++                      return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
++              }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+               req_set_fail(req);
+@@ -1091,7 +1085,7 @@ retry_multishot:
+       else if (sr->done_io)
+               ret = sr->done_io;
+       else
+-              io_kbuf_recycle(req, req->buf_list, issue_flags);
++              io_kbuf_recycle(req, sel.buf_list, issue_flags);
+       sel.val = ret;
+       if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
+@@ -1172,7 +1166,7 @@ int io_recv(struct io_kiocb *req, unsign
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       struct io_async_msghdr *kmsg = req->async_data;
+-      struct io_br_sel sel = { };
++      struct io_br_sel sel;
+       struct socket *sock;
+       unsigned flags;
+       int ret, min_ret = 0;
+@@ -1192,6 +1186,7 @@ int io_recv(struct io_kiocb *req, unsign
+               flags |= MSG_DONTWAIT;
+ retry_multishot:
++      sel.buf_list = NULL;
+       if (io_do_buffer_select(req)) {
+               sel.val = sr->len;
+               ret = io_recv_buf_select(req, kmsg, &sel, issue_flags);
+@@ -1211,18 +1206,16 @@ retry_multishot:
+       ret = sock_recvmsg(sock, &kmsg->msg, flags);
+       if (ret < min_ret) {
+               if (ret == -EAGAIN && force_nonblock) {
+-                      if (issue_flags & IO_URING_F_MULTISHOT) {
+-                              io_kbuf_recycle(req, req->buf_list, issue_flags);
++                      io_kbuf_recycle(req, sel.buf_list, issue_flags);
++                      if (issue_flags & IO_URING_F_MULTISHOT)
+                               return IOU_ISSUE_SKIP_COMPLETE;
+-                      }
+-
+                       return -EAGAIN;
+               }
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
+-                      return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
++                      return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+@@ -1238,7 +1231,7 @@ out_free:
+       else if (sr->done_io)
+               ret = sr->done_io;
+       else
+-              io_kbuf_recycle(req, req->buf_list, issue_flags);
++              io_kbuf_recycle(req, sel.buf_list, issue_flags);
+       sel.val = ret;
+       if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -356,10 +356,10 @@ void io_poll_task_func(struct io_kiocb *
+       ret = io_poll_check_events(req, ts);
+       if (ret == IOU_POLL_NO_ACTION) {
+-              io_kbuf_recycle(req, req->buf_list, 0);
++              io_kbuf_recycle(req, NULL, 0);
+               return;
+       } else if (ret == IOU_POLL_REQUEUE) {
+-              io_kbuf_recycle(req, req->buf_list, 0);
++              io_kbuf_recycle(req, NULL, 0);
+               __io_poll_execute(req, 0);
+               return;
+       }
+@@ -753,7 +753,7 @@ int io_arm_poll_handler(struct io_kiocb
+       req->flags |= REQ_F_POLLED;
+       ipt.pt._qproc = io_async_queue_proc;
+-      io_kbuf_recycle(req, req->buf_list, issue_flags);
++      io_kbuf_recycle(req, NULL, issue_flags);
+       ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
+       if (ret)
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -522,7 +522,7 @@ void io_req_rw_complete(struct io_kiocb
+       io_req_io_end(req);
+       if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
+-              req->cqe.flags |= io_put_kbuf(req, req->cqe.res, req->buf_list);
++              req->cqe.flags |= io_put_kbuf(req, req->cqe.res, NULL);
+       io_req_rw_cleanup(req, 0);
+       io_req_task_complete(req, ts);
+@@ -589,7 +589,7 @@ static inline void io_rw_done(struct kio
+ }
+ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
+-                     unsigned int issue_flags)
++                    struct io_br_sel *sel, unsigned int issue_flags)
+ {
+       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+       unsigned final_ret = io_fixup_rw_res(req, ret);
+@@ -604,7 +604,7 @@ static int kiocb_done(struct io_kiocb *r
+                        */
+                       io_req_io_end(req);
+                       io_req_set_res(req, final_ret,
+-                                     io_put_kbuf(req, ret, req->buf_list));
++                                     io_put_kbuf(req, ret, sel->buf_list));
+                       io_req_rw_cleanup(req, issue_flags);
+                       return IOU_OK;
+               }
+@@ -955,10 +955,10 @@ int io_read(struct io_kiocb *req, unsign
+       ret = __io_read(req, &sel, issue_flags);
+       if (ret >= 0)
+-              return kiocb_done(req, ret, issue_flags);
++              return kiocb_done(req, ret, &sel, issue_flags);
+       if (req->flags & REQ_F_BUFFERS_COMMIT)
+-              io_kbuf_recycle(req, req->buf_list, issue_flags);
++              io_kbuf_recycle(req, sel.buf_list, issue_flags);
+       return ret;
+ }
+@@ -986,17 +986,17 @@ int io_read_mshot(struct io_kiocb *req,
+                * Reset rw->len to 0 again to avoid clamping future mshot
+                * reads, in case the buffer size varies.
+                */
+-              if (io_kbuf_recycle(req, req->buf_list, issue_flags))
++              if (io_kbuf_recycle(req, sel.buf_list, issue_flags))
+                       rw->len = 0;
+               if (issue_flags & IO_URING_F_MULTISHOT)
+                       return IOU_ISSUE_SKIP_COMPLETE;
+               return -EAGAIN;
+       } else if (ret <= 0) {
+-              io_kbuf_recycle(req, req->buf_list, issue_flags);
++              io_kbuf_recycle(req, sel.buf_list, issue_flags);
+               if (ret < 0)
+                       req_set_fail(req);
+       } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+-              cflags = io_put_kbuf(req, ret, req->buf_list);
++              cflags = io_put_kbuf(req, ret, sel.buf_list);
+       } else {
+               /*
+                * Any successful return value will keep the multishot read
+@@ -1004,7 +1004,7 @@ int io_read_mshot(struct io_kiocb *req,
+                * we fail to post a CQE, or multishot is no longer set, then
+                * jump to the termination path. This request is then done.
+                */
+-              cflags = io_put_kbuf(req, ret, req->buf_list);
++              cflags = io_put_kbuf(req, ret, sel.buf_list);
+               rw->len = 0; /* similarly to above, reset len to 0 */
+               if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
+@@ -1135,7 +1135,7 @@ int io_write(struct io_kiocb *req, unsig
+                       return -EAGAIN;
+               }
+ done:
+-              return kiocb_done(req, ret2, issue_flags);
++              return kiocb_done(req, ret2, NULL, issue_flags);
+       } else {
+ ret_eagain:
+               iov_iter_restore(&io->iter, &io->iter_state);
+@@ -1215,7 +1215,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx
+               if (!smp_load_acquire(&req->iopoll_completed))
+                       break;
+               nr_events++;
+-              req->cqe.flags = io_put_kbuf(req, req->cqe.res, req->buf_list);
++              req->cqe.flags = io_put_kbuf(req, req->cqe.res, NULL);
+               if (req->opcode != IORING_OP_URING_CMD)
+                       io_req_rw_cleanup(req, 0);
+       }
diff --git a/queue-6.12/io_uring-kbuf-uninline-__io_put_kbufs.patch b/queue-6.12/io_uring-kbuf-uninline-__io_put_kbufs.patch
new file mode 100644 (file)
index 0000000..aeca0dd
--- /dev/null
@@ -0,0 +1,203 @@
+From e7f2c429fac51f341094e974e2858949f3670941 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:49 +0000
+Subject: io_uring/kbuf: uninline __io_put_kbufs
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 5d3e51240d89678b87b5dc6987ea572048a0f0eb upstream.
+
+__io_put_kbufs() and other helper functions are too large to be inlined,
+compilers would normally refuse to do so. Uninline it and move together
+with io_kbuf_commit into kbuf.c.
+
+io_kbuf_commitSigned-off-by: Pavel Begunkov <asml.silence@gmail.com>
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/3dade7f55ad590e811aff83b1ec55c9c04e17b2b.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |   60 ++++++++++++++++++++++++++++++++++++++++++++++
+ io_uring/kbuf.h |   73 +++++++-------------------------------------------------
+ 2 files changed, 70 insertions(+), 63 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -20,6 +20,9 @@
+ /* BIDs are addressed by a 16-bit field in a CQE */
+ #define MAX_BIDS_PER_BGID (1 << 16)
++/* Mapped buffer ring, return io_uring_buf from head */
++#define io_ring_head_to_buf(br, head, mask)   &(br)->bufs[(head) & (mask)]
++
+ struct io_provide_buf {
+       struct file                     *file;
+       __u64                           addr;
+@@ -29,6 +32,34 @@ struct io_provide_buf {
+       __u16                           bid;
+ };
++bool io_kbuf_commit(struct io_kiocb *req,
++                  struct io_buffer_list *bl, int len, int nr)
++{
++      if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
++              return true;
++
++      req->flags &= ~REQ_F_BUFFERS_COMMIT;
++
++      if (unlikely(len < 0))
++              return true;
++
++      if (bl->flags & IOBL_INC) {
++              struct io_uring_buf *buf;
++
++              buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
++              if (WARN_ON_ONCE(len > buf->len))
++                      len = buf->len;
++              buf->len -= len;
++              if (buf->len) {
++                      buf->addr += len;
++                      return false;
++              }
++      }
++
++      bl->head += nr;
++      return true;
++}
++
+ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+                                                       unsigned int bgid)
+ {
+@@ -337,6 +368,35 @@ int io_buffers_peek(struct io_kiocb *req
+       return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
+ }
++static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
++{
++      struct io_buffer_list *bl = req->buf_list;
++      bool ret = true;
++
++      if (bl) {
++              ret = io_kbuf_commit(req, bl, len, nr);
++              req->buf_index = bl->bgid;
++      }
++      req->flags &= ~REQ_F_BUFFER_RING;
++      return ret;
++}
++
++unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
++{
++      unsigned int ret;
++
++      ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
++
++      if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) {
++              io_kbuf_drop_legacy(req);
++              return ret;
++      }
++
++      if (!__io_put_kbuf_ring(req, len, nbufs))
++              ret |= IORING_CQE_F_BUF_MORE;
++      return ret;
++}
++
+ static int __io_remove_buffers(struct io_ring_ctx *ctx,
+                              struct io_buffer_list *bl, unsigned nbufs)
+ {
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -84,6 +84,10 @@ int io_register_pbuf_status(struct io_ri
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+ void io_kbuf_drop_legacy(struct io_kiocb *req);
++unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs);
++bool io_kbuf_commit(struct io_kiocb *req,
++                  struct io_buffer_list *bl, int len, int nr);
++
+ void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+ struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+                                     unsigned long bgid);
+@@ -127,76 +131,19 @@ static inline bool io_kbuf_recycle(struc
+ /* Mapped buffer ring, return io_uring_buf from head */
+ #define io_ring_head_to_buf(br, head, mask)   &(br)->bufs[(head) & (mask)]
+-static inline bool io_kbuf_commit(struct io_kiocb *req,
+-                                struct io_buffer_list *bl, int len, int nr)
+-{
+-      if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
+-              return true;
+-
+-      req->flags &= ~REQ_F_BUFFERS_COMMIT;
+-
+-      if (unlikely(len < 0))
+-              return true;
+-
+-      if (bl->flags & IOBL_INC) {
+-              struct io_uring_buf *buf;
+-
+-              buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
+-              if (len > buf->len)
+-                      len = buf->len;
+-              buf->len -= len;
+-              if (buf->len) {
+-                      buf->addr += len;
+-                      return false;
+-              }
+-      }
+-
+-      bl->head += nr;
+-      return true;
+-}
+-
+-static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
+-{
+-      struct io_buffer_list *bl = req->buf_list;
+-      bool ret = true;
+-
+-      if (bl) {
+-              ret = io_kbuf_commit(req, bl, len, nr);
+-              req->buf_index = bl->bgid;
+-      }
+-      if (ret && (req->flags & REQ_F_BUF_MORE))
+-              ret = false;
+-      req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE);
+-      return ret;
+-}
+-
+-static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
+-                                        int nbufs, unsigned issue_flags)
+-{
+-      unsigned int ret;
+-
+-      if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+-              return 0;
+-
+-      ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
+-      if (req->flags & REQ_F_BUFFER_RING) {
+-              if (!__io_put_kbuf_ring(req, len, nbufs))
+-                      ret |= IORING_CQE_F_BUF_MORE;
+-      } else {
+-              io_kbuf_drop_legacy(req);
+-      }
+-      return ret;
+-}
+-
+ static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len,
+                                      unsigned issue_flags)
+ {
+-      return __io_put_kbufs(req, len, 1, issue_flags);
++      if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
++              return 0;
++      return __io_put_kbufs(req, len, 1);
+ }
+ static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len,
+                                       int nbufs, unsigned issue_flags)
+ {
+-      return __io_put_kbufs(req, len, nbufs, issue_flags);
++      if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
++              return 0;
++      return __io_put_kbufs(req, len, nbufs);
+ }
+ #endif
diff --git a/queue-6.12/io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch b/queue-6.12/io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch
new file mode 100644 (file)
index 0000000..6f5a2f2
--- /dev/null
@@ -0,0 +1,66 @@
+From d888bd5f65a50de9571cfd6333050ac399b8af47 Mon Sep 17 00:00:00 2001
+From: Caleb Sander Mateos <csander@purestorage.com>
+Date: Thu, 4 Dec 2025 15:43:31 -0700
+Subject: io_uring/kbuf: use READ_ONCE() for userspace-mapped memory
+
+From: Caleb Sander Mateos <csander@purestorage.com>
+
+Commit 78385c7299f7514697d196b3233a91bd5e485591 upstream.
+
+The struct io_uring_buf elements in a buffer ring are in a memory region
+accessible from userspace. A malicious/buggy userspace program could
+therefore write to them at any time, so they should be accessed with
+READ_ONCE() in the kernel. Commit 98b6fa62c84f ("io_uring/kbuf: always
+use READ_ONCE() to read ring provided buffer lengths") already switched
+the reads of the len field to READ_ONCE(). Do the same for bid and addr.
+
+Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
+Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers")
+Cc: Joanne Koong <joannelkoong@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -44,7 +44,7 @@ static bool io_kbuf_inc_commit(struct io
+               buf_len -= this_len;
+               /* Stop looping for invalid buffer length of 0 */
+               if (buf_len || !this_len) {
+-                      buf->addr += this_len;
++                      buf->addr = READ_ONCE(buf->addr) + this_len;
+                       buf->len = buf_len;
+                       return false;
+               }
+@@ -185,9 +185,9 @@ static struct io_br_sel io_ring_buffer_s
+       if (*len == 0 || *len > buf_len)
+               *len = buf_len;
+       req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
+-      req->buf_index = buf->bid;
++      req->buf_index = READ_ONCE(buf->bid);
+       sel.buf_list = bl;
+-      sel.addr = u64_to_user_ptr(buf->addr);
++      sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr));
+       if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
+               /*
+@@ -278,7 +278,7 @@ static int io_ring_buffers_peek(struct i
+       if (!arg->max_len)
+               arg->max_len = INT_MAX;
+-      req->buf_index = buf->bid;
++      req->buf_index = READ_ONCE(buf->bid);
+       do {
+               u32 len = READ_ONCE(buf->len);
+@@ -293,7 +293,7 @@ static int io_ring_buffers_peek(struct i
+                       }
+               }
+-              iov->iov_base = u64_to_user_ptr(buf->addr);
++              iov->iov_base = u64_to_user_ptr(READ_ONCE(buf->addr));
+               iov->iov_len = len;
+               iov++;
diff --git a/queue-6.12/io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch b/queue-6.12/io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch
new file mode 100644 (file)
index 0000000..a45c2ec
--- /dev/null
@@ -0,0 +1,159 @@
+From ae354ce85590e11d18bf61f13d951c2ee249b716 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:36 -0600
+Subject: io_uring/kbuf: use struct io_br_sel for multiple buffers picking
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 429884ff35f75a8ac3e8f822f483e220e3ea6394 upstream.
+
+The networking side uses bundles, which is picking multiple buffers at
+the same time. Pass in struct io_br_sel to those helpers.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-9-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |    5 +++--
+ io_uring/kbuf.h |    5 +++--
+ io_uring/net.c  |   31 +++++++++++++++++--------------
+ 3 files changed, 23 insertions(+), 18 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -312,7 +312,7 @@ static int io_ring_buffers_peek(struct i
+ }
+ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
+-                    unsigned int issue_flags)
++                    struct io_br_sel *sel, unsigned int issue_flags)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_buffer_list *bl;
+@@ -345,7 +345,8 @@ out_unlock:
+       return ret;
+ }
+-int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg)
++int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
++                  struct io_br_sel *sel)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_buffer_list *bl;
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -82,8 +82,9 @@ struct io_br_sel {
+ struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
+                                unsigned int issue_flags);
+ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
+-                    unsigned int issue_flags);
+-int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg);
++                    struct io_br_sel *sel, unsigned int issue_flags);
++int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
++                  struct io_br_sel *sel);
+ void io_destroy_buffers(struct io_ring_ctx *ctx);
+ int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -597,6 +597,7 @@ int io_send(struct io_kiocb *req, unsign
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       struct io_async_msghdr *kmsg = req->async_data;
++      struct io_br_sel sel = { };
+       struct socket *sock;
+       unsigned flags;
+       int min_ret = 0;
+@@ -633,7 +634,7 @@ retry_bundle:
+               else
+                       arg.mode |= KBUF_MODE_EXPAND;
+-              ret = io_buffers_select(req, &arg, issue_flags);
++              ret = io_buffers_select(req, &arg, &sel, issue_flags);
+               if (unlikely(ret < 0))
+                       return ret;
+@@ -1015,6 +1016,7 @@ int io_recvmsg(struct io_kiocb *req, uns
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       struct io_async_msghdr *kmsg = req->async_data;
++      struct io_br_sel sel = { };
+       struct socket *sock;
+       unsigned flags;
+       int ret, min_ret = 0;
+@@ -1035,7 +1037,6 @@ int io_recvmsg(struct io_kiocb *req, uns
+ retry_multishot:
+       if (io_do_buffer_select(req)) {
+-              struct io_br_sel sel;
+               size_t len = sr->len;
+               sel = io_buffer_select(req, &len, issue_flags);
+@@ -1096,7 +1097,7 @@ retry_multishot:
+ }
+ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
+-                            size_t *len, unsigned int issue_flags)
++                            struct io_br_sel *sel, unsigned int issue_flags)
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       int ret;
+@@ -1120,10 +1121,12 @@ static int io_recv_buf_select(struct io_
+                       arg.mode |= KBUF_MODE_FREE;
+               }
+-              if (kmsg->msg.msg_inq > 1)
+-                      arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
++              if (sel->val)
++                      arg.max_len = sel->val;
++              else if (kmsg->msg.msg_inq > 1)
++                      arg.max_len = min_not_zero(sel->val, (size_t) kmsg->msg.msg_inq);
+-              ret = io_buffers_peek(req, &arg);
++              ret = io_buffers_peek(req, &arg, sel);
+               if (unlikely(ret < 0))
+                       return ret;
+@@ -1144,14 +1147,13 @@ static int io_recv_buf_select(struct io_
+               iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
+                               arg.out_len);
+       } else {
+-              struct io_br_sel sel;
++              size_t len = sel->val;
+-              *len = sr->len;
+-              sel = io_buffer_select(req, len, issue_flags);
+-              if (!sel.addr)
++              *sel = io_buffer_select(req, &len, issue_flags);
++              if (!sel->addr)
+                       return -ENOBUFS;
+-              sr->buf = sel.addr;
+-              sr->len = *len;
++              sr->buf = sel->addr;
++              sr->len = len;
+ map_ubuf:
+               ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
+                                 &kmsg->msg.msg_iter);
+@@ -1166,11 +1168,11 @@ int io_recv(struct io_kiocb *req, unsign
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       struct io_async_msghdr *kmsg = req->async_data;
++      struct io_br_sel sel = { };
+       struct socket *sock;
+       unsigned flags;
+       int ret, min_ret = 0;
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+-      size_t len = sr->len;
+       bool mshot_finished;
+       if (!(req->flags & REQ_F_POLLED) &&
+@@ -1187,7 +1189,8 @@ int io_recv(struct io_kiocb *req, unsign
+ retry_multishot:
+       if (io_do_buffer_select(req)) {
+-              ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
++              sel.val = sr->len;
++              ret = io_recv_buf_select(req, kmsg, &sel, issue_flags);
+               if (unlikely(ret < 0)) {
+                       kmsg->msg.msg_inq = -1;
+                       goto out_free;
diff --git a/queue-6.12/io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch b/queue-6.12/io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch
new file mode 100644 (file)
index 0000000..7dced38
--- /dev/null
@@ -0,0 +1,47 @@
+From e643249c87544d67259296480d7352d80b097d26 Mon Sep 17 00:00:00 2001
+From: Joanne Koong <joannelkoong@gmail.com>
+Date: Thu, 4 Dec 2025 15:54:50 -0800
+Subject: io_uring/kbuf: use WRITE_ONCE() for userspace-shared buffer ring fields
+
+From: Joanne Koong <joannelkoong@gmail.com>
+
+Commit a4c694bfc2455e82b7caf6045ca893d123e0ed11 upstream.
+
+buf->addr and buf->len reside in memory shared with userspace. They
+should be written with WRITE_ONCE() to guarantee atomic stores and
+prevent tearing or other unsafe compiler optimizations.
+
+Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
+Cc: Caleb Sander Mateos <csander@purestorage.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -44,11 +44,11 @@ static bool io_kbuf_inc_commit(struct io
+               buf_len -= this_len;
+               /* Stop looping for invalid buffer length of 0 */
+               if (buf_len || !this_len) {
+-                      buf->addr = READ_ONCE(buf->addr) + this_len;
+-                      buf->len = buf_len;
++                      WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len);
++                      WRITE_ONCE(buf->len, buf_len);
+                       return false;
+               }
+-              buf->len = 0;
++              WRITE_ONCE(buf->len, 0);
+               bl->head++;
+               len -= this_len;
+       }
+@@ -289,7 +289,7 @@ static int io_ring_buffers_peek(struct i
+                               arg->partial_map = 1;
+                               if (iov != arg->iovs)
+                                       break;
+-                              buf->len = len;
++                              WRITE_ONCE(buf->len, len);
+                       }
+               }
diff --git a/queue-6.12/io_uring-net-clarify-io_recv_buf_select-return-value.patch b/queue-6.12/io_uring-net-clarify-io_recv_buf_select-return-value.patch
new file mode 100644 (file)
index 0000000..5b1ea6c
--- /dev/null
@@ -0,0 +1,28 @@
+From 3b1c89768957bd90b8427d9c51648aaa87d8d2ec Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:32 -0600
+Subject: io_uring/net: clarify io_recv_buf_select() return value
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit b22743f29b7d3dc68c68f9bd39a1b2600ec6434e upstream.
+
+It returns 0 on success, less than zero on error.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1192,7 +1192,7 @@ int io_recv(struct io_kiocb *req, unsign
+ retry_multishot:
+       if (io_do_buffer_select(req)) {
+               ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
+-              if (unlikely(ret)) {
++              if (unlikely(ret < 0)) {
+                       kmsg->msg.msg_inq = -1;
+                       goto out_free;
+               }
diff --git a/queue-6.12/io_uring-net-correct-type-for-min_not_zero-cast.patch b/queue-6.12/io_uring-net-correct-type-for-min_not_zero-cast.patch
new file mode 100644 (file)
index 0000000..7e8c249
--- /dev/null
@@ -0,0 +1,33 @@
+From 73284d7ab3a12d22b5a8bb32a48a507b87d01c7c Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 2 Sep 2025 05:19:42 -0600
+Subject: io_uring/net: correct type for min_not_zero() cast
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 37500634d0a8f931e15879760fb70f9b6f5d5370 upstream.
+
+The kernel test robot reports that after a recent change, the signedness
+of a min_not_zero() compare is now incorrect. Fix that up and cast to
+the right type.
+
+Fixes: 429884ff35f7 ("io_uring/kbuf: use struct io_br_sel for multiple buffers picking")
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202509020426.WJtrdwOU-lkp@intel.com/
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1122,7 +1122,7 @@ static int io_recv_buf_select(struct io_
+               if (sel->val)
+                       arg.max_len = sel->val;
+               else if (kmsg->msg.msg_inq > 1)
+-                      arg.max_len = min_not_zero(sel->val, (size_t) kmsg->msg.msg_inq);
++                      arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq);
+               ret = io_buffers_peek(req, &arg, sel);
+               if (unlikely(ret < 0))
diff --git a/queue-6.12/io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch b/queue-6.12/io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch
new file mode 100644 (file)
index 0000000..09e7172
--- /dev/null
@@ -0,0 +1,50 @@
+From d4f37a9a013f83b00d4dcf27a5bf2e04eec43af3 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:31 -0600
+Subject: io_uring/net: don't use io_net_kbuf_recyle() for non-provided cases
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 15ba5e51e689ceb1c2e921c5180a70c88cfdc8e9 upstream.
+
+A previous commit used io_net_kbuf_recyle() for any network helper that
+did IO and needed partial retry. However, that's only needed if the
+opcode does buffer selection, which isnt support for sendzc, sendmsg_zc,
+or sendmsg. Just remove them - they don't do any harm, but it is a bit
+confusing when reading the code.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -578,7 +578,7 @@ int io_sendmsg(struct io_kiocb *req, uns
+                       kmsg->msg.msg_controllen = 0;
+                       kmsg->msg.msg_control = NULL;
+                       sr->done_io += ret;
+-                      return io_net_kbuf_recyle(req, kmsg, ret);
++                      return -EAGAIN;
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+@@ -1448,7 +1448,7 @@ int io_send_zc(struct io_kiocb *req, uns
+                       zc->len -= ret;
+                       zc->buf += ret;
+                       zc->done_io += ret;
+-                      return io_net_kbuf_recyle(req, kmsg, ret);
++                      return -EAGAIN;
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
+@@ -1508,7 +1508,7 @@ int io_sendmsg_zc(struct io_kiocb *req,
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->done_io += ret;
+-                      return io_net_kbuf_recyle(req, kmsg, ret);
++                      return -EAGAIN;
+               }
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
diff --git a/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch b/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch
new file mode 100644 (file)
index 0000000..1619ebf
--- /dev/null
@@ -0,0 +1,117 @@
+From 31717e9230807656724fc50bb8f95ba51824ff2f Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:37 -0600
+Subject: io_uring/net: use struct io_br_sel->val as the recv finish value
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 58d815091890e83aa2f83a9cce1fdfe3af02c7b4 upstream.
+
+Currently a pointer is passed in to the 'ret' in the receive handlers,
+but since we already have a value field in io_br_sel, just use that.
+This is also in preparation for needing to pass in struct io_br_sel
+to io_recv_finish() anyway.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-10-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |   31 +++++++++++++++++--------------
+ 1 file changed, 17 insertions(+), 14 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -857,9 +857,10 @@ int io_recvmsg_prep(struct io_kiocb *req
+  * Returns true if it is actually finished, or false if it should run
+  * again (for multishot).
+  */
+-static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
++static inline bool io_recv_finish(struct io_kiocb *req,
+                                 struct io_async_msghdr *kmsg,
+-                                bool mshot_finished, unsigned issue_flags)
++                                struct io_br_sel *sel, bool mshot_finished,
++                                unsigned issue_flags)
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       unsigned int cflags = 0;
+@@ -868,7 +869,7 @@ static inline bool io_recv_finish(struct
+               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+       if (sr->flags & IORING_RECVSEND_BUNDLE) {
+-              size_t this_ret = *ret - sr->done_io;
++              size_t this_ret = sel->val - sr->done_io;
+               cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret));
+               if (sr->retry_flags & IO_SR_MSG_RETRY)
+@@ -889,7 +890,7 @@ static inline bool io_recv_finish(struct
+                       return false;
+               }
+       } else {
+-              cflags |= io_put_kbuf(req, *ret, req->buf_list);
++              cflags |= io_put_kbuf(req, sel->val, req->buf_list);
+       }
+       /*
+@@ -897,7 +898,7 @@ static inline bool io_recv_finish(struct
+        * receive from this socket.
+        */
+       if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
+-          io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
++          io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
+               int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
+               io_mshot_prep_retry(req, kmsg);
+@@ -910,20 +911,20 @@ static inline bool io_recv_finish(struct
+                       mshot_retry_ret = IOU_REQUEUE;
+               }
+               if (issue_flags & IO_URING_F_MULTISHOT)
+-                      *ret = mshot_retry_ret;
++                      sel->val = mshot_retry_ret;
+               else
+-                      *ret = -EAGAIN;
++                      sel->val = -EAGAIN;
+               return true;
+       }
+       /* Finish the request / stop multishot. */
+ finish:
+-      io_req_set_res(req, *ret, cflags);
++      io_req_set_res(req, sel->val, cflags);
+       if (issue_flags & IO_URING_F_MULTISHOT)
+-              *ret = IOU_STOP_MULTISHOT;
++              sel->val = IOU_STOP_MULTISHOT;
+       else
+-              *ret = IOU_OK;
++              sel->val = IOU_OK;
+       io_req_msg_cleanup(req, issue_flags);
+       return true;
+ }
+@@ -1090,10 +1091,11 @@ retry_multishot:
+       else
+               io_kbuf_recycle(req, req->buf_list, issue_flags);
+-      if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
++      sel.val = ret;
++      if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
+               goto retry_multishot;
+-      return ret;
++      return sel.val;
+ }
+ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
+@@ -1236,10 +1238,11 @@ out_free:
+       else
+               io_kbuf_recycle(req, req->buf_list, issue_flags);
+-      if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
++      sel.val = ret;
++      if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
+               goto retry_multishot;
+-      return ret;
++      return sel.val;
+ }
+ void io_send_zc_cleanup(struct io_kiocb *req)
diff --git a/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch b/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch
new file mode 100644 (file)
index 0000000..4cbefb7
--- /dev/null
@@ -0,0 +1,83 @@
+From 1aebf001d6fcb771b6318bca438fd205993d2bc1 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:38 -0600
+Subject: io_uring/net: use struct io_br_sel->val as the send finish value
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 461382a51fb83a9c4b7c50e1f10d3ca94edff25e upstream.
+
+Currently a pointer is passed in to the 'ret' in the send mshot handler,
+but since we already have a value field in io_br_sel, just use that.
+This is also in preparation for needing to pass in struct io_br_sel
+to io_send_finish() anyway.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-11-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |   22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -507,19 +507,20 @@ static int io_net_kbuf_recyle(struct io_
+       return -EAGAIN;
+ }
+-static inline bool io_send_finish(struct io_kiocb *req, int *ret,
+-                                struct io_async_msghdr *kmsg)
++static inline bool io_send_finish(struct io_kiocb *req,
++                                struct io_async_msghdr *kmsg,
++                                struct io_br_sel *sel)
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+-      bool bundle_finished = *ret <= 0;
++      bool bundle_finished = sel->val <= 0;
+       unsigned int cflags;
+       if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
+-              cflags = io_put_kbuf(req, *ret, req->buf_list);
++              cflags = io_put_kbuf(req, sel->val, req->buf_list);
+               goto finish;
+       }
+-      cflags = io_put_kbufs(req, *ret, req->buf_list, io_bundle_nbufs(kmsg, *ret));
++      cflags = io_put_kbufs(req, sel->val, req->buf_list, io_bundle_nbufs(kmsg, sel->val));
+       /*
+        * Don't start new bundles if the buffer list is empty, or if the
+@@ -532,15 +533,15 @@ static inline bool io_send_finish(struct
+        * Fill CQE for this receive and see if we should keep trying to
+        * receive from this socket.
+        */
+-      if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
++      if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
+               io_mshot_prep_retry(req, kmsg);
+               return false;
+       }
+       /* Otherwise stop bundle and use the current result. */
+ finish:
+-      io_req_set_res(req, *ret, cflags);
+-      *ret = IOU_OK;
++      io_req_set_res(req, sel->val, cflags);
++      sel->val = IOU_OK;
+       return true;
+ }
+@@ -687,11 +688,12 @@ retry_bundle:
+       else if (sr->done_io)
+               ret = sr->done_io;
+-      if (!io_send_finish(req, &ret, kmsg))
++      sel.val = ret;
++      if (!io_send_finish(req, kmsg, &sel))
+               goto retry_bundle;
+       io_req_msg_cleanup(req, issue_flags);
+-      return ret;
++      return sel.val;
+ }
+ static int io_recvmsg_mshot_prep(struct io_kiocb *req,
diff --git a/queue-6.12/io_uring-remove-async-poll-related-provided-buffer-recycles.patch b/queue-6.12/io_uring-remove-async-poll-related-provided-buffer-recycles.patch
new file mode 100644 (file)
index 0000000..11e0cec
--- /dev/null
@@ -0,0 +1,55 @@
+From d863f651b523f217ccdf7887364e993610f2b880 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:40 -0600
+Subject: io_uring: remove async/poll related provided buffer recycles
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit e973837b54024f070b2b48c7ee9725548548257a upstream.
+
+These aren't necessary anymore, get rid of them.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-13-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    2 --
+ io_uring/poll.c     |    4 ----
+ 2 files changed, 6 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -1921,11 +1921,9 @@ static void io_queue_async(struct io_kio
+       switch (io_arm_poll_handler(req, 0)) {
+       case IO_APOLL_READY:
+-              io_kbuf_recycle(req, NULL, 0);
+               io_req_task_queue(req);
+               break;
+       case IO_APOLL_ABORTED:
+-              io_kbuf_recycle(req, NULL, 0);
+               io_queue_iowq(req);
+               break;
+       case IO_APOLL_OK:
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -356,10 +356,8 @@ void io_poll_task_func(struct io_kiocb *
+       ret = io_poll_check_events(req, ts);
+       if (ret == IOU_POLL_NO_ACTION) {
+-              io_kbuf_recycle(req, NULL, 0);
+               return;
+       } else if (ret == IOU_POLL_REQUEUE) {
+-              io_kbuf_recycle(req, NULL, 0);
+               __io_poll_execute(req, 0);
+               return;
+       }
+@@ -753,8 +751,6 @@ int io_arm_poll_handler(struct io_kiocb
+       req->flags |= REQ_F_POLLED;
+       ipt.pt._qproc = io_async_queue_proc;
+-      io_kbuf_recycle(req, NULL, issue_flags);
+-
+       ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
+       if (ret)
+               return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED;
diff --git a/queue-6.12/io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch b/queue-6.12/io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch
new file mode 100644 (file)
index 0000000..e868993
--- /dev/null
@@ -0,0 +1,52 @@
+From fcc91899be4ad1e71f729a11d3939497bb0612e4 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 15 Oct 2025 13:38:53 -0600
+Subject: io_uring/rw: check for NULL io_br_sel when putting a buffer
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 18d6b1743eafeb3fb1e0ea5a2b7fd0a773d525a8 upstream.
+
+Both the read and write side use kiocb_done() to finish a request, and
+kiocb_done() will call io_put_kbuf() in case a provided buffer was used
+for the request. Provided buffers are not supported for writes, hence
+NULL is being passed in. This normally works fine, as io_put_kbuf()
+won't actually use the value unless REQ_F_BUFFER_RING or
+REQ_F_BUFFER_SELECTED is set in the request flags. But depending on
+compiler (or whether or not CONFIG_CC_OPTIMIZE_FOR_SIZE is set), that
+may be done even though the value is never used. This will then cause a
+NULL pointer dereference.
+
+Make it a bit more obvious and check for a NULL io_br_sel, and don't
+even bother calling io_put_kbuf() for that case.
+
+Fixes: 5fda51255439 ("io_uring/kbuf: switch to storing struct io_buffer_list locally")
+Reported-by: David Howells <dhowells@redhat.com>
+Tested-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/rw.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -598,13 +598,16 @@ static int kiocb_done(struct io_kiocb *r
+               req->file->f_pos = rw->kiocb.ki_pos;
+       if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) {
+               if (!__io_complete_rw_common(req, ret)) {
++                      u32 cflags = 0;
++
+                       /*
+                        * Safe to call io_end from here as we're inline
+                        * from the submission path.
+                        */
+                       io_req_io_end(req);
+-                      io_req_set_res(req, final_ret,
+-                                     io_put_kbuf(req, ret, sel->buf_list));
++                      if (sel)
++                              cflags = io_put_kbuf(req, ret, sel->buf_list);
++                      io_req_set_res(req, final_ret, cflags);
+                       io_req_rw_cleanup(req, issue_flags);
+                       return IOU_OK;
+               }
diff --git a/queue-6.12/series b/queue-6.12/series
new file mode 100644 (file)
index 0000000..3bf7613
--- /dev/null
@@ -0,0 +1,25 @@
+io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch
+io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch
+io_uring-kbuf-simplify-__io_put_kbuf.patch
+io_uring-kbuf-remove-legacy-kbuf-caching.patch
+io_uring-kbuf-open-code-__io_put_kbuf.patch
+io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch
+io_uring-kbuf-uninline-__io_put_kbufs.patch
+io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch
+io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch
+io_uring-net-clarify-io_recv_buf_select-return-value.patch
+io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch
+io_uring-kbuf-introduce-struct-io_br_sel.patch
+io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch
+io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch
+io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch
+io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch
+io_uring-remove-async-poll-related-provided-buffer-recycles.patch
+io_uring-net-correct-type-for-min_not_zero-cast.patch
+io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch
+io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch
+io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch
+io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch
+io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch
+io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch
+io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch