--- /dev/null
+From 07642299c8028add4bd03aa4f85d7bb736b865b2 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 27 Aug 2025 15:27:30 -0600
+Subject: io_uring/kbuf: always use READ_ONCE() to read ring provided buffer lengths
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 98b6fa62c84f2e129161e976a5b9b3cb4ccd117b upstream.
+
+Since the buffers are mapped from userspace, it is prudent to use
+READ_ONCE() to read the value into a local variable, and use that for
+any other actions taken. Having a stable read of the buffer length
+avoids worrying about it changing after checking, or being read multiple
+times.
+
+Similarly, the buffer may well change in between it being picked and
+being committed. Ensure the looping for incremental ring buffer commit
+stops if it hits a zero sized buffer, as no further progress can be made
+at that point.
+
+Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
+Link: https://lore.kernel.org/io-uring/tencent_000C02641F6250C856D0C26228DE29A3D30A@qq.com/
+Reported-by: Qingyue Zhang <chunzhennn@qq.com>
+Reported-by: Suoxing Zhang <aftern00n@qq.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 20 +++++++++++++-------
+ 1 file changed, 13 insertions(+), 7 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -36,15 +36,19 @@ static bool io_kbuf_inc_commit(struct io
+ {
+ while (len) {
+ struct io_uring_buf *buf;
+- u32 this_len;
++ u32 buf_len, this_len;
+
+ buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
+- this_len = min_t(u32, len, buf->len);
+- buf->len -= this_len;
+- if (buf->len) {
++ buf_len = READ_ONCE(buf->len);
++ this_len = min_t(u32, len, buf_len);
++ buf_len -= this_len;
++ /* Stop looping for invalid buffer length of 0 */
++ if (buf_len || !this_len) {
+ buf->addr += this_len;
++ buf->len = buf_len;
+ return false;
+ }
++ buf->len = 0;
+ bl->head++;
+ len -= this_len;
+ }
+@@ -167,6 +171,7 @@ static struct io_br_sel io_ring_buffer_s
+ __u16 tail, head = bl->head;
+ struct io_br_sel sel = { };
+ struct io_uring_buf *buf;
++ u32 buf_len;
+
+ tail = smp_load_acquire(&br->tail);
+ if (unlikely(tail == head))
+@@ -176,8 +181,9 @@ static struct io_br_sel io_ring_buffer_s
+ req->flags |= REQ_F_BL_EMPTY;
+
+ buf = io_ring_head_to_buf(br, head, bl->mask);
+- if (*len == 0 || *len > buf->len)
+- *len = buf->len;
++ buf_len = READ_ONCE(buf->len);
++ if (*len == 0 || *len > buf_len)
++ *len = buf_len;
+ req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
+ req->buf_index = buf->bid;
+ sel.buf_list = bl;
+@@ -274,7 +280,7 @@ static int io_ring_buffers_peek(struct i
+
+ req->buf_index = buf->bid;
+ do {
+- u32 len = buf->len;
++ u32 len = READ_ONCE(buf->len);
+
+ /* truncate end piece, if needed, for non partial buffers */
+ if (len > arg->max_len) {
--- /dev/null
+From 9f348b1b92e7a9a7e90c28e943a528e79e84cc12 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:30 -0600
+Subject: io_uring/kbuf: drop 'issue_flags' from io_put_kbuf(s)() arguments
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 5e73b402cbbea51bcab90fc5ee6c6d06af76ae1b upstream.
+
+Picking multiple buffers always requires the ring lock to be held across
+the operation, so there's no need to pass in the issue_flags to
+io_put_kbufs(). On the single buffer side, if the initial picking of a
+ring buffer was unlocked, then it will have been committed already. For
+legacy buffers, no locking is required, as they will simply be freed.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 2 +-
+ io_uring/kbuf.h | 5 ++---
+ io_uring/net.c | 14 ++++++--------
+ io_uring/rw.c | 10 +++++-----
+ 4 files changed, 14 insertions(+), 17 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb
+ lockdep_assert_held(&req->ctx->uring_lock);
+
+ req_set_fail(req);
+- io_req_set_res(req, res, io_put_kbuf(req, res, IO_URING_F_UNLOCKED));
++ io_req_set_res(req, res, io_put_kbuf(req, res));
+ if (def->fail)
+ def->fail(req);
+ io_req_complete_defer(req);
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -131,8 +131,7 @@ static inline bool io_kbuf_recycle(struc
+ /* Mapped buffer ring, return io_uring_buf from head */
+ #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
+
+-static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len,
+- unsigned issue_flags)
++static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len)
+ {
+ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+ return 0;
+@@ -140,7 +139,7 @@ static inline unsigned int io_put_kbuf(s
+ }
+
+ static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len,
+- int nbufs, unsigned issue_flags)
++ int nbufs)
+ {
+ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+ return 0;
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -508,19 +508,18 @@ static int io_net_kbuf_recyle(struct io_
+ }
+
+ static inline bool io_send_finish(struct io_kiocb *req, int *ret,
+- struct io_async_msghdr *kmsg,
+- unsigned issue_flags)
++ struct io_async_msghdr *kmsg)
+ {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ bool bundle_finished = *ret <= 0;
+ unsigned int cflags;
+
+ if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
+- cflags = io_put_kbuf(req, *ret, issue_flags);
++ cflags = io_put_kbuf(req, *ret);
+ goto finish;
+ }
+
+- cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags);
++ cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret));
+
+ /*
+ * Don't start new bundles if the buffer list is empty, or if the
+@@ -687,7 +686,7 @@ retry_bundle:
+ else if (sr->done_io)
+ ret = sr->done_io;
+
+- if (!io_send_finish(req, &ret, kmsg, issue_flags))
++ if (!io_send_finish(req, &ret, kmsg))
+ goto retry_bundle;
+
+ io_req_msg_cleanup(req, issue_flags);
+@@ -870,8 +869,7 @@ static inline bool io_recv_finish(struct
+ if (sr->flags & IORING_RECVSEND_BUNDLE) {
+ size_t this_ret = *ret - sr->done_io;
+
+- cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
+- issue_flags);
++ cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret));
+ if (sr->retry_flags & IO_SR_MSG_RETRY)
+ cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+ /* bundle with no more immediate buffers, we're done */
+@@ -890,7 +888,7 @@ static inline bool io_recv_finish(struct
+ return false;
+ }
+ } else {
+- cflags |= io_put_kbuf(req, *ret, issue_flags);
++ cflags |= io_put_kbuf(req, *ret);
+ }
+
+ /*
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -520,7 +520,7 @@ void io_req_rw_complete(struct io_kiocb
+ io_req_io_end(req);
+
+ if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
+- req->cqe.flags |= io_put_kbuf(req, req->cqe.res, 0);
++ req->cqe.flags |= io_put_kbuf(req, req->cqe.res);
+
+ io_req_rw_cleanup(req, 0);
+ io_req_task_complete(req, ts);
+@@ -602,7 +602,7 @@ static int kiocb_done(struct io_kiocb *r
+ */
+ io_req_io_end(req);
+ io_req_set_res(req, final_ret,
+- io_put_kbuf(req, ret, issue_flags));
++ io_put_kbuf(req, ret));
+ io_req_rw_cleanup(req, issue_flags);
+ return IOU_OK;
+ }
+@@ -991,7 +991,7 @@ int io_read_mshot(struct io_kiocb *req,
+ if (ret < 0)
+ req_set_fail(req);
+ } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+- cflags = io_put_kbuf(req, ret, issue_flags);
++ cflags = io_put_kbuf(req, ret);
+ } else {
+ /*
+ * Any successful return value will keep the multishot read
+@@ -999,7 +999,7 @@ int io_read_mshot(struct io_kiocb *req,
+ * we fail to post a CQE, or multishot is no longer set, then
+ * jump to the termination path. This request is then done.
+ */
+- cflags = io_put_kbuf(req, ret, issue_flags);
++ cflags = io_put_kbuf(req, ret);
+ rw->len = 0; /* similarly to above, reset len to 0 */
+
+ if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
+@@ -1210,7 +1210,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx
+ if (!smp_load_acquire(&req->iopoll_completed))
+ break;
+ nr_events++;
+- req->cqe.flags = io_put_kbuf(req, req->cqe.res, 0);
++ req->cqe.flags = io_put_kbuf(req, req->cqe.res);
+ if (req->opcode != IORING_OP_URING_CMD)
+ io_req_rw_cleanup(req, 0);
+ }
--- /dev/null
+From 145c6a4a6e0d6d44b2bf75e60cb495c3d05d0461 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 10 Mar 2025 14:01:49 -0600
+Subject: io_uring/kbuf: enable bundles for incrementally consumed buffers
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit cf9536e550dd243a1681fdbf804221527da20a80 upstream.
+
+The original support for incrementally consumed buffers didn't allow it
+to be used with bundles, with the assumption being that incremental
+buffers are generally larger, and hence there's less of a nedd to
+support it.
+
+But that assumption may not be correct - it's perfectly viable to use
+smaller buffers with incremental consumption, and there may be valid
+reasons for an application or framework to do so.
+
+As there's really no need to explicitly disable bundles with
+incrementally consumed buffers, allow it. This actually makes the peek
+side cheaper and simpler, with the completion side basically the same,
+just needing to iterate for the consumed length.
+
+Reported-by: Norman Maurer <norman_maurer@apple.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 56 ++++++++++++++++++++++++++------------------------------
+ 1 file changed, 26 insertions(+), 30 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -32,6 +32,25 @@ struct io_provide_buf {
+ __u16 bid;
+ };
+
++static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
++{
++ while (len) {
++ struct io_uring_buf *buf;
++ u32 this_len;
++
++ buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
++ this_len = min_t(u32, len, buf->len);
++ buf->len -= this_len;
++ if (buf->len) {
++ buf->addr += this_len;
++ return false;
++ }
++ bl->head++;
++ len -= this_len;
++ }
++ return true;
++}
++
+ bool io_kbuf_commit(struct io_kiocb *req,
+ struct io_buffer_list *bl, int len, int nr)
+ {
+@@ -42,20 +61,8 @@ bool io_kbuf_commit(struct io_kiocb *req
+
+ if (unlikely(len < 0))
+ return true;
+-
+- if (bl->flags & IOBL_INC) {
+- struct io_uring_buf *buf;
+-
+- buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
+- if (WARN_ON_ONCE(len > buf->len))
+- len = buf->len;
+- buf->len -= len;
+- if (buf->len) {
+- buf->addr += len;
+- return false;
+- }
+- }
+-
++ if (bl->flags & IOBL_INC)
++ return io_kbuf_inc_commit(bl, len);
+ bl->head += nr;
+ return true;
+ }
+@@ -235,25 +242,14 @@ static int io_ring_buffers_peek(struct i
+ buf = io_ring_head_to_buf(br, head, bl->mask);
+ if (arg->max_len) {
+ u32 len = READ_ONCE(buf->len);
++ size_t needed;
+
+ if (unlikely(!len))
+ return -ENOBUFS;
+- /*
+- * Limit incremental buffers to 1 segment. No point trying
+- * to peek ahead and map more than we need, when the buffers
+- * themselves should be large when setup with
+- * IOU_PBUF_RING_INC.
+- */
+- if (bl->flags & IOBL_INC) {
+- nr_avail = 1;
+- } else {
+- size_t needed;
+-
+- needed = (arg->max_len + len - 1) / len;
+- needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
+- if (nr_avail > needed)
+- nr_avail = needed;
+- }
++ needed = (arg->max_len + len - 1) / len;
++ needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
++ if (nr_avail > needed)
++ nr_avail = needed;
+ }
+
+ /*
--- /dev/null
+From 17b4417dca061d29ab2975564e1a33ce7c4fa4d5 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Thu, 19 Mar 2026 14:29:09 -0600
+Subject: io_uring/kbuf: fix missing BUF_MORE for incremental buffers at EOF
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 3ecd3e03144b38a21a3b70254f1b9d2e16629b09 upstream.
+
+For a zero length transfer, io_kbuf_inc_commit() is called with !len.
+Since we never enter the while loop to consume the buffers,
+io_kbuf_inc_commit() ends up returning true, consuming the buffer. But
+if no data was consumed, by definition it cannot have consumed the
+buffer. Return false for that case.
+
+Reported-by: Martin Michaelis <code@mgjm.de>
+Cc: stable@vger.kernel.org
+Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
+Link: https://github.com/axboe/liburing/issues/1553
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -34,6 +34,10 @@ struct io_provide_buf {
+
+ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
+ {
++ /* No data consumed, return false early to avoid consuming the buffer */
++ if (!len)
++ return false;
++
+ while (len) {
+ struct io_uring_buf *buf;
+ u32 buf_len, this_len;
--- /dev/null
+From 7414a76696e4561ba2fe0b31bf66d4bdfd7641c9 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:48 +0000
+Subject: io_uring/kbuf: introduce io_kbuf_drop_legacy()
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 54e00d9a612ab93f37f612a5ccd7c0c4f8a31cea upstream.
+
+io_kbuf_drop() is only used for legacy provided buffers, and so
+__io_put_kbuf_list() is never called for REQ_F_BUFFER_RING. Remove the
+dead branch out of __io_put_kbuf_list(), rename it into
+io_kbuf_drop_legacy() and use it directly instead of io_kbuf_drop().
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/c8cc73e2272f09a86ecbdad9ebdd8304f8e583c0.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 5 ++---
+ io_uring/kbuf.c | 10 ++++++++++
+ io_uring/kbuf.h | 24 ++----------------------
+ 3 files changed, 14 insertions(+), 25 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -382,9 +382,8 @@ static bool req_need_defer(struct io_kio
+
+ static void io_clean_op(struct io_kiocb *req)
+ {
+- if (req->flags & REQ_F_BUFFER_SELECTED) {
+- io_kbuf_drop(req);
+- }
++ if (unlikely(req->flags & REQ_F_BUFFER_SELECTED))
++ io_kbuf_drop_legacy(req);
+
+ if (req->flags & REQ_F_NEED_CLEANUP) {
+ const struct io_cold_def *def = &io_cold_defs[req->opcode];
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -50,6 +50,16 @@ static int io_buffer_add_list(struct io_
+ return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
+ }
+
++void io_kbuf_drop_legacy(struct io_kiocb *req)
++{
++ if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
++ return;
++ req->buf_index = req->kbuf->bgid;
++ req->flags &= ~REQ_F_BUFFER_SELECTED;
++ kfree(req->kbuf);
++ req->kbuf = NULL;
++}
++
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
+ {
+ struct io_ring_ctx *ctx = req->ctx;
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -82,6 +82,7 @@ int io_unregister_pbuf_ring(struct io_ri
+ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
+
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
++void io_kbuf_drop_legacy(struct io_kiocb *req);
+
+ void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+ struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+@@ -169,27 +170,6 @@ static inline bool __io_put_kbuf_ring(st
+ return ret;
+ }
+
+-static inline void __io_put_kbuf_list(struct io_kiocb *req, int len)
+-{
+- if (req->flags & REQ_F_BUFFER_RING) {
+- __io_put_kbuf_ring(req, len, 1);
+- } else {
+- req->buf_index = req->kbuf->bgid;
+- req->flags &= ~REQ_F_BUFFER_SELECTED;
+- kfree(req->kbuf);
+- req->kbuf = NULL;
+- }
+-}
+-
+-static inline void io_kbuf_drop(struct io_kiocb *req)
+-{
+- if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
+- return;
+-
+- /* len == 0 is fine here, non-ring will always drop all of it */
+- __io_put_kbuf_list(req, 0);
+-}
+-
+ static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
+ int nbufs, unsigned issue_flags)
+ {
+@@ -203,7 +183,7 @@ static inline unsigned int __io_put_kbuf
+ if (!__io_put_kbuf_ring(req, len, nbufs))
+ ret |= IORING_CQE_F_BUF_MORE;
+ } else {
+- __io_put_kbuf_list(req, len);
++ io_kbuf_drop_legacy(req);
+ }
+ return ret;
+ }
--- /dev/null
+From 725d49239061ab49650f767bdc3c45fea32dbe4f Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:34 -0600
+Subject: io_uring/kbuf: introduce struct io_br_sel
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit ab6559bdbb08f6bee606435cd014fc5ba0f7b750 upstream.
+
+Rather than return addresses directly from buffer selection, add a
+struct around it. No functional changes in this patch, it's in
+preparation for storing more buffer related information locally, rather
+than in struct io_kiocb.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-7-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 26 +++++++++++++-------------
+ io_uring/kbuf.h | 19 +++++++++++++++++--
+ io_uring/net.c | 18 +++++++++---------
+ io_uring/rw.c | 31 ++++++++++++++++++-------------
+ 4 files changed, 57 insertions(+), 37 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -152,18 +152,18 @@ static int io_provided_buffers_select(st
+ return 1;
+ }
+
+-static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
+- struct io_buffer_list *bl,
+- unsigned int issue_flags)
++static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
++ struct io_buffer_list *bl,
++ unsigned int issue_flags)
+ {
+ struct io_uring_buf_ring *br = bl->buf_ring;
+ __u16 tail, head = bl->head;
++ struct io_br_sel sel = { };
+ struct io_uring_buf *buf;
+- void __user *ret;
+
+ tail = smp_load_acquire(&br->tail);
+ if (unlikely(tail == head))
+- return NULL;
++ return sel;
+
+ if (head + 1 == tail)
+ req->flags |= REQ_F_BL_EMPTY;
+@@ -174,7 +174,7 @@ static void __user *io_ring_buffer_selec
+ req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
+ req->buf_list = bl;
+ req->buf_index = buf->bid;
+- ret = u64_to_user_ptr(buf->addr);
++ sel.addr = u64_to_user_ptr(buf->addr);
+
+ if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
+ /*
+@@ -191,27 +191,27 @@ static void __user *io_ring_buffer_selec
+ req->flags |= REQ_F_BUF_MORE;
+ req->buf_list = NULL;
+ }
+- return ret;
++ return sel;
+ }
+
+-void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+- unsigned int issue_flags)
++struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
++ unsigned int issue_flags)
+ {
+ struct io_ring_ctx *ctx = req->ctx;
++ struct io_br_sel sel = { };
+ struct io_buffer_list *bl;
+- void __user *ret = NULL;
+
+ io_ring_submit_lock(req->ctx, issue_flags);
+
+ bl = io_buffer_get_list(ctx, req->buf_index);
+ if (likely(bl)) {
+ if (bl->flags & IOBL_BUF_RING)
+- ret = io_ring_buffer_select(req, len, bl, issue_flags);
++ sel = io_ring_buffer_select(req, len, bl, issue_flags);
+ else
+- ret = io_provided_buffer_select(req, len, bl);
++ sel.addr = io_provided_buffer_select(req, len, bl);
+ }
+ io_ring_submit_unlock(req->ctx, issue_flags);
+- return ret;
++ return sel;
+ }
+
+ /* cap it at a reasonable 256, will be one page even for 4K */
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -64,8 +64,23 @@ struct buf_sel_arg {
+ unsigned short partial_map;
+ };
+
+-void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+- unsigned int issue_flags);
++/*
++ * Return value from io_buffer_list selection. Just returns the error or
++ * user address for now, will be extended to return the buffer list in the
++ * future.
++ */
++struct io_br_sel {
++ /*
++ * Some selection parts return the user address, others return an error.
++ */
++ union {
++ void __user *addr;
++ ssize_t val;
++ };
++};
++
++struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
++ unsigned int issue_flags);
+ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
+ unsigned int issue_flags);
+ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg);
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1035,22 +1035,22 @@ int io_recvmsg(struct io_kiocb *req, uns
+
+ retry_multishot:
+ if (io_do_buffer_select(req)) {
+- void __user *buf;
++ struct io_br_sel sel;
+ size_t len = sr->len;
+
+- buf = io_buffer_select(req, &len, issue_flags);
+- if (!buf)
++ sel = io_buffer_select(req, &len, issue_flags);
++ if (!sel.addr)
+ return -ENOBUFS;
+
+ if (req->flags & REQ_F_APOLL_MULTISHOT) {
+- ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
++ ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len);
+ if (ret) {
+ io_kbuf_recycle(req, req->buf_list, issue_flags);
+ return ret;
+ }
+ }
+
+- iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
++ iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len);
+ }
+
+ kmsg->msg.msg_get_inq = 1;
+@@ -1144,13 +1144,13 @@ static int io_recv_buf_select(struct io_
+ iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
+ arg.out_len);
+ } else {
+- void __user *buf;
++ struct io_br_sel sel;
+
+ *len = sr->len;
+- buf = io_buffer_select(req, len, issue_flags);
+- if (!buf)
++ sel = io_buffer_select(req, len, issue_flags);
++ if (!sel.addr)
+ return -ENOBUFS;
+- sr->buf = buf;
++ sr->buf = sel.addr;
+ sr->len = *len;
+ map_ubuf:
+ ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -88,28 +88,28 @@ static int io_iov_buffer_select_prep(str
+
+ static int __io_import_iovec(int ddir, struct io_kiocb *req,
+ struct io_async_rw *io,
++ struct io_br_sel *sel,
+ unsigned int issue_flags)
+ {
+ const struct io_issue_def *def = &io_issue_defs[req->opcode];
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ struct iovec *iov;
+- void __user *buf;
+ int nr_segs, ret;
+ size_t sqe_len;
+
+- buf = u64_to_user_ptr(rw->addr);
++ sel->addr = u64_to_user_ptr(rw->addr);
+ sqe_len = rw->len;
+
+ if (!def->vectored || req->flags & REQ_F_BUFFER_SELECT) {
+ if (io_do_buffer_select(req)) {
+- buf = io_buffer_select(req, &sqe_len, issue_flags);
+- if (!buf)
++ *sel = io_buffer_select(req, &sqe_len, issue_flags);
++ if (!sel->addr)
+ return -ENOBUFS;
+- rw->addr = (unsigned long) buf;
++ rw->addr = (unsigned long) sel->addr;
+ rw->len = sqe_len;
+ }
+
+- return import_ubuf(ddir, buf, sqe_len, &io->iter);
++ return import_ubuf(ddir, sel->addr, sqe_len, &io->iter);
+ }
+
+ if (io->free_iovec) {
+@@ -119,7 +119,7 @@ static int __io_import_iovec(int ddir, s
+ iov = &io->fast_iov;
+ nr_segs = 1;
+ }
+- ret = __import_iovec(ddir, buf, sqe_len, nr_segs, &iov, &io->iter,
++ ret = __import_iovec(ddir, sel->addr, sqe_len, nr_segs, &iov, &io->iter,
+ req->ctx->compat);
+ if (unlikely(ret < 0))
+ return ret;
+@@ -134,11 +134,12 @@ static int __io_import_iovec(int ddir, s
+
+ static inline int io_import_iovec(int rw, struct io_kiocb *req,
+ struct io_async_rw *io,
++ struct io_br_sel *sel,
+ unsigned int issue_flags)
+ {
+ int ret;
+
+- ret = __io_import_iovec(rw, req, io, issue_flags);
++ ret = __io_import_iovec(rw, req, io, sel, issue_flags);
+ if (unlikely(ret < 0))
+ return ret;
+
+@@ -240,6 +241,7 @@ done:
+ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
+ {
+ struct io_async_rw *rw;
++ struct io_br_sel sel = { };
+ int ret;
+
+ if (io_rw_alloc_async(req))
+@@ -249,7 +251,7 @@ static int io_prep_rw_setup(struct io_ki
+ return 0;
+
+ rw = req->async_data;
+- ret = io_import_iovec(ddir, req, rw, 0);
++ ret = io_import_iovec(ddir, req, rw, &sel, 0);
+ if (unlikely(ret < 0))
+ return ret;
+
+@@ -827,7 +829,8 @@ static int io_rw_init_file(struct io_kio
+ return 0;
+ }
+
+-static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
++static int __io_read(struct io_kiocb *req, struct io_br_sel *sel,
++ unsigned int issue_flags)
+ {
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+@@ -837,7 +840,7 @@ static int __io_read(struct io_kiocb *re
+ loff_t *ppos;
+
+ if (io_do_buffer_select(req)) {
+- ret = io_import_iovec(ITER_DEST, req, io, issue_flags);
++ ret = io_import_iovec(ITER_DEST, req, io, sel, issue_flags);
+ if (unlikely(ret < 0))
+ return ret;
+ }
+@@ -947,9 +950,10 @@ done:
+
+ int io_read(struct io_kiocb *req, unsigned int issue_flags)
+ {
++ struct io_br_sel sel = { };
+ int ret;
+
+- ret = __io_read(req, issue_flags);
++ ret = __io_read(req, &sel, issue_flags);
+ if (ret >= 0)
+ return kiocb_done(req, ret, issue_flags);
+
+@@ -961,6 +965,7 @@ int io_read(struct io_kiocb *req, unsign
+ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
+ {
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
++ struct io_br_sel sel = { };
+ unsigned int cflags = 0;
+ int ret;
+
+@@ -970,7 +975,7 @@ int io_read_mshot(struct io_kiocb *req,
+ if (!io_file_can_poll(req))
+ return -EBADFD;
+
+- ret = __io_read(req, issue_flags);
++ ret = __io_read(req, &sel, issue_flags);
+
+ /*
+ * If we get -EAGAIN, recycle our buffer and just let normal poll
--- /dev/null
+From 7828a049e6c26ed764dcbe0579954a43d6e44edb Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:47 +0000
+Subject: io_uring/kbuf: open code __io_put_kbuf()
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit e150e70fce425e1cdfc227974893cad9fb90a0d3 upstream.
+
+__io_put_kbuf() is a trivial wrapper, open code it into
+__io_put_kbufs().
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/9dc17380272b48d56c95992c6f9eaacd5546e1d3.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 5 -----
+ io_uring/kbuf.h | 4 +---
+ 2 files changed, 1 insertion(+), 8 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -76,11 +76,6 @@ bool io_kbuf_recycle_legacy(struct io_ki
+ return true;
+ }
+
+-void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags)
+-{
+- __io_put_kbuf_list(req, len);
+-}
+-
+ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+ struct io_buffer_list *bl)
+ {
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -81,8 +81,6 @@ int io_register_pbuf_ring(struct io_ring
+ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
+ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
+
+-void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags);
+-
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+
+ void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+@@ -205,7 +203,7 @@ static inline unsigned int __io_put_kbuf
+ if (!__io_put_kbuf_ring(req, len, nbufs))
+ ret |= IORING_CQE_F_BUF_MORE;
+ } else {
+- __io_put_kbuf(req, len, issue_flags);
++ __io_put_kbuf_list(req, len);
+ }
+ return ret;
+ }
--- /dev/null
+From 185e462830eaf7a6df916ba1400b46182b36ec9d Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:33 -0600
+Subject: io_uring/kbuf: pass in struct io_buffer_list to commit/recycle helpers
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 1b5add75d7c894c62506c9b55f1d9eaadae50ef1 upstream.
+
+Rather than have this implied being in the io_kiocb, pass it in directly
+so it's immediately obvious where these users of ->buf_list are coming
+from.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-6-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 6 +++---
+ io_uring/kbuf.c | 9 +++++----
+ io_uring/kbuf.h | 24 ++++++++++++++----------
+ io_uring/net.c | 30 +++++++++++++-----------------
+ io_uring/poll.c | 6 +++---
+ io_uring/rw.c | 16 ++++++++--------
+ 6 files changed, 46 insertions(+), 45 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb
+ lockdep_assert_held(&req->ctx->uring_lock);
+
+ req_set_fail(req);
+- io_req_set_res(req, res, io_put_kbuf(req, res));
++ io_req_set_res(req, res, io_put_kbuf(req, res, req->buf_list));
+ if (def->fail)
+ def->fail(req);
+ io_req_complete_defer(req);
+@@ -1921,11 +1921,11 @@ static void io_queue_async(struct io_kio
+
+ switch (io_arm_poll_handler(req, 0)) {
+ case IO_APOLL_READY:
+- io_kbuf_recycle(req, 0);
++ io_kbuf_recycle(req, req->buf_list, 0);
+ io_req_task_queue(req);
+ break;
+ case IO_APOLL_ABORTED:
+- io_kbuf_recycle(req, 0);
++ io_kbuf_recycle(req, req->buf_list, 0);
+ io_queue_iowq(req);
+ break;
+ case IO_APOLL_OK:
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -368,9 +368,9 @@ int io_buffers_peek(struct io_kiocb *req
+ return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
+ }
+
+-static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
++static inline bool __io_put_kbuf_ring(struct io_kiocb *req,
++ struct io_buffer_list *bl, int len, int nr)
+ {
+- struct io_buffer_list *bl = req->buf_list;
+ bool ret = true;
+
+ if (bl) {
+@@ -381,7 +381,8 @@ static inline bool __io_put_kbuf_ring(st
+ return ret;
+ }
+
+-unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
++unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
++ int len, int nbufs)
+ {
+ unsigned int ret;
+
+@@ -392,7 +393,7 @@ unsigned int __io_put_kbufs(struct io_ki
+ return ret;
+ }
+
+- if (!__io_put_kbuf_ring(req, len, nbufs))
++ if (!__io_put_kbuf_ring(req, bl, len, nbufs))
+ ret |= IORING_CQE_F_BUF_MORE;
+ return ret;
+ }
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -84,7 +84,8 @@ int io_register_pbuf_status(struct io_ri
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+ void io_kbuf_drop_legacy(struct io_kiocb *req);
+
+-unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs);
++unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl,
++ int len, int nbufs);
+ bool io_kbuf_commit(struct io_kiocb *req,
+ struct io_buffer_list *bl, int len, int nr);
+
+@@ -93,7 +94,8 @@ struct io_buffer_list *io_pbuf_get_bl(st
+ unsigned long bgid);
+ int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma);
+
+-static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
++static inline bool io_kbuf_recycle_ring(struct io_kiocb *req,
++ struct io_buffer_list *bl)
+ {
+ /*
+ * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
+@@ -102,8 +104,8 @@ static inline bool io_kbuf_recycle_ring(
+ * The exception is partial io, that case we should increment bl->head
+ * to monopolize the buffer.
+ */
+- if (req->buf_list) {
+- req->buf_index = req->buf_list->bgid;
++ if (bl) {
++ req->buf_index = bl->bgid;
+ req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
+ return true;
+ }
+@@ -117,32 +119,34 @@ static inline bool io_do_buffer_select(s
+ return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING));
+ }
+
+-static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
++static inline bool io_kbuf_recycle(struct io_kiocb *req, struct io_buffer_list *bl,
++ unsigned issue_flags)
+ {
+ if (req->flags & REQ_F_BL_NO_RECYCLE)
+ return false;
+ if (req->flags & REQ_F_BUFFER_SELECTED)
+ return io_kbuf_recycle_legacy(req, issue_flags);
+ if (req->flags & REQ_F_BUFFER_RING)
+- return io_kbuf_recycle_ring(req);
++ return io_kbuf_recycle_ring(req, bl);
+ return false;
+ }
+
+ /* Mapped buffer ring, return io_uring_buf from head */
+ #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
+
+-static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len)
++static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len,
++ struct io_buffer_list *bl)
+ {
+ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+ return 0;
+- return __io_put_kbufs(req, len, 1);
++ return __io_put_kbufs(req, bl, len, 1);
+ }
+
+ static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len,
+- int nbufs)
++ struct io_buffer_list *bl, int nbufs)
+ {
+ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+ return 0;
+- return __io_put_kbufs(req, len, nbufs);
++ return __io_put_kbufs(req, bl, len, nbufs);
+ }
+ #endif
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -498,12 +498,12 @@ static int io_bundle_nbufs(struct io_asy
+ return nbufs;
+ }
+
+-static int io_net_kbuf_recyle(struct io_kiocb *req,
++static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl,
+ struct io_async_msghdr *kmsg, int len)
+ {
+ req->flags |= REQ_F_BL_NO_RECYCLE;
+ if (req->flags & REQ_F_BUFFERS_COMMIT)
+- io_kbuf_commit(req, req->buf_list, len, io_bundle_nbufs(kmsg, len));
++ io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len));
+ return -EAGAIN;
+ }
+
+@@ -515,11 +515,11 @@ static inline bool io_send_finish(struct
+ unsigned int cflags;
+
+ if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
+- cflags = io_put_kbuf(req, *ret);
++ cflags = io_put_kbuf(req, *ret, req->buf_list);
+ goto finish;
+ }
+
+- cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret));
++ cflags = io_put_kbufs(req, *ret, req->buf_list, io_bundle_nbufs(kmsg, *ret));
+
+ /*
+ * Don't start new bundles if the buffer list is empty, or if the
+@@ -675,7 +675,7 @@ retry_bundle:
+ sr->len -= ret;
+ sr->buf += ret;
+ sr->done_io += ret;
+- return io_net_kbuf_recyle(req, kmsg, ret);
++ return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+@@ -869,7 +869,7 @@ static inline bool io_recv_finish(struct
+ if (sr->flags & IORING_RECVSEND_BUNDLE) {
+ size_t this_ret = *ret - sr->done_io;
+
+- cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret));
++ cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret));
+ if (sr->retry_flags & IO_SR_MSG_RETRY)
+ cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+ /* bundle with no more immediate buffers, we're done */
+@@ -888,7 +888,7 @@ static inline bool io_recv_finish(struct
+ return false;
+ }
+ } else {
+- cflags |= io_put_kbuf(req, *ret);
++ cflags |= io_put_kbuf(req, *ret, req->buf_list);
+ }
+
+ /*
+@@ -1045,7 +1045,7 @@ retry_multishot:
+ if (req->flags & REQ_F_APOLL_MULTISHOT) {
+ ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
+ if (ret) {
+- io_kbuf_recycle(req, issue_flags);
++ io_kbuf_recycle(req, req->buf_list, issue_flags);
+ return ret;
+ }
+ }
+@@ -1070,15 +1070,11 @@ retry_multishot:
+ if (ret < min_ret) {
+ if (ret == -EAGAIN && force_nonblock) {
+ if (issue_flags & IO_URING_F_MULTISHOT) {
+- io_kbuf_recycle(req, issue_flags);
++ io_kbuf_recycle(req, req->buf_list, issue_flags);
+ return IOU_ISSUE_SKIP_COMPLETE;
+ }
+ return -EAGAIN;
+ }
+- if (ret > 0 && io_net_retry(sock, flags)) {
+- sr->done_io += ret;
+- return io_net_kbuf_recyle(req, kmsg, ret);
+- }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
+@@ -1091,7 +1087,7 @@ retry_multishot:
+ else if (sr->done_io)
+ ret = sr->done_io;
+ else
+- io_kbuf_recycle(req, issue_flags);
++ io_kbuf_recycle(req, req->buf_list, issue_flags);
+
+ if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
+ goto retry_multishot;
+@@ -1209,7 +1205,7 @@ retry_multishot:
+ if (ret < min_ret) {
+ if (ret == -EAGAIN && force_nonblock) {
+ if (issue_flags & IO_URING_F_MULTISHOT) {
+- io_kbuf_recycle(req, issue_flags);
++ io_kbuf_recycle(req, req->buf_list, issue_flags);
+ return IOU_ISSUE_SKIP_COMPLETE;
+ }
+
+@@ -1219,7 +1215,7 @@ retry_multishot:
+ sr->len -= ret;
+ sr->buf += ret;
+ sr->done_io += ret;
+- return io_net_kbuf_recyle(req, kmsg, ret);
++ return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+@@ -1235,7 +1231,7 @@ out_free:
+ else if (sr->done_io)
+ ret = sr->done_io;
+ else
+- io_kbuf_recycle(req, issue_flags);
++ io_kbuf_recycle(req, req->buf_list, issue_flags);
+
+ if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
+ goto retry_multishot;
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -356,10 +356,10 @@ void io_poll_task_func(struct io_kiocb *
+
+ ret = io_poll_check_events(req, ts);
+ if (ret == IOU_POLL_NO_ACTION) {
+- io_kbuf_recycle(req, 0);
++ io_kbuf_recycle(req, req->buf_list, 0);
+ return;
+ } else if (ret == IOU_POLL_REQUEUE) {
+- io_kbuf_recycle(req, 0);
++ io_kbuf_recycle(req, req->buf_list, 0);
+ __io_poll_execute(req, 0);
+ return;
+ }
+@@ -753,7 +753,7 @@ int io_arm_poll_handler(struct io_kiocb
+ req->flags |= REQ_F_POLLED;
+ ipt.pt._qproc = io_async_queue_proc;
+
+- io_kbuf_recycle(req, issue_flags);
++ io_kbuf_recycle(req, req->buf_list, issue_flags);
+
+ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
+ if (ret)
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -520,7 +520,7 @@ void io_req_rw_complete(struct io_kiocb
+ io_req_io_end(req);
+
+ if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
+- req->cqe.flags |= io_put_kbuf(req, req->cqe.res);
++ req->cqe.flags |= io_put_kbuf(req, req->cqe.res, req->buf_list);
+
+ io_req_rw_cleanup(req, 0);
+ io_req_task_complete(req, ts);
+@@ -602,7 +602,7 @@ static int kiocb_done(struct io_kiocb *r
+ */
+ io_req_io_end(req);
+ io_req_set_res(req, final_ret,
+- io_put_kbuf(req, ret));
++ io_put_kbuf(req, ret, req->buf_list));
+ io_req_rw_cleanup(req, issue_flags);
+ return IOU_OK;
+ }
+@@ -954,7 +954,7 @@ int io_read(struct io_kiocb *req, unsign
+ return kiocb_done(req, ret, issue_flags);
+
+ if (req->flags & REQ_F_BUFFERS_COMMIT)
+- io_kbuf_recycle(req, issue_flags);
++ io_kbuf_recycle(req, req->buf_list, issue_flags);
+ return ret;
+ }
+
+@@ -981,17 +981,17 @@ int io_read_mshot(struct io_kiocb *req,
+ * Reset rw->len to 0 again to avoid clamping future mshot
+ * reads, in case the buffer size varies.
+ */
+- if (io_kbuf_recycle(req, issue_flags))
++ if (io_kbuf_recycle(req, req->buf_list, issue_flags))
+ rw->len = 0;
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ return IOU_ISSUE_SKIP_COMPLETE;
+ return -EAGAIN;
+ } else if (ret <= 0) {
+- io_kbuf_recycle(req, issue_flags);
++ io_kbuf_recycle(req, req->buf_list, issue_flags);
+ if (ret < 0)
+ req_set_fail(req);
+ } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+- cflags = io_put_kbuf(req, ret);
++ cflags = io_put_kbuf(req, ret, req->buf_list);
+ } else {
+ /*
+ * Any successful return value will keep the multishot read
+@@ -999,7 +999,7 @@ int io_read_mshot(struct io_kiocb *req,
+ * we fail to post a CQE, or multishot is no longer set, then
+ * jump to the termination path. This request is then done.
+ */
+- cflags = io_put_kbuf(req, ret);
++ cflags = io_put_kbuf(req, ret, req->buf_list);
+ rw->len = 0; /* similarly to above, reset len to 0 */
+
+ if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
+@@ -1210,7 +1210,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx
+ if (!smp_load_acquire(&req->iopoll_completed))
+ break;
+ nr_events++;
+- req->cqe.flags = io_put_kbuf(req, req->cqe.res);
++ req->cqe.flags = io_put_kbuf(req, req->cqe.res, req->buf_list);
+ if (req->opcode != IORING_OP_URING_CMD)
+ io_req_rw_cleanup(req, 0);
+ }
--- /dev/null
+From aecbedeb4dd8a16964f1fc52778c421c136825f1 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Thu, 19 Mar 2026 14:29:20 -0600
+Subject: io_uring/kbuf: propagate BUF_MORE through early buffer commit path
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 418eab7a6f3c002d8e64d6e95ec27118017019af upstream.
+
+When io_should_commit() returns true (eg for non-pollable files), buffer
+commit happens at buffer selection time and sel->buf_list is set to
+NULL. When __io_put_kbufs() generates CQE flags at completion time, it
+calls __io_put_kbuf_ring() which finds a NULL buffer_list and hence
+cannot determine whether the buffer was consumed or not. This means that
+IORING_CQE_F_BUF_MORE is never set for non-pollable input with
+incrementally consumed buffers.
+
+Likewise for io_buffers_select(), which always commits upfront and
+discards the return value of io_kbuf_commit().
+
+Add REQ_F_BUF_MORE to store the result of io_kbuf_commit() during early
+commit. Then __io_put_kbuf_ring() can check this flag and set
+IORING_F_BUF_MORE accordingy.
+
+Reported-by: Martin Michaelis <code@mgjm.de>
+Cc: stable@vger.kernel.org
+Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
+Link: https://github.com/axboe/liburing/issues/1553
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -387,7 +387,10 @@ static inline bool __io_put_kbuf_ring(st
+ ret = io_kbuf_commit(req, bl, len, nr);
+ req->buf_index = bl->bgid;
+ }
+- req->flags &= ~REQ_F_BUFFER_RING;
++ if (ret && (req->flags & REQ_F_BUF_MORE))
++ ret = false;
++
++ req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE);
+ return ret;
+ }
+
--- /dev/null
+From 1fc437c00b774e5b56d91a169298e558947f9e27 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:42 +0000
+Subject: io_uring/kbuf: remove legacy kbuf bulk allocation
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 7919292a961421bfdb22f83c16657684c96076b3 upstream.
+
+Legacy provided buffers are slow and discouraged in favour of the ring
+variant. Remove the bulk allocation to keep it simpler as we don't care
+about performance.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/a064d70370e590efed8076e9501ae4cfc20fe0ca.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 30 +++++-------------------------
+ 1 file changed, 5 insertions(+), 25 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -521,12 +521,9 @@ int io_provide_buffers_prep(struct io_ki
+ return 0;
+ }
+
+-#define IO_BUFFER_ALLOC_BATCH 64
+-
+ static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
+ {
+- struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH];
+- int allocated;
++ struct io_buffer *buf;
+
+ /*
+ * Completions that don't happen inline (eg not under uring_lock) will
+@@ -544,27 +541,10 @@ static int io_refill_buffer_cache(struct
+ spin_unlock(&ctx->completion_lock);
+ }
+
+- /*
+- * No free buffers and no completion entries either. Allocate a new
+- * batch of buffer entries and add those to our freelist.
+- */
+-
+- allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT,
+- ARRAY_SIZE(bufs), (void **) bufs);
+- if (unlikely(!allocated)) {
+- /*
+- * Bulk alloc is all-or-nothing. If we fail to get a batch,
+- * retry single alloc to be on the safe side.
+- */
+- bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
+- if (!bufs[0])
+- return -ENOMEM;
+- allocated = 1;
+- }
+-
+- while (allocated)
+- list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache);
+-
++ buf = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
++ if (!buf)
++ return -ENOMEM;
++ list_add_tail(&buf->list, &ctx->io_buffers_cache);
+ return 0;
+ }
+
--- /dev/null
+From 7d352330a367b477ac6c75a7dbffc850dec5a757 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:46 +0000
+Subject: io_uring/kbuf: remove legacy kbuf caching
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 13ee854e7c04236a47a5beaacdcf51eb0bc7a8fa upstream.
+
+Remove all struct io_buffer caches. It makes it a fair bit simpler.
+Apart from from killing a bunch of lines and juggling between lists,
+__io_put_kbuf_list() doesn't need ->completion_lock locking now.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/18287217466ee2576ea0b1e72daccf7b22c7e856.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/io_uring_types.h | 3 --
+ io_uring/io_uring.c | 2 -
+ io_uring/kbuf.c | 58 ++++-------------------------------------
+ io_uring/kbuf.h | 5 +--
+ 4 files changed, 9 insertions(+), 59 deletions(-)
+
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -341,7 +341,6 @@ struct io_ring_ctx {
+
+ spinlock_t completion_lock;
+
+- struct list_head io_buffers_comp;
+ struct list_head cq_overflow_list;
+ struct io_hash_table cancel_table;
+
+@@ -361,8 +360,6 @@ struct io_ring_ctx {
+ unsigned int file_alloc_start;
+ unsigned int file_alloc_end;
+
+- struct list_head io_buffers_cache;
+-
+ /* Keep this last, we don't need it for the fast path */
+ struct wait_queue_head poll_wq;
+ struct io_restriction restrictions;
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -305,7 +305,6 @@ static __cold struct io_ring_ctx *io_rin
+ init_waitqueue_head(&ctx->sqo_sq_wait);
+ INIT_LIST_HEAD(&ctx->sqd_list);
+ INIT_LIST_HEAD(&ctx->cq_overflow_list);
+- INIT_LIST_HEAD(&ctx->io_buffers_cache);
+ ret = io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
+ sizeof(struct io_rsrc_node));
+ ret |= io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
+@@ -328,7 +327,6 @@ static __cold struct io_ring_ctx *io_rin
+ spin_lock_init(&ctx->completion_lock);
+ spin_lock_init(&ctx->timeout_lock);
+ INIT_WQ_LIST(&ctx->iopoll_list);
+- INIT_LIST_HEAD(&ctx->io_buffers_comp);
+ INIT_LIST_HEAD(&ctx->defer_list);
+ INIT_LIST_HEAD(&ctx->timeout_list);
+ INIT_LIST_HEAD(&ctx->ltimeout_list);
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -78,9 +78,7 @@ bool io_kbuf_recycle_legacy(struct io_ki
+
+ void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags)
+ {
+- spin_lock(&req->ctx->completion_lock);
+ __io_put_kbuf_list(req, len);
+- spin_unlock(&req->ctx->completion_lock);
+ }
+
+ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+@@ -362,14 +360,15 @@ static int __io_remove_buffers(struct io
+ return i;
+ }
+
+- /* protects io_buffers_cache */
+ lockdep_assert_held(&ctx->uring_lock);
+
+ while (!list_empty(&bl->buf_list)) {
+ struct io_buffer *nxt;
+
+ nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
+- list_move(&nxt->list, &ctx->io_buffers_cache);
++ list_del(&nxt->list);
++ kfree(nxt);
++
+ if (++i == nbufs)
+ return i;
+ cond_resched();
+@@ -389,27 +388,12 @@ void io_put_bl(struct io_ring_ctx *ctx,
+ void io_destroy_buffers(struct io_ring_ctx *ctx)
+ {
+ struct io_buffer_list *bl;
+- struct list_head *item, *tmp;
+- struct io_buffer *buf;
+ unsigned long index;
+
+ xa_for_each(&ctx->io_bl_xa, index, bl) {
+ xa_erase(&ctx->io_bl_xa, bl->bgid);
+ io_put_bl(ctx, bl);
+ }
+-
+- /*
+- * Move deferred locked entries to cache before pruning
+- */
+- spin_lock(&ctx->completion_lock);
+- if (!list_empty(&ctx->io_buffers_comp))
+- list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache);
+- spin_unlock(&ctx->completion_lock);
+-
+- list_for_each_safe(item, tmp, &ctx->io_buffers_cache) {
+- buf = list_entry(item, struct io_buffer, list);
+- kfree(buf);
+- }
+ }
+
+ static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+@@ -499,33 +483,6 @@ int io_provide_buffers_prep(struct io_ki
+ return 0;
+ }
+
+-static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
+-{
+- struct io_buffer *buf;
+-
+- /*
+- * Completions that don't happen inline (eg not under uring_lock) will
+- * add to ->io_buffers_comp. If we don't have any free buffers, check
+- * the completion list and splice those entries first.
+- */
+- if (!list_empty_careful(&ctx->io_buffers_comp)) {
+- spin_lock(&ctx->completion_lock);
+- if (!list_empty(&ctx->io_buffers_comp)) {
+- list_splice_init(&ctx->io_buffers_comp,
+- &ctx->io_buffers_cache);
+- spin_unlock(&ctx->completion_lock);
+- return 0;
+- }
+- spin_unlock(&ctx->completion_lock);
+- }
+-
+- buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
+- if (!buf)
+- return -ENOMEM;
+- list_add_tail(&buf->list, &ctx->io_buffers_cache);
+- return 0;
+-}
+-
+ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
+ struct io_buffer_list *bl)
+ {
+@@ -534,12 +491,11 @@ static int io_add_buffers(struct io_ring
+ int i, bid = pbuf->bid;
+
+ for (i = 0; i < pbuf->nbufs; i++) {
+- if (list_empty(&ctx->io_buffers_cache) &&
+- io_refill_buffer_cache(ctx))
++ buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
++ if (!buf)
+ break;
+- buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
+- list);
+- list_move_tail(&buf->list, &bl->buf_list);
++
++ list_add_tail(&buf->list, &bl->buf_list);
+ buf->addr = addr;
+ buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
+ buf->bid = bid;
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -177,8 +177,9 @@ static inline void __io_put_kbuf_list(st
+ __io_put_kbuf_ring(req, len, 1);
+ } else {
+ req->buf_index = req->kbuf->bgid;
+- list_add(&req->kbuf->list, &req->ctx->io_buffers_comp);
+ req->flags &= ~REQ_F_BUFFER_SELECTED;
++ kfree(req->kbuf);
++ req->kbuf = NULL;
+ }
+ }
+
+@@ -187,10 +188,8 @@ static inline void io_kbuf_drop(struct i
+ if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
+ return;
+
+- spin_lock(&req->ctx->completion_lock);
+ /* len == 0 is fine here, non-ring will always drop all of it */
+ __io_put_kbuf_list(req, 0);
+- spin_unlock(&req->ctx->completion_lock);
+ }
+
+ static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
--- /dev/null
+From 3aa159b85a008972392a89b8a7cda51b674fc32d Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:43 +0000
+Subject: io_uring/kbuf: remove legacy kbuf kmem cache
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 9afe6847cff78e7f3aa8f4c920265cf298033251 upstream.
+
+Remove the kmem cache used by legacy provided buffers.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/8195c207d8524d94e972c0c82de99282289f7f5c.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 2 --
+ io_uring/io_uring.h | 1 -
+ io_uring/kbuf.c | 8 +++-----
+ 3 files changed, 3 insertions(+), 8 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3867,8 +3867,6 @@ static int __init io_uring_init(void)
+ req_cachep = kmem_cache_create("io_kiocb", sizeof(struct io_kiocb), &kmem_args,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT |
+ SLAB_TYPESAFE_BY_RCU);
+- io_buf_cachep = KMEM_CACHE(io_buffer,
+- SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
+
+ iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
+
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -389,7 +389,6 @@ static inline bool io_req_cache_empty(st
+ }
+
+ extern struct kmem_cache *req_cachep;
+-extern struct kmem_cache *io_buf_cachep;
+
+ static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx)
+ {
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -20,8 +20,6 @@
+ /* BIDs are addressed by a 16-bit field in a CQE */
+ #define MAX_BIDS_PER_BGID (1 << 16)
+
+-struct kmem_cache *io_buf_cachep;
+-
+ struct io_provide_buf {
+ struct file *file;
+ __u64 addr;
+@@ -70,7 +68,7 @@ bool io_kbuf_recycle_legacy(struct io_ki
+ if (bl && !(bl->flags & IOBL_BUF_RING))
+ list_add(&buf->list, &bl->buf_list);
+ else
+- kmem_cache_free(io_buf_cachep, buf);
++ kfree(buf);
+ req->flags &= ~REQ_F_BUFFER_SELECTED;
+ req->kbuf = NULL;
+
+@@ -430,7 +428,7 @@ void io_destroy_buffers(struct io_ring_c
+
+ list_for_each_safe(item, tmp, &ctx->io_buffers_cache) {
+ buf = list_entry(item, struct io_buffer, list);
+- kmem_cache_free(io_buf_cachep, buf);
++ kfree(buf);
+ }
+ }
+
+@@ -541,7 +539,7 @@ static int io_refill_buffer_cache(struct
+ spin_unlock(&ctx->completion_lock);
+ }
+
+- buf = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
++ buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
+ if (!buf)
+ return -ENOMEM;
+ list_add_tail(&buf->list, &ctx->io_buffers_cache);
--- /dev/null
+From e74da9819b43033a42cfc36f58b95b3e89d85fdf Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:45 +0000
+Subject: io_uring/kbuf: simplify __io_put_kbuf
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit dc39fb1093ea33019f192c93b77b863282e10162 upstream.
+
+As a preparation step remove an optimisation from __io_put_kbuf() trying
+to use the locked cache. With that __io_put_kbuf_list() is only used
+with ->io_buffers_comp, and we remove the explicit list argument.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/1b7f1394ec4afc7f96b35a61f5992e27c49fd067.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 2 --
+ io_uring/kbuf.c | 26 +++-----------------------
+ io_uring/kbuf.h | 11 +++++------
+ 3 files changed, 8 insertions(+), 31 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -385,9 +385,7 @@ static bool req_need_defer(struct io_kio
+ static void io_clean_op(struct io_kiocb *req)
+ {
+ if (req->flags & REQ_F_BUFFER_SELECTED) {
+- spin_lock(&req->ctx->completion_lock);
+ io_kbuf_drop(req);
+- spin_unlock(&req->ctx->completion_lock);
+ }
+
+ if (req->flags & REQ_F_NEED_CLEANUP) {
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -78,29 +78,9 @@ bool io_kbuf_recycle_legacy(struct io_ki
+
+ void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags)
+ {
+- /*
+- * We can add this buffer back to two lists:
+- *
+- * 1) The io_buffers_cache list. This one is protected by the
+- * ctx->uring_lock. If we already hold this lock, add back to this
+- * list as we can grab it from issue as well.
+- * 2) The io_buffers_comp list. This one is protected by the
+- * ctx->completion_lock.
+- *
+- * We migrate buffers from the comp_list to the issue cache list
+- * when we need one.
+- */
+- if (issue_flags & IO_URING_F_UNLOCKED) {
+- struct io_ring_ctx *ctx = req->ctx;
+-
+- spin_lock(&ctx->completion_lock);
+- __io_put_kbuf_list(req, len, &ctx->io_buffers_comp);
+- spin_unlock(&ctx->completion_lock);
+- } else {
+- lockdep_assert_held(&req->ctx->uring_lock);
+-
+- __io_put_kbuf_list(req, len, &req->ctx->io_buffers_cache);
+- }
++ spin_lock(&req->ctx->completion_lock);
++ __io_put_kbuf_list(req, len);
++ spin_unlock(&req->ctx->completion_lock);
+ }
+
+ static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -171,27 +171,26 @@ static inline bool __io_put_kbuf_ring(st
+ return ret;
+ }
+
+-static inline void __io_put_kbuf_list(struct io_kiocb *req, int len,
+- struct list_head *list)
++static inline void __io_put_kbuf_list(struct io_kiocb *req, int len)
+ {
+ if (req->flags & REQ_F_BUFFER_RING) {
+ __io_put_kbuf_ring(req, len, 1);
+ } else {
+ req->buf_index = req->kbuf->bgid;
+- list_add(&req->kbuf->list, list);
++ list_add(&req->kbuf->list, &req->ctx->io_buffers_comp);
+ req->flags &= ~REQ_F_BUFFER_SELECTED;
+ }
+ }
+
+ static inline void io_kbuf_drop(struct io_kiocb *req)
+ {
+- lockdep_assert_held(&req->ctx->completion_lock);
+-
+ if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
+ return;
+
++ spin_lock(&req->ctx->completion_lock);
+ /* len == 0 is fine here, non-ring will always drop all of it */
+- __io_put_kbuf_list(req, 0, &req->ctx->io_buffers_comp);
++ __io_put_kbuf_list(req, 0);
++ spin_unlock(&req->ctx->completion_lock);
+ }
+
+ static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
--- /dev/null
+From 69db0294e6d11bc765f5cbee2f39602fa2b36f7b Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:39 -0600
+Subject: io_uring/kbuf: switch to storing struct io_buffer_list locally
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 5fda51255439addd1c9059098e30847a375a1008 upstream.
+
+Currently the buffer list is stored in struct io_kiocb. The buffer list
+can be of two types:
+
+1) Classic/legacy buffer list. These don't need to get referenced after
+ a buffer pick, and hence storing them in struct io_kiocb is perfectly
+ fine.
+
+2) Ring provided buffer lists. These DO need to be referenced after the
+ initial buffer pick, as they need to get consumed later on. This can
+ be either just incrementing the head of the ring, or it can be
+ consuming parts of a buffer if incremental buffer consumptions has
+ been configured.
+
+For case 2, io_uring needs to be careful not to access the buffer list
+after the initial pick-and-execute context. The core does recycling of
+these, but it's easy to make a mistake, because it's stored in the
+io_kiocb which does persist across multiple execution contexts. Either
+because it's a multishot request, or simply because it needed some kind
+of async trigger (eg poll) for retry purposes.
+
+Add a struct io_buffer_list to struct io_br_sel, which is always on
+stack for the various users of it. This prevents the buffer list from
+leaking outside of that execution context, and additionally it enables
+kbuf to not even pass back the struct io_buffer_list if the given
+context isn't appropriately locked already.
+
+This doesn't fix any bugs, it's simply a defensive measure to prevent
+any issues with reuse of a buffer list.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-12-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/io_uring_types.h | 6 ----
+ io_uring/io_uring.c | 6 ++--
+ io_uring/kbuf.c | 27 ++++++++++++---------
+ io_uring/kbuf.h | 16 ++++--------
+ io_uring/net.c | 51 +++++++++++++++++------------------------
+ io_uring/poll.c | 6 ++--
+ io_uring/rw.c | 22 ++++++++---------
+ 7 files changed, 60 insertions(+), 74 deletions(-)
+
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -624,12 +624,6 @@ struct io_kiocb {
+
+ /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
+ struct io_buffer *kbuf;
+-
+- /*
+- * stores buffer ID for ring provided buffers, valid IFF
+- * REQ_F_BUFFER_RING is set.
+- */
+- struct io_buffer_list *buf_list;
+ };
+
+ union {
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb
+ lockdep_assert_held(&req->ctx->uring_lock);
+
+ req_set_fail(req);
+- io_req_set_res(req, res, io_put_kbuf(req, res, req->buf_list));
++ io_req_set_res(req, res, io_put_kbuf(req, res, NULL));
+ if (def->fail)
+ def->fail(req);
+ io_req_complete_defer(req);
+@@ -1921,11 +1921,11 @@ static void io_queue_async(struct io_kio
+
+ switch (io_arm_poll_handler(req, 0)) {
+ case IO_APOLL_READY:
+- io_kbuf_recycle(req, req->buf_list, 0);
++ io_kbuf_recycle(req, NULL, 0);
+ io_req_task_queue(req);
+ break;
+ case IO_APOLL_ABORTED:
+- io_kbuf_recycle(req, req->buf_list, 0);
++ io_kbuf_recycle(req, NULL, 0);
+ io_queue_iowq(req);
+ break;
+ case IO_APOLL_OK:
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -172,8 +172,8 @@ static struct io_br_sel io_ring_buffer_s
+ if (*len == 0 || *len > buf->len)
+ *len = buf->len;
+ req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
+- req->buf_list = bl;
+ req->buf_index = buf->bid;
++ sel.buf_list = bl;
+ sel.addr = u64_to_user_ptr(buf->addr);
+
+ if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
+@@ -187,9 +187,9 @@ static struct io_br_sel io_ring_buffer_s
+ * the transfer completes (or if we get -EAGAIN and must poll of
+ * retry).
+ */
+- if (!io_kbuf_commit(req, bl, *len, 1))
++ if (!io_kbuf_commit(req, sel.buf_list, *len, 1))
+ req->flags |= REQ_F_BUF_MORE;
+- req->buf_list = NULL;
++ sel.buf_list = NULL;
+ }
+ return sel;
+ }
+@@ -307,7 +307,6 @@ static int io_ring_buffers_peek(struct i
+ req->flags |= REQ_F_BL_EMPTY;
+
+ req->flags |= REQ_F_BUFFER_RING;
+- req->buf_list = bl;
+ return iov - arg->iovs;
+ }
+
+@@ -315,16 +314,15 @@ int io_buffers_select(struct io_kiocb *r
+ struct io_br_sel *sel, unsigned int issue_flags)
+ {
+ struct io_ring_ctx *ctx = req->ctx;
+- struct io_buffer_list *bl;
+ int ret = -ENOENT;
+
+ io_ring_submit_lock(ctx, issue_flags);
+- bl = io_buffer_get_list(ctx, req->buf_index);
+- if (unlikely(!bl))
++ sel->buf_list = io_buffer_get_list(ctx, req->buf_index);
++ if (unlikely(!sel->buf_list))
+ goto out_unlock;
+
+- if (bl->flags & IOBL_BUF_RING) {
+- ret = io_ring_buffers_peek(req, arg, bl);
++ if (sel->buf_list->flags & IOBL_BUF_RING) {
++ ret = io_ring_buffers_peek(req, arg, sel->buf_list);
+ /*
+ * Don't recycle these buffers if we need to go through poll.
+ * Nobody else can use them anyway, and holding on to provided
+@@ -334,14 +332,17 @@ int io_buffers_select(struct io_kiocb *r
+ */
+ if (ret > 0) {
+ req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE;
+- if (!io_kbuf_commit(req, bl, arg->out_len, ret))
++ if (!io_kbuf_commit(req, sel->buf_list, arg->out_len, ret))
+ req->flags |= REQ_F_BUF_MORE;
+ }
+ } else {
+- ret = io_provided_buffers_select(req, &arg->out_len, bl, arg->iovs);
++ ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs);
+ }
+ out_unlock:
+- io_ring_submit_unlock(ctx, issue_flags);
++ if (issue_flags & IO_URING_F_UNLOCKED) {
++ sel->buf_list = NULL;
++ mutex_unlock(&ctx->uring_lock);
++ }
+ return ret;
+ }
+
+@@ -362,10 +363,12 @@ int io_buffers_peek(struct io_kiocb *req
+ ret = io_ring_buffers_peek(req, arg, bl);
+ if (ret > 0)
+ req->flags |= REQ_F_BUFFERS_COMMIT;
++ sel->buf_list = bl;
+ return ret;
+ }
+
+ /* don't support multiple buffer selections for legacy */
++ sel->buf_list = NULL;
+ return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
+ }
+
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -65,11 +65,14 @@ struct buf_sel_arg {
+ };
+
+ /*
+- * Return value from io_buffer_list selection. Just returns the error or
+- * user address for now, will be extended to return the buffer list in the
+- * future.
++ * Return value from io_buffer_list selection, to avoid stashing it in
++ * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
++ * across execution contexts are fine. But for ring provided buffers, the
++ * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
++ * persists, it's better to just keep the buffer local for those cases.
+ */
+ struct io_br_sel {
++ struct io_buffer_list *buf_list;
+ /*
+ * Some selection parts return the user address, others return an error.
+ */
+@@ -113,13 +116,6 @@ int io_pbuf_mmap(struct file *file, stru
+ static inline bool io_kbuf_recycle_ring(struct io_kiocb *req,
+ struct io_buffer_list *bl)
+ {
+- /*
+- * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
+- * the flag and hence ensure that bl->head doesn't get incremented.
+- * If the tail has already been incremented, hang on to it.
+- * The exception is partial io, that case we should increment bl->head
+- * to monopolize the buffer.
+- */
+ if (bl) {
+ req->buf_index = bl->bgid;
+ req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -442,7 +442,6 @@ int io_sendmsg_prep(struct io_kiocb *req
+ return -EINVAL;
+ sr->msg_flags |= MSG_WAITALL;
+ sr->buf_group = req->buf_index;
+- req->buf_list = NULL;
+ req->flags |= REQ_F_MULTISHOT;
+ }
+
+@@ -516,11 +515,11 @@ static inline bool io_send_finish(struct
+ unsigned int cflags;
+
+ if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
+- cflags = io_put_kbuf(req, sel->val, req->buf_list);
++ cflags = io_put_kbuf(req, sel->val, sel->buf_list);
+ goto finish;
+ }
+
+- cflags = io_put_kbufs(req, sel->val, req->buf_list, io_bundle_nbufs(kmsg, sel->val));
++ cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val));
+
+ /*
+ * Don't start new bundles if the buffer list is empty, or if the
+@@ -617,6 +616,7 @@ int io_send(struct io_kiocb *req, unsign
+ flags |= MSG_DONTWAIT;
+
+ retry_bundle:
++ sel.buf_list = NULL;
+ if (io_do_buffer_select(req)) {
+ struct buf_sel_arg arg = {
+ .iovs = &kmsg->fast_iov,
+@@ -677,7 +677,7 @@ retry_bundle:
+ sr->len -= ret;
+ sr->buf += ret;
+ sr->done_io += ret;
+- return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
++ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+@@ -816,18 +816,8 @@ int io_recvmsg_prep(struct io_kiocb *req
+ req->flags |= REQ_F_NOWAIT;
+ if (sr->msg_flags & MSG_ERRQUEUE)
+ req->flags |= REQ_F_CLEAR_POLLIN;
+- if (req->flags & REQ_F_BUFFER_SELECT) {
+- /*
+- * Store the buffer group for this multishot receive separately,
+- * as if we end up doing an io-wq based issue that selects a
+- * buffer, it has to be committed immediately and that will
+- * clear ->buf_list. This means we lose the link to the buffer
+- * list, and the eventual buffer put on completion then cannot
+- * restore it.
+- */
++ if (req->flags & REQ_F_BUFFER_SELECT)
+ sr->buf_group = req->buf_index;
+- req->buf_list = NULL;
+- }
+ if (sr->flags & IORING_RECV_MULTISHOT) {
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ return -EINVAL;
+@@ -873,7 +863,7 @@ static inline bool io_recv_finish(struct
+ if (sr->flags & IORING_RECVSEND_BUNDLE) {
+ size_t this_ret = sel->val - sr->done_io;
+
+- cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret));
++ cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret));
+ if (sr->retry_flags & IO_SR_MSG_RETRY)
+ cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+ /* bundle with no more immediate buffers, we're done */
+@@ -892,7 +882,7 @@ static inline bool io_recv_finish(struct
+ return false;
+ }
+ } else {
+- cflags |= io_put_kbuf(req, sel->val, req->buf_list);
++ cflags |= io_put_kbuf(req, sel->val, sel->buf_list);
+ }
+
+ /*
+@@ -1039,6 +1029,7 @@ int io_recvmsg(struct io_kiocb *req, uns
+ flags |= MSG_DONTWAIT;
+
+ retry_multishot:
++ sel.buf_list = NULL;
+ if (io_do_buffer_select(req)) {
+ size_t len = sr->len;
+
+@@ -1049,7 +1040,7 @@ retry_multishot:
+ if (req->flags & REQ_F_APOLL_MULTISHOT) {
+ ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len);
+ if (ret) {
+- io_kbuf_recycle(req, req->buf_list, issue_flags);
++ io_kbuf_recycle(req, sel.buf_list, issue_flags);
+ return ret;
+ }
+ }
+@@ -1073,12 +1064,15 @@ retry_multishot:
+
+ if (ret < min_ret) {
+ if (ret == -EAGAIN && force_nonblock) {
+- if (issue_flags & IO_URING_F_MULTISHOT) {
+- io_kbuf_recycle(req, req->buf_list, issue_flags);
++ io_kbuf_recycle(req, sel.buf_list, issue_flags);
++ if (issue_flags & IO_URING_F_MULTISHOT)
+ return IOU_ISSUE_SKIP_COMPLETE;
+- }
+ return -EAGAIN;
+ }
++ if (ret > 0 && io_net_retry(sock, flags)) {
++ sr->done_io += ret;
++ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
++ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
+@@ -1091,7 +1085,7 @@ retry_multishot:
+ else if (sr->done_io)
+ ret = sr->done_io;
+ else
+- io_kbuf_recycle(req, req->buf_list, issue_flags);
++ io_kbuf_recycle(req, sel.buf_list, issue_flags);
+
+ sel.val = ret;
+ if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
+@@ -1172,7 +1166,7 @@ int io_recv(struct io_kiocb *req, unsign
+ {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *kmsg = req->async_data;
+- struct io_br_sel sel = { };
++ struct io_br_sel sel;
+ struct socket *sock;
+ unsigned flags;
+ int ret, min_ret = 0;
+@@ -1192,6 +1186,7 @@ int io_recv(struct io_kiocb *req, unsign
+ flags |= MSG_DONTWAIT;
+
+ retry_multishot:
++ sel.buf_list = NULL;
+ if (io_do_buffer_select(req)) {
+ sel.val = sr->len;
+ ret = io_recv_buf_select(req, kmsg, &sel, issue_flags);
+@@ -1211,18 +1206,16 @@ retry_multishot:
+ ret = sock_recvmsg(sock, &kmsg->msg, flags);
+ if (ret < min_ret) {
+ if (ret == -EAGAIN && force_nonblock) {
+- if (issue_flags & IO_URING_F_MULTISHOT) {
+- io_kbuf_recycle(req, req->buf_list, issue_flags);
++ io_kbuf_recycle(req, sel.buf_list, issue_flags);
++ if (issue_flags & IO_URING_F_MULTISHOT)
+ return IOU_ISSUE_SKIP_COMPLETE;
+- }
+-
+ return -EAGAIN;
+ }
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->len -= ret;
+ sr->buf += ret;
+ sr->done_io += ret;
+- return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
++ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+@@ -1238,7 +1231,7 @@ out_free:
+ else if (sr->done_io)
+ ret = sr->done_io;
+ else
+- io_kbuf_recycle(req, req->buf_list, issue_flags);
++ io_kbuf_recycle(req, sel.buf_list, issue_flags);
+
+ sel.val = ret;
+ if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -356,10 +356,10 @@ void io_poll_task_func(struct io_kiocb *
+
+ ret = io_poll_check_events(req, ts);
+ if (ret == IOU_POLL_NO_ACTION) {
+- io_kbuf_recycle(req, req->buf_list, 0);
++ io_kbuf_recycle(req, NULL, 0);
+ return;
+ } else if (ret == IOU_POLL_REQUEUE) {
+- io_kbuf_recycle(req, req->buf_list, 0);
++ io_kbuf_recycle(req, NULL, 0);
+ __io_poll_execute(req, 0);
+ return;
+ }
+@@ -753,7 +753,7 @@ int io_arm_poll_handler(struct io_kiocb
+ req->flags |= REQ_F_POLLED;
+ ipt.pt._qproc = io_async_queue_proc;
+
+- io_kbuf_recycle(req, req->buf_list, issue_flags);
++ io_kbuf_recycle(req, NULL, issue_flags);
+
+ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
+ if (ret)
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -522,7 +522,7 @@ void io_req_rw_complete(struct io_kiocb
+ io_req_io_end(req);
+
+ if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
+- req->cqe.flags |= io_put_kbuf(req, req->cqe.res, req->buf_list);
++ req->cqe.flags |= io_put_kbuf(req, req->cqe.res, NULL);
+
+ io_req_rw_cleanup(req, 0);
+ io_req_task_complete(req, ts);
+@@ -589,7 +589,7 @@ static inline void io_rw_done(struct kio
+ }
+
+ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
+- unsigned int issue_flags)
++ struct io_br_sel *sel, unsigned int issue_flags)
+ {
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ unsigned final_ret = io_fixup_rw_res(req, ret);
+@@ -604,7 +604,7 @@ static int kiocb_done(struct io_kiocb *r
+ */
+ io_req_io_end(req);
+ io_req_set_res(req, final_ret,
+- io_put_kbuf(req, ret, req->buf_list));
++ io_put_kbuf(req, ret, sel->buf_list));
+ io_req_rw_cleanup(req, issue_flags);
+ return IOU_OK;
+ }
+@@ -955,10 +955,10 @@ int io_read(struct io_kiocb *req, unsign
+
+ ret = __io_read(req, &sel, issue_flags);
+ if (ret >= 0)
+- return kiocb_done(req, ret, issue_flags);
++ return kiocb_done(req, ret, &sel, issue_flags);
+
+ if (req->flags & REQ_F_BUFFERS_COMMIT)
+- io_kbuf_recycle(req, req->buf_list, issue_flags);
++ io_kbuf_recycle(req, sel.buf_list, issue_flags);
+ return ret;
+ }
+
+@@ -986,17 +986,17 @@ int io_read_mshot(struct io_kiocb *req,
+ * Reset rw->len to 0 again to avoid clamping future mshot
+ * reads, in case the buffer size varies.
+ */
+- if (io_kbuf_recycle(req, req->buf_list, issue_flags))
++ if (io_kbuf_recycle(req, sel.buf_list, issue_flags))
+ rw->len = 0;
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ return IOU_ISSUE_SKIP_COMPLETE;
+ return -EAGAIN;
+ } else if (ret <= 0) {
+- io_kbuf_recycle(req, req->buf_list, issue_flags);
++ io_kbuf_recycle(req, sel.buf_list, issue_flags);
+ if (ret < 0)
+ req_set_fail(req);
+ } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+- cflags = io_put_kbuf(req, ret, req->buf_list);
++ cflags = io_put_kbuf(req, ret, sel.buf_list);
+ } else {
+ /*
+ * Any successful return value will keep the multishot read
+@@ -1004,7 +1004,7 @@ int io_read_mshot(struct io_kiocb *req,
+ * we fail to post a CQE, or multishot is no longer set, then
+ * jump to the termination path. This request is then done.
+ */
+- cflags = io_put_kbuf(req, ret, req->buf_list);
++ cflags = io_put_kbuf(req, ret, sel.buf_list);
+ rw->len = 0; /* similarly to above, reset len to 0 */
+
+ if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
+@@ -1135,7 +1135,7 @@ int io_write(struct io_kiocb *req, unsig
+ return -EAGAIN;
+ }
+ done:
+- return kiocb_done(req, ret2, issue_flags);
++ return kiocb_done(req, ret2, NULL, issue_flags);
+ } else {
+ ret_eagain:
+ iov_iter_restore(&io->iter, &io->iter_state);
+@@ -1215,7 +1215,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx
+ if (!smp_load_acquire(&req->iopoll_completed))
+ break;
+ nr_events++;
+- req->cqe.flags = io_put_kbuf(req, req->cqe.res, req->buf_list);
++ req->cqe.flags = io_put_kbuf(req, req->cqe.res, NULL);
+ if (req->opcode != IORING_OP_URING_CMD)
+ io_req_rw_cleanup(req, 0);
+ }
--- /dev/null
+From e7f2c429fac51f341094e974e2858949f3670941 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 5 Feb 2025 11:36:49 +0000
+Subject: io_uring/kbuf: uninline __io_put_kbufs
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 5d3e51240d89678b87b5dc6987ea572048a0f0eb upstream.
+
+__io_put_kbufs() and other helper functions are too large to be inlined,
+compilers would normally refuse to do so. Uninline it and move together
+with io_kbuf_commit into kbuf.c.
+
+io_kbuf_commitSigned-off-by: Pavel Begunkov <asml.silence@gmail.com>
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/3dade7f55ad590e811aff83b1ec55c9c04e17b2b.1738724373.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++
+ io_uring/kbuf.h | 73 +++++++-------------------------------------------------
+ 2 files changed, 70 insertions(+), 63 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -20,6 +20,9 @@
+ /* BIDs are addressed by a 16-bit field in a CQE */
+ #define MAX_BIDS_PER_BGID (1 << 16)
+
++/* Mapped buffer ring, return io_uring_buf from head */
++#define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
++
+ struct io_provide_buf {
+ struct file *file;
+ __u64 addr;
+@@ -29,6 +32,34 @@ struct io_provide_buf {
+ __u16 bid;
+ };
+
++bool io_kbuf_commit(struct io_kiocb *req,
++ struct io_buffer_list *bl, int len, int nr)
++{
++ if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
++ return true;
++
++ req->flags &= ~REQ_F_BUFFERS_COMMIT;
++
++ if (unlikely(len < 0))
++ return true;
++
++ if (bl->flags & IOBL_INC) {
++ struct io_uring_buf *buf;
++
++ buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
++ if (WARN_ON_ONCE(len > buf->len))
++ len = buf->len;
++ buf->len -= len;
++ if (buf->len) {
++ buf->addr += len;
++ return false;
++ }
++ }
++
++ bl->head += nr;
++ return true;
++}
++
+ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+ unsigned int bgid)
+ {
+@@ -337,6 +368,35 @@ int io_buffers_peek(struct io_kiocb *req
+ return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
+ }
+
++static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
++{
++ struct io_buffer_list *bl = req->buf_list;
++ bool ret = true;
++
++ if (bl) {
++ ret = io_kbuf_commit(req, bl, len, nr);
++ req->buf_index = bl->bgid;
++ }
++ req->flags &= ~REQ_F_BUFFER_RING;
++ return ret;
++}
++
++unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
++{
++ unsigned int ret;
++
++ ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
++
++ if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) {
++ io_kbuf_drop_legacy(req);
++ return ret;
++ }
++
++ if (!__io_put_kbuf_ring(req, len, nbufs))
++ ret |= IORING_CQE_F_BUF_MORE;
++ return ret;
++}
++
+ static int __io_remove_buffers(struct io_ring_ctx *ctx,
+ struct io_buffer_list *bl, unsigned nbufs)
+ {
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -84,6 +84,10 @@ int io_register_pbuf_status(struct io_ri
+ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+ void io_kbuf_drop_legacy(struct io_kiocb *req);
+
++unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs);
++bool io_kbuf_commit(struct io_kiocb *req,
++ struct io_buffer_list *bl, int len, int nr);
++
+ void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+ struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid);
+@@ -127,76 +131,19 @@ static inline bool io_kbuf_recycle(struc
+ /* Mapped buffer ring, return io_uring_buf from head */
+ #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
+
+-static inline bool io_kbuf_commit(struct io_kiocb *req,
+- struct io_buffer_list *bl, int len, int nr)
+-{
+- if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
+- return true;
+-
+- req->flags &= ~REQ_F_BUFFERS_COMMIT;
+-
+- if (unlikely(len < 0))
+- return true;
+-
+- if (bl->flags & IOBL_INC) {
+- struct io_uring_buf *buf;
+-
+- buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
+- if (len > buf->len)
+- len = buf->len;
+- buf->len -= len;
+- if (buf->len) {
+- buf->addr += len;
+- return false;
+- }
+- }
+-
+- bl->head += nr;
+- return true;
+-}
+-
+-static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
+-{
+- struct io_buffer_list *bl = req->buf_list;
+- bool ret = true;
+-
+- if (bl) {
+- ret = io_kbuf_commit(req, bl, len, nr);
+- req->buf_index = bl->bgid;
+- }
+- if (ret && (req->flags & REQ_F_BUF_MORE))
+- ret = false;
+- req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE);
+- return ret;
+-}
+-
+-static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
+- int nbufs, unsigned issue_flags)
+-{
+- unsigned int ret;
+-
+- if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
+- return 0;
+-
+- ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
+- if (req->flags & REQ_F_BUFFER_RING) {
+- if (!__io_put_kbuf_ring(req, len, nbufs))
+- ret |= IORING_CQE_F_BUF_MORE;
+- } else {
+- io_kbuf_drop_legacy(req);
+- }
+- return ret;
+-}
+-
+ static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len,
+ unsigned issue_flags)
+ {
+- return __io_put_kbufs(req, len, 1, issue_flags);
++ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
++ return 0;
++ return __io_put_kbufs(req, len, 1);
+ }
+
+ static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len,
+ int nbufs, unsigned issue_flags)
+ {
+- return __io_put_kbufs(req, len, nbufs, issue_flags);
++ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
++ return 0;
++ return __io_put_kbufs(req, len, nbufs);
+ }
+ #endif
--- /dev/null
+From d888bd5f65a50de9571cfd6333050ac399b8af47 Mon Sep 17 00:00:00 2001
+From: Caleb Sander Mateos <csander@purestorage.com>
+Date: Thu, 4 Dec 2025 15:43:31 -0700
+Subject: io_uring/kbuf: use READ_ONCE() for userspace-mapped memory
+
+From: Caleb Sander Mateos <csander@purestorage.com>
+
+Commit 78385c7299f7514697d196b3233a91bd5e485591 upstream.
+
+The struct io_uring_buf elements in a buffer ring are in a memory region
+accessible from userspace. A malicious/buggy userspace program could
+therefore write to them at any time, so they should be accessed with
+READ_ONCE() in the kernel. Commit 98b6fa62c84f ("io_uring/kbuf: always
+use READ_ONCE() to read ring provided buffer lengths") already switched
+the reads of the len field to READ_ONCE(). Do the same for bid and addr.
+
+Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
+Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers")
+Cc: Joanne Koong <joannelkoong@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -44,7 +44,7 @@ static bool io_kbuf_inc_commit(struct io
+ buf_len -= this_len;
+ /* Stop looping for invalid buffer length of 0 */
+ if (buf_len || !this_len) {
+- buf->addr += this_len;
++ buf->addr = READ_ONCE(buf->addr) + this_len;
+ buf->len = buf_len;
+ return false;
+ }
+@@ -185,9 +185,9 @@ static struct io_br_sel io_ring_buffer_s
+ if (*len == 0 || *len > buf_len)
+ *len = buf_len;
+ req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
+- req->buf_index = buf->bid;
++ req->buf_index = READ_ONCE(buf->bid);
+ sel.buf_list = bl;
+- sel.addr = u64_to_user_ptr(buf->addr);
++ sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr));
+
+ if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
+ /*
+@@ -278,7 +278,7 @@ static int io_ring_buffers_peek(struct i
+ if (!arg->max_len)
+ arg->max_len = INT_MAX;
+
+- req->buf_index = buf->bid;
++ req->buf_index = READ_ONCE(buf->bid);
+ do {
+ u32 len = READ_ONCE(buf->len);
+
+@@ -293,7 +293,7 @@ static int io_ring_buffers_peek(struct i
+ }
+ }
+
+- iov->iov_base = u64_to_user_ptr(buf->addr);
++ iov->iov_base = u64_to_user_ptr(READ_ONCE(buf->addr));
+ iov->iov_len = len;
+ iov++;
+
--- /dev/null
+From ae354ce85590e11d18bf61f13d951c2ee249b716 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:36 -0600
+Subject: io_uring/kbuf: use struct io_br_sel for multiple buffers picking
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 429884ff35f75a8ac3e8f822f483e220e3ea6394 upstream.
+
+The networking side uses bundles, which is picking multiple buffers at
+the same time. Pass in struct io_br_sel to those helpers.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-9-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 5 +++--
+ io_uring/kbuf.h | 5 +++--
+ io_uring/net.c | 31 +++++++++++++++++--------------
+ 3 files changed, 23 insertions(+), 18 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -312,7 +312,7 @@ static int io_ring_buffers_peek(struct i
+ }
+
+ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
+- unsigned int issue_flags)
++ struct io_br_sel *sel, unsigned int issue_flags)
+ {
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer_list *bl;
+@@ -345,7 +345,8 @@ out_unlock:
+ return ret;
+ }
+
+-int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg)
++int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
++ struct io_br_sel *sel)
+ {
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_buffer_list *bl;
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -82,8 +82,9 @@ struct io_br_sel {
+ struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len,
+ unsigned int issue_flags);
+ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
+- unsigned int issue_flags);
+-int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg);
++ struct io_br_sel *sel, unsigned int issue_flags);
++int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
++ struct io_br_sel *sel);
+ void io_destroy_buffers(struct io_ring_ctx *ctx);
+
+ int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -597,6 +597,7 @@ int io_send(struct io_kiocb *req, unsign
+ {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *kmsg = req->async_data;
++ struct io_br_sel sel = { };
+ struct socket *sock;
+ unsigned flags;
+ int min_ret = 0;
+@@ -633,7 +634,7 @@ retry_bundle:
+ else
+ arg.mode |= KBUF_MODE_EXPAND;
+
+- ret = io_buffers_select(req, &arg, issue_flags);
++ ret = io_buffers_select(req, &arg, &sel, issue_flags);
+ if (unlikely(ret < 0))
+ return ret;
+
+@@ -1015,6 +1016,7 @@ int io_recvmsg(struct io_kiocb *req, uns
+ {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *kmsg = req->async_data;
++ struct io_br_sel sel = { };
+ struct socket *sock;
+ unsigned flags;
+ int ret, min_ret = 0;
+@@ -1035,7 +1037,6 @@ int io_recvmsg(struct io_kiocb *req, uns
+
+ retry_multishot:
+ if (io_do_buffer_select(req)) {
+- struct io_br_sel sel;
+ size_t len = sr->len;
+
+ sel = io_buffer_select(req, &len, issue_flags);
+@@ -1096,7 +1097,7 @@ retry_multishot:
+ }
+
+ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
+- size_t *len, unsigned int issue_flags)
++ struct io_br_sel *sel, unsigned int issue_flags)
+ {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int ret;
+@@ -1120,10 +1121,12 @@ static int io_recv_buf_select(struct io_
+ arg.mode |= KBUF_MODE_FREE;
+ }
+
+- if (kmsg->msg.msg_inq > 1)
+- arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
++ if (sel->val)
++ arg.max_len = sel->val;
++ else if (kmsg->msg.msg_inq > 1)
++ arg.max_len = min_not_zero(sel->val, (size_t) kmsg->msg.msg_inq);
+
+- ret = io_buffers_peek(req, &arg);
++ ret = io_buffers_peek(req, &arg, sel);
+ if (unlikely(ret < 0))
+ return ret;
+
+@@ -1144,14 +1147,13 @@ static int io_recv_buf_select(struct io_
+ iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
+ arg.out_len);
+ } else {
+- struct io_br_sel sel;
++ size_t len = sel->val;
+
+- *len = sr->len;
+- sel = io_buffer_select(req, len, issue_flags);
+- if (!sel.addr)
++ *sel = io_buffer_select(req, &len, issue_flags);
++ if (!sel->addr)
+ return -ENOBUFS;
+- sr->buf = sel.addr;
+- sr->len = *len;
++ sr->buf = sel->addr;
++ sr->len = len;
+ map_ubuf:
+ ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
+ &kmsg->msg.msg_iter);
+@@ -1166,11 +1168,11 @@ int io_recv(struct io_kiocb *req, unsign
+ {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *kmsg = req->async_data;
++ struct io_br_sel sel = { };
+ struct socket *sock;
+ unsigned flags;
+ int ret, min_ret = 0;
+ bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+- size_t len = sr->len;
+ bool mshot_finished;
+
+ if (!(req->flags & REQ_F_POLLED) &&
+@@ -1187,7 +1189,8 @@ int io_recv(struct io_kiocb *req, unsign
+
+ retry_multishot:
+ if (io_do_buffer_select(req)) {
+- ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
++ sel.val = sr->len;
++ ret = io_recv_buf_select(req, kmsg, &sel, issue_flags);
+ if (unlikely(ret < 0)) {
+ kmsg->msg.msg_inq = -1;
+ goto out_free;
--- /dev/null
+From e643249c87544d67259296480d7352d80b097d26 Mon Sep 17 00:00:00 2001
+From: Joanne Koong <joannelkoong@gmail.com>
+Date: Thu, 4 Dec 2025 15:54:50 -0800
+Subject: io_uring/kbuf: use WRITE_ONCE() for userspace-shared buffer ring fields
+
+From: Joanne Koong <joannelkoong@gmail.com>
+
+Commit a4c694bfc2455e82b7caf6045ca893d123e0ed11 upstream.
+
+buf->addr and buf->len reside in memory shared with userspace. They
+should be written with WRITE_ONCE() to guarantee atomic stores and
+prevent tearing or other unsafe compiler optimizations.
+
+Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
+Cc: Caleb Sander Mateos <csander@purestorage.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -44,11 +44,11 @@ static bool io_kbuf_inc_commit(struct io
+ buf_len -= this_len;
+ /* Stop looping for invalid buffer length of 0 */
+ if (buf_len || !this_len) {
+- buf->addr = READ_ONCE(buf->addr) + this_len;
+- buf->len = buf_len;
++ WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len);
++ WRITE_ONCE(buf->len, buf_len);
+ return false;
+ }
+- buf->len = 0;
++ WRITE_ONCE(buf->len, 0);
+ bl->head++;
+ len -= this_len;
+ }
+@@ -289,7 +289,7 @@ static int io_ring_buffers_peek(struct i
+ arg->partial_map = 1;
+ if (iov != arg->iovs)
+ break;
+- buf->len = len;
++ WRITE_ONCE(buf->len, len);
+ }
+ }
+
--- /dev/null
+From 3b1c89768957bd90b8427d9c51648aaa87d8d2ec Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:32 -0600
+Subject: io_uring/net: clarify io_recv_buf_select() return value
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit b22743f29b7d3dc68c68f9bd39a1b2600ec6434e upstream.
+
+It returns 0 on success, less than zero on error.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1192,7 +1192,7 @@ int io_recv(struct io_kiocb *req, unsign
+ retry_multishot:
+ if (io_do_buffer_select(req)) {
+ ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
+- if (unlikely(ret)) {
++ if (unlikely(ret < 0)) {
+ kmsg->msg.msg_inq = -1;
+ goto out_free;
+ }
--- /dev/null
+From 73284d7ab3a12d22b5a8bb32a48a507b87d01c7c Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 2 Sep 2025 05:19:42 -0600
+Subject: io_uring/net: correct type for min_not_zero() cast
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 37500634d0a8f931e15879760fb70f9b6f5d5370 upstream.
+
+The kernel test robot reports that after a recent change, the signedness
+of a min_not_zero() compare is now incorrect. Fix that up and cast to
+the right type.
+
+Fixes: 429884ff35f7 ("io_uring/kbuf: use struct io_br_sel for multiple buffers picking")
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202509020426.WJtrdwOU-lkp@intel.com/
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1122,7 +1122,7 @@ static int io_recv_buf_select(struct io_
+ if (sel->val)
+ arg.max_len = sel->val;
+ else if (kmsg->msg.msg_inq > 1)
+- arg.max_len = min_not_zero(sel->val, (size_t) kmsg->msg.msg_inq);
++ arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq);
+
+ ret = io_buffers_peek(req, &arg, sel);
+ if (unlikely(ret < 0))
--- /dev/null
+From d4f37a9a013f83b00d4dcf27a5bf2e04eec43af3 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:31 -0600
+Subject: io_uring/net: don't use io_net_kbuf_recyle() for non-provided cases
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 15ba5e51e689ceb1c2e921c5180a70c88cfdc8e9 upstream.
+
+A previous commit used io_net_kbuf_recyle() for any network helper that
+did IO and needed partial retry. However, that's only needed if the
+opcode does buffer selection, which isnt support for sendzc, sendmsg_zc,
+or sendmsg. Just remove them - they don't do any harm, but it is a bit
+confusing when reading the code.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -578,7 +578,7 @@ int io_sendmsg(struct io_kiocb *req, uns
+ kmsg->msg.msg_controllen = 0;
+ kmsg->msg.msg_control = NULL;
+ sr->done_io += ret;
+- return io_net_kbuf_recyle(req, kmsg, ret);
++ return -EAGAIN;
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+@@ -1448,7 +1448,7 @@ int io_send_zc(struct io_kiocb *req, uns
+ zc->len -= ret;
+ zc->buf += ret;
+ zc->done_io += ret;
+- return io_net_kbuf_recyle(req, kmsg, ret);
++ return -EAGAIN;
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+@@ -1508,7 +1508,7 @@ int io_sendmsg_zc(struct io_kiocb *req,
+
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->done_io += ret;
+- return io_net_kbuf_recyle(req, kmsg, ret);
++ return -EAGAIN;
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
--- /dev/null
+From 31717e9230807656724fc50bb8f95ba51824ff2f Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:37 -0600
+Subject: io_uring/net: use struct io_br_sel->val as the recv finish value
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 58d815091890e83aa2f83a9cce1fdfe3af02c7b4 upstream.
+
+Currently a pointer is passed in to the 'ret' in the receive handlers,
+but since we already have a value field in io_br_sel, just use that.
+This is also in preparation for needing to pass in struct io_br_sel
+to io_recv_finish() anyway.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-10-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c | 31 +++++++++++++++++--------------
+ 1 file changed, 17 insertions(+), 14 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -857,9 +857,10 @@ int io_recvmsg_prep(struct io_kiocb *req
+ * Returns true if it is actually finished, or false if it should run
+ * again (for multishot).
+ */
+-static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
++static inline bool io_recv_finish(struct io_kiocb *req,
+ struct io_async_msghdr *kmsg,
+- bool mshot_finished, unsigned issue_flags)
++ struct io_br_sel *sel, bool mshot_finished,
++ unsigned issue_flags)
+ {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ unsigned int cflags = 0;
+@@ -868,7 +869,7 @@ static inline bool io_recv_finish(struct
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+
+ if (sr->flags & IORING_RECVSEND_BUNDLE) {
+- size_t this_ret = *ret - sr->done_io;
++ size_t this_ret = sel->val - sr->done_io;
+
+ cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret));
+ if (sr->retry_flags & IO_SR_MSG_RETRY)
+@@ -889,7 +890,7 @@ static inline bool io_recv_finish(struct
+ return false;
+ }
+ } else {
+- cflags |= io_put_kbuf(req, *ret, req->buf_list);
++ cflags |= io_put_kbuf(req, sel->val, req->buf_list);
+ }
+
+ /*
+@@ -897,7 +898,7 @@ static inline bool io_recv_finish(struct
+ * receive from this socket.
+ */
+ if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
+- io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
++ io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
+ int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
+
+ io_mshot_prep_retry(req, kmsg);
+@@ -910,20 +911,20 @@ static inline bool io_recv_finish(struct
+ mshot_retry_ret = IOU_REQUEUE;
+ }
+ if (issue_flags & IO_URING_F_MULTISHOT)
+- *ret = mshot_retry_ret;
++ sel->val = mshot_retry_ret;
+ else
+- *ret = -EAGAIN;
++ sel->val = -EAGAIN;
+ return true;
+ }
+
+ /* Finish the request / stop multishot. */
+ finish:
+- io_req_set_res(req, *ret, cflags);
++ io_req_set_res(req, sel->val, cflags);
+
+ if (issue_flags & IO_URING_F_MULTISHOT)
+- *ret = IOU_STOP_MULTISHOT;
++ sel->val = IOU_STOP_MULTISHOT;
+ else
+- *ret = IOU_OK;
++ sel->val = IOU_OK;
+ io_req_msg_cleanup(req, issue_flags);
+ return true;
+ }
+@@ -1090,10 +1091,11 @@ retry_multishot:
+ else
+ io_kbuf_recycle(req, req->buf_list, issue_flags);
+
+- if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
++ sel.val = ret;
++ if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
+ goto retry_multishot;
+
+- return ret;
++ return sel.val;
+ }
+
+ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
+@@ -1236,10 +1238,11 @@ out_free:
+ else
+ io_kbuf_recycle(req, req->buf_list, issue_flags);
+
+- if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
++ sel.val = ret;
++ if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
+ goto retry_multishot;
+
+- return ret;
++ return sel.val;
+ }
+
+ void io_send_zc_cleanup(struct io_kiocb *req)
--- /dev/null
+From 1aebf001d6fcb771b6318bca438fd205993d2bc1 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:38 -0600
+Subject: io_uring/net: use struct io_br_sel->val as the send finish value
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 461382a51fb83a9c4b7c50e1f10d3ca94edff25e upstream.
+
+Currently a pointer is passed in to the 'ret' in the send mshot handler,
+but since we already have a value field in io_br_sel, just use that.
+This is also in preparation for needing to pass in struct io_br_sel
+to io_send_finish() anyway.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-11-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -507,19 +507,20 @@ static int io_net_kbuf_recyle(struct io_
+ return -EAGAIN;
+ }
+
+-static inline bool io_send_finish(struct io_kiocb *req, int *ret,
+- struct io_async_msghdr *kmsg)
++static inline bool io_send_finish(struct io_kiocb *req,
++ struct io_async_msghdr *kmsg,
++ struct io_br_sel *sel)
+ {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+- bool bundle_finished = *ret <= 0;
++ bool bundle_finished = sel->val <= 0;
+ unsigned int cflags;
+
+ if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
+- cflags = io_put_kbuf(req, *ret, req->buf_list);
++ cflags = io_put_kbuf(req, sel->val, req->buf_list);
+ goto finish;
+ }
+
+- cflags = io_put_kbufs(req, *ret, req->buf_list, io_bundle_nbufs(kmsg, *ret));
++ cflags = io_put_kbufs(req, sel->val, req->buf_list, io_bundle_nbufs(kmsg, sel->val));
+
+ /*
+ * Don't start new bundles if the buffer list is empty, or if the
+@@ -532,15 +533,15 @@ static inline bool io_send_finish(struct
+ * Fill CQE for this receive and see if we should keep trying to
+ * receive from this socket.
+ */
+- if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
++ if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) {
+ io_mshot_prep_retry(req, kmsg);
+ return false;
+ }
+
+ /* Otherwise stop bundle and use the current result. */
+ finish:
+- io_req_set_res(req, *ret, cflags);
+- *ret = IOU_OK;
++ io_req_set_res(req, sel->val, cflags);
++ sel->val = IOU_OK;
+ return true;
+ }
+
+@@ -687,11 +688,12 @@ retry_bundle:
+ else if (sr->done_io)
+ ret = sr->done_io;
+
+- if (!io_send_finish(req, &ret, kmsg))
++ sel.val = ret;
++ if (!io_send_finish(req, kmsg, &sel))
+ goto retry_bundle;
+
+ io_req_msg_cleanup(req, issue_flags);
+- return ret;
++ return sel.val;
+ }
+
+ static int io_recvmsg_mshot_prep(struct io_kiocb *req,
--- /dev/null
+From d863f651b523f217ccdf7887364e993610f2b880 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 20 Aug 2025 20:03:40 -0600
+Subject: io_uring: remove async/poll related provided buffer recycles
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit e973837b54024f070b2b48c7ee9725548548257a upstream.
+
+These aren't necessary anymore, get rid of them.
+
+Link: https://lore.kernel.org/r/20250821020750.598432-13-axboe@kernel.dk
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 2 --
+ io_uring/poll.c | 4 ----
+ 2 files changed, 6 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -1921,11 +1921,9 @@ static void io_queue_async(struct io_kio
+
+ switch (io_arm_poll_handler(req, 0)) {
+ case IO_APOLL_READY:
+- io_kbuf_recycle(req, NULL, 0);
+ io_req_task_queue(req);
+ break;
+ case IO_APOLL_ABORTED:
+- io_kbuf_recycle(req, NULL, 0);
+ io_queue_iowq(req);
+ break;
+ case IO_APOLL_OK:
+--- a/io_uring/poll.c
++++ b/io_uring/poll.c
+@@ -356,10 +356,8 @@ void io_poll_task_func(struct io_kiocb *
+
+ ret = io_poll_check_events(req, ts);
+ if (ret == IOU_POLL_NO_ACTION) {
+- io_kbuf_recycle(req, NULL, 0);
+ return;
+ } else if (ret == IOU_POLL_REQUEUE) {
+- io_kbuf_recycle(req, NULL, 0);
+ __io_poll_execute(req, 0);
+ return;
+ }
+@@ -753,8 +751,6 @@ int io_arm_poll_handler(struct io_kiocb
+ req->flags |= REQ_F_POLLED;
+ ipt.pt._qproc = io_async_queue_proc;
+
+- io_kbuf_recycle(req, NULL, issue_flags);
+-
+ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
+ if (ret)
+ return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED;
--- /dev/null
+From fcc91899be4ad1e71f729a11d3939497bb0612e4 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 15 Oct 2025 13:38:53 -0600
+Subject: io_uring/rw: check for NULL io_br_sel when putting a buffer
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 18d6b1743eafeb3fb1e0ea5a2b7fd0a773d525a8 upstream.
+
+Both the read and write side use kiocb_done() to finish a request, and
+kiocb_done() will call io_put_kbuf() in case a provided buffer was used
+for the request. Provided buffers are not supported for writes, hence
+NULL is being passed in. This normally works fine, as io_put_kbuf()
+won't actually use the value unless REQ_F_BUFFER_RING or
+REQ_F_BUFFER_SELECTED is set in the request flags. But depending on
+compiler (or whether or not CONFIG_CC_OPTIMIZE_FOR_SIZE is set), that
+may be done even though the value is never used. This will then cause a
+NULL pointer dereference.
+
+Make it a bit more obvious and check for a NULL io_br_sel, and don't
+even bother calling io_put_kbuf() for that case.
+
+Fixes: 5fda51255439 ("io_uring/kbuf: switch to storing struct io_buffer_list locally")
+Reported-by: David Howells <dhowells@redhat.com>
+Tested-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/rw.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -598,13 +598,16 @@ static int kiocb_done(struct io_kiocb *r
+ req->file->f_pos = rw->kiocb.ki_pos;
+ if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) {
+ if (!__io_complete_rw_common(req, ret)) {
++ u32 cflags = 0;
++
+ /*
+ * Safe to call io_end from here as we're inline
+ * from the submission path.
+ */
+ io_req_io_end(req);
+- io_req_set_res(req, final_ret,
+- io_put_kbuf(req, ret, sel->buf_list));
++ if (sel)
++ cflags = io_put_kbuf(req, ret, sel->buf_list);
++ io_req_set_res(req, final_ret, cflags);
+ io_req_rw_cleanup(req, issue_flags);
+ return IOU_OK;
+ }
--- /dev/null
+io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch
+io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch
+io_uring-kbuf-simplify-__io_put_kbuf.patch
+io_uring-kbuf-remove-legacy-kbuf-caching.patch
+io_uring-kbuf-open-code-__io_put_kbuf.patch
+io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch
+io_uring-kbuf-uninline-__io_put_kbufs.patch
+io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch
+io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch
+io_uring-net-clarify-io_recv_buf_select-return-value.patch
+io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch
+io_uring-kbuf-introduce-struct-io_br_sel.patch
+io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch
+io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch
+io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch
+io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch
+io_uring-remove-async-poll-related-provided-buffer-recycles.patch
+io_uring-net-correct-type-for-min_not_zero-cast.patch
+io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch
+io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch
+io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch
+io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch
+io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch
+io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch
+io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch