From: Greg Kroah-Hartman Date: Fri, 3 Apr 2026 11:47:33 +0000 (+0200) Subject: 6.12-stable patches X-Git-Tag: v6.6.133~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=77b45e861f42db918fe0926e8815bc5a6d9dafd6;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch io_uring-kbuf-introduce-struct-io_br_sel.patch io_uring-kbuf-open-code-__io_put_kbuf.patch io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch io_uring-kbuf-remove-legacy-kbuf-caching.patch io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch io_uring-kbuf-simplify-__io_put_kbuf.patch io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch io_uring-kbuf-uninline-__io_put_kbufs.patch io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch io_uring-net-clarify-io_recv_buf_select-return-value.patch io_uring-net-correct-type-for-min_not_zero-cast.patch io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch io_uring-remove-async-poll-related-provided-buffer-recycles.patch io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch series --- diff --git a/queue-6.12/io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch b/queue-6.12/io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch new file mode 100644 index 0000000000..2a75cf9fc1 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch @@ -0,0 +1,85 @@ +From 07642299c8028add4bd03aa4f85d7bb736b865b2 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 27 Aug 2025 15:27:30 -0600 +Subject: io_uring/kbuf: always use READ_ONCE() to read ring provided buffer lengths + +From: Jens Axboe + +Commit 98b6fa62c84f2e129161e976a5b9b3cb4ccd117b upstream. + +Since the buffers are mapped from userspace, it is prudent to use +READ_ONCE() to read the value into a local variable, and use that for +any other actions taken. Having a stable read of the buffer length +avoids worrying about it changing after checking, or being read multiple +times. + +Similarly, the buffer may well change in between it being picked and +being committed. Ensure the looping for incremental ring buffer commit +stops if it hits a zero sized buffer, as no further progress can be made +at that point. + +Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") +Link: https://lore.kernel.org/io-uring/tencent_000C02641F6250C856D0C26228DE29A3D30A@qq.com/ +Reported-by: Qingyue Zhang +Reported-by: Suoxing Zhang +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -36,15 +36,19 @@ static bool io_kbuf_inc_commit(struct io + { + while (len) { + struct io_uring_buf *buf; +- u32 this_len; ++ u32 buf_len, this_len; + + buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); +- this_len = min_t(u32, len, buf->len); +- buf->len -= this_len; +- if (buf->len) { ++ buf_len = READ_ONCE(buf->len); ++ this_len = min_t(u32, len, buf_len); ++ buf_len -= this_len; ++ /* Stop looping for invalid buffer length of 0 */ ++ if (buf_len || !this_len) { + buf->addr += this_len; ++ buf->len = buf_len; + return false; + } ++ buf->len = 0; + bl->head++; + len -= this_len; + } +@@ -167,6 +171,7 @@ static struct io_br_sel io_ring_buffer_s + __u16 tail, head = bl->head; + struct io_br_sel sel = { }; + struct io_uring_buf *buf; ++ u32 buf_len; + + tail = smp_load_acquire(&br->tail); + if (unlikely(tail == head)) +@@ -176,8 +181,9 @@ static struct io_br_sel io_ring_buffer_s + req->flags |= REQ_F_BL_EMPTY; + + buf = io_ring_head_to_buf(br, head, bl->mask); +- if (*len == 0 || *len > buf->len) +- *len = buf->len; ++ buf_len = READ_ONCE(buf->len); ++ if (*len == 0 || *len > buf_len) ++ *len = buf_len; + req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; + req->buf_index = buf->bid; + sel.buf_list = bl; +@@ -274,7 +280,7 @@ static int io_ring_buffers_peek(struct i + + req->buf_index = buf->bid; + do { +- u32 len = buf->len; ++ u32 len = READ_ONCE(buf->len); + + /* truncate end piece, if needed, for non partial buffers */ + if (len > arg->max_len) { diff --git a/queue-6.12/io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch b/queue-6.12/io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch new file mode 100644 index 0000000000..214b2bb34e --- /dev/null +++ b/queue-6.12/io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch @@ -0,0 +1,156 @@ +From 9f348b1b92e7a9a7e90c28e943a528e79e84cc12 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:30 -0600 +Subject: io_uring/kbuf: drop 'issue_flags' from io_put_kbuf(s)() arguments + +From: Jens Axboe + +Commit 5e73b402cbbea51bcab90fc5ee6c6d06af76ae1b upstream. + +Picking multiple buffers always requires the ring lock to be held across +the operation, so there's no need to pass in the issue_flags to +io_put_kbufs(). On the single buffer side, if the initial picking of a +ring buffer was unlocked, then it will have been committed already. For +legacy buffers, no locking is required, as they will simply be freed. + +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 +- + io_uring/kbuf.h | 5 ++--- + io_uring/net.c | 14 ++++++-------- + io_uring/rw.c | 10 +++++----- + 4 files changed, 14 insertions(+), 17 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb + lockdep_assert_held(&req->ctx->uring_lock); + + req_set_fail(req); +- io_req_set_res(req, res, io_put_kbuf(req, res, IO_URING_F_UNLOCKED)); ++ io_req_set_res(req, res, io_put_kbuf(req, res)); + if (def->fail) + def->fail(req); + io_req_complete_defer(req); +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -131,8 +131,7 @@ static inline bool io_kbuf_recycle(struc + /* Mapped buffer ring, return io_uring_buf from head */ + #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)] + +-static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len, +- unsigned issue_flags) ++static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len) + { + if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) + return 0; +@@ -140,7 +139,7 @@ static inline unsigned int io_put_kbuf(s + } + + static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len, +- int nbufs, unsigned issue_flags) ++ int nbufs) + { + if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) + return 0; +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -508,19 +508,18 @@ static int io_net_kbuf_recyle(struct io_ + } + + static inline bool io_send_finish(struct io_kiocb *req, int *ret, +- struct io_async_msghdr *kmsg, +- unsigned issue_flags) ++ struct io_async_msghdr *kmsg) + { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + bool bundle_finished = *ret <= 0; + unsigned int cflags; + + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { +- cflags = io_put_kbuf(req, *ret, issue_flags); ++ cflags = io_put_kbuf(req, *ret); + goto finish; + } + +- cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags); ++ cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret)); + + /* + * Don't start new bundles if the buffer list is empty, or if the +@@ -687,7 +686,7 @@ retry_bundle: + else if (sr->done_io) + ret = sr->done_io; + +- if (!io_send_finish(req, &ret, kmsg, issue_flags)) ++ if (!io_send_finish(req, &ret, kmsg)) + goto retry_bundle; + + io_req_msg_cleanup(req, issue_flags); +@@ -870,8 +869,7 @@ static inline bool io_recv_finish(struct + if (sr->flags & IORING_RECVSEND_BUNDLE) { + size_t this_ret = *ret - sr->done_io; + +- cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), +- issue_flags); ++ cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret)); + if (sr->retry_flags & IO_SR_MSG_RETRY) + cflags = req->cqe.flags | (cflags & CQE_F_MASK); + /* bundle with no more immediate buffers, we're done */ +@@ -890,7 +888,7 @@ static inline bool io_recv_finish(struct + return false; + } + } else { +- cflags |= io_put_kbuf(req, *ret, issue_flags); ++ cflags |= io_put_kbuf(req, *ret); + } + + /* +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -520,7 +520,7 @@ void io_req_rw_complete(struct io_kiocb + io_req_io_end(req); + + if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) +- req->cqe.flags |= io_put_kbuf(req, req->cqe.res, 0); ++ req->cqe.flags |= io_put_kbuf(req, req->cqe.res); + + io_req_rw_cleanup(req, 0); + io_req_task_complete(req, ts); +@@ -602,7 +602,7 @@ static int kiocb_done(struct io_kiocb *r + */ + io_req_io_end(req); + io_req_set_res(req, final_ret, +- io_put_kbuf(req, ret, issue_flags)); ++ io_put_kbuf(req, ret)); + io_req_rw_cleanup(req, issue_flags); + return IOU_OK; + } +@@ -991,7 +991,7 @@ int io_read_mshot(struct io_kiocb *req, + if (ret < 0) + req_set_fail(req); + } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { +- cflags = io_put_kbuf(req, ret, issue_flags); ++ cflags = io_put_kbuf(req, ret); + } else { + /* + * Any successful return value will keep the multishot read +@@ -999,7 +999,7 @@ int io_read_mshot(struct io_kiocb *req, + * we fail to post a CQE, or multishot is no longer set, then + * jump to the termination path. This request is then done. + */ +- cflags = io_put_kbuf(req, ret, issue_flags); ++ cflags = io_put_kbuf(req, ret); + rw->len = 0; /* similarly to above, reset len to 0 */ + + if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { +@@ -1210,7 +1210,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx + if (!smp_load_acquire(&req->iopoll_completed)) + break; + nr_events++; +- req->cqe.flags = io_put_kbuf(req, req->cqe.res, 0); ++ req->cqe.flags = io_put_kbuf(req, req->cqe.res); + if (req->opcode != IORING_OP_URING_CMD) + io_req_rw_cleanup(req, 0); + } diff --git a/queue-6.12/io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch b/queue-6.12/io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch new file mode 100644 index 0000000000..3d09b99548 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch @@ -0,0 +1,112 @@ +From 145c6a4a6e0d6d44b2bf75e60cb495c3d05d0461 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 10 Mar 2025 14:01:49 -0600 +Subject: io_uring/kbuf: enable bundles for incrementally consumed buffers + +From: Jens Axboe + +Commit cf9536e550dd243a1681fdbf804221527da20a80 upstream. + +The original support for incrementally consumed buffers didn't allow it +to be used with bundles, with the assumption being that incremental +buffers are generally larger, and hence there's less of a nedd to +support it. + +But that assumption may not be correct - it's perfectly viable to use +smaller buffers with incremental consumption, and there may be valid +reasons for an application or framework to do so. + +As there's really no need to explicitly disable bundles with +incrementally consumed buffers, allow it. This actually makes the peek +side cheaper and simpler, with the completion side basically the same, +just needing to iterate for the consumed length. + +Reported-by: Norman Maurer +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 56 ++++++++++++++++++++++++++------------------------------ + 1 file changed, 26 insertions(+), 30 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -32,6 +32,25 @@ struct io_provide_buf { + __u16 bid; + }; + ++static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) ++{ ++ while (len) { ++ struct io_uring_buf *buf; ++ u32 this_len; ++ ++ buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); ++ this_len = min_t(u32, len, buf->len); ++ buf->len -= this_len; ++ if (buf->len) { ++ buf->addr += this_len; ++ return false; ++ } ++ bl->head++; ++ len -= this_len; ++ } ++ return true; ++} ++ + bool io_kbuf_commit(struct io_kiocb *req, + struct io_buffer_list *bl, int len, int nr) + { +@@ -42,20 +61,8 @@ bool io_kbuf_commit(struct io_kiocb *req + + if (unlikely(len < 0)) + return true; +- +- if (bl->flags & IOBL_INC) { +- struct io_uring_buf *buf; +- +- buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); +- if (WARN_ON_ONCE(len > buf->len)) +- len = buf->len; +- buf->len -= len; +- if (buf->len) { +- buf->addr += len; +- return false; +- } +- } +- ++ if (bl->flags & IOBL_INC) ++ return io_kbuf_inc_commit(bl, len); + bl->head += nr; + return true; + } +@@ -235,25 +242,14 @@ static int io_ring_buffers_peek(struct i + buf = io_ring_head_to_buf(br, head, bl->mask); + if (arg->max_len) { + u32 len = READ_ONCE(buf->len); ++ size_t needed; + + if (unlikely(!len)) + return -ENOBUFS; +- /* +- * Limit incremental buffers to 1 segment. No point trying +- * to peek ahead and map more than we need, when the buffers +- * themselves should be large when setup with +- * IOU_PBUF_RING_INC. +- */ +- if (bl->flags & IOBL_INC) { +- nr_avail = 1; +- } else { +- size_t needed; +- +- needed = (arg->max_len + len - 1) / len; +- needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); +- if (nr_avail > needed) +- nr_avail = needed; +- } ++ needed = (arg->max_len + len - 1) / len; ++ needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); ++ if (nr_avail > needed) ++ nr_avail = needed; + } + + /* diff --git a/queue-6.12/io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch b/queue-6.12/io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch new file mode 100644 index 0000000000..afae3e9cc9 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch @@ -0,0 +1,38 @@ +From 17b4417dca061d29ab2975564e1a33ce7c4fa4d5 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Thu, 19 Mar 2026 14:29:09 -0600 +Subject: io_uring/kbuf: fix missing BUF_MORE for incremental buffers at EOF + +From: Jens Axboe + +Commit 3ecd3e03144b38a21a3b70254f1b9d2e16629b09 upstream. + +For a zero length transfer, io_kbuf_inc_commit() is called with !len. +Since we never enter the while loop to consume the buffers, +io_kbuf_inc_commit() ends up returning true, consuming the buffer. But +if no data was consumed, by definition it cannot have consumed the +buffer. Return false for that case. + +Reported-by: Martin Michaelis +Cc: stable@vger.kernel.org +Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") +Link: https://github.com/axboe/liburing/issues/1553 +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -34,6 +34,10 @@ struct io_provide_buf { + + static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) + { ++ /* No data consumed, return false early to avoid consuming the buffer */ ++ if (!len) ++ return false; ++ + while (len) { + struct io_uring_buf *buf; + u32 buf_len, this_len; diff --git a/queue-6.12/io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch b/queue-6.12/io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch new file mode 100644 index 0000000000..6f16ab9adc --- /dev/null +++ b/queue-6.12/io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch @@ -0,0 +1,104 @@ +From 7414a76696e4561ba2fe0b31bf66d4bdfd7641c9 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 5 Feb 2025 11:36:48 +0000 +Subject: io_uring/kbuf: introduce io_kbuf_drop_legacy() + +From: Pavel Begunkov + +Commit 54e00d9a612ab93f37f612a5ccd7c0c4f8a31cea upstream. + +io_kbuf_drop() is only used for legacy provided buffers, and so +__io_put_kbuf_list() is never called for REQ_F_BUFFER_RING. Remove the +dead branch out of __io_put_kbuf_list(), rename it into +io_kbuf_drop_legacy() and use it directly instead of io_kbuf_drop(). + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/c8cc73e2272f09a86ecbdad9ebdd8304f8e583c0.1738724373.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 5 ++--- + io_uring/kbuf.c | 10 ++++++++++ + io_uring/kbuf.h | 24 ++---------------------- + 3 files changed, 14 insertions(+), 25 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -382,9 +382,8 @@ static bool req_need_defer(struct io_kio + + static void io_clean_op(struct io_kiocb *req) + { +- if (req->flags & REQ_F_BUFFER_SELECTED) { +- io_kbuf_drop(req); +- } ++ if (unlikely(req->flags & REQ_F_BUFFER_SELECTED)) ++ io_kbuf_drop_legacy(req); + + if (req->flags & REQ_F_NEED_CLEANUP) { + const struct io_cold_def *def = &io_cold_defs[req->opcode]; +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -50,6 +50,16 @@ static int io_buffer_add_list(struct io_ + return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); + } + ++void io_kbuf_drop_legacy(struct io_kiocb *req) ++{ ++ if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED))) ++ return; ++ req->buf_index = req->kbuf->bgid; ++ req->flags &= ~REQ_F_BUFFER_SELECTED; ++ kfree(req->kbuf); ++ req->kbuf = NULL; ++} ++ + bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) + { + struct io_ring_ctx *ctx = req->ctx; +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -82,6 +82,7 @@ int io_unregister_pbuf_ring(struct io_ri + int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg); + + bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); ++void io_kbuf_drop_legacy(struct io_kiocb *req); + + void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl); + struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, +@@ -169,27 +170,6 @@ static inline bool __io_put_kbuf_ring(st + return ret; + } + +-static inline void __io_put_kbuf_list(struct io_kiocb *req, int len) +-{ +- if (req->flags & REQ_F_BUFFER_RING) { +- __io_put_kbuf_ring(req, len, 1); +- } else { +- req->buf_index = req->kbuf->bgid; +- req->flags &= ~REQ_F_BUFFER_SELECTED; +- kfree(req->kbuf); +- req->kbuf = NULL; +- } +-} +- +-static inline void io_kbuf_drop(struct io_kiocb *req) +-{ +- if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) +- return; +- +- /* len == 0 is fine here, non-ring will always drop all of it */ +- __io_put_kbuf_list(req, 0); +-} +- + static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, + int nbufs, unsigned issue_flags) + { +@@ -203,7 +183,7 @@ static inline unsigned int __io_put_kbuf + if (!__io_put_kbuf_ring(req, len, nbufs)) + ret |= IORING_CQE_F_BUF_MORE; + } else { +- __io_put_kbuf_list(req, len); ++ io_kbuf_drop_legacy(req); + } + return ret; + } diff --git a/queue-6.12/io_uring-kbuf-introduce-struct-io_br_sel.patch b/queue-6.12/io_uring-kbuf-introduce-struct-io_br_sel.patch new file mode 100644 index 0000000000..e579aee475 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-introduce-struct-io_br_sel.patch @@ -0,0 +1,295 @@ +From 725d49239061ab49650f767bdc3c45fea32dbe4f Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:34 -0600 +Subject: io_uring/kbuf: introduce struct io_br_sel + +From: Jens Axboe + +Commit ab6559bdbb08f6bee606435cd014fc5ba0f7b750 upstream. + +Rather than return addresses directly from buffer selection, add a +struct around it. No functional changes in this patch, it's in +preparation for storing more buffer related information locally, rather +than in struct io_kiocb. + +Link: https://lore.kernel.org/r/20250821020750.598432-7-axboe@kernel.dk +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 26 +++++++++++++------------- + io_uring/kbuf.h | 19 +++++++++++++++++-- + io_uring/net.c | 18 +++++++++--------- + io_uring/rw.c | 31 ++++++++++++++++++------------- + 4 files changed, 57 insertions(+), 37 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -152,18 +152,18 @@ static int io_provided_buffers_select(st + return 1; + } + +-static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, +- struct io_buffer_list *bl, +- unsigned int issue_flags) ++static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, ++ struct io_buffer_list *bl, ++ unsigned int issue_flags) + { + struct io_uring_buf_ring *br = bl->buf_ring; + __u16 tail, head = bl->head; ++ struct io_br_sel sel = { }; + struct io_uring_buf *buf; +- void __user *ret; + + tail = smp_load_acquire(&br->tail); + if (unlikely(tail == head)) +- return NULL; ++ return sel; + + if (head + 1 == tail) + req->flags |= REQ_F_BL_EMPTY; +@@ -174,7 +174,7 @@ static void __user *io_ring_buffer_selec + req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; + req->buf_list = bl; + req->buf_index = buf->bid; +- ret = u64_to_user_ptr(buf->addr); ++ sel.addr = u64_to_user_ptr(buf->addr); + + if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { + /* +@@ -191,27 +191,27 @@ static void __user *io_ring_buffer_selec + req->flags |= REQ_F_BUF_MORE; + req->buf_list = NULL; + } +- return ret; ++ return sel; + } + +-void __user *io_buffer_select(struct io_kiocb *req, size_t *len, +- unsigned int issue_flags) ++struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len, ++ unsigned int issue_flags) + { + struct io_ring_ctx *ctx = req->ctx; ++ struct io_br_sel sel = { }; + struct io_buffer_list *bl; +- void __user *ret = NULL; + + io_ring_submit_lock(req->ctx, issue_flags); + + bl = io_buffer_get_list(ctx, req->buf_index); + if (likely(bl)) { + if (bl->flags & IOBL_BUF_RING) +- ret = io_ring_buffer_select(req, len, bl, issue_flags); ++ sel = io_ring_buffer_select(req, len, bl, issue_flags); + else +- ret = io_provided_buffer_select(req, len, bl); ++ sel.addr = io_provided_buffer_select(req, len, bl); + } + io_ring_submit_unlock(req->ctx, issue_flags); +- return ret; ++ return sel; + } + + /* cap it at a reasonable 256, will be one page even for 4K */ +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -64,8 +64,23 @@ struct buf_sel_arg { + unsigned short partial_map; + }; + +-void __user *io_buffer_select(struct io_kiocb *req, size_t *len, +- unsigned int issue_flags); ++/* ++ * Return value from io_buffer_list selection. Just returns the error or ++ * user address for now, will be extended to return the buffer list in the ++ * future. ++ */ ++struct io_br_sel { ++ /* ++ * Some selection parts return the user address, others return an error. ++ */ ++ union { ++ void __user *addr; ++ ssize_t val; ++ }; ++}; ++ ++struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len, ++ unsigned int issue_flags); + int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, + unsigned int issue_flags); + int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg); +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -1035,22 +1035,22 @@ int io_recvmsg(struct io_kiocb *req, uns + + retry_multishot: + if (io_do_buffer_select(req)) { +- void __user *buf; ++ struct io_br_sel sel; + size_t len = sr->len; + +- buf = io_buffer_select(req, &len, issue_flags); +- if (!buf) ++ sel = io_buffer_select(req, &len, issue_flags); ++ if (!sel.addr) + return -ENOBUFS; + + if (req->flags & REQ_F_APOLL_MULTISHOT) { +- ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); ++ ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len); + if (ret) { + io_kbuf_recycle(req, req->buf_list, issue_flags); + return ret; + } + } + +- iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len); ++ iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, sel.addr, len); + } + + kmsg->msg.msg_get_inq = 1; +@@ -1144,13 +1144,13 @@ static int io_recv_buf_select(struct io_ + iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, + arg.out_len); + } else { +- void __user *buf; ++ struct io_br_sel sel; + + *len = sr->len; +- buf = io_buffer_select(req, len, issue_flags); +- if (!buf) ++ sel = io_buffer_select(req, len, issue_flags); ++ if (!sel.addr) + return -ENOBUFS; +- sr->buf = buf; ++ sr->buf = sel.addr; + sr->len = *len; + map_ubuf: + ret = import_ubuf(ITER_DEST, sr->buf, sr->len, +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -88,28 +88,28 @@ static int io_iov_buffer_select_prep(str + + static int __io_import_iovec(int ddir, struct io_kiocb *req, + struct io_async_rw *io, ++ struct io_br_sel *sel, + unsigned int issue_flags) + { + const struct io_issue_def *def = &io_issue_defs[req->opcode]; + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + struct iovec *iov; +- void __user *buf; + int nr_segs, ret; + size_t sqe_len; + +- buf = u64_to_user_ptr(rw->addr); ++ sel->addr = u64_to_user_ptr(rw->addr); + sqe_len = rw->len; + + if (!def->vectored || req->flags & REQ_F_BUFFER_SELECT) { + if (io_do_buffer_select(req)) { +- buf = io_buffer_select(req, &sqe_len, issue_flags); +- if (!buf) ++ *sel = io_buffer_select(req, &sqe_len, issue_flags); ++ if (!sel->addr) + return -ENOBUFS; +- rw->addr = (unsigned long) buf; ++ rw->addr = (unsigned long) sel->addr; + rw->len = sqe_len; + } + +- return import_ubuf(ddir, buf, sqe_len, &io->iter); ++ return import_ubuf(ddir, sel->addr, sqe_len, &io->iter); + } + + if (io->free_iovec) { +@@ -119,7 +119,7 @@ static int __io_import_iovec(int ddir, s + iov = &io->fast_iov; + nr_segs = 1; + } +- ret = __import_iovec(ddir, buf, sqe_len, nr_segs, &iov, &io->iter, ++ ret = __import_iovec(ddir, sel->addr, sqe_len, nr_segs, &iov, &io->iter, + req->ctx->compat); + if (unlikely(ret < 0)) + return ret; +@@ -134,11 +134,12 @@ static int __io_import_iovec(int ddir, s + + static inline int io_import_iovec(int rw, struct io_kiocb *req, + struct io_async_rw *io, ++ struct io_br_sel *sel, + unsigned int issue_flags) + { + int ret; + +- ret = __io_import_iovec(rw, req, io, issue_flags); ++ ret = __io_import_iovec(rw, req, io, sel, issue_flags); + if (unlikely(ret < 0)) + return ret; + +@@ -240,6 +241,7 @@ done: + static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import) + { + struct io_async_rw *rw; ++ struct io_br_sel sel = { }; + int ret; + + if (io_rw_alloc_async(req)) +@@ -249,7 +251,7 @@ static int io_prep_rw_setup(struct io_ki + return 0; + + rw = req->async_data; +- ret = io_import_iovec(ddir, req, rw, 0); ++ ret = io_import_iovec(ddir, req, rw, &sel, 0); + if (unlikely(ret < 0)) + return ret; + +@@ -827,7 +829,8 @@ static int io_rw_init_file(struct io_kio + return 0; + } + +-static int __io_read(struct io_kiocb *req, unsigned int issue_flags) ++static int __io_read(struct io_kiocb *req, struct io_br_sel *sel, ++ unsigned int issue_flags) + { + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); +@@ -837,7 +840,7 @@ static int __io_read(struct io_kiocb *re + loff_t *ppos; + + if (io_do_buffer_select(req)) { +- ret = io_import_iovec(ITER_DEST, req, io, issue_flags); ++ ret = io_import_iovec(ITER_DEST, req, io, sel, issue_flags); + if (unlikely(ret < 0)) + return ret; + } +@@ -947,9 +950,10 @@ done: + + int io_read(struct io_kiocb *req, unsigned int issue_flags) + { ++ struct io_br_sel sel = { }; + int ret; + +- ret = __io_read(req, issue_flags); ++ ret = __io_read(req, &sel, issue_flags); + if (ret >= 0) + return kiocb_done(req, ret, issue_flags); + +@@ -961,6 +965,7 @@ int io_read(struct io_kiocb *req, unsign + int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); ++ struct io_br_sel sel = { }; + unsigned int cflags = 0; + int ret; + +@@ -970,7 +975,7 @@ int io_read_mshot(struct io_kiocb *req, + if (!io_file_can_poll(req)) + return -EBADFD; + +- ret = __io_read(req, issue_flags); ++ ret = __io_read(req, &sel, issue_flags); + + /* + * If we get -EAGAIN, recycle our buffer and just let normal poll diff --git a/queue-6.12/io_uring-kbuf-open-code-__io_put_kbuf.patch b/queue-6.12/io_uring-kbuf-open-code-__io_put_kbuf.patch new file mode 100644 index 0000000000..53d2d0e701 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-open-code-__io_put_kbuf.patch @@ -0,0 +1,55 @@ +From 7828a049e6c26ed764dcbe0579954a43d6e44edb Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 5 Feb 2025 11:36:47 +0000 +Subject: io_uring/kbuf: open code __io_put_kbuf() + +From: Pavel Begunkov + +Commit e150e70fce425e1cdfc227974893cad9fb90a0d3 upstream. + +__io_put_kbuf() is a trivial wrapper, open code it into +__io_put_kbufs(). + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/9dc17380272b48d56c95992c6f9eaacd5546e1d3.1738724373.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 5 ----- + io_uring/kbuf.h | 4 +--- + 2 files changed, 1 insertion(+), 8 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -76,11 +76,6 @@ bool io_kbuf_recycle_legacy(struct io_ki + return true; + } + +-void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) +-{ +- __io_put_kbuf_list(req, len); +-} +- + static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl) + { +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -81,8 +81,6 @@ int io_register_pbuf_ring(struct io_ring + int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); + int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg); + +-void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags); +- + bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); + + void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl); +@@ -205,7 +203,7 @@ static inline unsigned int __io_put_kbuf + if (!__io_put_kbuf_ring(req, len, nbufs)) + ret |= IORING_CQE_F_BUF_MORE; + } else { +- __io_put_kbuf(req, len, issue_flags); ++ __io_put_kbuf_list(req, len); + } + return ret; + } diff --git a/queue-6.12/io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch b/queue-6.12/io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch new file mode 100644 index 0000000000..1ac59657aa --- /dev/null +++ b/queue-6.12/io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch @@ -0,0 +1,369 @@ +From 185e462830eaf7a6df916ba1400b46182b36ec9d Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:33 -0600 +Subject: io_uring/kbuf: pass in struct io_buffer_list to commit/recycle helpers + +From: Jens Axboe + +Commit 1b5add75d7c894c62506c9b55f1d9eaadae50ef1 upstream. + +Rather than have this implied being in the io_kiocb, pass it in directly +so it's immediately obvious where these users of ->buf_list are coming +from. + +Link: https://lore.kernel.org/r/20250821020750.598432-6-axboe@kernel.dk +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 6 +++--- + io_uring/kbuf.c | 9 +++++---- + io_uring/kbuf.h | 24 ++++++++++++++---------- + io_uring/net.c | 30 +++++++++++++----------------- + io_uring/poll.c | 6 +++--- + io_uring/rw.c | 16 ++++++++-------- + 6 files changed, 46 insertions(+), 45 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb + lockdep_assert_held(&req->ctx->uring_lock); + + req_set_fail(req); +- io_req_set_res(req, res, io_put_kbuf(req, res)); ++ io_req_set_res(req, res, io_put_kbuf(req, res, req->buf_list)); + if (def->fail) + def->fail(req); + io_req_complete_defer(req); +@@ -1921,11 +1921,11 @@ static void io_queue_async(struct io_kio + + switch (io_arm_poll_handler(req, 0)) { + case IO_APOLL_READY: +- io_kbuf_recycle(req, 0); ++ io_kbuf_recycle(req, req->buf_list, 0); + io_req_task_queue(req); + break; + case IO_APOLL_ABORTED: +- io_kbuf_recycle(req, 0); ++ io_kbuf_recycle(req, req->buf_list, 0); + io_queue_iowq(req); + break; + case IO_APOLL_OK: +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -368,9 +368,9 @@ int io_buffers_peek(struct io_kiocb *req + return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs); + } + +-static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) ++static inline bool __io_put_kbuf_ring(struct io_kiocb *req, ++ struct io_buffer_list *bl, int len, int nr) + { +- struct io_buffer_list *bl = req->buf_list; + bool ret = true; + + if (bl) { +@@ -381,7 +381,8 @@ static inline bool __io_put_kbuf_ring(st + return ret; + } + +-unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs) ++unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl, ++ int len, int nbufs) + { + unsigned int ret; + +@@ -392,7 +393,7 @@ unsigned int __io_put_kbufs(struct io_ki + return ret; + } + +- if (!__io_put_kbuf_ring(req, len, nbufs)) ++ if (!__io_put_kbuf_ring(req, bl, len, nbufs)) + ret |= IORING_CQE_F_BUF_MORE; + return ret; + } +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -84,7 +84,8 @@ int io_register_pbuf_status(struct io_ri + bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); + void io_kbuf_drop_legacy(struct io_kiocb *req); + +-unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs); ++unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl, ++ int len, int nbufs); + bool io_kbuf_commit(struct io_kiocb *req, + struct io_buffer_list *bl, int len, int nr); + +@@ -93,7 +94,8 @@ struct io_buffer_list *io_pbuf_get_bl(st + unsigned long bgid); + int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma); + +-static inline bool io_kbuf_recycle_ring(struct io_kiocb *req) ++static inline bool io_kbuf_recycle_ring(struct io_kiocb *req, ++ struct io_buffer_list *bl) + { + /* + * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear +@@ -102,8 +104,8 @@ static inline bool io_kbuf_recycle_ring( + * The exception is partial io, that case we should increment bl->head + * to monopolize the buffer. + */ +- if (req->buf_list) { +- req->buf_index = req->buf_list->bgid; ++ if (bl) { ++ req->buf_index = bl->bgid; + req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT); + return true; + } +@@ -117,32 +119,34 @@ static inline bool io_do_buffer_select(s + return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)); + } + +-static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) ++static inline bool io_kbuf_recycle(struct io_kiocb *req, struct io_buffer_list *bl, ++ unsigned issue_flags) + { + if (req->flags & REQ_F_BL_NO_RECYCLE) + return false; + if (req->flags & REQ_F_BUFFER_SELECTED) + return io_kbuf_recycle_legacy(req, issue_flags); + if (req->flags & REQ_F_BUFFER_RING) +- return io_kbuf_recycle_ring(req); ++ return io_kbuf_recycle_ring(req, bl); + return false; + } + + /* Mapped buffer ring, return io_uring_buf from head */ + #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)] + +-static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len) ++static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len, ++ struct io_buffer_list *bl) + { + if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) + return 0; +- return __io_put_kbufs(req, len, 1); ++ return __io_put_kbufs(req, bl, len, 1); + } + + static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len, +- int nbufs) ++ struct io_buffer_list *bl, int nbufs) + { + if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) + return 0; +- return __io_put_kbufs(req, len, nbufs); ++ return __io_put_kbufs(req, bl, len, nbufs); + } + #endif +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -498,12 +498,12 @@ static int io_bundle_nbufs(struct io_asy + return nbufs; + } + +-static int io_net_kbuf_recyle(struct io_kiocb *req, ++static int io_net_kbuf_recyle(struct io_kiocb *req, struct io_buffer_list *bl, + struct io_async_msghdr *kmsg, int len) + { + req->flags |= REQ_F_BL_NO_RECYCLE; + if (req->flags & REQ_F_BUFFERS_COMMIT) +- io_kbuf_commit(req, req->buf_list, len, io_bundle_nbufs(kmsg, len)); ++ io_kbuf_commit(req, bl, len, io_bundle_nbufs(kmsg, len)); + return -EAGAIN; + } + +@@ -515,11 +515,11 @@ static inline bool io_send_finish(struct + unsigned int cflags; + + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { +- cflags = io_put_kbuf(req, *ret); ++ cflags = io_put_kbuf(req, *ret, req->buf_list); + goto finish; + } + +- cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret)); ++ cflags = io_put_kbufs(req, *ret, req->buf_list, io_bundle_nbufs(kmsg, *ret)); + + /* + * Don't start new bundles if the buffer list is empty, or if the +@@ -675,7 +675,7 @@ retry_bundle: + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; +- return io_net_kbuf_recyle(req, kmsg, ret); ++ return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret); + } + if (ret == -ERESTARTSYS) + ret = -EINTR; +@@ -869,7 +869,7 @@ static inline bool io_recv_finish(struct + if (sr->flags & IORING_RECVSEND_BUNDLE) { + size_t this_ret = *ret - sr->done_io; + +- cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret)); ++ cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret)); + if (sr->retry_flags & IO_SR_MSG_RETRY) + cflags = req->cqe.flags | (cflags & CQE_F_MASK); + /* bundle with no more immediate buffers, we're done */ +@@ -888,7 +888,7 @@ static inline bool io_recv_finish(struct + return false; + } + } else { +- cflags |= io_put_kbuf(req, *ret); ++ cflags |= io_put_kbuf(req, *ret, req->buf_list); + } + + /* +@@ -1045,7 +1045,7 @@ retry_multishot: + if (req->flags & REQ_F_APOLL_MULTISHOT) { + ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len); + if (ret) { +- io_kbuf_recycle(req, issue_flags); ++ io_kbuf_recycle(req, req->buf_list, issue_flags); + return ret; + } + } +@@ -1070,15 +1070,11 @@ retry_multishot: + if (ret < min_ret) { + if (ret == -EAGAIN && force_nonblock) { + if (issue_flags & IO_URING_F_MULTISHOT) { +- io_kbuf_recycle(req, issue_flags); ++ io_kbuf_recycle(req, req->buf_list, issue_flags); + return IOU_ISSUE_SKIP_COMPLETE; + } + return -EAGAIN; + } +- if (ret > 0 && io_net_retry(sock, flags)) { +- sr->done_io += ret; +- return io_net_kbuf_recyle(req, kmsg, ret); +- } + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); +@@ -1091,7 +1087,7 @@ retry_multishot: + else if (sr->done_io) + ret = sr->done_io; + else +- io_kbuf_recycle(req, issue_flags); ++ io_kbuf_recycle(req, req->buf_list, issue_flags); + + if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) + goto retry_multishot; +@@ -1209,7 +1205,7 @@ retry_multishot: + if (ret < min_ret) { + if (ret == -EAGAIN && force_nonblock) { + if (issue_flags & IO_URING_F_MULTISHOT) { +- io_kbuf_recycle(req, issue_flags); ++ io_kbuf_recycle(req, req->buf_list, issue_flags); + return IOU_ISSUE_SKIP_COMPLETE; + } + +@@ -1219,7 +1215,7 @@ retry_multishot: + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; +- return io_net_kbuf_recyle(req, kmsg, ret); ++ return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret); + } + if (ret == -ERESTARTSYS) + ret = -EINTR; +@@ -1235,7 +1231,7 @@ out_free: + else if (sr->done_io) + ret = sr->done_io; + else +- io_kbuf_recycle(req, issue_flags); ++ io_kbuf_recycle(req, req->buf_list, issue_flags); + + if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) + goto retry_multishot; +--- a/io_uring/poll.c ++++ b/io_uring/poll.c +@@ -356,10 +356,10 @@ void io_poll_task_func(struct io_kiocb * + + ret = io_poll_check_events(req, ts); + if (ret == IOU_POLL_NO_ACTION) { +- io_kbuf_recycle(req, 0); ++ io_kbuf_recycle(req, req->buf_list, 0); + return; + } else if (ret == IOU_POLL_REQUEUE) { +- io_kbuf_recycle(req, 0); ++ io_kbuf_recycle(req, req->buf_list, 0); + __io_poll_execute(req, 0); + return; + } +@@ -753,7 +753,7 @@ int io_arm_poll_handler(struct io_kiocb + req->flags |= REQ_F_POLLED; + ipt.pt._qproc = io_async_queue_proc; + +- io_kbuf_recycle(req, issue_flags); ++ io_kbuf_recycle(req, req->buf_list, issue_flags); + + ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); + if (ret) +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -520,7 +520,7 @@ void io_req_rw_complete(struct io_kiocb + io_req_io_end(req); + + if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) +- req->cqe.flags |= io_put_kbuf(req, req->cqe.res); ++ req->cqe.flags |= io_put_kbuf(req, req->cqe.res, req->buf_list); + + io_req_rw_cleanup(req, 0); + io_req_task_complete(req, ts); +@@ -602,7 +602,7 @@ static int kiocb_done(struct io_kiocb *r + */ + io_req_io_end(req); + io_req_set_res(req, final_ret, +- io_put_kbuf(req, ret)); ++ io_put_kbuf(req, ret, req->buf_list)); + io_req_rw_cleanup(req, issue_flags); + return IOU_OK; + } +@@ -954,7 +954,7 @@ int io_read(struct io_kiocb *req, unsign + return kiocb_done(req, ret, issue_flags); + + if (req->flags & REQ_F_BUFFERS_COMMIT) +- io_kbuf_recycle(req, issue_flags); ++ io_kbuf_recycle(req, req->buf_list, issue_flags); + return ret; + } + +@@ -981,17 +981,17 @@ int io_read_mshot(struct io_kiocb *req, + * Reset rw->len to 0 again to avoid clamping future mshot + * reads, in case the buffer size varies. + */ +- if (io_kbuf_recycle(req, issue_flags)) ++ if (io_kbuf_recycle(req, req->buf_list, issue_flags)) + rw->len = 0; + if (issue_flags & IO_URING_F_MULTISHOT) + return IOU_ISSUE_SKIP_COMPLETE; + return -EAGAIN; + } else if (ret <= 0) { +- io_kbuf_recycle(req, issue_flags); ++ io_kbuf_recycle(req, req->buf_list, issue_flags); + if (ret < 0) + req_set_fail(req); + } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { +- cflags = io_put_kbuf(req, ret); ++ cflags = io_put_kbuf(req, ret, req->buf_list); + } else { + /* + * Any successful return value will keep the multishot read +@@ -999,7 +999,7 @@ int io_read_mshot(struct io_kiocb *req, + * we fail to post a CQE, or multishot is no longer set, then + * jump to the termination path. This request is then done. + */ +- cflags = io_put_kbuf(req, ret); ++ cflags = io_put_kbuf(req, ret, req->buf_list); + rw->len = 0; /* similarly to above, reset len to 0 */ + + if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { +@@ -1210,7 +1210,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx + if (!smp_load_acquire(&req->iopoll_completed)) + break; + nr_events++; +- req->cqe.flags = io_put_kbuf(req, req->cqe.res); ++ req->cqe.flags = io_put_kbuf(req, req->cqe.res, req->buf_list); + if (req->opcode != IORING_OP_URING_CMD) + io_req_rw_cleanup(req, 0); + } diff --git a/queue-6.12/io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch b/queue-6.12/io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch new file mode 100644 index 0000000000..f372dd08fc --- /dev/null +++ b/queue-6.12/io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch @@ -0,0 +1,48 @@ +From aecbedeb4dd8a16964f1fc52778c421c136825f1 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Thu, 19 Mar 2026 14:29:20 -0600 +Subject: io_uring/kbuf: propagate BUF_MORE through early buffer commit path + +From: Jens Axboe + +Commit 418eab7a6f3c002d8e64d6e95ec27118017019af upstream. + +When io_should_commit() returns true (eg for non-pollable files), buffer +commit happens at buffer selection time and sel->buf_list is set to +NULL. When __io_put_kbufs() generates CQE flags at completion time, it +calls __io_put_kbuf_ring() which finds a NULL buffer_list and hence +cannot determine whether the buffer was consumed or not. This means that +IORING_CQE_F_BUF_MORE is never set for non-pollable input with +incrementally consumed buffers. + +Likewise for io_buffers_select(), which always commits upfront and +discards the return value of io_kbuf_commit(). + +Add REQ_F_BUF_MORE to store the result of io_kbuf_commit() during early +commit. Then __io_put_kbuf_ring() can check this flag and set +IORING_F_BUF_MORE accordingy. + +Reported-by: Martin Michaelis +Cc: stable@vger.kernel.org +Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") +Link: https://github.com/axboe/liburing/issues/1553 +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -387,7 +387,10 @@ static inline bool __io_put_kbuf_ring(st + ret = io_kbuf_commit(req, bl, len, nr); + req->buf_index = bl->bgid; + } +- req->flags &= ~REQ_F_BUFFER_RING; ++ if (ret && (req->flags & REQ_F_BUF_MORE)) ++ ret = false; ++ ++ req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE); + return ret; + } + diff --git a/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch new file mode 100644 index 0000000000..333ebdfc95 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch @@ -0,0 +1,69 @@ +From 1fc437c00b774e5b56d91a169298e558947f9e27 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 5 Feb 2025 11:36:42 +0000 +Subject: io_uring/kbuf: remove legacy kbuf bulk allocation + +From: Pavel Begunkov + +Commit 7919292a961421bfdb22f83c16657684c96076b3 upstream. + +Legacy provided buffers are slow and discouraged in favour of the ring +variant. Remove the bulk allocation to keep it simpler as we don't care +about performance. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/a064d70370e590efed8076e9501ae4cfc20fe0ca.1738724373.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 30 +++++------------------------- + 1 file changed, 5 insertions(+), 25 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -521,12 +521,9 @@ int io_provide_buffers_prep(struct io_ki + return 0; + } + +-#define IO_BUFFER_ALLOC_BATCH 64 +- + static int io_refill_buffer_cache(struct io_ring_ctx *ctx) + { +- struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH]; +- int allocated; ++ struct io_buffer *buf; + + /* + * Completions that don't happen inline (eg not under uring_lock) will +@@ -544,27 +541,10 @@ static int io_refill_buffer_cache(struct + spin_unlock(&ctx->completion_lock); + } + +- /* +- * No free buffers and no completion entries either. Allocate a new +- * batch of buffer entries and add those to our freelist. +- */ +- +- allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT, +- ARRAY_SIZE(bufs), (void **) bufs); +- if (unlikely(!allocated)) { +- /* +- * Bulk alloc is all-or-nothing. If we fail to get a batch, +- * retry single alloc to be on the safe side. +- */ +- bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); +- if (!bufs[0]) +- return -ENOMEM; +- allocated = 1; +- } +- +- while (allocated) +- list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache); +- ++ buf = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ list_add_tail(&buf->list, &ctx->io_buffers_cache); + return 0; + } + diff --git a/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-caching.patch b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-caching.patch new file mode 100644 index 0000000000..2b290d7b02 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-caching.patch @@ -0,0 +1,194 @@ +From 7d352330a367b477ac6c75a7dbffc850dec5a757 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 5 Feb 2025 11:36:46 +0000 +Subject: io_uring/kbuf: remove legacy kbuf caching + +From: Pavel Begunkov + +Commit 13ee854e7c04236a47a5beaacdcf51eb0bc7a8fa upstream. + +Remove all struct io_buffer caches. It makes it a fair bit simpler. +Apart from from killing a bunch of lines and juggling between lists, +__io_put_kbuf_list() doesn't need ->completion_lock locking now. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/18287217466ee2576ea0b1e72daccf7b22c7e856.1738724373.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/io_uring_types.h | 3 -- + io_uring/io_uring.c | 2 - + io_uring/kbuf.c | 58 ++++------------------------------------- + io_uring/kbuf.h | 5 +-- + 4 files changed, 9 insertions(+), 59 deletions(-) + +--- a/include/linux/io_uring_types.h ++++ b/include/linux/io_uring_types.h +@@ -341,7 +341,6 @@ struct io_ring_ctx { + + spinlock_t completion_lock; + +- struct list_head io_buffers_comp; + struct list_head cq_overflow_list; + struct io_hash_table cancel_table; + +@@ -361,8 +360,6 @@ struct io_ring_ctx { + unsigned int file_alloc_start; + unsigned int file_alloc_end; + +- struct list_head io_buffers_cache; +- + /* Keep this last, we don't need it for the fast path */ + struct wait_queue_head poll_wq; + struct io_restriction restrictions; +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -305,7 +305,6 @@ static __cold struct io_ring_ctx *io_rin + init_waitqueue_head(&ctx->sqo_sq_wait); + INIT_LIST_HEAD(&ctx->sqd_list); + INIT_LIST_HEAD(&ctx->cq_overflow_list); +- INIT_LIST_HEAD(&ctx->io_buffers_cache); + ret = io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX, + sizeof(struct io_rsrc_node)); + ret |= io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX, +@@ -328,7 +327,6 @@ static __cold struct io_ring_ctx *io_rin + spin_lock_init(&ctx->completion_lock); + spin_lock_init(&ctx->timeout_lock); + INIT_WQ_LIST(&ctx->iopoll_list); +- INIT_LIST_HEAD(&ctx->io_buffers_comp); + INIT_LIST_HEAD(&ctx->defer_list); + INIT_LIST_HEAD(&ctx->timeout_list); + INIT_LIST_HEAD(&ctx->ltimeout_list); +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -78,9 +78,7 @@ bool io_kbuf_recycle_legacy(struct io_ki + + void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) + { +- spin_lock(&req->ctx->completion_lock); + __io_put_kbuf_list(req, len); +- spin_unlock(&req->ctx->completion_lock); + } + + static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, +@@ -362,14 +360,15 @@ static int __io_remove_buffers(struct io + return i; + } + +- /* protects io_buffers_cache */ + lockdep_assert_held(&ctx->uring_lock); + + while (!list_empty(&bl->buf_list)) { + struct io_buffer *nxt; + + nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); +- list_move(&nxt->list, &ctx->io_buffers_cache); ++ list_del(&nxt->list); ++ kfree(nxt); ++ + if (++i == nbufs) + return i; + cond_resched(); +@@ -389,27 +388,12 @@ void io_put_bl(struct io_ring_ctx *ctx, + void io_destroy_buffers(struct io_ring_ctx *ctx) + { + struct io_buffer_list *bl; +- struct list_head *item, *tmp; +- struct io_buffer *buf; + unsigned long index; + + xa_for_each(&ctx->io_bl_xa, index, bl) { + xa_erase(&ctx->io_bl_xa, bl->bgid); + io_put_bl(ctx, bl); + } +- +- /* +- * Move deferred locked entries to cache before pruning +- */ +- spin_lock(&ctx->completion_lock); +- if (!list_empty(&ctx->io_buffers_comp)) +- list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache); +- spin_unlock(&ctx->completion_lock); +- +- list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { +- buf = list_entry(item, struct io_buffer, list); +- kfree(buf); +- } + } + + static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) +@@ -499,33 +483,6 @@ int io_provide_buffers_prep(struct io_ki + return 0; + } + +-static int io_refill_buffer_cache(struct io_ring_ctx *ctx) +-{ +- struct io_buffer *buf; +- +- /* +- * Completions that don't happen inline (eg not under uring_lock) will +- * add to ->io_buffers_comp. If we don't have any free buffers, check +- * the completion list and splice those entries first. +- */ +- if (!list_empty_careful(&ctx->io_buffers_comp)) { +- spin_lock(&ctx->completion_lock); +- if (!list_empty(&ctx->io_buffers_comp)) { +- list_splice_init(&ctx->io_buffers_comp, +- &ctx->io_buffers_cache); +- spin_unlock(&ctx->completion_lock); +- return 0; +- } +- spin_unlock(&ctx->completion_lock); +- } +- +- buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); +- if (!buf) +- return -ENOMEM; +- list_add_tail(&buf->list, &ctx->io_buffers_cache); +- return 0; +-} +- + static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, + struct io_buffer_list *bl) + { +@@ -534,12 +491,11 @@ static int io_add_buffers(struct io_ring + int i, bid = pbuf->bid; + + for (i = 0; i < pbuf->nbufs; i++) { +- if (list_empty(&ctx->io_buffers_cache) && +- io_refill_buffer_cache(ctx)) ++ buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); ++ if (!buf) + break; +- buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer, +- list); +- list_move_tail(&buf->list, &bl->buf_list); ++ ++ list_add_tail(&buf->list, &bl->buf_list); + buf->addr = addr; + buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); + buf->bid = bid; +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -177,8 +177,9 @@ static inline void __io_put_kbuf_list(st + __io_put_kbuf_ring(req, len, 1); + } else { + req->buf_index = req->kbuf->bgid; +- list_add(&req->kbuf->list, &req->ctx->io_buffers_comp); + req->flags &= ~REQ_F_BUFFER_SELECTED; ++ kfree(req->kbuf); ++ req->kbuf = NULL; + } + } + +@@ -187,10 +188,8 @@ static inline void io_kbuf_drop(struct i + if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) + return; + +- spin_lock(&req->ctx->completion_lock); + /* len == 0 is fine here, non-ring will always drop all of it */ + __io_put_kbuf_list(req, 0); +- spin_unlock(&req->ctx->completion_lock); + } + + static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, diff --git a/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch new file mode 100644 index 0000000000..5c9d2c734a --- /dev/null +++ b/queue-6.12/io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch @@ -0,0 +1,80 @@ +From 3aa159b85a008972392a89b8a7cda51b674fc32d Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 5 Feb 2025 11:36:43 +0000 +Subject: io_uring/kbuf: remove legacy kbuf kmem cache + +From: Pavel Begunkov + +Commit 9afe6847cff78e7f3aa8f4c920265cf298033251 upstream. + +Remove the kmem cache used by legacy provided buffers. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/8195c207d8524d94e972c0c82de99282289f7f5c.1738724373.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 -- + io_uring/io_uring.h | 1 - + io_uring/kbuf.c | 8 +++----- + 3 files changed, 3 insertions(+), 8 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3867,8 +3867,6 @@ static int __init io_uring_init(void) + req_cachep = kmem_cache_create("io_kiocb", sizeof(struct io_kiocb), &kmem_args, + SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT | + SLAB_TYPESAFE_BY_RCU); +- io_buf_cachep = KMEM_CACHE(io_buffer, +- SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); + + iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64); + +--- a/io_uring/io_uring.h ++++ b/io_uring/io_uring.h +@@ -389,7 +389,6 @@ static inline bool io_req_cache_empty(st + } + + extern struct kmem_cache *req_cachep; +-extern struct kmem_cache *io_buf_cachep; + + static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx) + { +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -20,8 +20,6 @@ + /* BIDs are addressed by a 16-bit field in a CQE */ + #define MAX_BIDS_PER_BGID (1 << 16) + +-struct kmem_cache *io_buf_cachep; +- + struct io_provide_buf { + struct file *file; + __u64 addr; +@@ -70,7 +68,7 @@ bool io_kbuf_recycle_legacy(struct io_ki + if (bl && !(bl->flags & IOBL_BUF_RING)) + list_add(&buf->list, &bl->buf_list); + else +- kmem_cache_free(io_buf_cachep, buf); ++ kfree(buf); + req->flags &= ~REQ_F_BUFFER_SELECTED; + req->kbuf = NULL; + +@@ -430,7 +428,7 @@ void io_destroy_buffers(struct io_ring_c + + list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { + buf = list_entry(item, struct io_buffer, list); +- kmem_cache_free(io_buf_cachep, buf); ++ kfree(buf); + } + } + +@@ -541,7 +539,7 @@ static int io_refill_buffer_cache(struct + spin_unlock(&ctx->completion_lock); + } + +- buf = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); ++ buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); + if (!buf) + return -ENOMEM; + list_add_tail(&buf->list, &ctx->io_buffers_cache); diff --git a/queue-6.12/io_uring-kbuf-simplify-__io_put_kbuf.patch b/queue-6.12/io_uring-kbuf-simplify-__io_put_kbuf.patch new file mode 100644 index 0000000000..f757acb76e --- /dev/null +++ b/queue-6.12/io_uring-kbuf-simplify-__io_put_kbuf.patch @@ -0,0 +1,105 @@ +From e74da9819b43033a42cfc36f58b95b3e89d85fdf Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 5 Feb 2025 11:36:45 +0000 +Subject: io_uring/kbuf: simplify __io_put_kbuf + +From: Pavel Begunkov + +Commit dc39fb1093ea33019f192c93b77b863282e10162 upstream. + +As a preparation step remove an optimisation from __io_put_kbuf() trying +to use the locked cache. With that __io_put_kbuf_list() is only used +with ->io_buffers_comp, and we remove the explicit list argument. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/1b7f1394ec4afc7f96b35a61f5992e27c49fd067.1738724373.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 -- + io_uring/kbuf.c | 26 +++----------------------- + io_uring/kbuf.h | 11 +++++------ + 3 files changed, 8 insertions(+), 31 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -385,9 +385,7 @@ static bool req_need_defer(struct io_kio + static void io_clean_op(struct io_kiocb *req) + { + if (req->flags & REQ_F_BUFFER_SELECTED) { +- spin_lock(&req->ctx->completion_lock); + io_kbuf_drop(req); +- spin_unlock(&req->ctx->completion_lock); + } + + if (req->flags & REQ_F_NEED_CLEANUP) { +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -78,29 +78,9 @@ bool io_kbuf_recycle_legacy(struct io_ki + + void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) + { +- /* +- * We can add this buffer back to two lists: +- * +- * 1) The io_buffers_cache list. This one is protected by the +- * ctx->uring_lock. If we already hold this lock, add back to this +- * list as we can grab it from issue as well. +- * 2) The io_buffers_comp list. This one is protected by the +- * ctx->completion_lock. +- * +- * We migrate buffers from the comp_list to the issue cache list +- * when we need one. +- */ +- if (issue_flags & IO_URING_F_UNLOCKED) { +- struct io_ring_ctx *ctx = req->ctx; +- +- spin_lock(&ctx->completion_lock); +- __io_put_kbuf_list(req, len, &ctx->io_buffers_comp); +- spin_unlock(&ctx->completion_lock); +- } else { +- lockdep_assert_held(&req->ctx->uring_lock); +- +- __io_put_kbuf_list(req, len, &req->ctx->io_buffers_cache); +- } ++ spin_lock(&req->ctx->completion_lock); ++ __io_put_kbuf_list(req, len); ++ spin_unlock(&req->ctx->completion_lock); + } + + static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -171,27 +171,26 @@ static inline bool __io_put_kbuf_ring(st + return ret; + } + +-static inline void __io_put_kbuf_list(struct io_kiocb *req, int len, +- struct list_head *list) ++static inline void __io_put_kbuf_list(struct io_kiocb *req, int len) + { + if (req->flags & REQ_F_BUFFER_RING) { + __io_put_kbuf_ring(req, len, 1); + } else { + req->buf_index = req->kbuf->bgid; +- list_add(&req->kbuf->list, list); ++ list_add(&req->kbuf->list, &req->ctx->io_buffers_comp); + req->flags &= ~REQ_F_BUFFER_SELECTED; + } + } + + static inline void io_kbuf_drop(struct io_kiocb *req) + { +- lockdep_assert_held(&req->ctx->completion_lock); +- + if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) + return; + ++ spin_lock(&req->ctx->completion_lock); + /* len == 0 is fine here, non-ring will always drop all of it */ +- __io_put_kbuf_list(req, 0, &req->ctx->io_buffers_comp); ++ __io_put_kbuf_list(req, 0); ++ spin_unlock(&req->ctx->completion_lock); + } + + static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, diff --git a/queue-6.12/io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch b/queue-6.12/io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch new file mode 100644 index 0000000000..049c746d8b --- /dev/null +++ b/queue-6.12/io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch @@ -0,0 +1,498 @@ +From 69db0294e6d11bc765f5cbee2f39602fa2b36f7b Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:39 -0600 +Subject: io_uring/kbuf: switch to storing struct io_buffer_list locally + +From: Jens Axboe + +Commit 5fda51255439addd1c9059098e30847a375a1008 upstream. + +Currently the buffer list is stored in struct io_kiocb. The buffer list +can be of two types: + +1) Classic/legacy buffer list. These don't need to get referenced after + a buffer pick, and hence storing them in struct io_kiocb is perfectly + fine. + +2) Ring provided buffer lists. These DO need to be referenced after the + initial buffer pick, as they need to get consumed later on. This can + be either just incrementing the head of the ring, or it can be + consuming parts of a buffer if incremental buffer consumptions has + been configured. + +For case 2, io_uring needs to be careful not to access the buffer list +after the initial pick-and-execute context. The core does recycling of +these, but it's easy to make a mistake, because it's stored in the +io_kiocb which does persist across multiple execution contexts. Either +because it's a multishot request, or simply because it needed some kind +of async trigger (eg poll) for retry purposes. + +Add a struct io_buffer_list to struct io_br_sel, which is always on +stack for the various users of it. This prevents the buffer list from +leaking outside of that execution context, and additionally it enables +kbuf to not even pass back the struct io_buffer_list if the given +context isn't appropriately locked already. + +This doesn't fix any bugs, it's simply a defensive measure to prevent +any issues with reuse of a buffer list. + +Link: https://lore.kernel.org/r/20250821020750.598432-12-axboe@kernel.dk +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/io_uring_types.h | 6 ---- + io_uring/io_uring.c | 6 ++-- + io_uring/kbuf.c | 27 ++++++++++++--------- + io_uring/kbuf.h | 16 ++++-------- + io_uring/net.c | 51 +++++++++++++++++------------------------ + io_uring/poll.c | 6 ++-- + io_uring/rw.c | 22 ++++++++--------- + 7 files changed, 60 insertions(+), 74 deletions(-) + +--- a/include/linux/io_uring_types.h ++++ b/include/linux/io_uring_types.h +@@ -624,12 +624,6 @@ struct io_kiocb { + + /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ + struct io_buffer *kbuf; +- +- /* +- * stores buffer ID for ring provided buffers, valid IFF +- * REQ_F_BUFFER_RING is set. +- */ +- struct io_buffer_list *buf_list; + }; + + union { +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -921,7 +921,7 @@ void io_req_defer_failed(struct io_kiocb + lockdep_assert_held(&req->ctx->uring_lock); + + req_set_fail(req); +- io_req_set_res(req, res, io_put_kbuf(req, res, req->buf_list)); ++ io_req_set_res(req, res, io_put_kbuf(req, res, NULL)); + if (def->fail) + def->fail(req); + io_req_complete_defer(req); +@@ -1921,11 +1921,11 @@ static void io_queue_async(struct io_kio + + switch (io_arm_poll_handler(req, 0)) { + case IO_APOLL_READY: +- io_kbuf_recycle(req, req->buf_list, 0); ++ io_kbuf_recycle(req, NULL, 0); + io_req_task_queue(req); + break; + case IO_APOLL_ABORTED: +- io_kbuf_recycle(req, req->buf_list, 0); ++ io_kbuf_recycle(req, NULL, 0); + io_queue_iowq(req); + break; + case IO_APOLL_OK: +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -172,8 +172,8 @@ static struct io_br_sel io_ring_buffer_s + if (*len == 0 || *len > buf->len) + *len = buf->len; + req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; +- req->buf_list = bl; + req->buf_index = buf->bid; ++ sel.buf_list = bl; + sel.addr = u64_to_user_ptr(buf->addr); + + if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { +@@ -187,9 +187,9 @@ static struct io_br_sel io_ring_buffer_s + * the transfer completes (or if we get -EAGAIN and must poll of + * retry). + */ +- if (!io_kbuf_commit(req, bl, *len, 1)) ++ if (!io_kbuf_commit(req, sel.buf_list, *len, 1)) + req->flags |= REQ_F_BUF_MORE; +- req->buf_list = NULL; ++ sel.buf_list = NULL; + } + return sel; + } +@@ -307,7 +307,6 @@ static int io_ring_buffers_peek(struct i + req->flags |= REQ_F_BL_EMPTY; + + req->flags |= REQ_F_BUFFER_RING; +- req->buf_list = bl; + return iov - arg->iovs; + } + +@@ -315,16 +314,15 @@ int io_buffers_select(struct io_kiocb *r + struct io_br_sel *sel, unsigned int issue_flags) + { + struct io_ring_ctx *ctx = req->ctx; +- struct io_buffer_list *bl; + int ret = -ENOENT; + + io_ring_submit_lock(ctx, issue_flags); +- bl = io_buffer_get_list(ctx, req->buf_index); +- if (unlikely(!bl)) ++ sel->buf_list = io_buffer_get_list(ctx, req->buf_index); ++ if (unlikely(!sel->buf_list)) + goto out_unlock; + +- if (bl->flags & IOBL_BUF_RING) { +- ret = io_ring_buffers_peek(req, arg, bl); ++ if (sel->buf_list->flags & IOBL_BUF_RING) { ++ ret = io_ring_buffers_peek(req, arg, sel->buf_list); + /* + * Don't recycle these buffers if we need to go through poll. + * Nobody else can use them anyway, and holding on to provided +@@ -334,14 +332,17 @@ int io_buffers_select(struct io_kiocb *r + */ + if (ret > 0) { + req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE; +- if (!io_kbuf_commit(req, bl, arg->out_len, ret)) ++ if (!io_kbuf_commit(req, sel->buf_list, arg->out_len, ret)) + req->flags |= REQ_F_BUF_MORE; + } + } else { +- ret = io_provided_buffers_select(req, &arg->out_len, bl, arg->iovs); ++ ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs); + } + out_unlock: +- io_ring_submit_unlock(ctx, issue_flags); ++ if (issue_flags & IO_URING_F_UNLOCKED) { ++ sel->buf_list = NULL; ++ mutex_unlock(&ctx->uring_lock); ++ } + return ret; + } + +@@ -362,10 +363,12 @@ int io_buffers_peek(struct io_kiocb *req + ret = io_ring_buffers_peek(req, arg, bl); + if (ret > 0) + req->flags |= REQ_F_BUFFERS_COMMIT; ++ sel->buf_list = bl; + return ret; + } + + /* don't support multiple buffer selections for legacy */ ++ sel->buf_list = NULL; + return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs); + } + +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -65,11 +65,14 @@ struct buf_sel_arg { + }; + + /* +- * Return value from io_buffer_list selection. Just returns the error or +- * user address for now, will be extended to return the buffer list in the +- * future. ++ * Return value from io_buffer_list selection, to avoid stashing it in ++ * struct io_kiocb. For legacy/classic provided buffers, keeping a reference ++ * across execution contexts are fine. But for ring provided buffers, the ++ * list may go away as soon as ->uring_lock is dropped. As the io_kiocb ++ * persists, it's better to just keep the buffer local for those cases. + */ + struct io_br_sel { ++ struct io_buffer_list *buf_list; + /* + * Some selection parts return the user address, others return an error. + */ +@@ -113,13 +116,6 @@ int io_pbuf_mmap(struct file *file, stru + static inline bool io_kbuf_recycle_ring(struct io_kiocb *req, + struct io_buffer_list *bl) + { +- /* +- * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear +- * the flag and hence ensure that bl->head doesn't get incremented. +- * If the tail has already been incremented, hang on to it. +- * The exception is partial io, that case we should increment bl->head +- * to monopolize the buffer. +- */ + if (bl) { + req->buf_index = bl->bgid; + req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT); +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -442,7 +442,6 @@ int io_sendmsg_prep(struct io_kiocb *req + return -EINVAL; + sr->msg_flags |= MSG_WAITALL; + sr->buf_group = req->buf_index; +- req->buf_list = NULL; + req->flags |= REQ_F_MULTISHOT; + } + +@@ -516,11 +515,11 @@ static inline bool io_send_finish(struct + unsigned int cflags; + + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { +- cflags = io_put_kbuf(req, sel->val, req->buf_list); ++ cflags = io_put_kbuf(req, sel->val, sel->buf_list); + goto finish; + } + +- cflags = io_put_kbufs(req, sel->val, req->buf_list, io_bundle_nbufs(kmsg, sel->val)); ++ cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val)); + + /* + * Don't start new bundles if the buffer list is empty, or if the +@@ -617,6 +616,7 @@ int io_send(struct io_kiocb *req, unsign + flags |= MSG_DONTWAIT; + + retry_bundle: ++ sel.buf_list = NULL; + if (io_do_buffer_select(req)) { + struct buf_sel_arg arg = { + .iovs = &kmsg->fast_iov, +@@ -677,7 +677,7 @@ retry_bundle: + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; +- return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret); ++ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); + } + if (ret == -ERESTARTSYS) + ret = -EINTR; +@@ -816,18 +816,8 @@ int io_recvmsg_prep(struct io_kiocb *req + req->flags |= REQ_F_NOWAIT; + if (sr->msg_flags & MSG_ERRQUEUE) + req->flags |= REQ_F_CLEAR_POLLIN; +- if (req->flags & REQ_F_BUFFER_SELECT) { +- /* +- * Store the buffer group for this multishot receive separately, +- * as if we end up doing an io-wq based issue that selects a +- * buffer, it has to be committed immediately and that will +- * clear ->buf_list. This means we lose the link to the buffer +- * list, and the eventual buffer put on completion then cannot +- * restore it. +- */ ++ if (req->flags & REQ_F_BUFFER_SELECT) + sr->buf_group = req->buf_index; +- req->buf_list = NULL; +- } + if (sr->flags & IORING_RECV_MULTISHOT) { + if (!(req->flags & REQ_F_BUFFER_SELECT)) + return -EINVAL; +@@ -873,7 +863,7 @@ static inline bool io_recv_finish(struct + if (sr->flags & IORING_RECVSEND_BUNDLE) { + size_t this_ret = sel->val - sr->done_io; + +- cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret)); ++ cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret)); + if (sr->retry_flags & IO_SR_MSG_RETRY) + cflags = req->cqe.flags | (cflags & CQE_F_MASK); + /* bundle with no more immediate buffers, we're done */ +@@ -892,7 +882,7 @@ static inline bool io_recv_finish(struct + return false; + } + } else { +- cflags |= io_put_kbuf(req, sel->val, req->buf_list); ++ cflags |= io_put_kbuf(req, sel->val, sel->buf_list); + } + + /* +@@ -1039,6 +1029,7 @@ int io_recvmsg(struct io_kiocb *req, uns + flags |= MSG_DONTWAIT; + + retry_multishot: ++ sel.buf_list = NULL; + if (io_do_buffer_select(req)) { + size_t len = sr->len; + +@@ -1049,7 +1040,7 @@ retry_multishot: + if (req->flags & REQ_F_APOLL_MULTISHOT) { + ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len); + if (ret) { +- io_kbuf_recycle(req, req->buf_list, issue_flags); ++ io_kbuf_recycle(req, sel.buf_list, issue_flags); + return ret; + } + } +@@ -1073,12 +1064,15 @@ retry_multishot: + + if (ret < min_ret) { + if (ret == -EAGAIN && force_nonblock) { +- if (issue_flags & IO_URING_F_MULTISHOT) { +- io_kbuf_recycle(req, req->buf_list, issue_flags); ++ io_kbuf_recycle(req, sel.buf_list, issue_flags); ++ if (issue_flags & IO_URING_F_MULTISHOT) + return IOU_ISSUE_SKIP_COMPLETE; +- } + return -EAGAIN; + } ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->done_io += ret; ++ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); ++ } + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); +@@ -1091,7 +1085,7 @@ retry_multishot: + else if (sr->done_io) + ret = sr->done_io; + else +- io_kbuf_recycle(req, req->buf_list, issue_flags); ++ io_kbuf_recycle(req, sel.buf_list, issue_flags); + + sel.val = ret; + if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) +@@ -1172,7 +1166,7 @@ int io_recv(struct io_kiocb *req, unsign + { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_async_msghdr *kmsg = req->async_data; +- struct io_br_sel sel = { }; ++ struct io_br_sel sel; + struct socket *sock; + unsigned flags; + int ret, min_ret = 0; +@@ -1192,6 +1186,7 @@ int io_recv(struct io_kiocb *req, unsign + flags |= MSG_DONTWAIT; + + retry_multishot: ++ sel.buf_list = NULL; + if (io_do_buffer_select(req)) { + sel.val = sr->len; + ret = io_recv_buf_select(req, kmsg, &sel, issue_flags); +@@ -1211,18 +1206,16 @@ retry_multishot: + ret = sock_recvmsg(sock, &kmsg->msg, flags); + if (ret < min_ret) { + if (ret == -EAGAIN && force_nonblock) { +- if (issue_flags & IO_URING_F_MULTISHOT) { +- io_kbuf_recycle(req, req->buf_list, issue_flags); ++ io_kbuf_recycle(req, sel.buf_list, issue_flags); ++ if (issue_flags & IO_URING_F_MULTISHOT) + return IOU_ISSUE_SKIP_COMPLETE; +- } +- + return -EAGAIN; + } + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; +- return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret); ++ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret); + } + if (ret == -ERESTARTSYS) + ret = -EINTR; +@@ -1238,7 +1231,7 @@ out_free: + else if (sr->done_io) + ret = sr->done_io; + else +- io_kbuf_recycle(req, req->buf_list, issue_flags); ++ io_kbuf_recycle(req, sel.buf_list, issue_flags); + + sel.val = ret; + if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) +--- a/io_uring/poll.c ++++ b/io_uring/poll.c +@@ -356,10 +356,10 @@ void io_poll_task_func(struct io_kiocb * + + ret = io_poll_check_events(req, ts); + if (ret == IOU_POLL_NO_ACTION) { +- io_kbuf_recycle(req, req->buf_list, 0); ++ io_kbuf_recycle(req, NULL, 0); + return; + } else if (ret == IOU_POLL_REQUEUE) { +- io_kbuf_recycle(req, req->buf_list, 0); ++ io_kbuf_recycle(req, NULL, 0); + __io_poll_execute(req, 0); + return; + } +@@ -753,7 +753,7 @@ int io_arm_poll_handler(struct io_kiocb + req->flags |= REQ_F_POLLED; + ipt.pt._qproc = io_async_queue_proc; + +- io_kbuf_recycle(req, req->buf_list, issue_flags); ++ io_kbuf_recycle(req, NULL, issue_flags); + + ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); + if (ret) +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -522,7 +522,7 @@ void io_req_rw_complete(struct io_kiocb + io_req_io_end(req); + + if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) +- req->cqe.flags |= io_put_kbuf(req, req->cqe.res, req->buf_list); ++ req->cqe.flags |= io_put_kbuf(req, req->cqe.res, NULL); + + io_req_rw_cleanup(req, 0); + io_req_task_complete(req, ts); +@@ -589,7 +589,7 @@ static inline void io_rw_done(struct kio + } + + static int kiocb_done(struct io_kiocb *req, ssize_t ret, +- unsigned int issue_flags) ++ struct io_br_sel *sel, unsigned int issue_flags) + { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + unsigned final_ret = io_fixup_rw_res(req, ret); +@@ -604,7 +604,7 @@ static int kiocb_done(struct io_kiocb *r + */ + io_req_io_end(req); + io_req_set_res(req, final_ret, +- io_put_kbuf(req, ret, req->buf_list)); ++ io_put_kbuf(req, ret, sel->buf_list)); + io_req_rw_cleanup(req, issue_flags); + return IOU_OK; + } +@@ -955,10 +955,10 @@ int io_read(struct io_kiocb *req, unsign + + ret = __io_read(req, &sel, issue_flags); + if (ret >= 0) +- return kiocb_done(req, ret, issue_flags); ++ return kiocb_done(req, ret, &sel, issue_flags); + + if (req->flags & REQ_F_BUFFERS_COMMIT) +- io_kbuf_recycle(req, req->buf_list, issue_flags); ++ io_kbuf_recycle(req, sel.buf_list, issue_flags); + return ret; + } + +@@ -986,17 +986,17 @@ int io_read_mshot(struct io_kiocb *req, + * Reset rw->len to 0 again to avoid clamping future mshot + * reads, in case the buffer size varies. + */ +- if (io_kbuf_recycle(req, req->buf_list, issue_flags)) ++ if (io_kbuf_recycle(req, sel.buf_list, issue_flags)) + rw->len = 0; + if (issue_flags & IO_URING_F_MULTISHOT) + return IOU_ISSUE_SKIP_COMPLETE; + return -EAGAIN; + } else if (ret <= 0) { +- io_kbuf_recycle(req, req->buf_list, issue_flags); ++ io_kbuf_recycle(req, sel.buf_list, issue_flags); + if (ret < 0) + req_set_fail(req); + } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { +- cflags = io_put_kbuf(req, ret, req->buf_list); ++ cflags = io_put_kbuf(req, ret, sel.buf_list); + } else { + /* + * Any successful return value will keep the multishot read +@@ -1004,7 +1004,7 @@ int io_read_mshot(struct io_kiocb *req, + * we fail to post a CQE, or multishot is no longer set, then + * jump to the termination path. This request is then done. + */ +- cflags = io_put_kbuf(req, ret, req->buf_list); ++ cflags = io_put_kbuf(req, ret, sel.buf_list); + rw->len = 0; /* similarly to above, reset len to 0 */ + + if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) { +@@ -1135,7 +1135,7 @@ int io_write(struct io_kiocb *req, unsig + return -EAGAIN; + } + done: +- return kiocb_done(req, ret2, issue_flags); ++ return kiocb_done(req, ret2, NULL, issue_flags); + } else { + ret_eagain: + iov_iter_restore(&io->iter, &io->iter_state); +@@ -1215,7 +1215,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx + if (!smp_load_acquire(&req->iopoll_completed)) + break; + nr_events++; +- req->cqe.flags = io_put_kbuf(req, req->cqe.res, req->buf_list); ++ req->cqe.flags = io_put_kbuf(req, req->cqe.res, NULL); + if (req->opcode != IORING_OP_URING_CMD) + io_req_rw_cleanup(req, 0); + } diff --git a/queue-6.12/io_uring-kbuf-uninline-__io_put_kbufs.patch b/queue-6.12/io_uring-kbuf-uninline-__io_put_kbufs.patch new file mode 100644 index 0000000000..aeca0dd4a7 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-uninline-__io_put_kbufs.patch @@ -0,0 +1,203 @@ +From e7f2c429fac51f341094e974e2858949f3670941 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 5 Feb 2025 11:36:49 +0000 +Subject: io_uring/kbuf: uninline __io_put_kbufs + +From: Pavel Begunkov + +Commit 5d3e51240d89678b87b5dc6987ea572048a0f0eb upstream. + +__io_put_kbufs() and other helper functions are too large to be inlined, +compilers would normally refuse to do so. Uninline it and move together +with io_kbuf_commit into kbuf.c. + +io_kbuf_commitSigned-off-by: Pavel Begunkov + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/3dade7f55ad590e811aff83b1ec55c9c04e17b2b.1738724373.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++ + io_uring/kbuf.h | 73 +++++++------------------------------------------------- + 2 files changed, 70 insertions(+), 63 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -20,6 +20,9 @@ + /* BIDs are addressed by a 16-bit field in a CQE */ + #define MAX_BIDS_PER_BGID (1 << 16) + ++/* Mapped buffer ring, return io_uring_buf from head */ ++#define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)] ++ + struct io_provide_buf { + struct file *file; + __u64 addr; +@@ -29,6 +32,34 @@ struct io_provide_buf { + __u16 bid; + }; + ++bool io_kbuf_commit(struct io_kiocb *req, ++ struct io_buffer_list *bl, int len, int nr) ++{ ++ if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT))) ++ return true; ++ ++ req->flags &= ~REQ_F_BUFFERS_COMMIT; ++ ++ if (unlikely(len < 0)) ++ return true; ++ ++ if (bl->flags & IOBL_INC) { ++ struct io_uring_buf *buf; ++ ++ buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); ++ if (WARN_ON_ONCE(len > buf->len)) ++ len = buf->len; ++ buf->len -= len; ++ if (buf->len) { ++ buf->addr += len; ++ return false; ++ } ++ } ++ ++ bl->head += nr; ++ return true; ++} ++ + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, + unsigned int bgid) + { +@@ -337,6 +368,35 @@ int io_buffers_peek(struct io_kiocb *req + return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs); + } + ++static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) ++{ ++ struct io_buffer_list *bl = req->buf_list; ++ bool ret = true; ++ ++ if (bl) { ++ ret = io_kbuf_commit(req, bl, len, nr); ++ req->buf_index = bl->bgid; ++ } ++ req->flags &= ~REQ_F_BUFFER_RING; ++ return ret; ++} ++ ++unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs) ++{ ++ unsigned int ret; ++ ++ ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); ++ ++ if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) { ++ io_kbuf_drop_legacy(req); ++ return ret; ++ } ++ ++ if (!__io_put_kbuf_ring(req, len, nbufs)) ++ ret |= IORING_CQE_F_BUF_MORE; ++ return ret; ++} ++ + static int __io_remove_buffers(struct io_ring_ctx *ctx, + struct io_buffer_list *bl, unsigned nbufs) + { +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -84,6 +84,10 @@ int io_register_pbuf_status(struct io_ri + bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); + void io_kbuf_drop_legacy(struct io_kiocb *req); + ++unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs); ++bool io_kbuf_commit(struct io_kiocb *req, ++ struct io_buffer_list *bl, int len, int nr); ++ + void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl); + struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, + unsigned long bgid); +@@ -127,76 +131,19 @@ static inline bool io_kbuf_recycle(struc + /* Mapped buffer ring, return io_uring_buf from head */ + #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)] + +-static inline bool io_kbuf_commit(struct io_kiocb *req, +- struct io_buffer_list *bl, int len, int nr) +-{ +- if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT))) +- return true; +- +- req->flags &= ~REQ_F_BUFFERS_COMMIT; +- +- if (unlikely(len < 0)) +- return true; +- +- if (bl->flags & IOBL_INC) { +- struct io_uring_buf *buf; +- +- buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); +- if (len > buf->len) +- len = buf->len; +- buf->len -= len; +- if (buf->len) { +- buf->addr += len; +- return false; +- } +- } +- +- bl->head += nr; +- return true; +-} +- +-static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) +-{ +- struct io_buffer_list *bl = req->buf_list; +- bool ret = true; +- +- if (bl) { +- ret = io_kbuf_commit(req, bl, len, nr); +- req->buf_index = bl->bgid; +- } +- if (ret && (req->flags & REQ_F_BUF_MORE)) +- ret = false; +- req->flags &= ~(REQ_F_BUFFER_RING | REQ_F_BUF_MORE); +- return ret; +-} +- +-static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, +- int nbufs, unsigned issue_flags) +-{ +- unsigned int ret; +- +- if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) +- return 0; +- +- ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); +- if (req->flags & REQ_F_BUFFER_RING) { +- if (!__io_put_kbuf_ring(req, len, nbufs)) +- ret |= IORING_CQE_F_BUF_MORE; +- } else { +- io_kbuf_drop_legacy(req); +- } +- return ret; +-} +- + static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len, + unsigned issue_flags) + { +- return __io_put_kbufs(req, len, 1, issue_flags); ++ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) ++ return 0; ++ return __io_put_kbufs(req, len, 1); + } + + static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len, + int nbufs, unsigned issue_flags) + { +- return __io_put_kbufs(req, len, nbufs, issue_flags); ++ if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) ++ return 0; ++ return __io_put_kbufs(req, len, nbufs); + } + #endif diff --git a/queue-6.12/io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch b/queue-6.12/io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch new file mode 100644 index 0000000000..6f5a2f29ad --- /dev/null +++ b/queue-6.12/io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch @@ -0,0 +1,66 @@ +From d888bd5f65a50de9571cfd6333050ac399b8af47 Mon Sep 17 00:00:00 2001 +From: Caleb Sander Mateos +Date: Thu, 4 Dec 2025 15:43:31 -0700 +Subject: io_uring/kbuf: use READ_ONCE() for userspace-mapped memory + +From: Caleb Sander Mateos + +Commit 78385c7299f7514697d196b3233a91bd5e485591 upstream. + +The struct io_uring_buf elements in a buffer ring are in a memory region +accessible from userspace. A malicious/buggy userspace program could +therefore write to them at any time, so they should be accessed with +READ_ONCE() in the kernel. Commit 98b6fa62c84f ("io_uring/kbuf: always +use READ_ONCE() to read ring provided buffer lengths") already switched +the reads of the len field to READ_ONCE(). Do the same for bid and addr. + +Signed-off-by: Caleb Sander Mateos +Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers") +Cc: Joanne Koong +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -44,7 +44,7 @@ static bool io_kbuf_inc_commit(struct io + buf_len -= this_len; + /* Stop looping for invalid buffer length of 0 */ + if (buf_len || !this_len) { +- buf->addr += this_len; ++ buf->addr = READ_ONCE(buf->addr) + this_len; + buf->len = buf_len; + return false; + } +@@ -185,9 +185,9 @@ static struct io_br_sel io_ring_buffer_s + if (*len == 0 || *len > buf_len) + *len = buf_len; + req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; +- req->buf_index = buf->bid; ++ req->buf_index = READ_ONCE(buf->bid); + sel.buf_list = bl; +- sel.addr = u64_to_user_ptr(buf->addr); ++ sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr)); + + if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { + /* +@@ -278,7 +278,7 @@ static int io_ring_buffers_peek(struct i + if (!arg->max_len) + arg->max_len = INT_MAX; + +- req->buf_index = buf->bid; ++ req->buf_index = READ_ONCE(buf->bid); + do { + u32 len = READ_ONCE(buf->len); + +@@ -293,7 +293,7 @@ static int io_ring_buffers_peek(struct i + } + } + +- iov->iov_base = u64_to_user_ptr(buf->addr); ++ iov->iov_base = u64_to_user_ptr(READ_ONCE(buf->addr)); + iov->iov_len = len; + iov++; + diff --git a/queue-6.12/io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch b/queue-6.12/io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch new file mode 100644 index 0000000000..a45c2ec45b --- /dev/null +++ b/queue-6.12/io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch @@ -0,0 +1,159 @@ +From ae354ce85590e11d18bf61f13d951c2ee249b716 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:36 -0600 +Subject: io_uring/kbuf: use struct io_br_sel for multiple buffers picking + +From: Jens Axboe + +Commit 429884ff35f75a8ac3e8f822f483e220e3ea6394 upstream. + +The networking side uses bundles, which is picking multiple buffers at +the same time. Pass in struct io_br_sel to those helpers. + +Link: https://lore.kernel.org/r/20250821020750.598432-9-axboe@kernel.dk +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 5 +++-- + io_uring/kbuf.h | 5 +++-- + io_uring/net.c | 31 +++++++++++++++++-------------- + 3 files changed, 23 insertions(+), 18 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -312,7 +312,7 @@ static int io_ring_buffers_peek(struct i + } + + int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, +- unsigned int issue_flags) ++ struct io_br_sel *sel, unsigned int issue_flags) + { + struct io_ring_ctx *ctx = req->ctx; + struct io_buffer_list *bl; +@@ -345,7 +345,8 @@ out_unlock: + return ret; + } + +-int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg) ++int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, ++ struct io_br_sel *sel) + { + struct io_ring_ctx *ctx = req->ctx; + struct io_buffer_list *bl; +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -82,8 +82,9 @@ struct io_br_sel { + struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len, + unsigned int issue_flags); + int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, +- unsigned int issue_flags); +-int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg); ++ struct io_br_sel *sel, unsigned int issue_flags); ++int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, ++ struct io_br_sel *sel); + void io_destroy_buffers(struct io_ring_ctx *ctx); + + int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -597,6 +597,7 @@ int io_send(struct io_kiocb *req, unsign + { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_async_msghdr *kmsg = req->async_data; ++ struct io_br_sel sel = { }; + struct socket *sock; + unsigned flags; + int min_ret = 0; +@@ -633,7 +634,7 @@ retry_bundle: + else + arg.mode |= KBUF_MODE_EXPAND; + +- ret = io_buffers_select(req, &arg, issue_flags); ++ ret = io_buffers_select(req, &arg, &sel, issue_flags); + if (unlikely(ret < 0)) + return ret; + +@@ -1015,6 +1016,7 @@ int io_recvmsg(struct io_kiocb *req, uns + { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_async_msghdr *kmsg = req->async_data; ++ struct io_br_sel sel = { }; + struct socket *sock; + unsigned flags; + int ret, min_ret = 0; +@@ -1035,7 +1037,6 @@ int io_recvmsg(struct io_kiocb *req, uns + + retry_multishot: + if (io_do_buffer_select(req)) { +- struct io_br_sel sel; + size_t len = sr->len; + + sel = io_buffer_select(req, &len, issue_flags); +@@ -1096,7 +1097,7 @@ retry_multishot: + } + + static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, +- size_t *len, unsigned int issue_flags) ++ struct io_br_sel *sel, unsigned int issue_flags) + { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + int ret; +@@ -1120,10 +1121,12 @@ static int io_recv_buf_select(struct io_ + arg.mode |= KBUF_MODE_FREE; + } + +- if (kmsg->msg.msg_inq > 1) +- arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); ++ if (sel->val) ++ arg.max_len = sel->val; ++ else if (kmsg->msg.msg_inq > 1) ++ arg.max_len = min_not_zero(sel->val, (size_t) kmsg->msg.msg_inq); + +- ret = io_buffers_peek(req, &arg); ++ ret = io_buffers_peek(req, &arg, sel); + if (unlikely(ret < 0)) + return ret; + +@@ -1144,14 +1147,13 @@ static int io_recv_buf_select(struct io_ + iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, + arg.out_len); + } else { +- struct io_br_sel sel; ++ size_t len = sel->val; + +- *len = sr->len; +- sel = io_buffer_select(req, len, issue_flags); +- if (!sel.addr) ++ *sel = io_buffer_select(req, &len, issue_flags); ++ if (!sel->addr) + return -ENOBUFS; +- sr->buf = sel.addr; +- sr->len = *len; ++ sr->buf = sel->addr; ++ sr->len = len; + map_ubuf: + ret = import_ubuf(ITER_DEST, sr->buf, sr->len, + &kmsg->msg.msg_iter); +@@ -1166,11 +1168,11 @@ int io_recv(struct io_kiocb *req, unsign + { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_async_msghdr *kmsg = req->async_data; ++ struct io_br_sel sel = { }; + struct socket *sock; + unsigned flags; + int ret, min_ret = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; +- size_t len = sr->len; + bool mshot_finished; + + if (!(req->flags & REQ_F_POLLED) && +@@ -1187,7 +1189,8 @@ int io_recv(struct io_kiocb *req, unsign + + retry_multishot: + if (io_do_buffer_select(req)) { +- ret = io_recv_buf_select(req, kmsg, &len, issue_flags); ++ sel.val = sr->len; ++ ret = io_recv_buf_select(req, kmsg, &sel, issue_flags); + if (unlikely(ret < 0)) { + kmsg->msg.msg_inq = -1; + goto out_free; diff --git a/queue-6.12/io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch b/queue-6.12/io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch new file mode 100644 index 0000000000..7dced3859d --- /dev/null +++ b/queue-6.12/io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch @@ -0,0 +1,47 @@ +From e643249c87544d67259296480d7352d80b097d26 Mon Sep 17 00:00:00 2001 +From: Joanne Koong +Date: Thu, 4 Dec 2025 15:54:50 -0800 +Subject: io_uring/kbuf: use WRITE_ONCE() for userspace-shared buffer ring fields + +From: Joanne Koong + +Commit a4c694bfc2455e82b7caf6045ca893d123e0ed11 upstream. + +buf->addr and buf->len reside in memory shared with userspace. They +should be written with WRITE_ONCE() to guarantee atomic stores and +prevent tearing or other unsafe compiler optimizations. + +Signed-off-by: Joanne Koong +Cc: Caleb Sander Mateos +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -44,11 +44,11 @@ static bool io_kbuf_inc_commit(struct io + buf_len -= this_len; + /* Stop looping for invalid buffer length of 0 */ + if (buf_len || !this_len) { +- buf->addr = READ_ONCE(buf->addr) + this_len; +- buf->len = buf_len; ++ WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len); ++ WRITE_ONCE(buf->len, buf_len); + return false; + } +- buf->len = 0; ++ WRITE_ONCE(buf->len, 0); + bl->head++; + len -= this_len; + } +@@ -289,7 +289,7 @@ static int io_ring_buffers_peek(struct i + arg->partial_map = 1; + if (iov != arg->iovs) + break; +- buf->len = len; ++ WRITE_ONCE(buf->len, len); + } + } + diff --git a/queue-6.12/io_uring-net-clarify-io_recv_buf_select-return-value.patch b/queue-6.12/io_uring-net-clarify-io_recv_buf_select-return-value.patch new file mode 100644 index 0000000000..5b1ea6c678 --- /dev/null +++ b/queue-6.12/io_uring-net-clarify-io_recv_buf_select-return-value.patch @@ -0,0 +1,28 @@ +From 3b1c89768957bd90b8427d9c51648aaa87d8d2ec Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:32 -0600 +Subject: io_uring/net: clarify io_recv_buf_select() return value + +From: Jens Axboe + +Commit b22743f29b7d3dc68c68f9bd39a1b2600ec6434e upstream. + +It returns 0 on success, less than zero on error. + +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -1192,7 +1192,7 @@ int io_recv(struct io_kiocb *req, unsign + retry_multishot: + if (io_do_buffer_select(req)) { + ret = io_recv_buf_select(req, kmsg, &len, issue_flags); +- if (unlikely(ret)) { ++ if (unlikely(ret < 0)) { + kmsg->msg.msg_inq = -1; + goto out_free; + } diff --git a/queue-6.12/io_uring-net-correct-type-for-min_not_zero-cast.patch b/queue-6.12/io_uring-net-correct-type-for-min_not_zero-cast.patch new file mode 100644 index 0000000000..7e8c24942e --- /dev/null +++ b/queue-6.12/io_uring-net-correct-type-for-min_not_zero-cast.patch @@ -0,0 +1,33 @@ +From 73284d7ab3a12d22b5a8bb32a48a507b87d01c7c Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 2 Sep 2025 05:19:42 -0600 +Subject: io_uring/net: correct type for min_not_zero() cast + +From: Jens Axboe + +Commit 37500634d0a8f931e15879760fb70f9b6f5d5370 upstream. + +The kernel test robot reports that after a recent change, the signedness +of a min_not_zero() compare is now incorrect. Fix that up and cast to +the right type. + +Fixes: 429884ff35f7 ("io_uring/kbuf: use struct io_br_sel for multiple buffers picking") +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202509020426.WJtrdwOU-lkp@intel.com/ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -1122,7 +1122,7 @@ static int io_recv_buf_select(struct io_ + if (sel->val) + arg.max_len = sel->val; + else if (kmsg->msg.msg_inq > 1) +- arg.max_len = min_not_zero(sel->val, (size_t) kmsg->msg.msg_inq); ++ arg.max_len = min_not_zero(sel->val, (ssize_t) kmsg->msg.msg_inq); + + ret = io_buffers_peek(req, &arg, sel); + if (unlikely(ret < 0)) diff --git a/queue-6.12/io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch b/queue-6.12/io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch new file mode 100644 index 0000000000..09e717284e --- /dev/null +++ b/queue-6.12/io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch @@ -0,0 +1,50 @@ +From d4f37a9a013f83b00d4dcf27a5bf2e04eec43af3 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:31 -0600 +Subject: io_uring/net: don't use io_net_kbuf_recyle() for non-provided cases + +From: Jens Axboe + +Commit 15ba5e51e689ceb1c2e921c5180a70c88cfdc8e9 upstream. + +A previous commit used io_net_kbuf_recyle() for any network helper that +did IO and needed partial retry. However, that's only needed if the +opcode does buffer selection, which isnt support for sendzc, sendmsg_zc, +or sendmsg. Just remove them - they don't do any harm, but it is a bit +confusing when reading the code. + +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -578,7 +578,7 @@ int io_sendmsg(struct io_kiocb *req, uns + kmsg->msg.msg_controllen = 0; + kmsg->msg.msg_control = NULL; + sr->done_io += ret; +- return io_net_kbuf_recyle(req, kmsg, ret); ++ return -EAGAIN; + } + if (ret == -ERESTARTSYS) + ret = -EINTR; +@@ -1448,7 +1448,7 @@ int io_send_zc(struct io_kiocb *req, uns + zc->len -= ret; + zc->buf += ret; + zc->done_io += ret; +- return io_net_kbuf_recyle(req, kmsg, ret); ++ return -EAGAIN; + } + if (ret == -ERESTARTSYS) + ret = -EINTR; +@@ -1508,7 +1508,7 @@ int io_sendmsg_zc(struct io_kiocb *req, + + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; +- return io_net_kbuf_recyle(req, kmsg, ret); ++ return -EAGAIN; + } + if (ret == -ERESTARTSYS) + ret = -EINTR; diff --git a/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch b/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch new file mode 100644 index 0000000000..1619ebf345 --- /dev/null +++ b/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch @@ -0,0 +1,117 @@ +From 31717e9230807656724fc50bb8f95ba51824ff2f Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:37 -0600 +Subject: io_uring/net: use struct io_br_sel->val as the recv finish value + +From: Jens Axboe + +Commit 58d815091890e83aa2f83a9cce1fdfe3af02c7b4 upstream. + +Currently a pointer is passed in to the 'ret' in the receive handlers, +but since we already have a value field in io_br_sel, just use that. +This is also in preparation for needing to pass in struct io_br_sel +to io_recv_finish() anyway. + +Link: https://lore.kernel.org/r/20250821020750.598432-10-axboe@kernel.dk +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 31 +++++++++++++++++-------------- + 1 file changed, 17 insertions(+), 14 deletions(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -857,9 +857,10 @@ int io_recvmsg_prep(struct io_kiocb *req + * Returns true if it is actually finished, or false if it should run + * again (for multishot). + */ +-static inline bool io_recv_finish(struct io_kiocb *req, int *ret, ++static inline bool io_recv_finish(struct io_kiocb *req, + struct io_async_msghdr *kmsg, +- bool mshot_finished, unsigned issue_flags) ++ struct io_br_sel *sel, bool mshot_finished, ++ unsigned issue_flags) + { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + unsigned int cflags = 0; +@@ -868,7 +869,7 @@ static inline bool io_recv_finish(struct + cflags |= IORING_CQE_F_SOCK_NONEMPTY; + + if (sr->flags & IORING_RECVSEND_BUNDLE) { +- size_t this_ret = *ret - sr->done_io; ++ size_t this_ret = sel->val - sr->done_io; + + cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret)); + if (sr->retry_flags & IO_SR_MSG_RETRY) +@@ -889,7 +890,7 @@ static inline bool io_recv_finish(struct + return false; + } + } else { +- cflags |= io_put_kbuf(req, *ret, req->buf_list); ++ cflags |= io_put_kbuf(req, sel->val, req->buf_list); + } + + /* +@@ -897,7 +898,7 @@ static inline bool io_recv_finish(struct + * receive from this socket. + */ + if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished && +- io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { ++ io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { + int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; + + io_mshot_prep_retry(req, kmsg); +@@ -910,20 +911,20 @@ static inline bool io_recv_finish(struct + mshot_retry_ret = IOU_REQUEUE; + } + if (issue_flags & IO_URING_F_MULTISHOT) +- *ret = mshot_retry_ret; ++ sel->val = mshot_retry_ret; + else +- *ret = -EAGAIN; ++ sel->val = -EAGAIN; + return true; + } + + /* Finish the request / stop multishot. */ + finish: +- io_req_set_res(req, *ret, cflags); ++ io_req_set_res(req, sel->val, cflags); + + if (issue_flags & IO_URING_F_MULTISHOT) +- *ret = IOU_STOP_MULTISHOT; ++ sel->val = IOU_STOP_MULTISHOT; + else +- *ret = IOU_OK; ++ sel->val = IOU_OK; + io_req_msg_cleanup(req, issue_flags); + return true; + } +@@ -1090,10 +1091,11 @@ retry_multishot: + else + io_kbuf_recycle(req, req->buf_list, issue_flags); + +- if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) ++ sel.val = ret; ++ if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) + goto retry_multishot; + +- return ret; ++ return sel.val; + } + + static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg, +@@ -1236,10 +1238,11 @@ out_free: + else + io_kbuf_recycle(req, req->buf_list, issue_flags); + +- if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags)) ++ sel.val = ret; ++ if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags)) + goto retry_multishot; + +- return ret; ++ return sel.val; + } + + void io_send_zc_cleanup(struct io_kiocb *req) diff --git a/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch b/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch new file mode 100644 index 0000000000..4cbefb7d21 --- /dev/null +++ b/queue-6.12/io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch @@ -0,0 +1,83 @@ +From 1aebf001d6fcb771b6318bca438fd205993d2bc1 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:38 -0600 +Subject: io_uring/net: use struct io_br_sel->val as the send finish value + +From: Jens Axboe + +Commit 461382a51fb83a9c4b7c50e1f10d3ca94edff25e upstream. + +Currently a pointer is passed in to the 'ret' in the send mshot handler, +but since we already have a value field in io_br_sel, just use that. +This is also in preparation for needing to pass in struct io_br_sel +to io_send_finish() anyway. + +Link: https://lore.kernel.org/r/20250821020750.598432-11-axboe@kernel.dk +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -507,19 +507,20 @@ static int io_net_kbuf_recyle(struct io_ + return -EAGAIN; + } + +-static inline bool io_send_finish(struct io_kiocb *req, int *ret, +- struct io_async_msghdr *kmsg) ++static inline bool io_send_finish(struct io_kiocb *req, ++ struct io_async_msghdr *kmsg, ++ struct io_br_sel *sel) + { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); +- bool bundle_finished = *ret <= 0; ++ bool bundle_finished = sel->val <= 0; + unsigned int cflags; + + if (!(sr->flags & IORING_RECVSEND_BUNDLE)) { +- cflags = io_put_kbuf(req, *ret, req->buf_list); ++ cflags = io_put_kbuf(req, sel->val, req->buf_list); + goto finish; + } + +- cflags = io_put_kbufs(req, *ret, req->buf_list, io_bundle_nbufs(kmsg, *ret)); ++ cflags = io_put_kbufs(req, sel->val, req->buf_list, io_bundle_nbufs(kmsg, sel->val)); + + /* + * Don't start new bundles if the buffer list is empty, or if the +@@ -532,15 +533,15 @@ static inline bool io_send_finish(struct + * Fill CQE for this receive and see if we should keep trying to + * receive from this socket. + */ +- if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) { ++ if (io_req_post_cqe(req, sel->val, cflags | IORING_CQE_F_MORE)) { + io_mshot_prep_retry(req, kmsg); + return false; + } + + /* Otherwise stop bundle and use the current result. */ + finish: +- io_req_set_res(req, *ret, cflags); +- *ret = IOU_OK; ++ io_req_set_res(req, sel->val, cflags); ++ sel->val = IOU_OK; + return true; + } + +@@ -687,11 +688,12 @@ retry_bundle: + else if (sr->done_io) + ret = sr->done_io; + +- if (!io_send_finish(req, &ret, kmsg)) ++ sel.val = ret; ++ if (!io_send_finish(req, kmsg, &sel)) + goto retry_bundle; + + io_req_msg_cleanup(req, issue_flags); +- return ret; ++ return sel.val; + } + + static int io_recvmsg_mshot_prep(struct io_kiocb *req, diff --git a/queue-6.12/io_uring-remove-async-poll-related-provided-buffer-recycles.patch b/queue-6.12/io_uring-remove-async-poll-related-provided-buffer-recycles.patch new file mode 100644 index 0000000000..11e0cec6c0 --- /dev/null +++ b/queue-6.12/io_uring-remove-async-poll-related-provided-buffer-recycles.patch @@ -0,0 +1,55 @@ +From d863f651b523f217ccdf7887364e993610f2b880 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 20 Aug 2025 20:03:40 -0600 +Subject: io_uring: remove async/poll related provided buffer recycles + +From: Jens Axboe + +Commit e973837b54024f070b2b48c7ee9725548548257a upstream. + +These aren't necessary anymore, get rid of them. + +Link: https://lore.kernel.org/r/20250821020750.598432-13-axboe@kernel.dk +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 -- + io_uring/poll.c | 4 ---- + 2 files changed, 6 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -1921,11 +1921,9 @@ static void io_queue_async(struct io_kio + + switch (io_arm_poll_handler(req, 0)) { + case IO_APOLL_READY: +- io_kbuf_recycle(req, NULL, 0); + io_req_task_queue(req); + break; + case IO_APOLL_ABORTED: +- io_kbuf_recycle(req, NULL, 0); + io_queue_iowq(req); + break; + case IO_APOLL_OK: +--- a/io_uring/poll.c ++++ b/io_uring/poll.c +@@ -356,10 +356,8 @@ void io_poll_task_func(struct io_kiocb * + + ret = io_poll_check_events(req, ts); + if (ret == IOU_POLL_NO_ACTION) { +- io_kbuf_recycle(req, NULL, 0); + return; + } else if (ret == IOU_POLL_REQUEUE) { +- io_kbuf_recycle(req, NULL, 0); + __io_poll_execute(req, 0); + return; + } +@@ -753,8 +751,6 @@ int io_arm_poll_handler(struct io_kiocb + req->flags |= REQ_F_POLLED; + ipt.pt._qproc = io_async_queue_proc; + +- io_kbuf_recycle(req, NULL, issue_flags); +- + ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); + if (ret) + return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED; diff --git a/queue-6.12/io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch b/queue-6.12/io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch new file mode 100644 index 0000000000..e868993ff9 --- /dev/null +++ b/queue-6.12/io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch @@ -0,0 +1,52 @@ +From fcc91899be4ad1e71f729a11d3939497bb0612e4 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 15 Oct 2025 13:38:53 -0600 +Subject: io_uring/rw: check for NULL io_br_sel when putting a buffer + +From: Jens Axboe + +Commit 18d6b1743eafeb3fb1e0ea5a2b7fd0a773d525a8 upstream. + +Both the read and write side use kiocb_done() to finish a request, and +kiocb_done() will call io_put_kbuf() in case a provided buffer was used +for the request. Provided buffers are not supported for writes, hence +NULL is being passed in. This normally works fine, as io_put_kbuf() +won't actually use the value unless REQ_F_BUFFER_RING or +REQ_F_BUFFER_SELECTED is set in the request flags. But depending on +compiler (or whether or not CONFIG_CC_OPTIMIZE_FOR_SIZE is set), that +may be done even though the value is never used. This will then cause a +NULL pointer dereference. + +Make it a bit more obvious and check for a NULL io_br_sel, and don't +even bother calling io_put_kbuf() for that case. + +Fixes: 5fda51255439 ("io_uring/kbuf: switch to storing struct io_buffer_list locally") +Reported-by: David Howells +Tested-by: David Howells +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/rw.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -598,13 +598,16 @@ static int kiocb_done(struct io_kiocb *r + req->file->f_pos = rw->kiocb.ki_pos; + if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) { + if (!__io_complete_rw_common(req, ret)) { ++ u32 cflags = 0; ++ + /* + * Safe to call io_end from here as we're inline + * from the submission path. + */ + io_req_io_end(req); +- io_req_set_res(req, final_ret, +- io_put_kbuf(req, ret, sel->buf_list)); ++ if (sel) ++ cflags = io_put_kbuf(req, ret, sel->buf_list); ++ io_req_set_res(req, final_ret, cflags); + io_req_rw_cleanup(req, issue_flags); + return IOU_OK; + } diff --git a/queue-6.12/series b/queue-6.12/series new file mode 100644 index 0000000000..3bf76134e8 --- /dev/null +++ b/queue-6.12/series @@ -0,0 +1,25 @@ +io_uring-kbuf-remove-legacy-kbuf-bulk-allocation.patch +io_uring-kbuf-remove-legacy-kbuf-kmem-cache.patch +io_uring-kbuf-simplify-__io_put_kbuf.patch +io_uring-kbuf-remove-legacy-kbuf-caching.patch +io_uring-kbuf-open-code-__io_put_kbuf.patch +io_uring-kbuf-introduce-io_kbuf_drop_legacy.patch +io_uring-kbuf-uninline-__io_put_kbufs.patch +io_uring-kbuf-drop-issue_flags-from-io_put_kbuf-s-arguments.patch +io_uring-net-don-t-use-io_net_kbuf_recyle-for-non-provided-cases.patch +io_uring-net-clarify-io_recv_buf_select-return-value.patch +io_uring-kbuf-pass-in-struct-io_buffer_list-to-commit-recycle-helpers.patch +io_uring-kbuf-introduce-struct-io_br_sel.patch +io_uring-kbuf-use-struct-io_br_sel-for-multiple-buffers-picking.patch +io_uring-net-use-struct-io_br_sel-val-as-the-recv-finish-value.patch +io_uring-net-use-struct-io_br_sel-val-as-the-send-finish-value.patch +io_uring-kbuf-switch-to-storing-struct-io_buffer_list-locally.patch +io_uring-remove-async-poll-related-provided-buffer-recycles.patch +io_uring-net-correct-type-for-min_not_zero-cast.patch +io_uring-rw-check-for-null-io_br_sel-when-putting-a-buffer.patch +io_uring-kbuf-enable-bundles-for-incrementally-consumed-buffers.patch +io_uring-kbuf-always-use-read_once-to-read-ring-provided-buffer-lengths.patch +io_uring-kbuf-use-read_once-for-userspace-mapped-memory.patch +io_uring-kbuf-use-write_once-for-userspace-shared-buffer-ring-fields.patch +io_uring-kbuf-fix-missing-buf_more-for-incremental-buffers-at-eof.patch +io_uring-kbuf-propagate-buf_more-through-early-buffer-commit-path.patch