From: Greg Kroah-Hartman Date: Tue, 12 May 2026 14:37:43 +0000 (+0200) Subject: 6.18-stable patches X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c01653694c0557f62e5fd23b52b4975eb352ffc5;p=thirdparty%2Fkernel%2Fstable-queue.git 6.18-stable patches added patches: io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch --- diff --git a/queue-6.18/io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch b/queue-6.18/io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch new file mode 100644 index 0000000000..722c5c5ecb --- /dev/null +++ b/queue-6.18/io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch @@ -0,0 +1,100 @@ +From ae165fed72f9686aeb04766a15459807114a5023 Mon Sep 17 00:00:00 2001 +From: Martin Michaelis +Date: Thu, 23 Apr 2026 15:54:11 -0600 +Subject: io_uring/kbuf: support min length left for incremental buffers + +From: Martin Michaelis + +Commit 7deba791ad495ce1d7921683f4f7d1190fa210d1 upstream. + +Incrementally consumed buffer rings are generally fully consumed, but +it's quite possible that the application has a minimum size it needs to +meet to avoid truncation. Currently that minimum limit is 1 byte, but +this should be a setting that is the hands of the application. For +recvmsg multishot, a prime use case for incrementally consumed buffers, +the application may get spurious -EFAULT returned at the end of an +incrementally consumed buffer, as less space is available than the +headers need. + +Grab a u32 field in struct io_uring_buf_reg, which the application can +use to inform the kernel of the minimum size that should be available +in an incrementally consumed buffer. If less than that is available, +the current buffer is fully processed and the next one will be picked. + +Cc: stable@vger.kernel.org +Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") +Link: https://github.com/axboe/liburing/issues/1433 +Signed-off-by: Martin Michaelis +[axboe: write commit message, change io_buffer_list member name] +Reviewed-by: Gabriel Krisman Bertazi +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/io_uring.h | 3 ++- + io_uring/kbuf.c | 12 +++++++++--- + io_uring/kbuf.h | 7 +++++++ + 3 files changed, 18 insertions(+), 4 deletions(-) + +--- a/include/uapi/linux/io_uring.h ++++ b/include/uapi/linux/io_uring.h +@@ -864,7 +864,8 @@ struct io_uring_buf_reg { + __u32 ring_entries; + __u16 bgid; + __u16 flags; +- __u64 resv[3]; ++ __u32 min_left; ++ __u32 resv[5]; + }; + + /* argument for IORING_REGISTER_PBUF_STATUS */ +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -47,9 +47,9 @@ static bool io_kbuf_inc_commit(struct io + this_len = min_t(u32, len, buf_len); + buf_len -= this_len; + /* Stop looping for invalid buffer length of 0 */ +- if (buf_len || !this_len) { +- buf->addr = READ_ONCE(buf->addr) + this_len; +- buf->len = buf_len; ++ if (buf_len > bl->min_left_sub_one || !this_len) { ++ WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len); ++ WRITE_ONCE(buf->len, buf_len); + return false; + } + buf->len = 0; +@@ -637,6 +637,10 @@ int io_register_pbuf_ring(struct io_ring + if (reg.ring_entries >= 65536) + return -EINVAL; + ++ /* minimum left byte count is a property of incremental buffers */ ++ if (!(reg.flags & IOU_PBUF_RING_INC) && reg.min_left) ++ return -EINVAL; ++ + bl = io_buffer_get_list(ctx, reg.bgid); + if (bl) { + /* if mapped buffer ring OR classic exists, don't allow */ +@@ -684,6 +688,8 @@ int io_register_pbuf_ring(struct io_ring + bl->mask = reg.ring_entries - 1; + bl->flags |= IOBL_BUF_RING; + bl->buf_ring = br; ++ if (reg.min_left) ++ bl->min_left_sub_one = reg.min_left - 1; + if (reg.flags & IOU_PBUF_RING_INC) + bl->flags |= IOBL_INC; + ret = io_buffer_add_list(ctx, bl, reg.bgid); +--- a/io_uring/kbuf.h ++++ b/io_uring/kbuf.h +@@ -34,6 +34,13 @@ struct io_buffer_list { + + __u16 flags; + ++ /* ++ * minimum required amount to be left to reuse an incrementally ++ * consumed buffer. If less than this is left at consumption time, ++ * buffer is done and head is incremented to the next buffer. ++ */ ++ __u32 min_left_sub_one; ++ + struct io_mapped_region region; + }; + diff --git a/queue-6.18/io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch b/queue-6.18/io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch new file mode 100644 index 0000000000..641e2011b1 --- /dev/null +++ b/queue-6.18/io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch @@ -0,0 +1,54 @@ +From 8d9d53f745bd6ada526ea1d5c3fabf4fd8d756eb Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 27 Apr 2026 19:16:39 -0600 +Subject: io_uring/tw: serialize ctx->retry_llist with ->uring_lock + +From: Jens Axboe + +Commit 17666e2d7592c3e85260cafd3950121524acc2c5 upstream. + +The DEFER_TASKRUN local task work paths all run under ctx->uring_lock, +which serializes them with each other and with the rest of the ring's +hot paths. io_move_task_work_from_local() is the exception - it's called +from io_ring_exit_work() on a kworker without holding the lock and from +the iopoll cancelation side right after dropping it. + +->work_llist is fine with this, as it's only ever updated via the +expected paths. But the ->retry_llist is updated while runing, and hence +it could potentially race between normal task_work running and the +task-has-exited shutdown path. + +Simply grab ->uring_lock while moving the local work to the fallback +list for exit purposes, which nicely serializes it across both the +normal additions and the exit prune path. + +Cc: stable@vger.kernel.org +Fixes: f46b9cdb22f7 ("io_uring: limit local tw done") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -1370,8 +1370,18 @@ void io_req_task_work_add_remote(struct + + static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx) + { +- struct llist_node *node = llist_del_all(&ctx->work_llist); ++ struct llist_node *node; + ++ /* ++ * Running the work items may utilize ->retry_llist as a means ++ * for capping the number of task_work entries run at the same ++ * time. But that list can potentially race with moving the work ++ * from here, if the task is exiting. As any normal task_work ++ * running holds ->uring_lock already, just guard this slow path ++ * with ->uring_lock to avoid racing on ->retry_llist. ++ */ ++ guard(mutex)(&ctx->uring_lock); ++ node = llist_del_all(&ctx->work_llist); + __io_fallback_tw(node, false); + node = llist_del_all(&ctx->retry_llist); + __io_fallback_tw(node, false); diff --git a/queue-6.18/series b/queue-6.18/series index 2fd6fd3856..a347ef1904 100644 --- a/queue-6.18/series +++ b/queue-6.18/series @@ -243,3 +243,5 @@ loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patc loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch +io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch +io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch