From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 12 May 2026 14:37:43 +0000 (+0200)
Subject: 6.18-stable patches
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c01653694c0557f62e5fd23b52b4975eb352ffc5;p=thirdparty%2Fkernel%2Fstable-queue.git

6.18-stable patches

added patches:
	io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch
	io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch
---

diff --git a/queue-6.18/io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch b/queue-6.18/io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch
new file mode 100644
index 0000000000..722c5c5ecb
--- /dev/null
+++ b/queue-6.18/io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch
@@ -0,0 +1,100 @@
+From ae165fed72f9686aeb04766a15459807114a5023 Mon Sep 17 00:00:00 2001
+From: Martin Michaelis <code@mgjm.de>
+Date: Thu, 23 Apr 2026 15:54:11 -0600
+Subject: io_uring/kbuf: support min length left for incremental buffers
+
+From: Martin Michaelis <code@mgjm.de>
+
+Commit 7deba791ad495ce1d7921683f4f7d1190fa210d1 upstream.
+
+Incrementally consumed buffer rings are generally fully consumed, but
+it's quite possible that the application has a minimum size it needs to
+meet to avoid truncation. Currently that minimum limit is 1 byte, but
+this should be a setting that is the hands of the application. For
+recvmsg multishot, a prime use case for incrementally consumed buffers,
+the application may get spurious -EFAULT returned at the end of an
+incrementally consumed buffer, as less space is available than the
+headers need.
+
+Grab a u32 field in struct io_uring_buf_reg, which the application can
+use to inform the kernel of the minimum size that should be available
+in an incrementally consumed buffer. If less than that is available,
+the current buffer is fully processed and the next one will be picked.
+
+Cc: stable@vger.kernel.org
+Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
+Link: https://github.com/axboe/liburing/issues/1433
+Signed-off-by: Martin Michaelis <code@mgjm.de>
+[axboe: write commit message, change io_buffer_list member name]
+Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/io_uring.h |    3 ++-
+ io_uring/kbuf.c               |   12 +++++++++---
+ io_uring/kbuf.h               |    7 +++++++
+ 3 files changed, 18 insertions(+), 4 deletions(-)
+
+--- a/include/uapi/linux/io_uring.h
++++ b/include/uapi/linux/io_uring.h
+@@ -864,7 +864,8 @@ struct io_uring_buf_reg {
+ 	__u32	ring_entries;
+ 	__u16	bgid;
+ 	__u16	flags;
+-	__u64	resv[3];
++	__u32	min_left;
++	__u32	resv[5];
+ };
+ 
+ /* argument for IORING_REGISTER_PBUF_STATUS */
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -47,9 +47,9 @@ static bool io_kbuf_inc_commit(struct io
+ 		this_len = min_t(u32, len, buf_len);
+ 		buf_len -= this_len;
+ 		/* Stop looping for invalid buffer length of 0 */
+-		if (buf_len || !this_len) {
+-			buf->addr = READ_ONCE(buf->addr) + this_len;
+-			buf->len = buf_len;
++		if (buf_len > bl->min_left_sub_one || !this_len) {
++			WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len);
++			WRITE_ONCE(buf->len, buf_len);
+ 			return false;
+ 		}
+ 		buf->len = 0;
+@@ -637,6 +637,10 @@ int io_register_pbuf_ring(struct io_ring
+ 	if (reg.ring_entries >= 65536)
+ 		return -EINVAL;
+ 
++	/* minimum left byte count is a property of incremental buffers */
++	if (!(reg.flags & IOU_PBUF_RING_INC) && reg.min_left)
++		return -EINVAL;
++
+ 	bl = io_buffer_get_list(ctx, reg.bgid);
+ 	if (bl) {
+ 		/* if mapped buffer ring OR classic exists, don't allow */
+@@ -684,6 +688,8 @@ int io_register_pbuf_ring(struct io_ring
+ 	bl->mask = reg.ring_entries - 1;
+ 	bl->flags |= IOBL_BUF_RING;
+ 	bl->buf_ring = br;
++	if (reg.min_left)
++		bl->min_left_sub_one = reg.min_left - 1;
+ 	if (reg.flags & IOU_PBUF_RING_INC)
+ 		bl->flags |= IOBL_INC;
+ 	ret = io_buffer_add_list(ctx, bl, reg.bgid);
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -34,6 +34,13 @@ struct io_buffer_list {
+ 
+ 	__u16 flags;
+ 
++	/*
++	 * minimum required amount to be left to reuse an incrementally
++	 * consumed buffer. If less than this is left at consumption time,
++	 * buffer is done and head is incremented to the next buffer.
++	 */
++	__u32 min_left_sub_one;
++
+ 	struct io_mapped_region region;
+ };
+ 
diff --git a/queue-6.18/io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch b/queue-6.18/io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch
new file mode 100644
index 0000000000..641e2011b1
--- /dev/null
+++ b/queue-6.18/io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch
@@ -0,0 +1,54 @@
+From 8d9d53f745bd6ada526ea1d5c3fabf4fd8d756eb Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 27 Apr 2026 19:16:39 -0600
+Subject: io_uring/tw: serialize ctx->retry_llist with ->uring_lock
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 17666e2d7592c3e85260cafd3950121524acc2c5 upstream.
+
+The DEFER_TASKRUN local task work paths all run under ctx->uring_lock,
+which serializes them with each other and with the rest of the ring's
+hot paths. io_move_task_work_from_local() is the exception - it's called
+from io_ring_exit_work() on a kworker without holding the lock and from
+the iopoll cancelation side right after dropping it.
+
+->work_llist is fine with this, as it's only ever updated via the
+expected paths. But the ->retry_llist is updated while runing, and hence
+it could potentially race between normal task_work running and the
+task-has-exited shutdown path.
+
+Simply grab ->uring_lock while moving the local work to the fallback
+list for exit purposes, which nicely serializes it across both the
+normal additions and the exit prune path.
+
+Cc: stable@vger.kernel.org
+Fixes: f46b9cdb22f7 ("io_uring: limit local tw done")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -1370,8 +1370,18 @@ void io_req_task_work_add_remote(struct
+ 
+ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
+ {
+-	struct llist_node *node = llist_del_all(&ctx->work_llist);
++	struct llist_node *node;
+ 
++	/*
++	 * Running the work items may utilize ->retry_llist as a means
++	 * for capping the number of task_work entries run at the same
++	 * time. But that list can potentially race with moving the work
++	 * from here, if the task is exiting. As any normal task_work
++	 * running holds ->uring_lock already, just guard this slow path
++	 * with ->uring_lock to avoid racing on ->retry_llist.
++	 */
++	guard(mutex)(&ctx->uring_lock);
++	node = llist_del_all(&ctx->work_llist);
+ 	__io_fallback_tw(node, false);
+ 	node = llist_del_all(&ctx->retry_llist);
+ 	__io_fallback_tw(node, false);
diff --git a/queue-6.18/series b/queue-6.18/series
index 2fd6fd3856..a347ef1904 100644
--- a/queue-6.18/series
+++ b/queue-6.18/series
@@ -243,3 +243,5 @@ loongarch-kvm-fix-hw-timer-interrupt-lost-when-inject-interrupt-by-software.patc
 loongarch-kvm-move-unconditional-delay-into-timer-clear-scenery.patch
 loongarch-kvm-use-kvm_set_pte-in-kvm_flush_pte.patch
 loongarch-use-per-root-bridge-pcih-flag-to-skip-mem-resource-fixup.patch
+io_uring-kbuf-support-min-length-left-for-incremental-buffers.patch
+io_uring-tw-serialize-ctx-retry_llist-with-uring_lock.patch