--- /dev/null
+From ae165fed72f9686aeb04766a15459807114a5023 Mon Sep 17 00:00:00 2001
+From: Martin Michaelis <code@mgjm.de>
+Date: Thu, 23 Apr 2026 15:54:11 -0600
+Subject: io_uring/kbuf: support min length left for incremental buffers
+
+From: Martin Michaelis <code@mgjm.de>
+
+Commit 7deba791ad495ce1d7921683f4f7d1190fa210d1 upstream.
+
+Incrementally consumed buffer rings are generally fully consumed, but
+it's quite possible that the application has a minimum size it needs to
+meet to avoid truncation. Currently that minimum limit is 1 byte, but
+this should be a setting that is the hands of the application. For
+recvmsg multishot, a prime use case for incrementally consumed buffers,
+the application may get spurious -EFAULT returned at the end of an
+incrementally consumed buffer, as less space is available than the
+headers need.
+
+Grab a u32 field in struct io_uring_buf_reg, which the application can
+use to inform the kernel of the minimum size that should be available
+in an incrementally consumed buffer. If less than that is available,
+the current buffer is fully processed and the next one will be picked.
+
+Cc: stable@vger.kernel.org
+Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption")
+Link: https://github.com/axboe/liburing/issues/1433
+Signed-off-by: Martin Michaelis <code@mgjm.de>
+[axboe: write commit message, change io_buffer_list member name]
+Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/io_uring.h | 3 ++-
+ io_uring/kbuf.c | 12 +++++++++---
+ io_uring/kbuf.h | 7 +++++++
+ 3 files changed, 18 insertions(+), 4 deletions(-)
+
+--- a/include/uapi/linux/io_uring.h
++++ b/include/uapi/linux/io_uring.h
+@@ -864,7 +864,8 @@ struct io_uring_buf_reg {
+ __u32 ring_entries;
+ __u16 bgid;
+ __u16 flags;
+- __u64 resv[3];
++ __u32 min_left;
++ __u32 resv[5];
+ };
+
+ /* argument for IORING_REGISTER_PBUF_STATUS */
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -47,9 +47,9 @@ static bool io_kbuf_inc_commit(struct io
+ this_len = min_t(u32, len, buf_len);
+ buf_len -= this_len;
+ /* Stop looping for invalid buffer length of 0 */
+- if (buf_len || !this_len) {
+- buf->addr = READ_ONCE(buf->addr) + this_len;
+- buf->len = buf_len;
++ if (buf_len > bl->min_left_sub_one || !this_len) {
++ WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len);
++ WRITE_ONCE(buf->len, buf_len);
+ return false;
+ }
+ buf->len = 0;
+@@ -637,6 +637,10 @@ int io_register_pbuf_ring(struct io_ring
+ if (reg.ring_entries >= 65536)
+ return -EINVAL;
+
++ /* minimum left byte count is a property of incremental buffers */
++ if (!(reg.flags & IOU_PBUF_RING_INC) && reg.min_left)
++ return -EINVAL;
++
+ bl = io_buffer_get_list(ctx, reg.bgid);
+ if (bl) {
+ /* if mapped buffer ring OR classic exists, don't allow */
+@@ -684,6 +688,8 @@ int io_register_pbuf_ring(struct io_ring
+ bl->mask = reg.ring_entries - 1;
+ bl->flags |= IOBL_BUF_RING;
+ bl->buf_ring = br;
++ if (reg.min_left)
++ bl->min_left_sub_one = reg.min_left - 1;
+ if (reg.flags & IOU_PBUF_RING_INC)
+ bl->flags |= IOBL_INC;
+ ret = io_buffer_add_list(ctx, bl, reg.bgid);
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -34,6 +34,13 @@ struct io_buffer_list {
+
+ __u16 flags;
+
++ /*
++ * minimum required amount to be left to reuse an incrementally
++ * consumed buffer. If less than this is left at consumption time,
++ * buffer is done and head is incremented to the next buffer.
++ */
++ __u32 min_left_sub_one;
++
+ struct io_mapped_region region;
+ };
+
--- /dev/null
+From 8d9d53f745bd6ada526ea1d5c3fabf4fd8d756eb Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 27 Apr 2026 19:16:39 -0600
+Subject: io_uring/tw: serialize ctx->retry_llist with ->uring_lock
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 17666e2d7592c3e85260cafd3950121524acc2c5 upstream.
+
+The DEFER_TASKRUN local task work paths all run under ctx->uring_lock,
+which serializes them with each other and with the rest of the ring's
+hot paths. io_move_task_work_from_local() is the exception - it's called
+from io_ring_exit_work() on a kworker without holding the lock and from
+the iopoll cancelation side right after dropping it.
+
+->work_llist is fine with this, as it's only ever updated via the
+expected paths. But the ->retry_llist is updated while runing, and hence
+it could potentially race between normal task_work running and the
+task-has-exited shutdown path.
+
+Simply grab ->uring_lock while moving the local work to the fallback
+list for exit purposes, which nicely serializes it across both the
+normal additions and the exit prune path.
+
+Cc: stable@vger.kernel.org
+Fixes: f46b9cdb22f7 ("io_uring: limit local tw done")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -1370,8 +1370,18 @@ void io_req_task_work_add_remote(struct
+
+ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
+ {
+- struct llist_node *node = llist_del_all(&ctx->work_llist);
++ struct llist_node *node;
+
++ /*
++ * Running the work items may utilize ->retry_llist as a means
++ * for capping the number of task_work entries run at the same
++ * time. But that list can potentially race with moving the work
++ * from here, if the task is exiting. As any normal task_work
++ * running holds ->uring_lock already, just guard this slow path
++ * with ->uring_lock to avoid racing on ->retry_llist.
++ */
++ guard(mutex)(&ctx->uring_lock);
++ node = llist_del_all(&ctx->work_llist);
+ __io_fallback_tw(node, false);
+ node = llist_del_all(&ctx->retry_llist);
+ __io_fallback_tw(node, false);