6.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 2 Jul 2025 10:24:00 +0000 (12:24 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 2 Jul 2025 10:24:00 +0000 (12:24 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 2 Jul 2025 10:24:00 +0000 (12:24 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 2 Jul 2025 10:24:00 +0000 (12:24 +0200)
diff --git a/queue-6.12/io_uring-fix-potential-page-leak-in-io_sqe_buffer_register.patch b/queue-6.12/io_uring-fix-potential-page-leak-in-io_sqe_buffer_register.patch

new file mode 100644 (file)

index 0000000..532c694
--- /dev/null
+++ b/queue-6.12/io_uring-fix-potential-page-leak-in-io_sqe_buffer_register.patch
@@ -0,0 +1,52 @@
+From bb71440639de0757a801ca818d5046c5ce08ced5 Mon Sep 17 00:00:00 2001
+From: Penglei Jiang <superman.xpt@gmail.com>
+Date: Tue, 17 Jun 2025 09:56:44 -0700
+Subject: io_uring: fix potential page leak in io_sqe_buffer_register()
+
+From: Penglei Jiang <superman.xpt@gmail.com>
+
+Commit e1c75831f682eef0f68b35723437146ed86070b1 upstream.
+
+If allocation of the 'imu' fails, then the existing pages aren't
+unpinned in the error path. This is mostly a theoretical issue,
+requiring fault injection to hit.
+
+Move unpin_user_pages() to unified error handling to fix the page leak
+issue.
+
+Fixes: d8c2237d0aa9 ("io_uring: add io_pin_pages() helper")
+Signed-off-by: Penglei Jiang <superman.xpt@gmail.com>
+Link: https://lore.kernel.org/r/20250617165644.79165-1-superman.xpt@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/rsrc.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/io_uring/rsrc.c
++++ b/io_uring/rsrc.c
+@@ -983,10 +983,8 @@ static int io_sqe_buffer_register(struct
+               goto done;
+ 
+       ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage);
+-      if (ret) {
+-              unpin_user_pages(pages, nr_pages);
++      if (ret)
+               goto done;
+-      }
+ 
+       size = iov->iov_len;
+       /* store original address for later verification */
+@@ -1010,8 +1008,11 @@ static int io_sqe_buffer_register(struct
+               size -= vec_len;
+       }
+ done:
+-      if (ret)
++      if (ret) {
+               kvfree(imu);
++              if (pages)
++                      unpin_user_pages(pages, nr_pages);
++      }
+       kvfree(pages);
+       return ret;
+ }
diff --git a/queue-6.12/io_uring-kbuf-flag-partial-buffer-mappings.patch b/queue-6.12/io_uring-kbuf-flag-partial-buffer-mappings.patch

new file mode 100644 (file)

index 0000000..e018ad1
--- /dev/null
+++ b/queue-6.12/io_uring-kbuf-flag-partial-buffer-mappings.patch
@@ -0,0 +1,142 @@
+From 29305dedb17704599efaa5c3ee6b1bee7dc80fd8 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Thu, 26 Jun 2025 12:17:48 -0600
+Subject: io_uring/kbuf: flag partial buffer mappings
+
+From: Jens Axboe <axboe@kernel.dk>
+
+A previous commit aborted mapping more for a non-incremental ring for
+bundle peeking, but depending on where in the process this peeking
+happened, it would not necessarily prevent a retry by the user. That can
+create gaps in the received/read data.
+
+Add struct buf_sel_arg->partial_map, which can pass this information
+back. The networking side can then map that to internal state and use it
+to gate retry as well.
+
+Since this necessitates a new flag, change io_sr_msg->retry to a
+retry_flags member, and store both the retry and partial map condition
+in there.
+
+Cc: stable@vger.kernel.org
+Fixes: 26ec15e4b0c1 ("io_uring/kbuf: don't truncate end buffer for multiple buffer peeks")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+(cherry picked from commit 178b8ff66ff827c41b4fa105e9aabb99a0b5c537)
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c |    1 +
+ io_uring/kbuf.h |    1 +
+ io_uring/net.c  |   23 +++++++++++++++--------
+ 3 files changed, 17 insertions(+), 8 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -263,6 +263,7 @@ static int io_ring_buffers_peek(struct i
+               if (len > arg->max_len) {
+                       len = arg->max_len;
+                       if (!(bl->flags & IOBL_INC)) {
++                              arg->partial_map = 1;
+                               if (iov != arg->iovs)
+                                       break;
+                               buf->len = len;
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -61,6 +61,7 @@ struct buf_sel_arg {
+       size_t max_len;
+       unsigned short nr_iovs;
+       unsigned short mode;
++      unsigned short partial_map;
+ };
+ 
+ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -76,13 +76,18 @@ struct io_sr_msg {
+       /* initialised and used only by !msg send variants */
+       u16                             addr_len;
+       u16                             buf_group;
+-      bool                            retry;
++      unsigned short                  retry_flags;
+       void __user                     *addr;
+       void __user                     *msg_control;
+       /* used only for send zerocopy */
+       struct io_kiocb                 *notif;
+ };
+ 
++enum sr_retry_flags {
++      IO_SR_MSG_RETRY         = 1,
++      IO_SR_MSG_PARTIAL_MAP   = 2,
++};
++
+ /*
+  * Number of times we'll try and do receives if there's more data. If we
+  * exceed this limit, then add us to the back of the queue and retry from
+@@ -204,7 +209,7 @@ static inline void io_mshot_prep_retry(s
+ 
+       req->flags &= ~REQ_F_BL_EMPTY;
+       sr->done_io = 0;
+-      sr->retry = false;
++      sr->retry_flags = 0;
+       sr->len = 0; /* get from the provided buffer */
+       req->buf_index = sr->buf_group;
+ }
+@@ -411,7 +416,7 @@ int io_sendmsg_prep(struct io_kiocb *req
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ 
+       sr->done_io = 0;
+-      sr->retry = false;
++      sr->retry_flags = 0;
+ 
+       if (req->opcode == IORING_OP_SEND) {
+               if (READ_ONCE(sqe->__pad3[0]))
+@@ -783,7 +788,7 @@ int io_recvmsg_prep(struct io_kiocb *req
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ 
+       sr->done_io = 0;
+-      sr->retry = false;
++      sr->retry_flags = 0;
+ 
+       if (unlikely(sqe->file_index || sqe->addr2))
+               return -EINVAL;
+@@ -856,7 +861,7 @@ static inline bool io_recv_finish(struct
+ 
+               cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
+                                     issue_flags);
+-              if (sr->retry)
++              if (sr->retry_flags & IO_SR_MSG_RETRY)
+                       cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+               /* bundle with no more immediate buffers, we're done */
+               if (req->flags & REQ_F_BL_EMPTY)
+@@ -865,12 +870,12 @@ static inline bool io_recv_finish(struct
+                * If more is available AND it was a full transfer, retry and
+                * append to this one
+                */
+-              if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
++              if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
+                   !iov_iter_count(&kmsg->msg.msg_iter)) {
+                       req->cqe.flags = cflags & ~CQE_F_MASK;
+                       sr->len = kmsg->msg.msg_inq;
+                       sr->done_io += this_ret;
+-                      sr->retry = true;
++                      sr->retry_flags |= IO_SR_MSG_RETRY;
+                       return false;
+               }
+       } else {
+@@ -1123,6 +1128,8 @@ static int io_recv_buf_select(struct io_
+                       kmsg->free_iov = arg.iovs;
+                       req->flags |= REQ_F_NEED_CLEANUP;
+               }
++              if (arg.partial_map)
++                      sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP;
+ 
+               /* special case 1 vec, can be a fast path */
+               if (ret == 1) {
+@@ -1252,7 +1259,7 @@ int io_send_zc_prep(struct io_kiocb *req
+       struct io_kiocb *notif;
+ 
+       zc->done_io = 0;
+-      zc->retry = false;
++      zc->retry_flags = 0;
+       req->flags |= REQ_F_POLL_NO_LAZY;
+ 
+       if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
diff --git a/queue-6.12/io_uring-net-always-use-current-transfer-count-for-buffer-put.patch b/queue-6.12/io_uring-net-always-use-current-transfer-count-for-buffer-put.patch

new file mode 100644 (file)

index 0000000..2bd4660
--- /dev/null
+++ b/queue-6.12/io_uring-net-always-use-current-transfer-count-for-buffer-put.patch
@@ -0,0 +1,35 @@
+From b66423f9c952d70f4c8130da3b9fc2be68db52cc Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 20 Jun 2025 07:41:21 -0600
+Subject: io_uring/net: always use current transfer count for buffer put
+
+From: Jens Axboe <axboe@kernel.dk>
+
+A previous fix corrected the retry condition for when to continue a
+current bundle, but it missed that the current (not the total) transfer
+count also applies to the buffer put. If not, then for incrementally
+consumed buffer rings repeated completions on the same request may end
+up over consuming.
+
+Reported-by: Roy Tang (ErgoniaTrading) <royonia@ergonia.io>
+Cc: stable@vger.kernel.org
+Fixes: 3a08988123c8 ("io_uring/net: only retry recv bundle for a full transfer")
+Link: https://github.com/axboe/liburing/issues/1423
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+(cherry picked from commit 51a4598ad5d9eb6be4ec9ba65bbfdf0ac302eb2e)
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -854,7 +854,7 @@ static inline bool io_recv_finish(struct
+       if (sr->flags & IORING_RECVSEND_BUNDLE) {
+               size_t this_ret = *ret - sr->done_io;
+ 
+-              cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret),
++              cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),
+                                     issue_flags);
+               if (sr->retry)
+                       cflags = req->cqe.flags | (cflags & CQE_F_MASK);
diff --git a/queue-6.12/io_uring-net-improve-recv-bundles.patch b/queue-6.12/io_uring-net-improve-recv-bundles.patch

new file mode 100644 (file)

index 0000000..71ad9f1
--- /dev/null
+++ b/queue-6.12/io_uring-net-improve-recv-bundles.patch
@@ -0,0 +1,128 @@
+From 3a5ac5f9a18ac9a80cdcee755a88b9ba8db90e3c Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sat, 8 Feb 2025 10:50:34 -0700
+Subject: io_uring/net: improve recv bundles
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 7c71a0af81ba72de9b2c501065e4e718aba9a271 upstream.
+
+Current recv bundles are only supported for multishot receives, and
+additionally they also always post at least 2 CQEs if more data is
+available than what a buffer will hold. This happens because the initial
+bundle recv will do a single buffer, and then do the rest of what is in
+the socket as a followup receive. As shown in a test program, if 1k
+buffers are available and 32k is available to receive in the socket,
+you'd get the following completions:
+
+bundle=1, mshot=0
+cqe res 1024
+cqe res 1024
+[...]
+cqe res 1024
+
+bundle=1, mshot=1
+cqe res 1024
+cqe res 31744
+
+where bundle=1 && mshot=0 will post 32 1k completions, and bundle=1 &&
+mshot=1 will post a 1k completion and then a 31k completion.
+
+To support bundle recv without multishot, it's possible to simply retry
+the recv immediately and post a single completion, rather than split it
+into two completions. With the below patch, the same test looks as
+follows:
+
+bundle=1, mshot=0
+cqe res 32768
+
+bundle=1, mshot=1
+cqe res 32768
+
+where mshot=0 works fine for bundles, and both of them post just a
+single 32k completion rather than split it into separate completions.
+Posting fewer completions is always a nice win, and not needing
+multishot for proper bundle efficiency is nice for cases that can't
+necessarily use multishot.
+
+Reported-by: Norman Maurer <norman_maurer@apple.com>
+Link: https://lore.kernel.org/r/184f9f92-a682-4205-a15d-89e18f664502@kernel.dk
+Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |   18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -76,6 +76,7 @@ struct io_sr_msg {
+       /* initialised and used only by !msg send variants */
+       u16                             addr_len;
+       u16                             buf_group;
++      bool                            retry;
+       void __user                     *addr;
+       void __user                     *msg_control;
+       /* used only for send zerocopy */
+@@ -203,6 +204,7 @@ static inline void io_mshot_prep_retry(s
+ 
+       req->flags &= ~REQ_F_BL_EMPTY;
+       sr->done_io = 0;
++      sr->retry = false;
+       sr->len = 0; /* get from the provided buffer */
+       req->buf_index = sr->buf_group;
+ }
+@@ -409,6 +411,7 @@ int io_sendmsg_prep(struct io_kiocb *req
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ 
+       sr->done_io = 0;
++      sr->retry = false;
+ 
+       if (req->opcode == IORING_OP_SEND) {
+               if (READ_ONCE(sqe->__pad3[0]))
+@@ -780,6 +783,7 @@ int io_recvmsg_prep(struct io_kiocb *req
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ 
+       sr->done_io = 0;
++      sr->retry = false;
+ 
+       if (unlikely(sqe->file_index || sqe->addr2))
+               return -EINVAL;
+@@ -828,6 +832,9 @@ int io_recvmsg_prep(struct io_kiocb *req
+       return io_recvmsg_prep_setup(req);
+ }
+ 
++/* bits to clear in old and inherit in new cflags on bundle retry */
++#define CQE_F_MASK    (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
++
+ /*
+  * Finishes io_recv and io_recvmsg.
+  *
+@@ -847,9 +854,19 @@ static inline bool io_recv_finish(struct
+       if (sr->flags & IORING_RECVSEND_BUNDLE) {
+               cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
+                                     issue_flags);
++              if (sr->retry)
++                      cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+               /* bundle with no more immediate buffers, we're done */
+               if (req->flags & REQ_F_BL_EMPTY)
+                       goto finish;
++              /* if more is available, retry and append to this one */
++              if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) {
++                      req->cqe.flags = cflags & ~CQE_F_MASK;
++                      sr->len = kmsg->msg.msg_inq;
++                      sr->done_io += *ret;
++                      sr->retry = true;
++                      return false;
++              }
+       } else {
+               cflags |= io_put_kbuf(req, *ret, issue_flags);
+       }
+@@ -1228,6 +1245,7 @@ int io_send_zc_prep(struct io_kiocb *req
+       struct io_kiocb *notif;
+ 
+       zc->done_io = 0;
++      zc->retry = false;
+       req->flags |= REQ_F_POLL_NO_LAZY;
+ 
+       if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
diff --git a/queue-6.12/io_uring-net-mark-iov-as-dynamically-allocated-even-for-single-segments.patch b/queue-6.12/io_uring-net-mark-iov-as-dynamically-allocated-even-for-single-segments.patch

new file mode 100644 (file)

index 0000000..6b7645c
--- /dev/null
+++ b/queue-6.12/io_uring-net-mark-iov-as-dynamically-allocated-even-for-single-segments.patch
@@ -0,0 +1,49 @@
+From c4e101eab9014e6174d9042c3dc7ff80ce22b889 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 25 Jun 2025 10:17:06 -0600
+Subject: io_uring/net: mark iov as dynamically allocated even for single segments
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 9a709b7e98e6fa51600b5f2d24c5068efa6d39de upstream.
+
+A bigger array of vecs could've been allocated, but
+io_ring_buffers_peek() still decided to cap the mapped range depending
+on how much data was available. Hence don't rely on the segment count
+to know if the request should be marked as needing cleanup, always
+check upfront if the iov array is different than the fast_iov array.
+
+Fixes: 26ec15e4b0c1 ("io_uring/kbuf: don't truncate end buffer for multiple buffer peeks")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1118,6 +1118,12 @@ static int io_recv_buf_select(struct io_
+               if (unlikely(ret < 0))
+                       return ret;
+ 
++              if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
++                      kmsg->free_iov_nr = ret;
++                      kmsg->free_iov = arg.iovs;
++                      req->flags |= REQ_F_NEED_CLEANUP;
++              }
++
+               /* special case 1 vec, can be a fast path */
+               if (ret == 1) {
+                       sr->buf = arg.iovs[0].iov_base;
+@@ -1126,11 +1132,6 @@ static int io_recv_buf_select(struct io_
+               }
+               iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
+                               arg.out_len);
+-              if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
+-                      kmsg->free_iov_nr = ret;
+-                      kmsg->free_iov = arg.iovs;
+-                      req->flags |= REQ_F_NEED_CLEANUP;
+-              }
+       } else {
+               void __user *buf;
+ 
diff --git a/queue-6.12/io_uring-net-only-consider-msg_inq-if-larger-than-1.patch b/queue-6.12/io_uring-net-only-consider-msg_inq-if-larger-than-1.patch

new file mode 100644 (file)

index 0000000..8c14a50
--- /dev/null
+++ b/queue-6.12/io_uring-net-only-consider-msg_inq-if-larger-than-1.patch
@@ -0,0 +1,49 @@
+From 8861bd9b328862a49e1cc613bb70eba48d6ce0c8 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 28 May 2025 13:45:44 -0600
+Subject: io_uring/net: only consider msg_inq if larger than 1
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 2c7f023219966777be0687e15b57689894304cd3 upstream.
+
+Currently retry and general validity of msg_inq is gated on it being
+larger than zero, but it's entirely possible for this to be slightly
+inaccurate. In particular, if FIN is received, it'll return 1.
+
+Just use larger than 1 as the check. This covers both the FIN case, and
+at the same time, it doesn't make much sense to retry a recv immediately
+if there's even just a single 1 byte of valid data in the socket.
+
+Leave the SOCK_NONEMPTY flagging when larger than 0 still, as an app may
+use that for the final receive.
+
+Cc: stable@vger.kernel.org
+Reported-by: Christian Mazakas <christian.mazakas@gmail.com>
+Fixes: 7c71a0af81ba ("io_uring/net: improve recv bundles")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -865,7 +865,7 @@ static inline bool io_recv_finish(struct
+                * If more is available AND it was a full transfer, retry and
+                * append to this one
+                */
+-              if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 &&
++              if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 &&
+                   !iov_iter_count(&kmsg->msg.msg_iter)) {
+                       req->cqe.flags = cflags & ~CQE_F_MASK;
+                       sr->len = kmsg->msg.msg_inq;
+@@ -1111,7 +1111,7 @@ static int io_recv_buf_select(struct io_
+                       arg.mode |= KBUF_MODE_FREE;
+               }
+ 
+-              if (kmsg->msg.msg_inq > 0)
++              if (kmsg->msg.msg_inq > 1)
+                       arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
+ 
+               ret = io_buffers_peek(req, &arg);
diff --git a/queue-6.12/io_uring-net-only-retry-recv-bundle-for-a-full-transfer.patch b/queue-6.12/io_uring-net-only-retry-recv-bundle-for-a-full-transfer.patch

new file mode 100644 (file)

index 0000000..1843e74
--- /dev/null
+++ b/queue-6.12/io_uring-net-only-retry-recv-bundle-for-a-full-transfer.patch
@@ -0,0 +1,60 @@
+From 010c5e8c46373dfba92fbe264ac0dc407fcf38db Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 21 May 2025 18:51:49 -0600
+Subject: io_uring/net: only retry recv bundle for a full transfer
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 3a08988123c868dbfdd054541b1090fb891fa49e upstream.
+
+If a shorter than assumed transfer was seen, a partial buffer will have
+been filled. For that case it isn't sane to attempt to fill more into
+the bundle before posting a completion, as that will cause a gap in
+the received data.
+
+Check if the iterator has hit zero and only allow to continue a bundle
+operation if that is the case.
+
+Also ensure that for putting finished buffers, only the current transfer
+is accounted. Otherwise too many buffers may be put for a short transfer.
+
+Link: https://github.com/axboe/liburing/issues/1409
+Cc: stable@vger.kernel.org
+Fixes: 7c71a0af81ba ("io_uring/net: improve recv bundles")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -852,18 +852,24 @@ static inline bool io_recv_finish(struct
+               cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+ 
+       if (sr->flags & IORING_RECVSEND_BUNDLE) {
+-              cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
++              size_t this_ret = *ret - sr->done_io;
++
++              cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret),
+                                     issue_flags);
+               if (sr->retry)
+                       cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+               /* bundle with no more immediate buffers, we're done */
+               if (req->flags & REQ_F_BL_EMPTY)
+                       goto finish;
+-              /* if more is available, retry and append to this one */
+-              if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) {
++              /*
++               * If more is available AND it was a full transfer, retry and
++               * append to this one
++               */
++              if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 &&
++                  !iov_iter_count(&kmsg->msg.msg_iter)) {
+                       req->cqe.flags = cflags & ~CQE_F_MASK;
+                       sr->len = kmsg->msg.msg_inq;
+-                      sr->done_io += *ret;
++                      sr->done_io += this_ret;
+                       sr->retry = true;
+                       return false;
+               }
diff --git a/queue-6.12/io_uring-rsrc-don-t-rely-on-user-vaddr-alignment.patch b/queue-6.12/io_uring-rsrc-don-t-rely-on-user-vaddr-alignment.patch

new file mode 100644 (file)

index 0000000..bb1ac22
--- /dev/null
+++ b/queue-6.12/io_uring-rsrc-don-t-rely-on-user-vaddr-alignment.patch
@@ -0,0 +1,56 @@
+From a2b1e9553839f0d0524f9a68239ca215e87586bd Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Tue, 24 Jun 2025 14:40:34 +0100
+Subject: io_uring/rsrc: don't rely on user vaddr alignment
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 3a3c6d61577dbb23c09df3e21f6f9eda1ecd634b upstream.
+
+There is no guaranteed alignment for user pointers, however the
+calculation of an offset of the first page into a folio after coalescing
+uses some weird bit mask logic, get rid of it.
+
+Cc: stable@vger.kernel.org
+Reported-by: David Hildenbrand <david@redhat.com>
+Fixes: a8edbb424b139 ("io_uring/rsrc: enable multi-hugepage buffer coalescing")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/io-uring/e387b4c78b33f231105a601d84eefd8301f57954.1750771718.git.asml.silence@gmail.com/
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/rsrc.c |    5 ++++-
+ io_uring/rsrc.h |    1 +
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/io_uring/rsrc.c
++++ b/io_uring/rsrc.c
+@@ -918,6 +918,7 @@ static bool io_try_coalesce_buffer(struc
+               return false;
+ 
+       data->folio_shift = folio_shift(folio);
++      data->first_folio_page_idx = folio_page_idx(folio, page_array[0]);
+       /*
+        * Check if pages are contiguous inside a folio, and all folios have
+        * the same page count except for the head and tail.
+@@ -998,7 +999,9 @@ static int io_sqe_buffer_register(struct
+       if (coalesced)
+               imu->folio_shift = data.folio_shift;
+       refcount_set(&imu->refs, 1);
+-      off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
++      off = (unsigned long)iov->iov_base & ~PAGE_MASK;
++      if (coalesced)
++              off += data.first_folio_page_idx << PAGE_SHIFT;
+       *pimu = imu;
+       ret = 0;
+ 
+--- a/io_uring/rsrc.h
++++ b/io_uring/rsrc.h
+@@ -56,6 +56,7 @@ struct io_imu_folio_data {
+       /* For non-head/tail folios, has to be fully included */
+       unsigned int    nr_pages_mid;
+       unsigned int    folio_shift;
++      unsigned long   first_folio_page_idx;
+ };
+ 
+ void io_rsrc_node_ref_zero(struct io_rsrc_node *node);
diff --git a/queue-6.12/io_uring-rsrc-fix-folio-unpinning.patch b/queue-6.12/io_uring-rsrc-fix-folio-unpinning.patch

new file mode 100644 (file)

index 0000000..0c0c825
--- /dev/null
+++ b/queue-6.12/io_uring-rsrc-fix-folio-unpinning.patch
@@ -0,0 +1,77 @@
+From e33b8b1df1133d03c7b3581e666430446e017016 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Tue, 24 Jun 2025 14:40:33 +0100
+Subject: io_uring/rsrc: fix folio unpinning
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 5afb4bf9fc62d828647647ec31745083637132e4 upstream.
+
+syzbot complains about an unmapping failure:
+
+[  108.070381][   T14] kernel BUG at mm/gup.c:71!
+[  108.070502][   T14] Internal error: Oops - BUG: 00000000f2000800 [#1]  SMP
+[  108.123672][   T14] Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20250221-8.fc42 02/21/2025
+[  108.127458][   T14] Workqueue: iou_exit io_ring_exit_work
+[  108.174205][   T14] Call trace:
+[  108.175649][   T14]  sanity_check_pinned_pages+0x7cc/0x7d0 (P)
+[  108.178138][   T14]  unpin_user_page+0x80/0x10c
+[  108.180189][   T14]  io_release_ubuf+0x84/0xf8
+[  108.182196][   T14]  io_free_rsrc_node+0x250/0x57c
+[  108.184345][   T14]  io_rsrc_data_free+0x148/0x298
+[  108.186493][   T14]  io_sqe_buffers_unregister+0x84/0xa0
+[  108.188991][   T14]  io_ring_ctx_free+0x48/0x480
+[  108.191057][   T14]  io_ring_exit_work+0x764/0x7d8
+[  108.193207][   T14]  process_one_work+0x7e8/0x155c
+[  108.195431][   T14]  worker_thread+0x958/0xed8
+[  108.197561][   T14]  kthread+0x5fc/0x75c
+[  108.199362][   T14]  ret_from_fork+0x10/0x20
+
+We can pin a tail page of a folio, but then io_uring will try to unpin
+the head page of the folio. While it should be fine in terms of keeping
+the page actually alive, mm folks say it's wrong and triggers a debug
+warning. Use unpin_user_folio() instead of unpin_user_page*.
+
+Cc: stable@vger.kernel.org
+Debugged-by: David Hildenbrand <david@redhat.com>
+Reported-by: syzbot+1d335893772467199ab6@syzkaller.appspotmail.com
+Closes: https://lkml.kernel.org/r/683f1551.050a0220.55ceb.0017.GAE@google.com
+Fixes: a8edbb424b139 ("io_uring/rsrc: enable multi-hugepage buffer coalescing")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/io-uring/a28b0f87339ac2acf14a645dad1e95bbcbf18acd.1750771718.git.asml.silence@gmail.com/
+[axboe: adapt to current tree, massage commit message]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/rsrc.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/io_uring/rsrc.c
++++ b/io_uring/rsrc.c
+@@ -119,8 +119,11 @@ static void io_buffer_unmap(struct io_ri
+       if (imu != &dummy_ubuf) {
+               if (!refcount_dec_and_test(&imu->refs))
+                       return;
+-              for (i = 0; i < imu->nr_bvecs; i++)
+-                      unpin_user_page(imu->bvec[i].bv_page);
++              for (i = 0; i < imu->nr_bvecs; i++) {
++                      struct folio *folio = page_folio(imu->bvec[i].bv_page);
++
++                      unpin_user_folio(folio, 1);
++              }
+               if (imu->acct_pages)
+                       io_unaccount_mem(ctx, imu->acct_pages);
+               kvfree(imu);
+@@ -1010,8 +1013,10 @@ static int io_sqe_buffer_register(struct
+ done:
+       if (ret) {
+               kvfree(imu);
+-              if (pages)
+-                      unpin_user_pages(pages, nr_pages);
++              if (pages) {
++                      for (i = 0; i < nr_pages; i++)
++                              unpin_user_folio(page_folio(pages[i]), 1);
++              }
+       }
+       kvfree(pages);
+       return ret;
diff --git a/queue-6.12/mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch b/queue-6.12/mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch

new file mode 100644 (file)

index 0000000..0fbe96e
--- /dev/null
+++ b/queue-6.12/mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch
@@ -0,0 +1,109 @@
+From 0cf4b1687a187ba9247c71721d8b064634eda1f7 Mon Sep 17 00:00:00 2001
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Date: Fri, 6 Jun 2025 13:50:32 +0100
+Subject: mm/vma: reset VMA iterator on commit_merge() OOM failure
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+commit 0cf4b1687a187ba9247c71721d8b064634eda1f7 upstream.
+
+While an OOM failure in commit_merge() isn't really feasible due to the
+allocation which might fail (a maple tree pre-allocation) being 'too small
+to fail', we do need to handle this case correctly regardless.
+
+In vma_merge_existing_range(), we can theoretically encounter failures
+which result in an OOM error in two ways - firstly dup_anon_vma() might
+fail with an OOM error, and secondly commit_merge() failing, ultimately,
+to pre-allocate a maple tree node.
+
+The abort logic for dup_anon_vma() resets the VMA iterator to the initial
+range, ensuring that any logic looping on this iterator will correctly
+proceed to the next VMA.
+
+However the commit_merge() abort logic does not do the same thing.  This
+resulted in a syzbot report occurring because mlockall() iterates through
+VMAs, is tolerant of errors, but ended up with an incorrect previous VMA
+being specified due to incorrect iterator state.
+
+While making this change, it became apparent we are duplicating logic -
+the logic introduced in commit 41e6ddcaa0f1 ("mm/vma: add give_up_on_oom
+option on modify/merge, use in uffd release") duplicates the
+vmg->give_up_on_oom check in both abort branches.
+
+Additionally, we observe that we can perform the anon_dup check safely on
+dup_anon_vma() failure, as this will not be modified should this call
+fail.
+
+Finally, we need to reset the iterator in both cases, so now we can simply
+use the exact same code to abort for both.
+
+We remove the VM_WARN_ON(err != -ENOMEM) as it would be silly for this to
+be otherwise and it allows us to implement the abort check more neatly.
+
+Link: https://lkml.kernel.org/r/20250606125032.164249-1-lorenzo.stoakes@oracle.com
+Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure")
+Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reported-by: syzbot+d16409ea9ecc16ed261a@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/linux-mm/6842cc67.a00a0220.29ac89.003b.GAE@google.com/
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vma.c |   27 ++++++++-------------------
+ 1 file changed, 8 insertions(+), 19 deletions(-)
+
+--- a/mm/vma.c
++++ b/mm/vma.c
+@@ -836,9 +836,6 @@ static struct vm_area_struct *vma_merge_
+               err = dup_anon_vma(next, vma, &anon_dup);
+       }
+ 
+-      if (err)
+-              goto abort;
+-
+       /*
+        * In nearly all cases, we expand vmg->vma. There is one exception -
+        * merge_right where we partially span the VMA. In this case we shrink
+@@ -846,22 +843,11 @@ static struct vm_area_struct *vma_merge_
+        */
+       expanded = !merge_right || merge_will_delete_vma;
+ 
+-      if (commit_merge(vmg, adjust,
+-                       merge_will_delete_vma ? vma : NULL,
+-                       merge_will_delete_next ? next : NULL,
+-                       adj_start, expanded)) {
+-              if (anon_dup)
+-                      unlink_anon_vmas(anon_dup);
+-
+-              /*
+-               * We've cleaned up any cloned anon_vma's, no VMAs have been
+-               * modified, no harm no foul if the user requests that we not
+-               * report this and just give up, leaving the VMAs unmerged.
+-               */
+-              if (!vmg->give_up_on_oom)
+-                      vmg->state = VMA_MERGE_ERROR_NOMEM;
+-              return NULL;
+-      }
++      if (err || commit_merge(vmg, adjust,
++                      merge_will_delete_vma ? vma : NULL,
++                      merge_will_delete_next ? next : NULL,
++                      adj_start, expanded))
++              goto abort;
+ 
+       res = merge_left ? prev : next;
+       khugepaged_enter_vma(res, vmg->flags);
+@@ -873,6 +859,9 @@ abort:
+       vma_iter_set(vmg->vmi, start);
+       vma_iter_load(vmg->vmi);
+ 
++      if (anon_dup)
++              unlink_anon_vmas(anon_dup);
++
+       /*
+        * This means we have failed to clone anon_vma's correctly, but no
+        * actual changes to VMAs have occurred, so no harm no foul - if the
diff --git a/queue-6.12/net-libwx-fix-tx-l4-checksum.patch b/queue-6.12/net-libwx-fix-tx-l4-checksum.patch

new file mode 100644 (file)

index 0000000..d534c02
--- /dev/null
+++ b/queue-6.12/net-libwx-fix-tx-l4-checksum.patch
@@ -0,0 +1,43 @@
+From c7d82913d5f9e97860772ee4051eaa66b56a6273 Mon Sep 17 00:00:00 2001
+From: Jiawen Wu <jiawenwu@trustnetic.com>
+Date: Mon, 24 Mar 2025 18:32:35 +0800
+Subject: net: libwx: fix Tx L4 checksum
+
+From: Jiawen Wu <jiawenwu@trustnetic.com>
+
+commit c7d82913d5f9e97860772ee4051eaa66b56a6273 upstream.
+
+The hardware only supports L4 checksum offload for TCP/UDP/SCTP protocol.
+There was a bug to set Tx checksum flag for the other protocol that results
+in Tx ring hang. Fix to compute software checksum for these packets.
+
+Fixes: 3403960cdf86 ("net: wangxun: libwx add tx offload functions")
+Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
+Link: https://patch.msgid.link/20250324103235.823096-2-jiawenwu@trustnetic.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Wenshan Lan <jetlan9@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/wangxun/libwx/wx_lib.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
++++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+@@ -1336,6 +1336,7 @@ static void wx_tx_csum(struct wx_ring *t
+       u8 tun_prot = 0;
+ 
+       if (skb->ip_summed != CHECKSUM_PARTIAL) {
++csum_failed:
+               if (!(first->tx_flags & WX_TX_FLAGS_HW_VLAN) &&
+                   !(first->tx_flags & WX_TX_FLAGS_CC))
+                       return;
+@@ -1429,7 +1430,8 @@ static void wx_tx_csum(struct wx_ring *t
+                                       WX_TXD_L4LEN_SHIFT;
+                       break;
+               default:
+-                      break;
++                      skb_checksum_help(skb);
++                      goto csum_failed;
+               }
+ 
+               /* update TX checksum flag */
diff --git a/queue-6.12/series b/queue-6.12/series

index 40855055eed4291d94352c5c23c0014a32305401..d4c030ed650981562e819b34120ad0e89c325128 100644 (file)
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -187,3 +187,14 @@ drm-amd-display-fix-mpv-playback-corruption-on-weston.patch
  media-uvcvideo-rollback-non-processed-entities-on-error.patch
  x86-fpu-refactor-xfeature-bitmask-update-code-for-sigframe-xsave.patch
  x86-pkeys-simplify-pkru-update-in-signal-frame.patch
+net-libwx-fix-tx-l4-checksum.patch
+io_uring-fix-potential-page-leak-in-io_sqe_buffer_register.patch
+io_uring-rsrc-fix-folio-unpinning.patch
+io_uring-rsrc-don-t-rely-on-user-vaddr-alignment.patch
+io_uring-net-improve-recv-bundles.patch
+io_uring-net-only-retry-recv-bundle-for-a-full-transfer.patch
+io_uring-net-only-consider-msg_inq-if-larger-than-1.patch
+io_uring-net-always-use-current-transfer-count-for-buffer-put.patch
+io_uring-net-mark-iov-as-dynamically-allocated-even-for-single-segments.patch
+io_uring-kbuf-flag-partial-buffer-mappings.patch
+mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 2 Jul 2025 10:24:00 +0000 (12:24 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 2 Jul 2025 10:24:00 +0000 (12:24 +0200)
queue-6.12/io_uring-fix-potential-page-leak-in-io_sqe_buffer_register.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/io_uring-kbuf-flag-partial-buffer-mappings.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/io_uring-net-always-use-current-transfer-count-for-buffer-put.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/io_uring-net-improve-recv-bundles.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/io_uring-net-mark-iov-as-dynamically-allocated-even-for-single-segments.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/io_uring-net-only-consider-msg_inq-if-larger-than-1.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/io_uring-net-only-retry-recv-bundle-for-a-full-transfer.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/io_uring-rsrc-don-t-rely-on-user-vaddr-alignment.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/io_uring-rsrc-fix-folio-unpinning.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/net-libwx-fix-tx-l4-checksum.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/series		patch \| blob \| blame \| history