6.0-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 16 Oct 2022 07:10:08 +0000 (09:10 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 16 Oct 2022 07:10:08 +0000 (09:10 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 16 Oct 2022 07:10:08 +0000 (09:10 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 16 Oct 2022 07:10:08 +0000 (09:10 +0200)
diff --git a/queue-6.0/io_uring-add-custom-opcode-hooks-on-fail.patch b/queue-6.0/io_uring-add-custom-opcode-hooks-on-fail.patch

new file mode 100644 (file)

index 0000000..e8d8915
--- /dev/null
+++ b/queue-6.0/io_uring-add-custom-opcode-hooks-on-fail.patch
@@ -0,0 +1,47 @@
+From a47b255e90395bdb481975ab3d9e96fcf8b3165f Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 21 Sep 2022 12:17:46 +0100
+Subject: io_uring: add custom opcode hooks on fail
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit a47b255e90395bdb481975ab3d9e96fcf8b3165f upstream.
+
+Sometimes we have to do a little bit of a fixup on a request failuer in
+io_req_complete_failed(). Add a callback in opdef for that.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/b734cff4e67cb30cca976b9face321023f37549a.1663668091.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    4 ++++
+ io_uring/opdef.h    |    1 +
+ 2 files changed, 5 insertions(+)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -823,8 +823,12 @@ inline void __io_req_complete(struct io_
+ 
+ void io_req_complete_failed(struct io_kiocb *req, s32 res)
+ {
++      const struct io_op_def *def = &io_op_defs[req->opcode];
++
+       req_set_fail(req);
+       io_req_set_res(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
++      if (def->fail)
++              def->fail(req);
+       io_req_complete_post(req);
+ }
+ 
+--- a/io_uring/opdef.h
++++ b/io_uring/opdef.h
+@@ -36,6 +36,7 @@ struct io_op_def {
+       int (*issue)(struct io_kiocb *, unsigned int);
+       int (*prep_async)(struct io_kiocb *);
+       void (*cleanup)(struct io_kiocb *);
++      void (*fail)(struct io_kiocb *);
+ };
+ 
+ extern const struct io_op_def io_op_defs[];
diff --git a/queue-6.0/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch b/queue-6.0/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch

new file mode 100644 (file)

index 0000000..be69db8
--- /dev/null
+++ b/queue-6.0/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch
@@ -0,0 +1,99 @@
+From 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 3 Oct 2022 13:59:47 +0100
+Subject: io_uring/af_unix: defer registered files gc to io_uring release
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 upstream.
+
+Instead of putting io_uring's registered files in unix_gc() we want it
+to be done by io_uring itself. The trick here is to consider io_uring
+registered files for cycle detection but not actually putting them down.
+Because io_uring can't register other ring instances, this will remove
+all refs to the ring file triggering the ->release path and clean up
+with io_ring_ctx_free().
+
+Cc: stable@vger.kernel.org
+Fixes: 6b06314c47e1 ("io_uring: add file set registration")
+Reported-and-tested-by: David Bouman <dbouman03@gmail.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[axboe: add kerneldoc comment to skb, fold in skb leak fix]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h |    2 ++
+ io_uring/rsrc.c        |    1 +
+ net/unix/garbage.c     |   20 ++++++++++++++++++++
+ 3 files changed, 23 insertions(+)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -796,6 +796,7 @@ typedef unsigned char *sk_buff_data_t;
+  *    @csum_level: indicates the number of consecutive checksums found in
+  *            the packet minus one that have been verified as
+  *            CHECKSUM_UNNECESSARY (max 3)
++ *    @scm_io_uring: SKB holds io_uring registered files
+  *    @dst_pending_confirm: need to confirm neighbour
+  *    @decrypted: Decrypted SKB
+  *    @slow_gro: state present at GRO time, slower prepare step required
+@@ -975,6 +976,7 @@ struct sk_buff {
+ #endif
+       __u8                    slow_gro:1;
+       __u8                    csum_not_inet:1;
++      __u8                    scm_io_uring:1;
+ 
+ #ifdef CONFIG_NET_SCHED
+       __u16                   tc_index;       /* traffic control index */
+--- a/io_uring/rsrc.c
++++ b/io_uring/rsrc.c
+@@ -855,6 +855,7 @@ int __io_scm_file_account(struct io_ring
+ 
+               UNIXCB(skb).fp = fpl;
+               skb->sk = sk;
++              skb->scm_io_uring = 1;
+               skb->destructor = unix_destruct_scm;
+               refcount_add(skb->truesize, &sk->sk_wmem_alloc);
+       }
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -204,6 +204,7 @@ void wait_for_unix_gc(void)
+ /* The external entry point: unix_gc() */
+ void unix_gc(void)
+ {
++      struct sk_buff *next_skb, *skb;
+       struct unix_sock *u;
+       struct unix_sock *next;
+       struct sk_buff_head hitlist;
+@@ -297,11 +298,30 @@ void unix_gc(void)
+ 
+       spin_unlock(&unix_gc_lock);
+ 
++      /* We need io_uring to clean its registered files, ignore all io_uring
++       * originated skbs. It's fine as io_uring doesn't keep references to
++       * other io_uring instances and so killing all other files in the cycle
++       * will put all io_uring references forcing it to go through normal
++       * release.path eventually putting registered files.
++       */
++      skb_queue_walk_safe(&hitlist, skb, next_skb) {
++              if (skb->scm_io_uring) {
++                      __skb_unlink(skb, &hitlist);
++                      skb_queue_tail(&skb->sk->sk_receive_queue, skb);
++              }
++      }
++
+       /* Here we are. Hitlist is filled. Die. */
+       __skb_queue_purge(&hitlist);
+ 
+       spin_lock(&unix_gc_lock);
+ 
++      /* There could be io_uring registered files, just push them back to
++       * the inflight list
++       */
++      list_for_each_entry_safe(u, next, &gc_candidates, link)
++              list_move_tail(&u->link, &gc_inflight_list);
++
+       /* All candidates should have been detached by now. */
+       BUG_ON(!list_empty(&gc_candidates));
+ 
diff --git a/queue-6.0/io_uring-correct-pinned_vm-accounting.patch b/queue-6.0/io_uring-correct-pinned_vm-accounting.patch

new file mode 100644 (file)

index 0000000..a2ab359
--- /dev/null
+++ b/queue-6.0/io_uring-correct-pinned_vm-accounting.patch
@@ -0,0 +1,48 @@
+From 42b6419d0aba47c5d8644cdc0b68502254671de5 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Tue, 4 Oct 2022 03:19:08 +0100
+Subject: io_uring: correct pinned_vm accounting
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 42b6419d0aba47c5d8644cdc0b68502254671de5 upstream.
+
+->mm_account should be released only after we free all registered
+buffers, otherwise __io_sqe_buffers_unregister() will see a NULL
+->mm_account and skip locked_vm accounting.
+
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/6d798f65ed4ab8db3664c4d3397d4af16ca98846.1664849932.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |   10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2422,12 +2422,6 @@ static void io_req_caches_free(struct io
+ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
+ {
+       io_sq_thread_finish(ctx);
+-
+-      if (ctx->mm_account) {
+-              mmdrop(ctx->mm_account);
+-              ctx->mm_account = NULL;
+-      }
+-
+       io_rsrc_refs_drop(ctx);
+       /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
+       io_wait_rsrc_data(ctx->buf_data);
+@@ -2470,6 +2464,10 @@ static __cold void io_ring_ctx_free(stru
+       WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
+       WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);
+ 
++      if (ctx->mm_account) {
++              mmdrop(ctx->mm_account);
++              ctx->mm_account = NULL;
++      }
+       io_mem_free(ctx->rings);
+       io_mem_free(ctx->sq_sqes);
+ 
diff --git a/queue-6.0/io_uring-limit-registration-w-single_issuer.patch b/queue-6.0/io_uring-limit-registration-w-single_issuer.patch

new file mode 100644 (file)

index 0000000..0d69f1e
--- /dev/null
+++ b/queue-6.0/io_uring-limit-registration-w-single_issuer.patch
@@ -0,0 +1,36 @@
+From d7cce96c449e35bbfd41e830b341b95973891eed Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Tue, 27 Sep 2022 01:13:30 +0100
+Subject: io_uring: limit registration w/ SINGLE_ISSUER
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit d7cce96c449e35bbfd41e830b341b95973891eed upstream.
+
+IORING_SETUP_SINGLE_ISSUER restricts what tasks can submit requests.
+Extend it to registration as well, so non-owning task can't do
+registrations. It's not necessary at the moment but might be useful in
+the future.
+
+Cc: <stable@vger.kernel.org> # 6.0
+Fixes: 97bbdc06a444 ("io_uring: add IORING_SETUP_SINGLE_ISSUER")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/f52a6a9c8a8990d4a831f73c0571e7406aac2bba.1664237592.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3710,6 +3710,9 @@ static int __io_uring_register(struct io
+       if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
+               return -ENXIO;
+ 
++      if (ctx->submitter_task && ctx->submitter_task != current)
++              return -EEXIST;
++
+       if (ctx->restricted) {
+               if (opcode >= IORING_REGISTER_LAST)
+                       return -EINVAL;
diff --git a/queue-6.0/io_uring-net-don-t-lose-partial-send-recv-on-fail.patch b/queue-6.0/io_uring-net-don-t-lose-partial-send-recv-on-fail.patch

new file mode 100644 (file)

index 0000000..57a62ac
--- /dev/null
+++ b/queue-6.0/io_uring-net-don-t-lose-partial-send-recv-on-fail.patch
@@ -0,0 +1,88 @@
+From 7e6b638ed501cced4e472298d6b08dd16346f3a6 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 21 Sep 2022 12:17:48 +0100
+Subject: io_uring/net: don't lose partial send/recv on fail
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 7e6b638ed501cced4e472298d6b08dd16346f3a6 upstream.
+
+Just as with rw, partial send/recv may end up in
+io_req_complete_failed() and loose the result, make sure we return the
+number of bytes processed.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/a4ff95897b5419356fca9ea55db91ac15b2975f9.1663668091.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c   |   10 ++++++++++
+ io_uring/net.h   |    2 ++
+ io_uring/opdef.c |    4 ++++
+ 3 files changed, 16 insertions(+)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1087,6 +1087,16 @@ int io_sendzc(struct io_kiocb *req, unsi
+       return IOU_OK;
+ }
+ 
++void io_sendrecv_fail(struct io_kiocb *req)
++{
++      struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
++      int res = req->cqe.res;
++
++      if (req->flags & REQ_F_PARTIAL_IO)
++              res = sr->done_io;
++      io_req_set_res(req, res, req->cqe.flags);
++}
++
+ int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ {
+       struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
+--- a/io_uring/net.h
++++ b/io_uring/net.h
+@@ -43,6 +43,8 @@ int io_recvmsg_prep(struct io_kiocb *req
+ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags);
+ int io_recv(struct io_kiocb *req, unsigned int issue_flags);
+ 
++void io_sendrecv_fail(struct io_kiocb *req);
++
+ int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+ int io_accept(struct io_kiocb *req, unsigned int issue_flags);
+ 
+--- a/io_uring/opdef.c
++++ b/io_uring/opdef.c
+@@ -157,6 +157,7 @@ const struct io_op_def io_op_defs[] = {
+               .issue                  = io_sendmsg,
+               .prep_async             = io_sendmsg_prep_async,
+               .cleanup                = io_sendmsg_recvmsg_cleanup,
++              .fail                   = io_sendrecv_fail,
+ #else
+               .prep                   = io_eopnotsupp_prep,
+ #endif
+@@ -174,6 +175,7 @@ const struct io_op_def io_op_defs[] = {
+               .issue                  = io_recvmsg,
+               .prep_async             = io_recvmsg_prep_async,
+               .cleanup                = io_sendmsg_recvmsg_cleanup,
++              .fail                   = io_sendrecv_fail,
+ #else
+               .prep                   = io_eopnotsupp_prep,
+ #endif
+@@ -316,6 +318,7 @@ const struct io_op_def io_op_defs[] = {
+ #if defined(CONFIG_NET)
+               .prep                   = io_sendmsg_prep,
+               .issue                  = io_send,
++              .fail                   = io_sendrecv_fail,
+ #else
+               .prep                   = io_eopnotsupp_prep,
+ #endif
+@@ -331,6 +334,7 @@ const struct io_op_def io_op_defs[] = {
+ #if defined(CONFIG_NET)
+               .prep                   = io_recvmsg_prep,
+               .issue                  = io_recv,
++              .fail                   = io_sendrecv_fail,
+ #else
+               .prep                   = io_eopnotsupp_prep,
+ #endif
diff --git a/queue-6.0/io_uring-net-don-t-lose-partial-send_zc-on-fail.patch b/queue-6.0/io_uring-net-don-t-lose-partial-send_zc-on-fail.patch

new file mode 100644 (file)

index 0000000..5a92ef1
--- /dev/null
+++ b/queue-6.0/io_uring-net-don-t-lose-partial-send_zc-on-fail.patch
@@ -0,0 +1,69 @@
+From 5693bcce892d7b8b15a7a92b011d3d40a023b53c Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 21 Sep 2022 12:17:49 +0100
+Subject: io_uring/net: don't lose partial send_zc on fail
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 5693bcce892d7b8b15a7a92b011d3d40a023b53c upstream.
+
+Partial zc send may end up in io_req_complete_failed(), which not only
+would return invalid result but also mask out the notification leading
+to lifetime issues.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/5673285b5e83e6ceca323727b4ddaa584b5cc91e.1663668091.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c   |   16 ++++++++++++++++
+ io_uring/net.h   |    1 +
+ io_uring/opdef.c |    1 +
+ 3 files changed, 18 insertions(+)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -1097,6 +1097,22 @@ void io_sendrecv_fail(struct io_kiocb *r
+       io_req_set_res(req, res, req->cqe.flags);
+ }
+ 
++void io_send_zc_fail(struct io_kiocb *req)
++{
++      struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
++      int res = req->cqe.res;
++
++      if (req->flags & REQ_F_PARTIAL_IO) {
++              if (req->flags & REQ_F_NEED_CLEANUP) {
++                      io_notif_flush(sr->notif);
++                      sr->notif = NULL;
++                      req->flags &= ~REQ_F_NEED_CLEANUP;
++              }
++              res = sr->done_io;
++      }
++      io_req_set_res(req, res, req->cqe.flags);
++}
++
+ int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ {
+       struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
+--- a/io_uring/net.h
++++ b/io_uring/net.h
+@@ -58,6 +58,7 @@ int io_connect(struct io_kiocb *req, uns
+ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags);
+ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+ void io_sendzc_cleanup(struct io_kiocb *req);
++void io_send_zc_fail(struct io_kiocb *req);
+ 
+ void io_netmsg_cache_free(struct io_cache_entry *entry);
+ #else
+--- a/io_uring/opdef.c
++++ b/io_uring/opdef.c
+@@ -494,6 +494,7 @@ const struct io_op_def io_op_defs[] = {
+               .issue                  = io_sendzc,
+               .prep_async             = io_sendzc_prep_async,
+               .cleanup                = io_sendzc_cleanup,
++              .fail                   = io_send_zc_fail,
+ #else
+               .prep                   = io_eopnotsupp_prep,
+ #endif
diff --git a/queue-6.0/io_uring-net-don-t-update-msg_name-if-not-provided.patch b/queue-6.0/io_uring-net-don-t-update-msg_name-if-not-provided.patch

new file mode 100644 (file)

index 0000000..97fbbff
--- /dev/null
+++ b/queue-6.0/io_uring-net-don-t-update-msg_name-if-not-provided.patch
@@ -0,0 +1,33 @@
+From 6f10ae8a155446248055c7ddd480ef40139af788 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Thu, 29 Sep 2022 22:23:18 +0100
+Subject: io_uring/net: don't update msg_name if not provided
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 6f10ae8a155446248055c7ddd480ef40139af788 upstream.
+
+io_sendmsg_copy_hdr() may clear msg->msg_name if the userspace didn't
+provide it, we should retain NULL in this case.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/97d49f61b5ec76d0900df658cfde3aa59ff22121.1664486545.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -163,7 +163,8 @@ static int io_setup_async_msg(struct io_
+       }
+       req->flags |= REQ_F_NEED_CLEANUP;
+       memcpy(async_msg, kmsg, sizeof(*kmsg));
+-      async_msg->msg.msg_name = &async_msg->addr;
++      if (async_msg->msg.msg_name)
++              async_msg->msg.msg_name = &async_msg->addr;
+       /* if were using fast_iov, set it to the new one */
+       if (!kmsg->free_iov) {
+               size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov;
diff --git a/queue-6.0/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch b/queue-6.0/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch

new file mode 100644 (file)

index 0000000..bc97eaa
--- /dev/null
+++ b/queue-6.0/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch
@@ -0,0 +1,68 @@
+From 3e4cb6ebbb2bad201c1186bc0b7e8cf41dd7f7e6 Mon Sep 17 00:00:00 2001
+From: Stefan Metzmacher <metze@samba.org>
+Date: Thu, 29 Sep 2022 09:39:10 +0200
+Subject: io_uring/net: fix fast_iov assignment in io_setup_async_msg()
+
+From: Stefan Metzmacher <metze@samba.org>
+
+commit 3e4cb6ebbb2bad201c1186bc0b7e8cf41dd7f7e6 upstream.
+
+I hit a very bad problem during my tests of SENDMSG_ZC.
+BUG(); in first_iovec_segment() triggered very easily.
+The problem was io_setup_async_msg() in the partial retry case,
+which seems to happen more often with _ZC.
+
+iov_iter_iovec_advance() may change i->iov in order to have i->iov_offset
+being only relative to the first element.
+
+Which means kmsg->msg.msg_iter.iov is no longer the
+same as kmsg->fast_iov.
+
+But this would rewind the copy to be the start of
+async_msg->fast_iov, which means the internal
+state of sync_msg->msg.msg_iter is inconsitent.
+
+I tested with 5 vectors with length like this 4, 0, 64, 20, 8388608
+and got a short writes with:
+- ret=2675244 min_ret=8388692 => remaining 5713448 sr->done_io=2675244
+- ret=-EAGAIN => io_uring_poll_arm
+- ret=4911225 min_ret=5713448 => remaining 802223  sr->done_io=7586469
+- ret=-EAGAIN => io_uring_poll_arm
+- ret=802223  min_ret=802223  => res=8388692
+
+While this was easily triggered with SENDMSG_ZC (queued for 6.1),
+it was a potential problem starting with 7ba89d2af17aa879dda30f5d5d3f152e587fc551
+in 5.18 for IORING_OP_RECVMSG.
+And also with 4c3c09439c08b03d9503df0ca4c7619c5842892e in 5.19
+for IORING_OP_SENDMSG.
+
+However 257e84a5377fbbc336ff563833a8712619acce56 introduced the critical
+code into io_setup_async_msg() in 5.11.
+
+Fixes: 7ba89d2af17aa ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly")
+Fixes: 257e84a5377fb ("io_uring: refactor sendmsg/recvmsg iov managing")
+Cc: stable@vger.kernel.org
+Signed-off-by: Stefan Metzmacher <metze@samba.org>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/b2e7be246e2fb173520862b0c7098e55767567a2.1664436949.git.metze@samba.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -165,8 +165,10 @@ static int io_setup_async_msg(struct io_
+       memcpy(async_msg, kmsg, sizeof(*kmsg));
+       async_msg->msg.msg_name = &async_msg->addr;
+       /* if were using fast_iov, set it to the new one */
+-      if (!async_msg->free_iov)
+-              async_msg->msg.msg_iter.iov = async_msg->fast_iov;
++      if (!kmsg->free_iov) {
++              size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov;
++              async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx];
++      }
+ 
+       return -EAGAIN;
+ }
diff --git a/queue-6.0/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch b/queue-6.0/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch

new file mode 100644 (file)

index 0000000..f16e1a3
--- /dev/null
+++ b/queue-6.0/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch
@@ -0,0 +1,87 @@
+From 3fb1bd68817288729179444caf1fd5c5c4d2d65d Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 4 Oct 2022 20:29:48 -0600
+Subject: io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 3fb1bd68817288729179444caf1fd5c5c4d2d65d upstream.
+
+We treat EINPROGRESS like EAGAIN, but if we're retrying post getting
+EINPROGRESS, then we just need to check the socket for errors and
+terminate the request.
+
+This was exposed on a bluetooth connection request which ends up
+taking a while and hitting EINPROGRESS, and yields a CQE result of
+-EBADFD because we're retrying a connect on a socket that is now
+connected.
+
+Cc: stable@vger.kernel.org
+Fixes: 87f80d623c6c ("io_uring: handle connect -EINPROGRESS like -EAGAIN")
+Link: https://github.com/axboe/liburing/issues/671
+Reported-by: Aidan Sun <aidansun05@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/net.c |   28 ++++++++++++++++++++++------
+ 1 file changed, 22 insertions(+), 6 deletions(-)
+
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -46,6 +46,7 @@ struct io_connect {
+       struct file                     *file;
+       struct sockaddr __user          *addr;
+       int                             addr_len;
++      bool                            in_progress;
+ };
+ 
+ struct io_sr_msg {
+@@ -1279,6 +1280,7 @@ int io_connect_prep(struct io_kiocb *req
+ 
+       conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+       conn->addr_len =  READ_ONCE(sqe->addr2);
++      conn->in_progress = false;
+       return 0;
+ }
+ 
+@@ -1290,6 +1292,16 @@ int io_connect(struct io_kiocb *req, uns
+       int ret;
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ 
++      if (connect->in_progress) {
++              struct socket *socket;
++
++              ret = -ENOTSOCK;
++              socket = sock_from_file(req->file);
++              if (socket)
++                      ret = sock_error(socket->sk);
++              goto out;
++      }
++
+       if (req_has_async_data(req)) {
+               io = req->async_data;
+       } else {
+@@ -1306,13 +1318,17 @@ int io_connect(struct io_kiocb *req, uns
+       ret = __sys_connect_file(req->file, &io->address,
+                                       connect->addr_len, file_flags);
+       if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
+-              if (req_has_async_data(req))
+-                      return -EAGAIN;
+-              if (io_alloc_async_data(req)) {
+-                      ret = -ENOMEM;
+-                      goto out;
++              if (ret == -EINPROGRESS) {
++                      connect->in_progress = true;
++              } else {
++                      if (req_has_async_data(req))
++                              return -EAGAIN;
++                      if (io_alloc_async_data(req)) {
++                              ret = -ENOMEM;
++                              goto out;
++                      }
++                      memcpy(req->async_data, &__io, sizeof(__io));
+               }
+-              memcpy(req->async_data, &__io, sizeof(__io));
+               return -EAGAIN;
+       }
+       if (ret == -ERESTARTSYS)
diff --git a/queue-6.0/io_uring-rw-don-t-lose-partial-io-result-on-fail.patch b/queue-6.0/io_uring-rw-don-t-lose-partial-io-result-on-fail.patch

new file mode 100644 (file)

index 0000000..9062f74
--- /dev/null
+++ b/queue-6.0/io_uring-rw-don-t-lose-partial-io-result-on-fail.patch
@@ -0,0 +1,97 @@
+From 47b4c68660752facfa6247b1fc9ca9d722b8b601 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Wed, 21 Sep 2022 12:17:47 +0100
+Subject: io_uring/rw: don't lose partial IO result on fail
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 47b4c68660752facfa6247b1fc9ca9d722b8b601 upstream.
+
+A partially done read/write may end up in io_req_complete_failed() and
+loose the result, make sure we return the number of bytes processed.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/05e0879c226bcd53b441bf92868eadd4bf04e2fc.1663668091.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/opdef.c |    6 ++++++
+ io_uring/rw.c    |    8 ++++++++
+ io_uring/rw.h    |    1 +
+ 3 files changed, 15 insertions(+)
+
+--- a/io_uring/opdef.c
++++ b/io_uring/opdef.c
+@@ -69,6 +69,7 @@ const struct io_op_def io_op_defs[] = {
+               .issue                  = io_read,
+               .prep_async             = io_readv_prep_async,
+               .cleanup                = io_readv_writev_cleanup,
++              .fail                   = io_rw_fail,
+       },
+       [IORING_OP_WRITEV] = {
+               .needs_file             = 1,
+@@ -85,6 +86,7 @@ const struct io_op_def io_op_defs[] = {
+               .issue                  = io_write,
+               .prep_async             = io_writev_prep_async,
+               .cleanup                = io_readv_writev_cleanup,
++              .fail                   = io_rw_fail,
+       },
+       [IORING_OP_FSYNC] = {
+               .needs_file             = 1,
+@@ -105,6 +107,7 @@ const struct io_op_def io_op_defs[] = {
+               .name                   = "READ_FIXED",
+               .prep                   = io_prep_rw,
+               .issue                  = io_read,
++              .fail                   = io_rw_fail,
+       },
+       [IORING_OP_WRITE_FIXED] = {
+               .needs_file             = 1,
+@@ -119,6 +122,7 @@ const struct io_op_def io_op_defs[] = {
+               .name                   = "WRITE_FIXED",
+               .prep                   = io_prep_rw,
+               .issue                  = io_write,
++              .fail                   = io_rw_fail,
+       },
+       [IORING_OP_POLL_ADD] = {
+               .needs_file             = 1,
+@@ -273,6 +277,7 @@ const struct io_op_def io_op_defs[] = {
+               .name                   = "READ",
+               .prep                   = io_prep_rw,
+               .issue                  = io_read,
++              .fail                   = io_rw_fail,
+       },
+       [IORING_OP_WRITE] = {
+               .needs_file             = 1,
+@@ -287,6 +292,7 @@ const struct io_op_def io_op_defs[] = {
+               .name                   = "WRITE",
+               .prep                   = io_prep_rw,
+               .issue                  = io_write,
++              .fail                   = io_rw_fail,
+       },
+       [IORING_OP_FADVISE] = {
+               .needs_file             = 1,
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -984,6 +984,14 @@ static void io_cqring_ev_posted_iopoll(s
+               io_cqring_wake(ctx);
+ }
+ 
++void io_rw_fail(struct io_kiocb *req)
++{
++      int res;
++
++      res = io_fixup_rw_res(req, req->cqe.res);
++      io_req_set_res(req, res, req->cqe.flags);
++}
++
+ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
+ {
+       struct io_wq_work_node *pos, *start, *prev;
+--- a/io_uring/rw.h
++++ b/io_uring/rw.h
+@@ -21,3 +21,4 @@ int io_readv_prep_async(struct io_kiocb
+ int io_write(struct io_kiocb *req, unsigned int issue_flags);
+ int io_writev_prep_async(struct io_kiocb *req);
+ void io_readv_writev_cleanup(struct io_kiocb *req);
++void io_rw_fail(struct io_kiocb *req);
diff --git a/queue-6.0/io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch b/queue-6.0/io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch

new file mode 100644 (file)

index 0000000..31a02ad
--- /dev/null
+++ b/queue-6.0/io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch
@@ -0,0 +1,39 @@
+From c278d9f8ac0db5590909e6d9e85b5ca2b786704f Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Tue, 27 Sep 2022 00:44:40 +0100
+Subject: io_uring/rw: don't lose short results on io_setup_async_rw()
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit c278d9f8ac0db5590909e6d9e85b5ca2b786704f upstream.
+
+If a retry io_setup_async_rw() fails we lose result from the first
+io_iter_do_read(), which is a problem mostly for streams/sockets.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/0e8d20cebe5fc9c96ed268463c394237daabc384.1664235732.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/rw.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -794,10 +794,12 @@ int io_read(struct io_kiocb *req, unsign
+       iov_iter_restore(&s->iter, &s->iter_state);
+ 
+       ret2 = io_setup_async_rw(req, iovec, s, true);
+-      if (ret2)
+-              return ret2;
+-
+       iovec = NULL;
++      if (ret2) {
++              ret = ret > 0 ? ret : ret2;
++              goto done;
++      }
++
+       io = req->async_data;
+       s = &io->s;
+       /*
diff --git a/queue-6.0/io_uring-rw-fix-unexpected-link-breakage.patch b/queue-6.0/io_uring-rw-fix-unexpected-link-breakage.patch

new file mode 100644 (file)

index 0000000..ebcfff5
--- /dev/null
+++ b/queue-6.0/io_uring-rw-fix-unexpected-link-breakage.patch
@@ -0,0 +1,38 @@
+From bf68b5b34311ee57ed40749a1257a30b46127556 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Tue, 27 Sep 2022 00:44:39 +0100
+Subject: io_uring/rw: fix unexpected link breakage
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit bf68b5b34311ee57ed40749a1257a30b46127556 upstream.
+
+req->cqe.res is set in io_read() to the amount of bytes left to be done,
+which is used to figure out whether to fail a read or not. However,
+io_read() may do another without returning, and we stash the previous
+value into ->bytes_done but forget to update cqe.res. Then we ask a read
+to do strictly less than cqe.res but expect the return to be exactly
+cqe.res.
+
+Fix the bug by updating cqe.res for retries.
+
+Cc: stable@vger.kernel.org
+Reported-and-Tested-by: Beld Zhang <beldzhang@gmail.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/3a1088440c7be98e5800267af922a67da0ef9f13.1664235732.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/rw.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -823,6 +823,7 @@ int io_read(struct io_kiocb *req, unsign
+                       return -EAGAIN;
+               }
+ 
++              req->cqe.res = iov_iter_count(&s->iter);
+               /*
+                * Now retry read with the IOCB_WAITQ parts set in the iocb. If
+                * we get -EIOCBQUEUED, then we'll get a notification when the
diff --git a/queue-6.0/series b/queue-6.0/series

index 93d5a44acd91c781000fb071d7f25d224a517445..9993679fcad5345ecf444daced2d0b14e944e285 100644 (file)
--- a/queue-6.0/series
+++ b/queue-6.0/series
@@ -7,3 +7,15 @@ alsa-hda-realtek-correct-pin-configs-for-asus-g533z.patch
  alsa-hda-realtek-add-quirk-for-asus-gv601r-laptop.patch
  alsa-hda-realtek-add-intel-reference-ssid-to-support-headset-keys.patch
  mtd-rawnand-atmel-unmap-streaming-dma-mappings.patch
+io_uring-add-custom-opcode-hooks-on-fail.patch
+io_uring-rw-don-t-lose-partial-io-result-on-fail.patch
+io_uring-net-don-t-lose-partial-send-recv-on-fail.patch
+io_uring-net-don-t-lose-partial-send_zc-on-fail.patch
+io_uring-rw-fix-unexpected-link-breakage.patch
+io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch
+io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch
+io_uring-net-don-t-update-msg_name-if-not-provided.patch
+io_uring-limit-registration-w-single_issuer.patch
+io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch
+io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch
+io_uring-correct-pinned_vm-accounting.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 16 Oct 2022 07:10:08 +0000 (09:10 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 16 Oct 2022 07:10:08 +0000 (09:10 +0200)
queue-6.0/io_uring-add-custom-opcode-hooks-on-fail.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-correct-pinned_vm-accounting.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-limit-registration-w-single_issuer.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-net-don-t-lose-partial-send-recv-on-fail.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-net-don-t-lose-partial-send_zc-on-fail.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-net-don-t-update-msg_name-if-not-provided.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-rw-don-t-lose-partial-io-result-on-fail.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/io_uring-rw-fix-unexpected-link-breakage.patch	[new file with mode: 0644]	patch \| blob
queue-6.0/series		patch \| blob \| blame \| history