From 514d55e433dc944b6700b926da2c98e1f9debcb1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 16 Oct 2022 09:10:08 +0200 Subject: [PATCH] 6.0-stable patches added patches: io_uring-add-custom-opcode-hooks-on-fail.patch io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch io_uring-correct-pinned_vm-accounting.patch io_uring-limit-registration-w-single_issuer.patch io_uring-net-don-t-lose-partial-send-recv-on-fail.patch io_uring-net-don-t-lose-partial-send_zc-on-fail.patch io_uring-net-don-t-update-msg_name-if-not-provided.patch io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch io_uring-rw-don-t-lose-partial-io-result-on-fail.patch io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch io_uring-rw-fix-unexpected-link-breakage.patch --- ...ring-add-custom-opcode-hooks-on-fail.patch | 47 +++++++++ ...istered-files-gc-to-io_uring-release.patch | 99 +++++++++++++++++++ ...o_uring-correct-pinned_vm-accounting.patch | 48 +++++++++ ...g-limit-registration-w-single_issuer.patch | 36 +++++++ ...don-t-lose-partial-send-recv-on-fail.patch | 88 +++++++++++++++++ ...t-don-t-lose-partial-send_zc-on-fail.patch | 69 +++++++++++++ ...on-t-update-msg_name-if-not-provided.patch | 33 +++++++ ...iov-assignment-in-io_setup_async_msg.patch | 68 +++++++++++++ ...ogress-correct-for-ioring_op_connect.patch | 87 ++++++++++++++++ ...don-t-lose-partial-io-result-on-fail.patch | 97 ++++++++++++++++++ ...e-short-results-on-io_setup_async_rw.patch | 39 ++++++++ ...ring-rw-fix-unexpected-link-breakage.patch | 38 +++++++ queue-6.0/series | 12 +++ 13 files changed, 761 insertions(+) create mode 100644 queue-6.0/io_uring-add-custom-opcode-hooks-on-fail.patch create mode 100644 queue-6.0/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch create mode 100644 queue-6.0/io_uring-correct-pinned_vm-accounting.patch create mode 100644 queue-6.0/io_uring-limit-registration-w-single_issuer.patch create mode 100644 queue-6.0/io_uring-net-don-t-lose-partial-send-recv-on-fail.patch create mode 100644 queue-6.0/io_uring-net-don-t-lose-partial-send_zc-on-fail.patch create mode 100644 queue-6.0/io_uring-net-don-t-update-msg_name-if-not-provided.patch create mode 100644 queue-6.0/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch create mode 100644 queue-6.0/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch create mode 100644 queue-6.0/io_uring-rw-don-t-lose-partial-io-result-on-fail.patch create mode 100644 queue-6.0/io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch create mode 100644 queue-6.0/io_uring-rw-fix-unexpected-link-breakage.patch diff --git a/queue-6.0/io_uring-add-custom-opcode-hooks-on-fail.patch b/queue-6.0/io_uring-add-custom-opcode-hooks-on-fail.patch new file mode 100644 index 00000000000..e8d89155fc4 --- /dev/null +++ b/queue-6.0/io_uring-add-custom-opcode-hooks-on-fail.patch @@ -0,0 +1,47 @@ +From a47b255e90395bdb481975ab3d9e96fcf8b3165f Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 21 Sep 2022 12:17:46 +0100 +Subject: io_uring: add custom opcode hooks on fail + +From: Pavel Begunkov + +commit a47b255e90395bdb481975ab3d9e96fcf8b3165f upstream. + +Sometimes we have to do a little bit of a fixup on a request failuer in +io_req_complete_failed(). Add a callback in opdef for that. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/b734cff4e67cb30cca976b9face321023f37549a.1663668091.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 4 ++++ + io_uring/opdef.h | 1 + + 2 files changed, 5 insertions(+) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -823,8 +823,12 @@ inline void __io_req_complete(struct io_ + + void io_req_complete_failed(struct io_kiocb *req, s32 res) + { ++ const struct io_op_def *def = &io_op_defs[req->opcode]; ++ + req_set_fail(req); + io_req_set_res(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED)); ++ if (def->fail) ++ def->fail(req); + io_req_complete_post(req); + } + +--- a/io_uring/opdef.h ++++ b/io_uring/opdef.h +@@ -36,6 +36,7 @@ struct io_op_def { + int (*issue)(struct io_kiocb *, unsigned int); + int (*prep_async)(struct io_kiocb *); + void (*cleanup)(struct io_kiocb *); ++ void (*fail)(struct io_kiocb *); + }; + + extern const struct io_op_def io_op_defs[]; diff --git a/queue-6.0/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch b/queue-6.0/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch new file mode 100644 index 00000000000..be69db8a736 --- /dev/null +++ b/queue-6.0/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch @@ -0,0 +1,99 @@ +From 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 3 Oct 2022 13:59:47 +0100 +Subject: io_uring/af_unix: defer registered files gc to io_uring release + +From: Pavel Begunkov + +commit 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 upstream. + +Instead of putting io_uring's registered files in unix_gc() we want it +to be done by io_uring itself. The trick here is to consider io_uring +registered files for cycle detection but not actually putting them down. +Because io_uring can't register other ring instances, this will remove +all refs to the ring file triggering the ->release path and clean up +with io_ring_ctx_free(). + +Cc: stable@vger.kernel.org +Fixes: 6b06314c47e1 ("io_uring: add file set registration") +Reported-and-tested-by: David Bouman +Signed-off-by: Pavel Begunkov +Signed-off-by: Thadeu Lima de Souza Cascardo +[axboe: add kerneldoc comment to skb, fold in skb leak fix] +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 2 ++ + io_uring/rsrc.c | 1 + + net/unix/garbage.c | 20 ++++++++++++++++++++ + 3 files changed, 23 insertions(+) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -796,6 +796,7 @@ typedef unsigned char *sk_buff_data_t; + * @csum_level: indicates the number of consecutive checksums found in + * the packet minus one that have been verified as + * CHECKSUM_UNNECESSARY (max 3) ++ * @scm_io_uring: SKB holds io_uring registered files + * @dst_pending_confirm: need to confirm neighbour + * @decrypted: Decrypted SKB + * @slow_gro: state present at GRO time, slower prepare step required +@@ -975,6 +976,7 @@ struct sk_buff { + #endif + __u8 slow_gro:1; + __u8 csum_not_inet:1; ++ __u8 scm_io_uring:1; + + #ifdef CONFIG_NET_SCHED + __u16 tc_index; /* traffic control index */ +--- a/io_uring/rsrc.c ++++ b/io_uring/rsrc.c +@@ -855,6 +855,7 @@ int __io_scm_file_account(struct io_ring + + UNIXCB(skb).fp = fpl; + skb->sk = sk; ++ skb->scm_io_uring = 1; + skb->destructor = unix_destruct_scm; + refcount_add(skb->truesize, &sk->sk_wmem_alloc); + } +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -204,6 +204,7 @@ void wait_for_unix_gc(void) + /* The external entry point: unix_gc() */ + void unix_gc(void) + { ++ struct sk_buff *next_skb, *skb; + struct unix_sock *u; + struct unix_sock *next; + struct sk_buff_head hitlist; +@@ -297,11 +298,30 @@ void unix_gc(void) + + spin_unlock(&unix_gc_lock); + ++ /* We need io_uring to clean its registered files, ignore all io_uring ++ * originated skbs. It's fine as io_uring doesn't keep references to ++ * other io_uring instances and so killing all other files in the cycle ++ * will put all io_uring references forcing it to go through normal ++ * release.path eventually putting registered files. ++ */ ++ skb_queue_walk_safe(&hitlist, skb, next_skb) { ++ if (skb->scm_io_uring) { ++ __skb_unlink(skb, &hitlist); ++ skb_queue_tail(&skb->sk->sk_receive_queue, skb); ++ } ++ } ++ + /* Here we are. Hitlist is filled. Die. */ + __skb_queue_purge(&hitlist); + + spin_lock(&unix_gc_lock); + ++ /* There could be io_uring registered files, just push them back to ++ * the inflight list ++ */ ++ list_for_each_entry_safe(u, next, &gc_candidates, link) ++ list_move_tail(&u->link, &gc_inflight_list); ++ + /* All candidates should have been detached by now. */ + BUG_ON(!list_empty(&gc_candidates)); + diff --git a/queue-6.0/io_uring-correct-pinned_vm-accounting.patch b/queue-6.0/io_uring-correct-pinned_vm-accounting.patch new file mode 100644 index 00000000000..a2ab359ce45 --- /dev/null +++ b/queue-6.0/io_uring-correct-pinned_vm-accounting.patch @@ -0,0 +1,48 @@ +From 42b6419d0aba47c5d8644cdc0b68502254671de5 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 4 Oct 2022 03:19:08 +0100 +Subject: io_uring: correct pinned_vm accounting + +From: Pavel Begunkov + +commit 42b6419d0aba47c5d8644cdc0b68502254671de5 upstream. + +->mm_account should be released only after we free all registered +buffers, otherwise __io_sqe_buffers_unregister() will see a NULL +->mm_account and skip locked_vm accounting. + +Cc: +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/6d798f65ed4ab8db3664c4d3397d4af16ca98846.1664849932.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2422,12 +2422,6 @@ static void io_req_caches_free(struct io + static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) + { + io_sq_thread_finish(ctx); +- +- if (ctx->mm_account) { +- mmdrop(ctx->mm_account); +- ctx->mm_account = NULL; +- } +- + io_rsrc_refs_drop(ctx); + /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ + io_wait_rsrc_data(ctx->buf_data); +@@ -2470,6 +2464,10 @@ static __cold void io_ring_ctx_free(stru + WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); + WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots); + ++ if (ctx->mm_account) { ++ mmdrop(ctx->mm_account); ++ ctx->mm_account = NULL; ++ } + io_mem_free(ctx->rings); + io_mem_free(ctx->sq_sqes); + diff --git a/queue-6.0/io_uring-limit-registration-w-single_issuer.patch b/queue-6.0/io_uring-limit-registration-w-single_issuer.patch new file mode 100644 index 00000000000..0d69f1e677e --- /dev/null +++ b/queue-6.0/io_uring-limit-registration-w-single_issuer.patch @@ -0,0 +1,36 @@ +From d7cce96c449e35bbfd41e830b341b95973891eed Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 27 Sep 2022 01:13:30 +0100 +Subject: io_uring: limit registration w/ SINGLE_ISSUER + +From: Pavel Begunkov + +commit d7cce96c449e35bbfd41e830b341b95973891eed upstream. + +IORING_SETUP_SINGLE_ISSUER restricts what tasks can submit requests. +Extend it to registration as well, so non-owning task can't do +registrations. It's not necessary at the moment but might be useful in +the future. + +Cc: # 6.0 +Fixes: 97bbdc06a444 ("io_uring: add IORING_SETUP_SINGLE_ISSUER") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/f52a6a9c8a8990d4a831f73c0571e7406aac2bba.1664237592.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3710,6 +3710,9 @@ static int __io_uring_register(struct io + if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) + return -ENXIO; + ++ if (ctx->submitter_task && ctx->submitter_task != current) ++ return -EEXIST; ++ + if (ctx->restricted) { + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; diff --git a/queue-6.0/io_uring-net-don-t-lose-partial-send-recv-on-fail.patch b/queue-6.0/io_uring-net-don-t-lose-partial-send-recv-on-fail.patch new file mode 100644 index 00000000000..57a62ac8647 --- /dev/null +++ b/queue-6.0/io_uring-net-don-t-lose-partial-send-recv-on-fail.patch @@ -0,0 +1,88 @@ +From 7e6b638ed501cced4e472298d6b08dd16346f3a6 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 21 Sep 2022 12:17:48 +0100 +Subject: io_uring/net: don't lose partial send/recv on fail + +From: Pavel Begunkov + +commit 7e6b638ed501cced4e472298d6b08dd16346f3a6 upstream. + +Just as with rw, partial send/recv may end up in +io_req_complete_failed() and loose the result, make sure we return the +number of bytes processed. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/a4ff95897b5419356fca9ea55db91ac15b2975f9.1663668091.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 10 ++++++++++ + io_uring/net.h | 2 ++ + io_uring/opdef.c | 4 ++++ + 3 files changed, 16 insertions(+) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -1087,6 +1087,16 @@ int io_sendzc(struct io_kiocb *req, unsi + return IOU_OK; + } + ++void io_sendrecv_fail(struct io_kiocb *req) ++{ ++ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); ++ int res = req->cqe.res; ++ ++ if (req->flags & REQ_F_PARTIAL_IO) ++ res = sr->done_io; ++ io_req_set_res(req, res, req->cqe.flags); ++} ++ + int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + { + struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); +--- a/io_uring/net.h ++++ b/io_uring/net.h +@@ -43,6 +43,8 @@ int io_recvmsg_prep(struct io_kiocb *req + int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags); + int io_recv(struct io_kiocb *req, unsigned int issue_flags); + ++void io_sendrecv_fail(struct io_kiocb *req); ++ + int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); + int io_accept(struct io_kiocb *req, unsigned int issue_flags); + +--- a/io_uring/opdef.c ++++ b/io_uring/opdef.c +@@ -157,6 +157,7 @@ const struct io_op_def io_op_defs[] = { + .issue = io_sendmsg, + .prep_async = io_sendmsg_prep_async, + .cleanup = io_sendmsg_recvmsg_cleanup, ++ .fail = io_sendrecv_fail, + #else + .prep = io_eopnotsupp_prep, + #endif +@@ -174,6 +175,7 @@ const struct io_op_def io_op_defs[] = { + .issue = io_recvmsg, + .prep_async = io_recvmsg_prep_async, + .cleanup = io_sendmsg_recvmsg_cleanup, ++ .fail = io_sendrecv_fail, + #else + .prep = io_eopnotsupp_prep, + #endif +@@ -316,6 +318,7 @@ const struct io_op_def io_op_defs[] = { + #if defined(CONFIG_NET) + .prep = io_sendmsg_prep, + .issue = io_send, ++ .fail = io_sendrecv_fail, + #else + .prep = io_eopnotsupp_prep, + #endif +@@ -331,6 +334,7 @@ const struct io_op_def io_op_defs[] = { + #if defined(CONFIG_NET) + .prep = io_recvmsg_prep, + .issue = io_recv, ++ .fail = io_sendrecv_fail, + #else + .prep = io_eopnotsupp_prep, + #endif diff --git a/queue-6.0/io_uring-net-don-t-lose-partial-send_zc-on-fail.patch b/queue-6.0/io_uring-net-don-t-lose-partial-send_zc-on-fail.patch new file mode 100644 index 00000000000..5a92ef17450 --- /dev/null +++ b/queue-6.0/io_uring-net-don-t-lose-partial-send_zc-on-fail.patch @@ -0,0 +1,69 @@ +From 5693bcce892d7b8b15a7a92b011d3d40a023b53c Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 21 Sep 2022 12:17:49 +0100 +Subject: io_uring/net: don't lose partial send_zc on fail + +From: Pavel Begunkov + +commit 5693bcce892d7b8b15a7a92b011d3d40a023b53c upstream. + +Partial zc send may end up in io_req_complete_failed(), which not only +would return invalid result but also mask out the notification leading +to lifetime issues. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/5673285b5e83e6ceca323727b4ddaa584b5cc91e.1663668091.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 16 ++++++++++++++++ + io_uring/net.h | 1 + + io_uring/opdef.c | 1 + + 3 files changed, 18 insertions(+) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -1097,6 +1097,22 @@ void io_sendrecv_fail(struct io_kiocb *r + io_req_set_res(req, res, req->cqe.flags); + } + ++void io_send_zc_fail(struct io_kiocb *req) ++{ ++ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); ++ int res = req->cqe.res; ++ ++ if (req->flags & REQ_F_PARTIAL_IO) { ++ if (req->flags & REQ_F_NEED_CLEANUP) { ++ io_notif_flush(sr->notif); ++ sr->notif = NULL; ++ req->flags &= ~REQ_F_NEED_CLEANUP; ++ } ++ res = sr->done_io; ++ } ++ io_req_set_res(req, res, req->cqe.flags); ++} ++ + int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + { + struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); +--- a/io_uring/net.h ++++ b/io_uring/net.h +@@ -58,6 +58,7 @@ int io_connect(struct io_kiocb *req, uns + int io_sendzc(struct io_kiocb *req, unsigned int issue_flags); + int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); + void io_sendzc_cleanup(struct io_kiocb *req); ++void io_send_zc_fail(struct io_kiocb *req); + + void io_netmsg_cache_free(struct io_cache_entry *entry); + #else +--- a/io_uring/opdef.c ++++ b/io_uring/opdef.c +@@ -494,6 +494,7 @@ const struct io_op_def io_op_defs[] = { + .issue = io_sendzc, + .prep_async = io_sendzc_prep_async, + .cleanup = io_sendzc_cleanup, ++ .fail = io_send_zc_fail, + #else + .prep = io_eopnotsupp_prep, + #endif diff --git a/queue-6.0/io_uring-net-don-t-update-msg_name-if-not-provided.patch b/queue-6.0/io_uring-net-don-t-update-msg_name-if-not-provided.patch new file mode 100644 index 00000000000..97fbbffa48a --- /dev/null +++ b/queue-6.0/io_uring-net-don-t-update-msg_name-if-not-provided.patch @@ -0,0 +1,33 @@ +From 6f10ae8a155446248055c7ddd480ef40139af788 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Thu, 29 Sep 2022 22:23:18 +0100 +Subject: io_uring/net: don't update msg_name if not provided + +From: Pavel Begunkov + +commit 6f10ae8a155446248055c7ddd480ef40139af788 upstream. + +io_sendmsg_copy_hdr() may clear msg->msg_name if the userspace didn't +provide it, we should retain NULL in this case. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/97d49f61b5ec76d0900df658cfde3aa59ff22121.1664486545.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -163,7 +163,8 @@ static int io_setup_async_msg(struct io_ + } + req->flags |= REQ_F_NEED_CLEANUP; + memcpy(async_msg, kmsg, sizeof(*kmsg)); +- async_msg->msg.msg_name = &async_msg->addr; ++ if (async_msg->msg.msg_name) ++ async_msg->msg.msg_name = &async_msg->addr; + /* if were using fast_iov, set it to the new one */ + if (!kmsg->free_iov) { + size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; diff --git a/queue-6.0/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch b/queue-6.0/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch new file mode 100644 index 00000000000..bc97eaadab9 --- /dev/null +++ b/queue-6.0/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch @@ -0,0 +1,68 @@ +From 3e4cb6ebbb2bad201c1186bc0b7e8cf41dd7f7e6 Mon Sep 17 00:00:00 2001 +From: Stefan Metzmacher +Date: Thu, 29 Sep 2022 09:39:10 +0200 +Subject: io_uring/net: fix fast_iov assignment in io_setup_async_msg() + +From: Stefan Metzmacher + +commit 3e4cb6ebbb2bad201c1186bc0b7e8cf41dd7f7e6 upstream. + +I hit a very bad problem during my tests of SENDMSG_ZC. +BUG(); in first_iovec_segment() triggered very easily. +The problem was io_setup_async_msg() in the partial retry case, +which seems to happen more often with _ZC. + +iov_iter_iovec_advance() may change i->iov in order to have i->iov_offset +being only relative to the first element. + +Which means kmsg->msg.msg_iter.iov is no longer the +same as kmsg->fast_iov. + +But this would rewind the copy to be the start of +async_msg->fast_iov, which means the internal +state of sync_msg->msg.msg_iter is inconsitent. + +I tested with 5 vectors with length like this 4, 0, 64, 20, 8388608 +and got a short writes with: +- ret=2675244 min_ret=8388692 => remaining 5713448 sr->done_io=2675244 +- ret=-EAGAIN => io_uring_poll_arm +- ret=4911225 min_ret=5713448 => remaining 802223 sr->done_io=7586469 +- ret=-EAGAIN => io_uring_poll_arm +- ret=802223 min_ret=802223 => res=8388692 + +While this was easily triggered with SENDMSG_ZC (queued for 6.1), +it was a potential problem starting with 7ba89d2af17aa879dda30f5d5d3f152e587fc551 +in 5.18 for IORING_OP_RECVMSG. +And also with 4c3c09439c08b03d9503df0ca4c7619c5842892e in 5.19 +for IORING_OP_SENDMSG. + +However 257e84a5377fbbc336ff563833a8712619acce56 introduced the critical +code into io_setup_async_msg() in 5.11. + +Fixes: 7ba89d2af17aa ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly") +Fixes: 257e84a5377fb ("io_uring: refactor sendmsg/recvmsg iov managing") +Cc: stable@vger.kernel.org +Signed-off-by: Stefan Metzmacher +Reviewed-by: Pavel Begunkov +Link: https://lore.kernel.org/r/b2e7be246e2fb173520862b0c7098e55767567a2.1664436949.git.metze@samba.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -165,8 +165,10 @@ static int io_setup_async_msg(struct io_ + memcpy(async_msg, kmsg, sizeof(*kmsg)); + async_msg->msg.msg_name = &async_msg->addr; + /* if were using fast_iov, set it to the new one */ +- if (!async_msg->free_iov) +- async_msg->msg.msg_iter.iov = async_msg->fast_iov; ++ if (!kmsg->free_iov) { ++ size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; ++ async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; ++ } + + return -EAGAIN; + } diff --git a/queue-6.0/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch b/queue-6.0/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch new file mode 100644 index 00000000000..f16e1a3925b --- /dev/null +++ b/queue-6.0/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch @@ -0,0 +1,87 @@ +From 3fb1bd68817288729179444caf1fd5c5c4d2d65d Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 4 Oct 2022 20:29:48 -0600 +Subject: io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT + +From: Jens Axboe + +commit 3fb1bd68817288729179444caf1fd5c5c4d2d65d upstream. + +We treat EINPROGRESS like EAGAIN, but if we're retrying post getting +EINPROGRESS, then we just need to check the socket for errors and +terminate the request. + +This was exposed on a bluetooth connection request which ends up +taking a while and hitting EINPROGRESS, and yields a CQE result of +-EBADFD because we're retrying a connect on a socket that is now +connected. + +Cc: stable@vger.kernel.org +Fixes: 87f80d623c6c ("io_uring: handle connect -EINPROGRESS like -EAGAIN") +Link: https://github.com/axboe/liburing/issues/671 +Reported-by: Aidan Sun +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -46,6 +46,7 @@ struct io_connect { + struct file *file; + struct sockaddr __user *addr; + int addr_len; ++ bool in_progress; + }; + + struct io_sr_msg { +@@ -1279,6 +1280,7 @@ int io_connect_prep(struct io_kiocb *req + + conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); + conn->addr_len = READ_ONCE(sqe->addr2); ++ conn->in_progress = false; + return 0; + } + +@@ -1290,6 +1292,16 @@ int io_connect(struct io_kiocb *req, uns + int ret; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + ++ if (connect->in_progress) { ++ struct socket *socket; ++ ++ ret = -ENOTSOCK; ++ socket = sock_from_file(req->file); ++ if (socket) ++ ret = sock_error(socket->sk); ++ goto out; ++ } ++ + if (req_has_async_data(req)) { + io = req->async_data; + } else { +@@ -1306,13 +1318,17 @@ int io_connect(struct io_kiocb *req, uns + ret = __sys_connect_file(req->file, &io->address, + connect->addr_len, file_flags); + if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { +- if (req_has_async_data(req)) +- return -EAGAIN; +- if (io_alloc_async_data(req)) { +- ret = -ENOMEM; +- goto out; ++ if (ret == -EINPROGRESS) { ++ connect->in_progress = true; ++ } else { ++ if (req_has_async_data(req)) ++ return -EAGAIN; ++ if (io_alloc_async_data(req)) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ memcpy(req->async_data, &__io, sizeof(__io)); + } +- memcpy(req->async_data, &__io, sizeof(__io)); + return -EAGAIN; + } + if (ret == -ERESTARTSYS) diff --git a/queue-6.0/io_uring-rw-don-t-lose-partial-io-result-on-fail.patch b/queue-6.0/io_uring-rw-don-t-lose-partial-io-result-on-fail.patch new file mode 100644 index 00000000000..9062f740f5b --- /dev/null +++ b/queue-6.0/io_uring-rw-don-t-lose-partial-io-result-on-fail.patch @@ -0,0 +1,97 @@ +From 47b4c68660752facfa6247b1fc9ca9d722b8b601 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 21 Sep 2022 12:17:47 +0100 +Subject: io_uring/rw: don't lose partial IO result on fail + +From: Pavel Begunkov + +commit 47b4c68660752facfa6247b1fc9ca9d722b8b601 upstream. + +A partially done read/write may end up in io_req_complete_failed() and +loose the result, make sure we return the number of bytes processed. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/05e0879c226bcd53b441bf92868eadd4bf04e2fc.1663668091.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/opdef.c | 6 ++++++ + io_uring/rw.c | 8 ++++++++ + io_uring/rw.h | 1 + + 3 files changed, 15 insertions(+) + +--- a/io_uring/opdef.c ++++ b/io_uring/opdef.c +@@ -69,6 +69,7 @@ const struct io_op_def io_op_defs[] = { + .issue = io_read, + .prep_async = io_readv_prep_async, + .cleanup = io_readv_writev_cleanup, ++ .fail = io_rw_fail, + }, + [IORING_OP_WRITEV] = { + .needs_file = 1, +@@ -85,6 +86,7 @@ const struct io_op_def io_op_defs[] = { + .issue = io_write, + .prep_async = io_writev_prep_async, + .cleanup = io_readv_writev_cleanup, ++ .fail = io_rw_fail, + }, + [IORING_OP_FSYNC] = { + .needs_file = 1, +@@ -105,6 +107,7 @@ const struct io_op_def io_op_defs[] = { + .name = "READ_FIXED", + .prep = io_prep_rw, + .issue = io_read, ++ .fail = io_rw_fail, + }, + [IORING_OP_WRITE_FIXED] = { + .needs_file = 1, +@@ -119,6 +122,7 @@ const struct io_op_def io_op_defs[] = { + .name = "WRITE_FIXED", + .prep = io_prep_rw, + .issue = io_write, ++ .fail = io_rw_fail, + }, + [IORING_OP_POLL_ADD] = { + .needs_file = 1, +@@ -273,6 +277,7 @@ const struct io_op_def io_op_defs[] = { + .name = "READ", + .prep = io_prep_rw, + .issue = io_read, ++ .fail = io_rw_fail, + }, + [IORING_OP_WRITE] = { + .needs_file = 1, +@@ -287,6 +292,7 @@ const struct io_op_def io_op_defs[] = { + .name = "WRITE", + .prep = io_prep_rw, + .issue = io_write, ++ .fail = io_rw_fail, + }, + [IORING_OP_FADVISE] = { + .needs_file = 1, +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -984,6 +984,14 @@ static void io_cqring_ev_posted_iopoll(s + io_cqring_wake(ctx); + } + ++void io_rw_fail(struct io_kiocb *req) ++{ ++ int res; ++ ++ res = io_fixup_rw_res(req, req->cqe.res); ++ io_req_set_res(req, res, req->cqe.flags); ++} ++ + int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) + { + struct io_wq_work_node *pos, *start, *prev; +--- a/io_uring/rw.h ++++ b/io_uring/rw.h +@@ -21,3 +21,4 @@ int io_readv_prep_async(struct io_kiocb + int io_write(struct io_kiocb *req, unsigned int issue_flags); + int io_writev_prep_async(struct io_kiocb *req); + void io_readv_writev_cleanup(struct io_kiocb *req); ++void io_rw_fail(struct io_kiocb *req); diff --git a/queue-6.0/io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch b/queue-6.0/io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch new file mode 100644 index 00000000000..31a02ad6c0f --- /dev/null +++ b/queue-6.0/io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch @@ -0,0 +1,39 @@ +From c278d9f8ac0db5590909e6d9e85b5ca2b786704f Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 27 Sep 2022 00:44:40 +0100 +Subject: io_uring/rw: don't lose short results on io_setup_async_rw() + +From: Pavel Begunkov + +commit c278d9f8ac0db5590909e6d9e85b5ca2b786704f upstream. + +If a retry io_setup_async_rw() fails we lose result from the first +io_iter_do_read(), which is a problem mostly for streams/sockets. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/0e8d20cebe5fc9c96ed268463c394237daabc384.1664235732.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/rw.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -794,10 +794,12 @@ int io_read(struct io_kiocb *req, unsign + iov_iter_restore(&s->iter, &s->iter_state); + + ret2 = io_setup_async_rw(req, iovec, s, true); +- if (ret2) +- return ret2; +- + iovec = NULL; ++ if (ret2) { ++ ret = ret > 0 ? ret : ret2; ++ goto done; ++ } ++ + io = req->async_data; + s = &io->s; + /* diff --git a/queue-6.0/io_uring-rw-fix-unexpected-link-breakage.patch b/queue-6.0/io_uring-rw-fix-unexpected-link-breakage.patch new file mode 100644 index 00000000000..ebcfff5f5e9 --- /dev/null +++ b/queue-6.0/io_uring-rw-fix-unexpected-link-breakage.patch @@ -0,0 +1,38 @@ +From bf68b5b34311ee57ed40749a1257a30b46127556 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 27 Sep 2022 00:44:39 +0100 +Subject: io_uring/rw: fix unexpected link breakage + +From: Pavel Begunkov + +commit bf68b5b34311ee57ed40749a1257a30b46127556 upstream. + +req->cqe.res is set in io_read() to the amount of bytes left to be done, +which is used to figure out whether to fail a read or not. However, +io_read() may do another without returning, and we stash the previous +value into ->bytes_done but forget to update cqe.res. Then we ask a read +to do strictly less than cqe.res but expect the return to be exactly +cqe.res. + +Fix the bug by updating cqe.res for retries. + +Cc: stable@vger.kernel.org +Reported-and-Tested-by: Beld Zhang +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/3a1088440c7be98e5800267af922a67da0ef9f13.1664235732.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/rw.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/io_uring/rw.c ++++ b/io_uring/rw.c +@@ -823,6 +823,7 @@ int io_read(struct io_kiocb *req, unsign + return -EAGAIN; + } + ++ req->cqe.res = iov_iter_count(&s->iter); + /* + * Now retry read with the IOCB_WAITQ parts set in the iocb. If + * we get -EIOCBQUEUED, then we'll get a notification when the diff --git a/queue-6.0/series b/queue-6.0/series index 93d5a44acd9..9993679fcad 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -7,3 +7,15 @@ alsa-hda-realtek-correct-pin-configs-for-asus-g533z.patch alsa-hda-realtek-add-quirk-for-asus-gv601r-laptop.patch alsa-hda-realtek-add-intel-reference-ssid-to-support-headset-keys.patch mtd-rawnand-atmel-unmap-streaming-dma-mappings.patch +io_uring-add-custom-opcode-hooks-on-fail.patch +io_uring-rw-don-t-lose-partial-io-result-on-fail.patch +io_uring-net-don-t-lose-partial-send-recv-on-fail.patch +io_uring-net-don-t-lose-partial-send_zc-on-fail.patch +io_uring-rw-fix-unexpected-link-breakage.patch +io_uring-rw-don-t-lose-short-results-on-io_setup_async_rw.patch +io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch +io_uring-net-don-t-update-msg_name-if-not-provided.patch +io_uring-limit-registration-w-single_issuer.patch +io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch +io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch +io_uring-correct-pinned_vm-accounting.patch -- 2.47.3