From: Greg Kroah-Hartman Date: Sun, 16 Oct 2022 07:10:02 +0000 (+0200) Subject: 5.19-stable patches X-Git-Tag: v5.4.219~181 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d97f72f4d7997778ee155d12c29f228f55571619;p=thirdparty%2Fkernel%2Fstable-queue.git 5.19-stable patches added patches: io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch io_uring-correct-pinned_vm-accounting.patch io_uring-net-don-t-update-msg_name-if-not-provided.patch io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch io_uring-rw-fix-unexpected-link-breakage.patch --- diff --git a/queue-5.19/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch b/queue-5.19/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch new file mode 100644 index 00000000000..9cfa9c4ad35 --- /dev/null +++ b/queue-5.19/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch @@ -0,0 +1,99 @@ +From 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 3 Oct 2022 13:59:47 +0100 +Subject: io_uring/af_unix: defer registered files gc to io_uring release + +From: Pavel Begunkov + +commit 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 upstream. + +Instead of putting io_uring's registered files in unix_gc() we want it +to be done by io_uring itself. The trick here is to consider io_uring +registered files for cycle detection but not actually putting them down. +Because io_uring can't register other ring instances, this will remove +all refs to the ring file triggering the ->release path and clean up +with io_ring_ctx_free(). + +Cc: stable@vger.kernel.org +Fixes: 6b06314c47e1 ("io_uring: add file set registration") +Reported-and-tested-by: David Bouman +Signed-off-by: Pavel Begunkov +Signed-off-by: Thadeu Lima de Souza Cascardo +[axboe: add kerneldoc comment to skb, fold in skb leak fix] +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 2 ++ + io_uring/io_uring.c | 1 + + net/unix/garbage.c | 20 ++++++++++++++++++++ + 3 files changed, 23 insertions(+) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -965,6 +965,7 @@ typedef unsigned char *sk_buff_data_t; + * @csum_level: indicates the number of consecutive checksums found in + * the packet minus one that have been verified as + * CHECKSUM_UNNECESSARY (max 3) ++ * @scm_io_uring: SKB holds io_uring registered files + * @dst_pending_confirm: need to confirm neighbour + * @decrypted: Decrypted SKB + * @slow_gro: state present at GRO time, slower prepare step required +@@ -1144,6 +1145,7 @@ struct sk_buff { + #endif + __u8 slow_gro:1; + __u8 csum_not_inet:1; ++ __u8 scm_io_uring:1; + + #ifdef CONFIG_NET_SCHED + __u16 tc_index; /* traffic control index */ +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -9500,6 +9500,7 @@ static int io_scm_file_account(struct io + + UNIXCB(skb).fp = fpl; + skb->sk = sk; ++ skb->scm_io_uring = 1; + skb->destructor = unix_destruct_scm; + refcount_add(skb->truesize, &sk->sk_wmem_alloc); + } +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -204,6 +204,7 @@ void wait_for_unix_gc(void) + /* The external entry point: unix_gc() */ + void unix_gc(void) + { ++ struct sk_buff *next_skb, *skb; + struct unix_sock *u; + struct unix_sock *next; + struct sk_buff_head hitlist; +@@ -297,11 +298,30 @@ void unix_gc(void) + + spin_unlock(&unix_gc_lock); + ++ /* We need io_uring to clean its registered files, ignore all io_uring ++ * originated skbs. It's fine as io_uring doesn't keep references to ++ * other io_uring instances and so killing all other files in the cycle ++ * will put all io_uring references forcing it to go through normal ++ * release.path eventually putting registered files. ++ */ ++ skb_queue_walk_safe(&hitlist, skb, next_skb) { ++ if (skb->scm_io_uring) { ++ __skb_unlink(skb, &hitlist); ++ skb_queue_tail(&skb->sk->sk_receive_queue, skb); ++ } ++ } ++ + /* Here we are. Hitlist is filled. Die. */ + __skb_queue_purge(&hitlist); + + spin_lock(&unix_gc_lock); + ++ /* There could be io_uring registered files, just push them back to ++ * the inflight list ++ */ ++ list_for_each_entry_safe(u, next, &gc_candidates, link) ++ list_move_tail(&u->link, &gc_inflight_list); ++ + /* All candidates should have been detached by now. */ + BUG_ON(!list_empty(&gc_candidates)); + diff --git a/queue-5.19/io_uring-correct-pinned_vm-accounting.patch b/queue-5.19/io_uring-correct-pinned_vm-accounting.patch new file mode 100644 index 00000000000..b3a7a7b8be2 --- /dev/null +++ b/queue-5.19/io_uring-correct-pinned_vm-accounting.patch @@ -0,0 +1,48 @@ +From 42b6419d0aba47c5d8644cdc0b68502254671de5 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 4 Oct 2022 03:19:08 +0100 +Subject: io_uring: correct pinned_vm accounting + +From: Pavel Begunkov + +commit 42b6419d0aba47c5d8644cdc0b68502254671de5 upstream. + +->mm_account should be released only after we free all registered +buffers, otherwise __io_sqe_buffers_unregister() will see a NULL +->mm_account and skip locked_vm accounting. + +Cc: +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/6d798f65ed4ab8db3664c4d3397d4af16ca98846.1664849932.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -10727,12 +10727,6 @@ static void io_flush_apoll_cache(struct + static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) + { + io_sq_thread_finish(ctx); +- +- if (ctx->mm_account) { +- mmdrop(ctx->mm_account); +- ctx->mm_account = NULL; +- } +- + io_rsrc_refs_drop(ctx); + /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ + io_wait_rsrc_data(ctx->buf_data); +@@ -10771,6 +10765,10 @@ static __cold void io_ring_ctx_free(stru + #endif + WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); + ++ if (ctx->mm_account) { ++ mmdrop(ctx->mm_account); ++ ctx->mm_account = NULL; ++ } + io_mem_free(ctx->rings); + io_mem_free(ctx->sq_sqes); + diff --git a/queue-5.19/io_uring-net-don-t-update-msg_name-if-not-provided.patch b/queue-5.19/io_uring-net-don-t-update-msg_name-if-not-provided.patch new file mode 100644 index 00000000000..8c576355acc --- /dev/null +++ b/queue-5.19/io_uring-net-don-t-update-msg_name-if-not-provided.patch @@ -0,0 +1,33 @@ +From 6f10ae8a155446248055c7ddd480ef40139af788 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Thu, 29 Sep 2022 22:23:18 +0100 +Subject: io_uring/net: don't update msg_name if not provided + +From: Pavel Begunkov + +commit 6f10ae8a155446248055c7ddd480ef40139af788 upstream. + +io_sendmsg_copy_hdr() may clear msg->msg_name if the userspace didn't +provide it, we should retain NULL in this case. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/97d49f61b5ec76d0900df658cfde3aa59ff22121.1664486545.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -5850,7 +5850,8 @@ static int io_setup_async_msg(struct io_ + async_msg = req->async_data; + req->flags |= REQ_F_NEED_CLEANUP; + memcpy(async_msg, kmsg, sizeof(*kmsg)); +- async_msg->msg.msg_name = &async_msg->addr; ++ if (async_msg->msg.msg_name) ++ async_msg->msg.msg_name = &async_msg->addr; + /* if were using fast_iov, set it to the new one */ + if (!kmsg->free_iov) { + size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; diff --git a/queue-5.19/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch b/queue-5.19/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch new file mode 100644 index 00000000000..50eac26f9e0 --- /dev/null +++ b/queue-5.19/io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch @@ -0,0 +1,68 @@ +From 3e4cb6ebbb2bad201c1186bc0b7e8cf41dd7f7e6 Mon Sep 17 00:00:00 2001 +From: Stefan Metzmacher +Date: Thu, 29 Sep 2022 09:39:10 +0200 +Subject: io_uring/net: fix fast_iov assignment in io_setup_async_msg() + +From: Stefan Metzmacher + +commit 3e4cb6ebbb2bad201c1186bc0b7e8cf41dd7f7e6 upstream. + +I hit a very bad problem during my tests of SENDMSG_ZC. +BUG(); in first_iovec_segment() triggered very easily. +The problem was io_setup_async_msg() in the partial retry case, +which seems to happen more often with _ZC. + +iov_iter_iovec_advance() may change i->iov in order to have i->iov_offset +being only relative to the first element. + +Which means kmsg->msg.msg_iter.iov is no longer the +same as kmsg->fast_iov. + +But this would rewind the copy to be the start of +async_msg->fast_iov, which means the internal +state of sync_msg->msg.msg_iter is inconsitent. + +I tested with 5 vectors with length like this 4, 0, 64, 20, 8388608 +and got a short writes with: +- ret=2675244 min_ret=8388692 => remaining 5713448 sr->done_io=2675244 +- ret=-EAGAIN => io_uring_poll_arm +- ret=4911225 min_ret=5713448 => remaining 802223 sr->done_io=7586469 +- ret=-EAGAIN => io_uring_poll_arm +- ret=802223 min_ret=802223 => res=8388692 + +While this was easily triggered with SENDMSG_ZC (queued for 6.1), +it was a potential problem starting with 7ba89d2af17aa879dda30f5d5d3f152e587fc551 +in 5.18 for IORING_OP_RECVMSG. +And also with 4c3c09439c08b03d9503df0ca4c7619c5842892e in 5.19 +for IORING_OP_SENDMSG. + +However 257e84a5377fbbc336ff563833a8712619acce56 introduced the critical +code into io_setup_async_msg() in 5.11. + +Fixes: 7ba89d2af17aa ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly") +Fixes: 257e84a5377fb ("io_uring: refactor sendmsg/recvmsg iov managing") +Cc: stable@vger.kernel.org +Signed-off-by: Stefan Metzmacher +Reviewed-by: Pavel Begunkov +Link: https://lore.kernel.org/r/b2e7be246e2fb173520862b0c7098e55767567a2.1664436949.git.metze@samba.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -5852,8 +5852,10 @@ static int io_setup_async_msg(struct io_ + memcpy(async_msg, kmsg, sizeof(*kmsg)); + async_msg->msg.msg_name = &async_msg->addr; + /* if were using fast_iov, set it to the new one */ +- if (!async_msg->free_iov) +- async_msg->msg.msg_iter.iov = async_msg->fast_iov; ++ if (!kmsg->free_iov) { ++ size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; ++ async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; ++ } + + return -EAGAIN; + } diff --git a/queue-5.19/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch b/queue-5.19/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch new file mode 100644 index 00000000000..2fd41b8e589 --- /dev/null +++ b/queue-5.19/io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch @@ -0,0 +1,87 @@ +From 3fb1bd68817288729179444caf1fd5c5c4d2d65d Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 4 Oct 2022 20:29:48 -0600 +Subject: io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT + +From: Jens Axboe + +commit 3fb1bd68817288729179444caf1fd5c5c4d2d65d upstream. + +We treat EINPROGRESS like EAGAIN, but if we're retrying post getting +EINPROGRESS, then we just need to check the socket for errors and +terminate the request. + +This was exposed on a bluetooth connection request which ends up +taking a while and hitting EINPROGRESS, and yields a CQE result of +-EBADFD because we're retrying a connect on a socket that is now +connected. + +Cc: stable@vger.kernel.org +Fixes: 87f80d623c6c ("io_uring: handle connect -EINPROGRESS like -EAGAIN") +Link: https://github.com/axboe/liburing/issues/671 +Reported-by: Aidan Sun +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -653,6 +653,7 @@ struct io_connect { + struct file *file; + struct sockaddr __user *addr; + int addr_len; ++ bool in_progress; + }; + + struct io_sr_msg { +@@ -6463,6 +6464,7 @@ static int io_connect_prep(struct io_kio + + conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); + conn->addr_len = READ_ONCE(sqe->addr2); ++ conn->in_progress = false; + return 0; + } + +@@ -6473,6 +6475,16 @@ static int io_connect(struct io_kiocb *r + int ret; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + ++ if (connect->in_progress) { ++ struct socket *socket; ++ ++ ret = -ENOTSOCK; ++ socket = sock_from_file(req->file); ++ if (socket) ++ ret = sock_error(socket->sk); ++ goto out; ++ } ++ + if (req_has_async_data(req)) { + io = req->async_data; + } else { +@@ -6489,13 +6501,17 @@ static int io_connect(struct io_kiocb *r + ret = __sys_connect_file(req->file, &io->address, + req->connect.addr_len, file_flags); + if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { +- if (req_has_async_data(req)) +- return -EAGAIN; +- if (io_alloc_async_data(req)) { +- ret = -ENOMEM; +- goto out; ++ if (ret == -EINPROGRESS) { ++ connect->in_progress = true; ++ } else { ++ if (req_has_async_data(req)) ++ return -EAGAIN; ++ if (io_alloc_async_data(req)) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ memcpy(req->async_data, &__io, sizeof(__io)); + } +- memcpy(req->async_data, &__io, sizeof(__io)); + return -EAGAIN; + } + if (ret == -ERESTARTSYS) diff --git a/queue-5.19/io_uring-rw-fix-unexpected-link-breakage.patch b/queue-5.19/io_uring-rw-fix-unexpected-link-breakage.patch new file mode 100644 index 00000000000..fbb556aaf2a --- /dev/null +++ b/queue-5.19/io_uring-rw-fix-unexpected-link-breakage.patch @@ -0,0 +1,38 @@ +From bf68b5b34311ee57ed40749a1257a30b46127556 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Tue, 27 Sep 2022 00:44:39 +0100 +Subject: io_uring/rw: fix unexpected link breakage + +From: Pavel Begunkov + +commit bf68b5b34311ee57ed40749a1257a30b46127556 upstream. + +req->cqe.res is set in io_read() to the amount of bytes left to be done, +which is used to figure out whether to fail a read or not. However, +io_read() may do another without returning, and we stash the previous +value into ->bytes_done but forget to update cqe.res. Then we ask a read +to do strictly less than cqe.res but expect the return to be exactly +cqe.res. + +Fix the bug by updating cqe.res for retries. + +Cc: stable@vger.kernel.org +Reported-and-Tested-by: Beld Zhang +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/3a1088440c7be98e5800267af922a67da0ef9f13.1664235732.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -4215,6 +4215,7 @@ static int io_read(struct io_kiocb *req, + return -EAGAIN; + } + ++ req->cqe.res = iov_iter_count(&s->iter); + /* + * Now retry read with the IOCB_WAITQ parts set in the iocb. If + * we get -EIOCBQUEUED, then we'll get a notification when the diff --git a/queue-5.19/series b/queue-5.19/series index f82d22908a9..c357b890caf 100644 --- a/queue-5.19/series +++ b/queue-5.19/series @@ -8,3 +8,9 @@ alsa-hda-realtek-correct-pin-configs-for-asus-g533z.patch alsa-hda-realtek-add-quirk-for-asus-gv601r-laptop.patch alsa-hda-realtek-add-intel-reference-ssid-to-support-headset-keys.patch mtd-rawnand-atmel-unmap-streaming-dma-mappings.patch +io_uring-rw-fix-unexpected-link-breakage.patch +io_uring-net-fix-fast_iov-assignment-in-io_setup_async_msg.patch +io_uring-net-don-t-update-msg_name-if-not-provided.patch +io_uring-net-handle-einprogress-correct-for-ioring_op_connect.patch +io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch +io_uring-correct-pinned_vm-accounting.patch