From: Greg Kroah-Hartman Date: Mon, 17 Oct 2022 09:43:47 +0000 (+0200) Subject: 5.15-stable patches X-Git-Tag: v5.4.219~33 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e108a477966f9d0e1e124788d541a188f94789aa;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch io_uring-correct-pinned_vm-accounting.patch io_uring-rw-fix-error-ed-retry-return-values.patch io_uring-rw-fix-short-rw-error-handling.patch io_uring-rw-fix-unexpected-link-breakage.patch --- diff --git a/queue-5.15/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch b/queue-5.15/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch new file mode 100644 index 00000000000..c7ef3ac361a --- /dev/null +++ b/queue-5.15/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch @@ -0,0 +1,102 @@ +From foo@baz Mon Oct 17 11:43:27 AM CEST 2022 +From: Pavel Begunkov +Date: Sun, 16 Oct 2022 22:42:54 +0100 +Subject: io_uring/af_unix: defer registered files gc to io_uring release +To: stable@vger.kernel.org +Cc: Jens Axboe , asml.silence@gmail.com +Message-ID: + +From: Pavel Begunkov + +[ upstream commit 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 ] + +Instead of putting io_uring's registered files in unix_gc() we want it +to be done by io_uring itself. The trick here is to consider io_uring +registered files for cycle detection but not actually putting them down. +Because io_uring can't register other ring instances, this will remove +all refs to the ring file triggering the ->release path and clean up +with io_ring_ctx_free(). + +Cc: stable@vger.kernel.org +Fixes: 6b06314c47e1 ("io_uring: add file set registration") +Reported-and-tested-by: David Bouman +Signed-off-by: Pavel Begunkov +Signed-off-by: Thadeu Lima de Souza Cascardo +[axboe: add kerneldoc comment to skb, fold in skb leak fix] +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 1 + + include/linux/skbuff.h | 2 ++ + net/unix/garbage.c | 20 ++++++++++++++++++++ + 3 files changed, 23 insertions(+) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -8066,6 +8066,7 @@ static int __io_sqe_files_scm(struct io_ + } + + skb->sk = sk; ++ skb->scm_io_uring = 1; + + nr_files = 0; + fpl->user = get_uid(current_user()); +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -725,6 +725,7 @@ typedef unsigned char *sk_buff_data_t; + * @csum_level: indicates the number of consecutive checksums found in + * the packet minus one that have been verified as + * CHECKSUM_UNNECESSARY (max 3) ++ * @scm_io_uring: SKB holds io_uring registered files + * @dst_pending_confirm: need to confirm neighbour + * @decrypted: Decrypted SKB + * @slow_gro: state present at GRO time, slower prepare step required +@@ -910,6 +911,7 @@ struct sk_buff { + __u8 decrypted:1; + #endif + __u8 slow_gro:1; ++ __u8 scm_io_uring:1; + + #ifdef CONFIG_NET_SCHED + __u16 tc_index; /* traffic control index */ +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -204,6 +204,7 @@ void wait_for_unix_gc(void) + /* The external entry point: unix_gc() */ + void unix_gc(void) + { ++ struct sk_buff *next_skb, *skb; + struct unix_sock *u; + struct unix_sock *next; + struct sk_buff_head hitlist; +@@ -297,11 +298,30 @@ void unix_gc(void) + + spin_unlock(&unix_gc_lock); + ++ /* We need io_uring to clean its registered files, ignore all io_uring ++ * originated skbs. It's fine as io_uring doesn't keep references to ++ * other io_uring instances and so killing all other files in the cycle ++ * will put all io_uring references forcing it to go through normal ++ * release.path eventually putting registered files. ++ */ ++ skb_queue_walk_safe(&hitlist, skb, next_skb) { ++ if (skb->scm_io_uring) { ++ __skb_unlink(skb, &hitlist); ++ skb_queue_tail(&skb->sk->sk_receive_queue, skb); ++ } ++ } ++ + /* Here we are. Hitlist is filled. Die. */ + __skb_queue_purge(&hitlist); + + spin_lock(&unix_gc_lock); + ++ /* There could be io_uring registered files, just push them back to ++ * the inflight list ++ */ ++ list_for_each_entry_safe(u, next, &gc_candidates, link) ++ list_move_tail(&u->link, &gc_inflight_list); ++ + /* All candidates should have been detached by now. */ + BUG_ON(!list_empty(&gc_candidates)); + diff --git a/queue-5.15/io_uring-correct-pinned_vm-accounting.patch b/queue-5.15/io_uring-correct-pinned_vm-accounting.patch new file mode 100644 index 00000000000..1de92e115d2 --- /dev/null +++ b/queue-5.15/io_uring-correct-pinned_vm-accounting.patch @@ -0,0 +1,51 @@ +From foo@baz Mon Oct 17 11:43:27 AM CEST 2022 +From: Pavel Begunkov +Date: Sun, 16 Oct 2022 22:42:55 +0100 +Subject: io_uring: correct pinned_vm accounting +To: stable@vger.kernel.org +Cc: Jens Axboe , asml.silence@gmail.com +Message-ID: <01e0607d0e179bae74e60809bc9e805369205132.1665954636.git.asml.silence@gmail.com> + +From: Pavel Begunkov + +[ upstream commit 42b6419d0aba47c5d8644cdc0b68502254671de5 ] + +->mm_account should be released only after we free all registered +buffers, otherwise __io_sqe_buffers_unregister() will see a NULL +->mm_account and skip locked_vm accounting. + +Cc: +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/6d798f65ed4ab8db3664c4d3397d4af16ca98846.1664849932.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -9303,11 +9303,6 @@ static void io_ring_ctx_free(struct io_r + { + io_sq_thread_finish(ctx); + +- if (ctx->mm_account) { +- mmdrop(ctx->mm_account); +- ctx->mm_account = NULL; +- } +- + /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ + io_wait_rsrc_data(ctx->buf_data); + io_wait_rsrc_data(ctx->file_data); +@@ -9343,6 +9338,11 @@ static void io_ring_ctx_free(struct io_r + #endif + WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); + ++ if (ctx->mm_account) { ++ mmdrop(ctx->mm_account); ++ ctx->mm_account = NULL; ++ } ++ + io_mem_free(ctx->rings); + io_mem_free(ctx->sq_sqes); + diff --git a/queue-5.15/io_uring-rw-fix-error-ed-retry-return-values.patch b/queue-5.15/io_uring-rw-fix-error-ed-retry-return-values.patch new file mode 100644 index 00000000000..207c5204745 --- /dev/null +++ b/queue-5.15/io_uring-rw-fix-error-ed-retry-return-values.patch @@ -0,0 +1,38 @@ +From foo@baz Mon Oct 17 11:43:27 AM CEST 2022 +From: Pavel Begunkov +Date: Sun, 16 Oct 2022 22:42:57 +0100 +Subject: io_uring/rw: fix error'ed retry return values +To: stable@vger.kernel.org +Cc: Jens Axboe , asml.silence@gmail.com +Message-ID: + +From: Pavel Begunkov + +[ upstream commit 62bb0647b14646fa6c9aa25ecdf67ad18f13523c ] + +Kernel test robot reports that we test negativity of an unsigned in +io_fixup_rw_res() after a recent change, which masks error codes and +messes up the return value in case I/O is re-retried and failed with +an error. + +Fixes: 4d9cb92ca41dd ("io_uring/rw: fix short rw error handling") +Reported-by: kernel test robot +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/9754a0970af1861e7865f9014f735c70dc60bf79.1663071587.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2701,7 +2701,7 @@ static bool __io_complete_rw_common(stru + return false; + } + +-static inline unsigned io_fixup_rw_res(struct io_kiocb *req, unsigned res) ++static inline int io_fixup_rw_res(struct io_kiocb *req, unsigned res) + { + struct io_async_rw *io = req->async_data; + diff --git a/queue-5.15/io_uring-rw-fix-short-rw-error-handling.patch b/queue-5.15/io_uring-rw-fix-short-rw-error-handling.patch new file mode 100644 index 00000000000..b59f113086c --- /dev/null +++ b/queue-5.15/io_uring-rw-fix-short-rw-error-handling.patch @@ -0,0 +1,102 @@ +From foo@baz Mon Oct 17 11:43:27 AM CEST 2022 +From: Pavel Begunkov +Date: Sun, 16 Oct 2022 22:42:56 +0100 +Subject: io_uring/rw: fix short rw error handling +To: stable@vger.kernel.org +Cc: Jens Axboe , asml.silence@gmail.com +Message-ID: <6592121a38f7ee5834ce0691b1f85d54fcea3cfa.1665954636.git.asml.silence@gmail.com> + +From: Pavel Begunkov + +[ upstream commit 89473c1a9205760c4fa6d158058da7b594a815f0 ] + +We have a couple of problems, first reports of unexpected link breakage +for reads when cqe->res indicates that the IO was done in full. The +reason here is partial IO with retries. + +TL;DR; we compare the result in __io_complete_rw_common() against +req->cqe.res, but req->cqe.res doesn't store the full length but rather +the length left to be done. So, when we pass the full corrected result +via kiocb_done() -> __io_complete_rw_common(), it fails. + +The second problem is that we don't try to correct res in +io_complete_rw(), which, for instance, might be a problem for O_DIRECT +but when a prefix of data was cached in the page cache. We also +definitely don't want to pass a corrected result into io_rw_done(). + +The fix here is to leave __io_complete_rw_common() alone, always pass +not corrected result into it and fix it up as the last step just before +actually finishing the I/O. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 28 +++++++++++++++++----------- + 1 file changed, 17 insertions(+), 11 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2701,6 +2701,20 @@ static bool __io_complete_rw_common(stru + return false; + } + ++static inline unsigned io_fixup_rw_res(struct io_kiocb *req, unsigned res) ++{ ++ struct io_async_rw *io = req->async_data; ++ ++ /* add previously done IO, if any */ ++ if (io && io->bytes_done > 0) { ++ if (res < 0) ++ res = io->bytes_done; ++ else ++ res += io->bytes_done; ++ } ++ return res; ++} ++ + static void io_req_task_complete(struct io_kiocb *req, bool *locked) + { + unsigned int cflags = io_put_rw_kbuf(req); +@@ -2724,7 +2738,7 @@ static void __io_complete_rw(struct io_k + { + if (__io_complete_rw_common(req, res)) + return; +- __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req)); ++ __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req)); + } + + static void io_complete_rw(struct kiocb *kiocb, long res, long res2) +@@ -2733,7 +2747,7 @@ static void io_complete_rw(struct kiocb + + if (__io_complete_rw_common(req, res)) + return; +- req->result = res; ++ req->result = io_fixup_rw_res(req, res); + req->io_task_work.func = io_req_task_complete; + io_req_task_work_add(req); + } +@@ -2979,15 +2993,6 @@ static void kiocb_done(struct kiocb *kio + unsigned int issue_flags) + { + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); +- struct io_async_rw *io = req->async_data; +- +- /* add previously done IO, if any */ +- if (io && io->bytes_done > 0) { +- if (ret < 0) +- ret = io->bytes_done; +- else +- ret += io->bytes_done; +- } + + if (req->flags & REQ_F_CUR_POS) + req->file->f_pos = kiocb->ki_pos; +@@ -3004,6 +3009,7 @@ static void kiocb_done(struct kiocb *kio + unsigned int cflags = io_put_rw_kbuf(req); + struct io_ring_ctx *ctx = req->ctx; + ++ ret = io_fixup_rw_res(req, ret); + req_set_fail(req); + if (!(issue_flags & IO_URING_F_NONBLOCK)) { + mutex_lock(&ctx->uring_lock); diff --git a/queue-5.15/io_uring-rw-fix-unexpected-link-breakage.patch b/queue-5.15/io_uring-rw-fix-unexpected-link-breakage.patch new file mode 100644 index 00000000000..8da4d9287cc --- /dev/null +++ b/queue-5.15/io_uring-rw-fix-unexpected-link-breakage.patch @@ -0,0 +1,41 @@ +From foo@baz Mon Oct 17 11:43:27 AM CEST 2022 +From: Pavel Begunkov +Date: Sun, 16 Oct 2022 22:42:58 +0100 +Subject: io_uring/rw: fix unexpected link breakage +To: stable@vger.kernel.org +Cc: Jens Axboe , asml.silence@gmail.com +Message-ID: <1b05243cdfa8135866a6ccc115e491df8d725d16.1665954636.git.asml.silence@gmail.com> + +From: Pavel Begunkov + +[ upstream commit bf68b5b34311ee57ed40749a1257a30b46127556 ] + +req->cqe.res is set in io_read() to the amount of bytes left to be done, +which is used to figure out whether to fail a read or not. However, +io_read() may do another without returning, and we stash the previous +value into ->bytes_done but forget to update cqe.res. Then we ask a read +to do strictly less than cqe.res but expect the return to be exactly +cqe.res. + +Fix the bug by updating cqe.res for retries. + +Cc: stable@vger.kernel.org +Reported-and-Tested-by: Beld Zhang +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/3a1088440c7be98e5800267af922a67da0ef9f13.1664235732.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -3606,6 +3606,7 @@ static int io_read(struct io_kiocb *req, + return -EAGAIN; + } + ++ req->result = iov_iter_count(iter); + /* + * Now retry read with the IOCB_WAITQ parts set in the iocb. If + * we get -EIOCBQUEUED, then we'll get a notification when the diff --git a/queue-5.15/series b/queue-5.15/series index 2380863122a..1a427f220ed 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -613,3 +613,8 @@ fsi-master-ast-cf-fix-missing-of_node_put-in-fsi_mas.patch clk-bcm2835-make-peripheral-pllc-critical.patch clk-bcm2835-round-uart-input-clock-up.patch perf-intel-pt-fix-segfault-in-intel_pt_print_info-with-uclibc.patch +io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch +io_uring-correct-pinned_vm-accounting.patch +io_uring-rw-fix-short-rw-error-handling.patch +io_uring-rw-fix-error-ed-retry-return-values.patch +io_uring-rw-fix-unexpected-link-breakage.patch