From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 17 Oct 2022 09:43:47 +0000 (+0200)
Subject: 5.15-stable patches
X-Git-Tag: v5.4.219~33
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e108a477966f9d0e1e124788d541a188f94789aa;p=thirdparty%2Fkernel%2Fstable-queue.git

5.15-stable patches

added patches:
	io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch
	io_uring-correct-pinned_vm-accounting.patch
	io_uring-rw-fix-error-ed-retry-return-values.patch
	io_uring-rw-fix-short-rw-error-handling.patch
	io_uring-rw-fix-unexpected-link-breakage.patch
---

diff --git a/queue-5.15/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch b/queue-5.15/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch
new file mode 100644
index 00000000000..c7ef3ac361a
--- /dev/null
+++ b/queue-5.15/io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch
@@ -0,0 +1,102 @@
+From foo@baz Mon Oct 17 11:43:27 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Sun, 16 Oct 2022 22:42:54 +0100
+Subject: io_uring/af_unix: defer registered files gc to io_uring release
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <a83df723eee66917424c39b62e5aec3871ddc481.1665954636.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 ]
+
+Instead of putting io_uring's registered files in unix_gc() we want it
+to be done by io_uring itself. The trick here is to consider io_uring
+registered files for cycle detection but not actually putting them down.
+Because io_uring can't register other ring instances, this will remove
+all refs to the ring file triggering the ->release path and clean up
+with io_ring_ctx_free().
+
+Cc: stable@vger.kernel.org
+Fixes: 6b06314c47e1 ("io_uring: add file set registration")
+Reported-and-tested-by: David Bouman <dbouman03@gmail.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+[axboe: add kerneldoc comment to skb, fold in skb leak fix]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c          |    1 +
+ include/linux/skbuff.h |    2 ++
+ net/unix/garbage.c     |   20 ++++++++++++++++++++
+ 3 files changed, 23 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -8066,6 +8066,7 @@ static int __io_sqe_files_scm(struct io_
+ 	}
+ 
+ 	skb->sk = sk;
++	skb->scm_io_uring = 1;
+ 
+ 	nr_files = 0;
+ 	fpl->user = get_uid(current_user());
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -725,6 +725,7 @@ typedef unsigned char *sk_buff_data_t;
+  *	@csum_level: indicates the number of consecutive checksums found in
+  *		the packet minus one that have been verified as
+  *		CHECKSUM_UNNECESSARY (max 3)
++ *	@scm_io_uring: SKB holds io_uring registered files
+  *	@dst_pending_confirm: need to confirm neighbour
+  *	@decrypted: Decrypted SKB
+  *	@slow_gro: state present at GRO time, slower prepare step required
+@@ -910,6 +911,7 @@ struct sk_buff {
+ 	__u8			decrypted:1;
+ #endif
+ 	__u8			slow_gro:1;
++	__u8			scm_io_uring:1;
+ 
+ #ifdef CONFIG_NET_SCHED
+ 	__u16			tc_index;	/* traffic control index */
+--- a/net/unix/garbage.c
++++ b/net/unix/garbage.c
+@@ -204,6 +204,7 @@ void wait_for_unix_gc(void)
+ /* The external entry point: unix_gc() */
+ void unix_gc(void)
+ {
++	struct sk_buff *next_skb, *skb;
+ 	struct unix_sock *u;
+ 	struct unix_sock *next;
+ 	struct sk_buff_head hitlist;
+@@ -297,11 +298,30 @@ void unix_gc(void)
+ 
+ 	spin_unlock(&unix_gc_lock);
+ 
++	/* We need io_uring to clean its registered files, ignore all io_uring
++	 * originated skbs. It's fine as io_uring doesn't keep references to
++	 * other io_uring instances and so killing all other files in the cycle
++	 * will put all io_uring references forcing it to go through normal
++	 * release.path eventually putting registered files.
++	 */
++	skb_queue_walk_safe(&hitlist, skb, next_skb) {
++		if (skb->scm_io_uring) {
++			__skb_unlink(skb, &hitlist);
++			skb_queue_tail(&skb->sk->sk_receive_queue, skb);
++		}
++	}
++
+ 	/* Here we are. Hitlist is filled. Die. */
+ 	__skb_queue_purge(&hitlist);
+ 
+ 	spin_lock(&unix_gc_lock);
+ 
++	/* There could be io_uring registered files, just push them back to
++	 * the inflight list
++	 */
++	list_for_each_entry_safe(u, next, &gc_candidates, link)
++		list_move_tail(&u->link, &gc_inflight_list);
++
+ 	/* All candidates should have been detached by now. */
+ 	BUG_ON(!list_empty(&gc_candidates));
+ 
diff --git a/queue-5.15/io_uring-correct-pinned_vm-accounting.patch b/queue-5.15/io_uring-correct-pinned_vm-accounting.patch
new file mode 100644
index 00000000000..1de92e115d2
--- /dev/null
+++ b/queue-5.15/io_uring-correct-pinned_vm-accounting.patch
@@ -0,0 +1,51 @@
+From foo@baz Mon Oct 17 11:43:27 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Sun, 16 Oct 2022 22:42:55 +0100
+Subject: io_uring: correct pinned_vm accounting
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <01e0607d0e179bae74e60809bc9e805369205132.1665954636.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit 42b6419d0aba47c5d8644cdc0b68502254671de5 ]
+
+->mm_account should be released only after we free all registered
+buffers, otherwise __io_sqe_buffers_unregister() will see a NULL
+->mm_account and skip locked_vm accounting.
+
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/6d798f65ed4ab8db3664c4d3397d4af16ca98846.1664849932.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -9303,11 +9303,6 @@ static void io_ring_ctx_free(struct io_r
+ {
+ 	io_sq_thread_finish(ctx);
+ 
+-	if (ctx->mm_account) {
+-		mmdrop(ctx->mm_account);
+-		ctx->mm_account = NULL;
+-	}
+-
+ 	/* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
+ 	io_wait_rsrc_data(ctx->buf_data);
+ 	io_wait_rsrc_data(ctx->file_data);
+@@ -9343,6 +9338,11 @@ static void io_ring_ctx_free(struct io_r
+ #endif
+ 	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
+ 
++	if (ctx->mm_account) {
++		mmdrop(ctx->mm_account);
++		ctx->mm_account = NULL;
++	}
++
+ 	io_mem_free(ctx->rings);
+ 	io_mem_free(ctx->sq_sqes);
+ 
diff --git a/queue-5.15/io_uring-rw-fix-error-ed-retry-return-values.patch b/queue-5.15/io_uring-rw-fix-error-ed-retry-return-values.patch
new file mode 100644
index 00000000000..207c5204745
--- /dev/null
+++ b/queue-5.15/io_uring-rw-fix-error-ed-retry-return-values.patch
@@ -0,0 +1,38 @@
+From foo@baz Mon Oct 17 11:43:27 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Sun, 16 Oct 2022 22:42:57 +0100
+Subject: io_uring/rw: fix error'ed retry return values
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <ebd1fd870c2f7048613f85fcdf3934f15301c1a7.1665954636.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit 62bb0647b14646fa6c9aa25ecdf67ad18f13523c ]
+
+Kernel test robot reports that we test negativity of an unsigned in
+io_fixup_rw_res() after a recent change, which masks error codes and
+messes up the return value in case I/O is re-retried and failed with
+an error.
+
+Fixes: 4d9cb92ca41dd ("io_uring/rw: fix short rw error handling")
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/9754a0970af1861e7865f9014f735c70dc60bf79.1663071587.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -2701,7 +2701,7 @@ static bool __io_complete_rw_common(stru
+ 	return false;
+ }
+ 
+-static inline unsigned io_fixup_rw_res(struct io_kiocb *req, unsigned res)
++static inline int io_fixup_rw_res(struct io_kiocb *req, unsigned res)
+ {
+ 	struct io_async_rw *io = req->async_data;
+ 
diff --git a/queue-5.15/io_uring-rw-fix-short-rw-error-handling.patch b/queue-5.15/io_uring-rw-fix-short-rw-error-handling.patch
new file mode 100644
index 00000000000..b59f113086c
--- /dev/null
+++ b/queue-5.15/io_uring-rw-fix-short-rw-error-handling.patch
@@ -0,0 +1,102 @@
+From foo@baz Mon Oct 17 11:43:27 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Sun, 16 Oct 2022 22:42:56 +0100
+Subject: io_uring/rw: fix short rw error handling
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <6592121a38f7ee5834ce0691b1f85d54fcea3cfa.1665954636.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit 89473c1a9205760c4fa6d158058da7b594a815f0 ]
+
+We have a couple of problems, first reports of unexpected link breakage
+for reads when cqe->res indicates that the IO was done in full. The
+reason here is partial IO with retries.
+
+TL;DR; we compare the result in __io_complete_rw_common() against
+req->cqe.res, but req->cqe.res doesn't store the full length but rather
+the length left to be done. So, when we pass the full corrected result
+via kiocb_done() -> __io_complete_rw_common(), it fails.
+
+The second problem is that we don't try to correct res in
+io_complete_rw(), which, for instance, might be a problem for O_DIRECT
+but when a prefix of data was cached in the page cache. We also
+definitely don't want to pass a corrected result into io_rw_done().
+
+The fix here is to leave __io_complete_rw_common() alone, always pass
+not corrected result into it and fix it up as the last step just before
+actually finishing the I/O.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -2701,6 +2701,20 @@ static bool __io_complete_rw_common(stru
+ 	return false;
+ }
+ 
++static inline unsigned io_fixup_rw_res(struct io_kiocb *req, unsigned res)
++{
++	struct io_async_rw *io = req->async_data;
++
++	/* add previously done IO, if any */
++	if (io && io->bytes_done > 0) {
++		if (res < 0)
++			res = io->bytes_done;
++		else
++			res += io->bytes_done;
++	}
++	return res;
++}
++
+ static void io_req_task_complete(struct io_kiocb *req, bool *locked)
+ {
+ 	unsigned int cflags = io_put_rw_kbuf(req);
+@@ -2724,7 +2738,7 @@ static void __io_complete_rw(struct io_k
+ {
+ 	if (__io_complete_rw_common(req, res))
+ 		return;
+-	__io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
++	__io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req));
+ }
+ 
+ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+@@ -2733,7 +2747,7 @@ static void io_complete_rw(struct kiocb
+ 
+ 	if (__io_complete_rw_common(req, res))
+ 		return;
+-	req->result = res;
++	req->result = io_fixup_rw_res(req, res);
+ 	req->io_task_work.func = io_req_task_complete;
+ 	io_req_task_work_add(req);
+ }
+@@ -2979,15 +2993,6 @@ static void kiocb_done(struct kiocb *kio
+ 		       unsigned int issue_flags)
+ {
+ 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+-	struct io_async_rw *io = req->async_data;
+-
+-	/* add previously done IO, if any */
+-	if (io && io->bytes_done > 0) {
+-		if (ret < 0)
+-			ret = io->bytes_done;
+-		else
+-			ret += io->bytes_done;
+-	}
+ 
+ 	if (req->flags & REQ_F_CUR_POS)
+ 		req->file->f_pos = kiocb->ki_pos;
+@@ -3004,6 +3009,7 @@ static void kiocb_done(struct kiocb *kio
+ 			unsigned int cflags = io_put_rw_kbuf(req);
+ 			struct io_ring_ctx *ctx = req->ctx;
+ 
++			ret = io_fixup_rw_res(req, ret);
+ 			req_set_fail(req);
+ 			if (!(issue_flags & IO_URING_F_NONBLOCK)) {
+ 				mutex_lock(&ctx->uring_lock);
diff --git a/queue-5.15/io_uring-rw-fix-unexpected-link-breakage.patch b/queue-5.15/io_uring-rw-fix-unexpected-link-breakage.patch
new file mode 100644
index 00000000000..8da4d9287cc
--- /dev/null
+++ b/queue-5.15/io_uring-rw-fix-unexpected-link-breakage.patch
@@ -0,0 +1,41 @@
+From foo@baz Mon Oct 17 11:43:27 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Sun, 16 Oct 2022 22:42:58 +0100
+Subject: io_uring/rw: fix unexpected link breakage
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <1b05243cdfa8135866a6ccc115e491df8d725d16.1665954636.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit bf68b5b34311ee57ed40749a1257a30b46127556 ]
+
+req->cqe.res is set in io_read() to the amount of bytes left to be done,
+which is used to figure out whether to fail a read or not. However,
+io_read() may do another without returning, and we stash the previous
+value into ->bytes_done but forget to update cqe.res. Then we ask a read
+to do strictly less than cqe.res but expect the return to be exactly
+cqe.res.
+
+Fix the bug by updating cqe.res for retries.
+
+Cc: stable@vger.kernel.org
+Reported-and-Tested-by: Beld Zhang <beldzhang@gmail.com>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/3a1088440c7be98e5800267af922a67da0ef9f13.1664235732.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -3606,6 +3606,7 @@ static int io_read(struct io_kiocb *req,
+ 			return -EAGAIN;
+ 		}
+ 
++		req->result = iov_iter_count(iter);
+ 		/*
+ 		 * Now retry read with the IOCB_WAITQ parts set in the iocb. If
+ 		 * we get -EIOCBQUEUED, then we'll get a notification when the
diff --git a/queue-5.15/series b/queue-5.15/series
index 2380863122a..1a427f220ed 100644
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -613,3 +613,8 @@ fsi-master-ast-cf-fix-missing-of_node_put-in-fsi_mas.patch
 clk-bcm2835-make-peripheral-pllc-critical.patch
 clk-bcm2835-round-uart-input-clock-up.patch
 perf-intel-pt-fix-segfault-in-intel_pt_print_info-with-uclibc.patch
+io_uring-af_unix-defer-registered-files-gc-to-io_uring-release.patch
+io_uring-correct-pinned_vm-accounting.patch
+io_uring-rw-fix-short-rw-error-handling.patch
+io_uring-rw-fix-error-ed-retry-return-values.patch
+io_uring-rw-fix-unexpected-link-breakage.patch