From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 7 Mar 2023 09:30:19 +0000 (+0100)
Subject: 5.10-stable patches
X-Git-Tag: v6.2.3~54
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=51c307e1d66deeaaa423905a649ffb8bdf7d1dae;p=thirdparty%2Fkernel%2Fstable-queue.git

5.10-stable patches

added patches:
	io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch
	io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch
	io_uring-mark-task-task_running-before-handling-resume-task-work.patch
	io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch
	io_uring-remove-msg_nosignal-from-recvmsg.patch
	io_uring-rsrc-disallow-multi-source-reg-buffers.patch
---

diff --git a/queue-5.10/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch b/queue-5.10/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch
new file mode 100644
index 00000000000..3baf92406b4
--- /dev/null
+++ b/queue-5.10/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch
@@ -0,0 +1,81 @@
+From a81785e05c0074f1fcba1227a14e49ea046fb62d Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 6 Mar 2023 13:18:27 -0700
+Subject: io_uring: add a conditional reschedule to the IOPOLL cancelation loop
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit fcc926bb857949dbfa51a7d95f3f5ebc657f198c upstream.
+
+If the kernel is configured with CONFIG_PREEMPT_NONE, we could be
+sitting in a tight loop reaping events but not giving them a chance to
+finish. This results in a trace ala:
+
+rcu: INFO: rcu_sched self-detected stall on CPU
+rcu:    2-...!: (5249 ticks this GP) idle=935c/1/0x4000000000000000 softirq=4265/4274 fqs=1
+        (t=5251 jiffies g=465 q=4135 ncpus=4)
+rcu: rcu_sched kthread starved for 5249 jiffies! g465 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0
+rcu:    Unless rcu_sched kthread gets sufficient CPU time, OOM is now expected behavior.
+rcu: RCU grace-period kthread stack dump:
+task:rcu_sched       state:R  running task     stack:0     pid:12    ppid:2      flags:0x00000008
+Call trace:
+ __switch_to+0xb0/0xc8
+ __schedule+0x43c/0x520
+ schedule+0x4c/0x98
+ schedule_timeout+0xbc/0xdc
+ rcu_gp_fqs_loop+0x308/0x344
+ rcu_gp_kthread+0xd8/0xf0
+ kthread+0xb8/0xc8
+ ret_from_fork+0x10/0x20
+rcu: Stack dump where RCU GP kthread last ran:
+Task dump for CPU 0:
+task:kworker/u8:10   state:R  running task     stack:0     pid:89    ppid:2      flags:0x0000000a
+Workqueue: events_unbound io_ring_exit_work
+Call trace:
+ __switch_to+0xb0/0xc8
+ 0xffff0000c8fefd28
+CPU: 2 PID: 95 Comm: kworker/u8:13 Not tainted 6.2.0-rc5-00042-g40316e337c80-dirty #2759
+Hardware name: linux,dummy-virt (DT)
+Workqueue: events_unbound io_ring_exit_work
+pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
+pc : io_do_iopoll+0x344/0x360
+lr : io_do_iopoll+0xb8/0x360
+sp : ffff800009bebc60
+x29: ffff800009bebc60 x28: 0000000000000000 x27: 0000000000000000
+x26: ffff0000c0f67d48 x25: ffff0000c0f67840 x24: ffff800008950024
+x23: 0000000000000001 x22: 0000000000000000 x21: ffff0000c27d3200
+x20: ffff0000c0f67840 x19: ffff0000c0f67800 x18: 0000000000000000
+x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
+x14: 0000000000000001 x13: 0000000000000001 x12: 0000000000000000
+x11: 0000000000000179 x10: 0000000000000870 x9 : ffff800009bebd60
+x8 : ffff0000c27d3ad0 x7 : fefefefefefefeff x6 : 0000646e756f626e
+x5 : ffff0000c0f67840 x4 : 0000000000000000 x3 : ffff0000c2398000
+x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
+Call trace:
+ io_do_iopoll+0x344/0x360
+ io_uring_try_cancel_requests+0x21c/0x334
+ io_ring_exit_work+0x90/0x40c
+ process_one_work+0x1a4/0x254
+ worker_thread+0x1ec/0x258
+ kthread+0xb8/0xc8
+ ret_from_fork+0x10/0x20
+
+Add a cond_resched() in the cancelation IOPOLL loop to fix this.
+
+Cc: stable@vger.kernel.org # 5.10+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -9690,6 +9690,7 @@ static void io_uring_try_cancel_requests
+ 			while (!list_empty_careful(&ctx->iopoll_list)) {
+ 				io_iopoll_try_reap_events(ctx);
+ 				ret = true;
++				cond_resched();
+ 			}
+ 		}
+ 
diff --git a/queue-5.10/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch b/queue-5.10/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch
new file mode 100644
index 00000000000..44e54c3ce2d
--- /dev/null
+++ b/queue-5.10/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch
@@ -0,0 +1,38 @@
+From 6c2a4a822d8e9862615f51037ffa2ee9668a7850 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 6 Mar 2023 13:15:06 -0700
+Subject: io_uring: handle TIF_NOTIFY_RESUME when checking for task_work
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit b5d3ae202fbfe055aa2a8ae8524531ee1dcab717 upstream.
+
+If TIF_NOTIFY_RESUME is set, then we need to call resume_user_mode_work()
+for PF_IO_WORKER threads. They never return to usermode, hence never get
+a chance to process any items that are marked by this flag. Most notably
+this includes the final put of files, but also any throttling markers set
+by block cgroups.
+
+Cc: stable@vger.kernel.org # 5.10+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2460,6 +2460,13 @@ static inline unsigned int io_put_rw_kbu
+ 
+ static inline bool io_run_task_work(void)
+ {
++	/*
++	 * PF_IO_WORKER never returns to userspace, so check here if we have
++	 * notify work that needs processing.
++	 */
++	if (current->flags & PF_IO_WORKER &&
++	    test_thread_flag(TIF_NOTIFY_RESUME))
++		tracehook_notify_resume(NULL);
+ 	if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
+ 		__set_current_state(TASK_RUNNING);
+ 		tracehook_notify_signal();
diff --git a/queue-5.10/io_uring-mark-task-task_running-before-handling-resume-task-work.patch b/queue-5.10/io_uring-mark-task-task_running-before-handling-resume-task-work.patch
new file mode 100644
index 00000000000..092debd2d9b
--- /dev/null
+++ b/queue-5.10/io_uring-mark-task-task_running-before-handling-resume-task-work.patch
@@ -0,0 +1,39 @@
+From 29b1662142a87b9af6742702730f1bfa255a9dc3 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 6 Mar 2023 13:16:38 -0700
+Subject: io_uring: mark task TASK_RUNNING before handling resume/task work
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 2f2bb1ffc9983e227424d0787289da5483b0c74f upstream.
+
+Just like for task_work, set the task mode to TASK_RUNNING before doing
+potential resume work. We're not holding any locks at this point,
+but we may have already set the task state to TASK_INTERRUPTIBLE in
+preparation for going to sleep waiting for events. Ensure that we set it
+back to TASK_RUNNING if we have work to process, to avoid warnings on
+calling blocking operations with !TASK_RUNNING.
+
+Fixes: b5d3ae202fbf ("io_uring: handle TIF_NOTIFY_RESUME when checking for task_work")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Link: https://lore.kernel.org/oe-lkp/202302062208.24d3e563-oliver.sang@intel.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2465,8 +2465,10 @@ static inline bool io_run_task_work(void
+ 	 * notify work that needs processing.
+ 	 */
+ 	if (current->flags & PF_IO_WORKER &&
+-	    test_thread_flag(TIF_NOTIFY_RESUME))
++	    test_thread_flag(TIF_NOTIFY_RESUME)) {
++		__set_current_state(TASK_RUNNING);
+ 		tracehook_notify_resume(NULL);
++	}
+ 	if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
+ 		__set_current_state(TASK_RUNNING);
+ 		tracehook_notify_signal();
diff --git a/queue-5.10/io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch b/queue-5.10/io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch
new file mode 100644
index 00000000000..6b619808409
--- /dev/null
+++ b/queue-5.10/io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch
@@ -0,0 +1,85 @@
+From f25af723ed67f5912b7b66353ce8634c612125de Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Mon, 6 Mar 2023 13:28:57 -0700
+Subject: io_uring/poll: allow some retries for poll triggering spuriously
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit c16bda37594f83147b167d381d54c010024efecf upstream.
+
+If we get woken spuriously when polling and fail the operation with
+-EAGAIN again, then we generally only allow polling again if data
+had been transferred at some point. This is indicated with
+REQ_F_PARTIAL_IO. However, if the spurious poll triggers when the socket
+was originally empty, then we haven't transferred data yet and we will
+fail the poll re-arm. This either punts the socket to io-wq if it's
+blocking, or it fails the request with -EAGAIN if not. Neither condition
+is desirable, as the former will slow things down, while the latter
+will make the application confused.
+
+We want to ensure that a repeated poll trigger doesn't lead to infinite
+work making no progress, that's what the REQ_F_PARTIAL_IO check was
+for. But it doesn't protect against a loop post the first receive, and
+it's unnecessarily strict if we started out with an empty socket.
+
+Add a somewhat random retry count, just to put an upper limit on the
+potential number of retries that will be done. This should be high enough
+that we won't really hit it in practice, unless something needs to be
+aborted anyway.
+
+Cc: stable@vger.kernel.org # v5.10+
+Link: https://github.com/axboe/liburing/issues/364
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -486,6 +486,7 @@ struct io_poll_iocb {
+ 	struct file			*file;
+ 	struct wait_queue_head		*head;
+ 	__poll_t			events;
++	int				retries;
+ 	struct wait_queue_entry		wait;
+ };
+ 
+@@ -5749,6 +5750,14 @@ enum {
+ 	IO_APOLL_READY
+ };
+ 
++/*
++ * We can't reliably detect loops in repeated poll triggers and issue
++ * subsequently failing. But rather than fail these immediately, allow a
++ * certain amount of retries before we give up. Given that this condition
++ * should _rarely_ trigger even once, we should be fine with a larger value.
++ */
++#define APOLL_MAX_RETRY		128
++
+ static int io_arm_poll_handler(struct io_kiocb *req)
+ {
+ 	const struct io_op_def *def = &io_op_defs[req->opcode];
+@@ -5760,8 +5769,6 @@ static int io_arm_poll_handler(struct io
+ 
+ 	if (!req->file || !file_can_poll(req->file))
+ 		return IO_APOLL_ABORTED;
+-	if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
+-		return IO_APOLL_ABORTED;
+ 	if (!def->pollin && !def->pollout)
+ 		return IO_APOLL_ABORTED;
+ 
+@@ -5779,8 +5786,13 @@ static int io_arm_poll_handler(struct io
+ 	if (req->flags & REQ_F_POLLED) {
+ 		apoll = req->apoll;
+ 		kfree(apoll->double_poll);
++		if (unlikely(!--apoll->poll.retries)) {
++			apoll->double_poll = NULL;
++			return IO_APOLL_ABORTED;
++		}
+ 	} else {
+ 		apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
++		apoll->poll.retries = APOLL_MAX_RETRY;
+ 	}
+ 	if (unlikely(!apoll))
+ 		return IO_APOLL_ABORTED;
diff --git a/queue-5.10/io_uring-remove-msg_nosignal-from-recvmsg.patch b/queue-5.10/io_uring-remove-msg_nosignal-from-recvmsg.patch
new file mode 100644
index 00000000000..c37bd8beae0
--- /dev/null
+++ b/queue-5.10/io_uring-remove-msg_nosignal-from-recvmsg.patch
@@ -0,0 +1,39 @@
+From 2378bb220f59f57b73dbcbc835b2f8d7acbec382 Mon Sep 17 00:00:00 2001
+From: David Lamparter <equinox@diac24.ne>
+Date: Mon, 6 Mar 2023 13:23:06 -0700
+Subject: io_uring: remove MSG_NOSIGNAL from recvmsg
+
+From: David Lamparter <equinox@diac24.ne>
+
+commit 7605c43d67face310b4b87dee1a28bc0c8cd8c0f upstream.
+
+MSG_NOSIGNAL is not applicable for the receiving side, SIGPIPE is
+generated when trying to write to a "broken pipe".  AF_PACKET's
+packet_recvmsg() does enforce this, giving back EINVAL when MSG_NOSIGNAL
+is set - making it unuseable in io_uring's recvmsg.
+
+Remove MSG_NOSIGNAL from io_recvmsg_prep().
+
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: David Lamparter <equinox@diac24.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230224150123.128346-1-equinox@diac24.net
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -4995,7 +4995,7 @@ static int io_recvmsg_prep(struct io_kio
+ 	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ 	sr->len = READ_ONCE(sqe->len);
+ 	sr->bgid = READ_ONCE(sqe->buf_group);
+-	sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
++	sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ 	if (sr->msg_flags & MSG_DONTWAIT)
+ 		req->flags |= REQ_F_NOWAIT;
+ 
diff --git a/queue-5.10/io_uring-rsrc-disallow-multi-source-reg-buffers.patch b/queue-5.10/io_uring-rsrc-disallow-multi-source-reg-buffers.patch
new file mode 100644
index 00000000000..5d42292609e
--- /dev/null
+++ b/queue-5.10/io_uring-rsrc-disallow-multi-source-reg-buffers.patch
@@ -0,0 +1,48 @@
+From 8443c73c6ce543ebb30e2a432b211c015dfb4949 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 6 Mar 2023 13:21:40 -0700
+Subject: io_uring/rsrc: disallow multi-source reg buffers
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit edd478269640b360c6f301f2baa04abdda563ef3 upstream.
+
+If two or more mappings go back to back to each other they can be passed
+into io_uring to be registered as a single registered buffer. That would
+even work if mappings came from different sources, e.g. it's possible to
+mix in this way anon pages and pages from shmem or hugetlb. That is not
+a problem but it'd rather be less prone if we forbid such mixing.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -9057,14 +9057,17 @@ static int io_sqe_buffer_register(struct
+ 	pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
+ 			      pages, vmas);
+ 	if (pret == nr_pages) {
++		struct file *file = vmas[0]->vm_file;
++
+ 		/* don't support file backed memory */
+ 		for (i = 0; i < nr_pages; i++) {
+-			struct vm_area_struct *vma = vmas[i];
+-
+-			if (vma_is_shmem(vma))
++			if (vmas[i]->vm_file != file) {
++				ret = -EINVAL;
++				break;
++			}
++			if (!file)
+ 				continue;
+-			if (vma->vm_file &&
+-			    !is_file_hugepages(vma->vm_file)) {
++			if (!vma_is_shmem(vmas[i]) && !is_file_hugepages(file)) {
+ 				ret = -EOPNOTSUPP;
+ 				break;
+ 			}
diff --git a/queue-5.10/series b/queue-5.10/series
index 0a92f8e2ef1..4bc1197ea68 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -373,3 +373,9 @@ ima-align-ima_file_mmap-parameters-with-mmap_file-lsm-hook.patch
 irqdomain-fix-association-race.patch
 irqdomain-fix-disassociation-race.patch
 irqdomain-drop-bogus-fwspec-mapping-error-handling.patch
+io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch
+io_uring-mark-task-task_running-before-handling-resume-task-work.patch
+io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch
+io_uring-rsrc-disallow-multi-source-reg-buffers.patch
+io_uring-remove-msg_nosignal-from-recvmsg.patch
+io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch