From: Greg Kroah-Hartman Date: Tue, 7 Mar 2023 09:30:19 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v6.2.3~54 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=51c307e1d66deeaaa423905a649ffb8bdf7d1dae;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch io_uring-mark-task-task_running-before-handling-resume-task-work.patch io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch io_uring-remove-msg_nosignal-from-recvmsg.patch io_uring-rsrc-disallow-multi-source-reg-buffers.patch --- diff --git a/queue-5.10/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch b/queue-5.10/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch new file mode 100644 index 00000000000..3baf92406b4 --- /dev/null +++ b/queue-5.10/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch @@ -0,0 +1,81 @@ +From a81785e05c0074f1fcba1227a14e49ea046fb62d Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 6 Mar 2023 13:18:27 -0700 +Subject: io_uring: add a conditional reschedule to the IOPOLL cancelation loop + +From: Jens Axboe + +commit fcc926bb857949dbfa51a7d95f3f5ebc657f198c upstream. + +If the kernel is configured with CONFIG_PREEMPT_NONE, we could be +sitting in a tight loop reaping events but not giving them a chance to +finish. This results in a trace ala: + +rcu: INFO: rcu_sched self-detected stall on CPU +rcu: 2-...!: (5249 ticks this GP) idle=935c/1/0x4000000000000000 softirq=4265/4274 fqs=1 + (t=5251 jiffies g=465 q=4135 ncpus=4) +rcu: rcu_sched kthread starved for 5249 jiffies! g465 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0 +rcu: Unless rcu_sched kthread gets sufficient CPU time, OOM is now expected behavior. +rcu: RCU grace-period kthread stack dump: +task:rcu_sched state:R running task stack:0 pid:12 ppid:2 flags:0x00000008 +Call trace: + __switch_to+0xb0/0xc8 + __schedule+0x43c/0x520 + schedule+0x4c/0x98 + schedule_timeout+0xbc/0xdc + rcu_gp_fqs_loop+0x308/0x344 + rcu_gp_kthread+0xd8/0xf0 + kthread+0xb8/0xc8 + ret_from_fork+0x10/0x20 +rcu: Stack dump where RCU GP kthread last ran: +Task dump for CPU 0: +task:kworker/u8:10 state:R running task stack:0 pid:89 ppid:2 flags:0x0000000a +Workqueue: events_unbound io_ring_exit_work +Call trace: + __switch_to+0xb0/0xc8 + 0xffff0000c8fefd28 +CPU: 2 PID: 95 Comm: kworker/u8:13 Not tainted 6.2.0-rc5-00042-g40316e337c80-dirty #2759 +Hardware name: linux,dummy-virt (DT) +Workqueue: events_unbound io_ring_exit_work +pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) +pc : io_do_iopoll+0x344/0x360 +lr : io_do_iopoll+0xb8/0x360 +sp : ffff800009bebc60 +x29: ffff800009bebc60 x28: 0000000000000000 x27: 0000000000000000 +x26: ffff0000c0f67d48 x25: ffff0000c0f67840 x24: ffff800008950024 +x23: 0000000000000001 x22: 0000000000000000 x21: ffff0000c27d3200 +x20: ffff0000c0f67840 x19: ffff0000c0f67800 x18: 0000000000000000 +x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 +x14: 0000000000000001 x13: 0000000000000001 x12: 0000000000000000 +x11: 0000000000000179 x10: 0000000000000870 x9 : ffff800009bebd60 +x8 : ffff0000c27d3ad0 x7 : fefefefefefefeff x6 : 0000646e756f626e +x5 : ffff0000c0f67840 x4 : 0000000000000000 x3 : ffff0000c2398000 +x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 +Call trace: + io_do_iopoll+0x344/0x360 + io_uring_try_cancel_requests+0x21c/0x334 + io_ring_exit_work+0x90/0x40c + process_one_work+0x1a4/0x254 + worker_thread+0x1ec/0x258 + kthread+0xb8/0xc8 + ret_from_fork+0x10/0x20 + +Add a cond_resched() in the cancelation IOPOLL loop to fix this. + +Cc: stable@vger.kernel.org # 5.10+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -9690,6 +9690,7 @@ static void io_uring_try_cancel_requests + while (!list_empty_careful(&ctx->iopoll_list)) { + io_iopoll_try_reap_events(ctx); + ret = true; ++ cond_resched(); + } + } + diff --git a/queue-5.10/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch b/queue-5.10/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch new file mode 100644 index 00000000000..44e54c3ce2d --- /dev/null +++ b/queue-5.10/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch @@ -0,0 +1,38 @@ +From 6c2a4a822d8e9862615f51037ffa2ee9668a7850 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 6 Mar 2023 13:15:06 -0700 +Subject: io_uring: handle TIF_NOTIFY_RESUME when checking for task_work + +From: Jens Axboe + +commit b5d3ae202fbfe055aa2a8ae8524531ee1dcab717 upstream. + +If TIF_NOTIFY_RESUME is set, then we need to call resume_user_mode_work() +for PF_IO_WORKER threads. They never return to usermode, hence never get +a chance to process any items that are marked by this flag. Most notably +this includes the final put of files, but also any throttling markers set +by block cgroups. + +Cc: stable@vger.kernel.org # 5.10+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2460,6 +2460,13 @@ static inline unsigned int io_put_rw_kbu + + static inline bool io_run_task_work(void) + { ++ /* ++ * PF_IO_WORKER never returns to userspace, so check here if we have ++ * notify work that needs processing. ++ */ ++ if (current->flags & PF_IO_WORKER && ++ test_thread_flag(TIF_NOTIFY_RESUME)) ++ tracehook_notify_resume(NULL); + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { + __set_current_state(TASK_RUNNING); + tracehook_notify_signal(); diff --git a/queue-5.10/io_uring-mark-task-task_running-before-handling-resume-task-work.patch b/queue-5.10/io_uring-mark-task-task_running-before-handling-resume-task-work.patch new file mode 100644 index 00000000000..092debd2d9b --- /dev/null +++ b/queue-5.10/io_uring-mark-task-task_running-before-handling-resume-task-work.patch @@ -0,0 +1,39 @@ +From 29b1662142a87b9af6742702730f1bfa255a9dc3 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 6 Mar 2023 13:16:38 -0700 +Subject: io_uring: mark task TASK_RUNNING before handling resume/task work + +From: Jens Axboe + +commit 2f2bb1ffc9983e227424d0787289da5483b0c74f upstream. + +Just like for task_work, set the task mode to TASK_RUNNING before doing +potential resume work. We're not holding any locks at this point, +but we may have already set the task state to TASK_INTERRUPTIBLE in +preparation for going to sleep waiting for events. Ensure that we set it +back to TASK_RUNNING if we have work to process, to avoid warnings on +calling blocking operations with !TASK_RUNNING. + +Fixes: b5d3ae202fbf ("io_uring: handle TIF_NOTIFY_RESUME when checking for task_work") +Reported-by: kernel test robot +Link: https://lore.kernel.org/oe-lkp/202302062208.24d3e563-oliver.sang@intel.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2465,8 +2465,10 @@ static inline bool io_run_task_work(void + * notify work that needs processing. + */ + if (current->flags & PF_IO_WORKER && +- test_thread_flag(TIF_NOTIFY_RESUME)) ++ test_thread_flag(TIF_NOTIFY_RESUME)) { ++ __set_current_state(TASK_RUNNING); + tracehook_notify_resume(NULL); ++ } + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { + __set_current_state(TASK_RUNNING); + tracehook_notify_signal(); diff --git a/queue-5.10/io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch b/queue-5.10/io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch new file mode 100644 index 00000000000..6b619808409 --- /dev/null +++ b/queue-5.10/io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch @@ -0,0 +1,85 @@ +From f25af723ed67f5912b7b66353ce8634c612125de Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 6 Mar 2023 13:28:57 -0700 +Subject: io_uring/poll: allow some retries for poll triggering spuriously + +From: Jens Axboe + +commit c16bda37594f83147b167d381d54c010024efecf upstream. + +If we get woken spuriously when polling and fail the operation with +-EAGAIN again, then we generally only allow polling again if data +had been transferred at some point. This is indicated with +REQ_F_PARTIAL_IO. However, if the spurious poll triggers when the socket +was originally empty, then we haven't transferred data yet and we will +fail the poll re-arm. This either punts the socket to io-wq if it's +blocking, or it fails the request with -EAGAIN if not. Neither condition +is desirable, as the former will slow things down, while the latter +will make the application confused. + +We want to ensure that a repeated poll trigger doesn't lead to infinite +work making no progress, that's what the REQ_F_PARTIAL_IO check was +for. But it doesn't protect against a loop post the first receive, and +it's unnecessarily strict if we started out with an empty socket. + +Add a somewhat random retry count, just to put an upper limit on the +potential number of retries that will be done. This should be high enough +that we won't really hit it in practice, unless something needs to be +aborted anyway. + +Cc: stable@vger.kernel.org # v5.10+ +Link: https://github.com/axboe/liburing/issues/364 +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -486,6 +486,7 @@ struct io_poll_iocb { + struct file *file; + struct wait_queue_head *head; + __poll_t events; ++ int retries; + struct wait_queue_entry wait; + }; + +@@ -5749,6 +5750,14 @@ enum { + IO_APOLL_READY + }; + ++/* ++ * We can't reliably detect loops in repeated poll triggers and issue ++ * subsequently failing. But rather than fail these immediately, allow a ++ * certain amount of retries before we give up. Given that this condition ++ * should _rarely_ trigger even once, we should be fine with a larger value. ++ */ ++#define APOLL_MAX_RETRY 128 ++ + static int io_arm_poll_handler(struct io_kiocb *req) + { + const struct io_op_def *def = &io_op_defs[req->opcode]; +@@ -5760,8 +5769,6 @@ static int io_arm_poll_handler(struct io + + if (!req->file || !file_can_poll(req->file)) + return IO_APOLL_ABORTED; +- if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) +- return IO_APOLL_ABORTED; + if (!def->pollin && !def->pollout) + return IO_APOLL_ABORTED; + +@@ -5779,8 +5786,13 @@ static int io_arm_poll_handler(struct io + if (req->flags & REQ_F_POLLED) { + apoll = req->apoll; + kfree(apoll->double_poll); ++ if (unlikely(!--apoll->poll.retries)) { ++ apoll->double_poll = NULL; ++ return IO_APOLL_ABORTED; ++ } + } else { + apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); ++ apoll->poll.retries = APOLL_MAX_RETRY; + } + if (unlikely(!apoll)) + return IO_APOLL_ABORTED; diff --git a/queue-5.10/io_uring-remove-msg_nosignal-from-recvmsg.patch b/queue-5.10/io_uring-remove-msg_nosignal-from-recvmsg.patch new file mode 100644 index 00000000000..c37bd8beae0 --- /dev/null +++ b/queue-5.10/io_uring-remove-msg_nosignal-from-recvmsg.patch @@ -0,0 +1,39 @@ +From 2378bb220f59f57b73dbcbc835b2f8d7acbec382 Mon Sep 17 00:00:00 2001 +From: David Lamparter +Date: Mon, 6 Mar 2023 13:23:06 -0700 +Subject: io_uring: remove MSG_NOSIGNAL from recvmsg + +From: David Lamparter + +commit 7605c43d67face310b4b87dee1a28bc0c8cd8c0f upstream. + +MSG_NOSIGNAL is not applicable for the receiving side, SIGPIPE is +generated when trying to write to a "broken pipe". AF_PACKET's +packet_recvmsg() does enforce this, giving back EINVAL when MSG_NOSIGNAL +is set - making it unuseable in io_uring's recvmsg. + +Remove MSG_NOSIGNAL from io_recvmsg_prep(). + +Cc: stable@vger.kernel.org # v5.10+ +Signed-off-by: David Lamparter +Cc: Eric Dumazet +Cc: Jens Axboe +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230224150123.128346-1-equinox@diac24.net +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -4995,7 +4995,7 @@ static int io_recvmsg_prep(struct io_kio + sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + sr->len = READ_ONCE(sqe->len); + sr->bgid = READ_ONCE(sqe->buf_group); +- sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; ++ sr->msg_flags = READ_ONCE(sqe->msg_flags); + if (sr->msg_flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + diff --git a/queue-5.10/io_uring-rsrc-disallow-multi-source-reg-buffers.patch b/queue-5.10/io_uring-rsrc-disallow-multi-source-reg-buffers.patch new file mode 100644 index 00000000000..5d42292609e --- /dev/null +++ b/queue-5.10/io_uring-rsrc-disallow-multi-source-reg-buffers.patch @@ -0,0 +1,48 @@ +From 8443c73c6ce543ebb30e2a432b211c015dfb4949 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 6 Mar 2023 13:21:40 -0700 +Subject: io_uring/rsrc: disallow multi-source reg buffers + +From: Pavel Begunkov + +commit edd478269640b360c6f301f2baa04abdda563ef3 upstream. + +If two or more mappings go back to back to each other they can be passed +into io_uring to be registered as a single registered buffer. That would +even work if mappings came from different sources, e.g. it's possible to +mix in this way anon pages and pages from shmem or hugetlb. That is not +a problem but it'd rather be less prone if we forbid such mixing. + +Cc: +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -9057,14 +9057,17 @@ static int io_sqe_buffer_register(struct + pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, + pages, vmas); + if (pret == nr_pages) { ++ struct file *file = vmas[0]->vm_file; ++ + /* don't support file backed memory */ + for (i = 0; i < nr_pages; i++) { +- struct vm_area_struct *vma = vmas[i]; +- +- if (vma_is_shmem(vma)) ++ if (vmas[i]->vm_file != file) { ++ ret = -EINVAL; ++ break; ++ } ++ if (!file) + continue; +- if (vma->vm_file && +- !is_file_hugepages(vma->vm_file)) { ++ if (!vma_is_shmem(vmas[i]) && !is_file_hugepages(file)) { + ret = -EOPNOTSUPP; + break; + } diff --git a/queue-5.10/series b/queue-5.10/series index 0a92f8e2ef1..4bc1197ea68 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -373,3 +373,9 @@ ima-align-ima_file_mmap-parameters-with-mmap_file-lsm-hook.patch irqdomain-fix-association-race.patch irqdomain-fix-disassociation-race.patch irqdomain-drop-bogus-fwspec-mapping-error-handling.patch +io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch +io_uring-mark-task-task_running-before-handling-resume-task-work.patch +io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch +io_uring-rsrc-disallow-multi-source-reg-buffers.patch +io_uring-remove-msg_nosignal-from-recvmsg.patch +io_uring-poll-allow-some-retries-for-poll-triggering-spuriously.patch