From: Greg Kroah-Hartman Date: Mon, 6 Mar 2023 10:50:39 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.2.3~110 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=140623e5b525bc34b13471691e8e816dfe9b717e;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch io_uring-add-reschedule-point-to-handle_tw_list.patch io_uring-fix-fget-leak-when-fs-don-t-support-nowait-buffered-read.patch io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch io_uring-remove-msg_nosignal-from-recvmsg.patch io_uring-replace-0-length-array-with-flexible-array.patch io_uring-rsrc-disallow-multi-source-reg-buffers.patch io_uring-use-user-visible-tail-in-io_uring_poll.patch --- diff --git a/queue-6.1/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch b/queue-6.1/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch new file mode 100644 index 00000000000..7a2d87741c6 --- /dev/null +++ b/queue-6.1/io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch @@ -0,0 +1,81 @@ +From fcc926bb857949dbfa51a7d95f3f5ebc657f198c Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 27 Jan 2023 09:28:13 -0700 +Subject: io_uring: add a conditional reschedule to the IOPOLL cancelation loop + +From: Jens Axboe + +commit fcc926bb857949dbfa51a7d95f3f5ebc657f198c upstream. + +If the kernel is configured with CONFIG_PREEMPT_NONE, we could be +sitting in a tight loop reaping events but not giving them a chance to +finish. This results in a trace ala: + +rcu: INFO: rcu_sched self-detected stall on CPU +rcu: 2-...!: (5249 ticks this GP) idle=935c/1/0x4000000000000000 softirq=4265/4274 fqs=1 + (t=5251 jiffies g=465 q=4135 ncpus=4) +rcu: rcu_sched kthread starved for 5249 jiffies! g465 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0 +rcu: Unless rcu_sched kthread gets sufficient CPU time, OOM is now expected behavior. +rcu: RCU grace-period kthread stack dump: +task:rcu_sched state:R running task stack:0 pid:12 ppid:2 flags:0x00000008 +Call trace: + __switch_to+0xb0/0xc8 + __schedule+0x43c/0x520 + schedule+0x4c/0x98 + schedule_timeout+0xbc/0xdc + rcu_gp_fqs_loop+0x308/0x344 + rcu_gp_kthread+0xd8/0xf0 + kthread+0xb8/0xc8 + ret_from_fork+0x10/0x20 +rcu: Stack dump where RCU GP kthread last ran: +Task dump for CPU 0: +task:kworker/u8:10 state:R running task stack:0 pid:89 ppid:2 flags:0x0000000a +Workqueue: events_unbound io_ring_exit_work +Call trace: + __switch_to+0xb0/0xc8 + 0xffff0000c8fefd28 +CPU: 2 PID: 95 Comm: kworker/u8:13 Not tainted 6.2.0-rc5-00042-g40316e337c80-dirty #2759 +Hardware name: linux,dummy-virt (DT) +Workqueue: events_unbound io_ring_exit_work +pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) +pc : io_do_iopoll+0x344/0x360 +lr : io_do_iopoll+0xb8/0x360 +sp : ffff800009bebc60 +x29: ffff800009bebc60 x28: 0000000000000000 x27: 0000000000000000 +x26: ffff0000c0f67d48 x25: ffff0000c0f67840 x24: ffff800008950024 +x23: 0000000000000001 x22: 0000000000000000 x21: ffff0000c27d3200 +x20: ffff0000c0f67840 x19: ffff0000c0f67800 x18: 0000000000000000 +x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 +x14: 0000000000000001 x13: 0000000000000001 x12: 0000000000000000 +x11: 0000000000000179 x10: 0000000000000870 x9 : ffff800009bebd60 +x8 : ffff0000c27d3ad0 x7 : fefefefefefefeff x6 : 0000646e756f626e +x5 : ffff0000c0f67840 x4 : 0000000000000000 x3 : ffff0000c2398000 +x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 +Call trace: + io_do_iopoll+0x344/0x360 + io_uring_try_cancel_requests+0x21c/0x334 + io_ring_exit_work+0x90/0x40c + process_one_work+0x1a4/0x254 + worker_thread+0x1ec/0x258 + kthread+0xb8/0xc8 + ret_from_fork+0x10/0x20 + +Add a cond_resched() in the cancelation IOPOLL loop to fix this. + +Cc: stable@vger.kernel.org # 5.10+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2912,6 +2912,7 @@ static __cold bool io_uring_try_cancel_r + while (!wq_list_empty(&ctx->iopoll_list)) { + io_iopoll_try_reap_events(ctx); + ret = true; ++ cond_resched(); + } + } + diff --git a/queue-6.1/io_uring-add-reschedule-point-to-handle_tw_list.patch b/queue-6.1/io_uring-add-reschedule-point-to-handle_tw_list.patch new file mode 100644 index 00000000000..1c29b1b5394 --- /dev/null +++ b/queue-6.1/io_uring-add-reschedule-point-to-handle_tw_list.patch @@ -0,0 +1,41 @@ +From f58680085478dd292435727210122960d38e8014 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 27 Jan 2023 09:50:31 -0700 +Subject: io_uring: add reschedule point to handle_tw_list() + +From: Jens Axboe + +commit f58680085478dd292435727210122960d38e8014 upstream. + +If CONFIG_PREEMPT_NONE is set and the task_work chains are long, we +could be running into issues blocking others for too long. Add a +reschedule check in handle_tw_list(), and flush the ctx if we need to +reschedule. + +Cc: stable@vger.kernel.org # 5.10+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -1030,10 +1030,16 @@ static unsigned int handle_tw_list(struc + /* if not contended, grab and improve batching */ + *locked = mutex_trylock(&(*ctx)->uring_lock); + percpu_ref_get(&(*ctx)->refs); +- } ++ } else if (!*locked) ++ *locked = mutex_trylock(&(*ctx)->uring_lock); + req->io_task_work.func(req, locked); + node = next; + count++; ++ if (unlikely(need_resched())) { ++ ctx_flush_and_put(*ctx, locked); ++ *ctx = NULL; ++ cond_resched(); ++ } + } + + return count; diff --git a/queue-6.1/io_uring-fix-fget-leak-when-fs-don-t-support-nowait-buffered-read.patch b/queue-6.1/io_uring-fix-fget-leak-when-fs-don-t-support-nowait-buffered-read.patch new file mode 100644 index 00000000000..dfb78eaa4ac --- /dev/null +++ b/queue-6.1/io_uring-fix-fget-leak-when-fs-don-t-support-nowait-buffered-read.patch @@ -0,0 +1,64 @@ +From 54aa7f2330b82884f4a1afce0220add6e8312f8b Mon Sep 17 00:00:00 2001 +From: Joseph Qi +Date: Tue, 28 Feb 2023 12:54:59 +0800 +Subject: io_uring: fix fget leak when fs don't support nowait buffered read + +From: Joseph Qi + +commit 54aa7f2330b82884f4a1afce0220add6e8312f8b upstream. + +Heming reported a BUG when using io_uring doing link-cp on ocfs2. [1] + +Do the following steps can reproduce this BUG: +mount -t ocfs2 /dev/vdc /mnt/ocfs2 +cp testfile /mnt/ocfs2/ +./link-cp /mnt/ocfs2/testfile /mnt/ocfs2/testfile.1 +umount /mnt/ocfs2 + +Then umount will fail, and it outputs: +umount: /mnt/ocfs2: target is busy. + +While tracing umount, it blames mnt_get_count() not return as expected. +Do a deep investigation for fget()/fput() on related code flow, I've +finally found that fget() leaks since ocfs2 doesn't support nowait +buffered read. + +io_issue_sqe +|-io_assign_file // do fget() first + |-io_read + |-io_iter_do_read + |-ocfs2_file_read_iter // return -EOPNOTSUPP + |-kiocb_done + |-io_rw_done + |-__io_complete_rw_common // set REQ_F_REISSUE + |-io_resubmit_prep + |-io_req_prep_async // override req->file, leak happens + +This was introduced by commit a196c78b5443 in v5.18. Fix it by don't +re-assign req->file if it has already been assigned. + +[1] https://lore.kernel.org/ocfs2-devel/ab580a75-91c8-d68a-3455-40361be1bfa8@linux.alibaba.com/T/#t + +Fixes: a196c78b5443 ("io_uring: assign non-fixed early for async work") +Cc: +Reported-by: Heming Zhao +Signed-off-by: Joseph Qi +Cc: Xiaoguang Wang +Link: https://lore.kernel.org/r/20230228045459.13524-1-joseph.qi@linux.alibaba.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -1597,7 +1597,7 @@ int io_req_prep_async(struct io_kiocb *r + const struct io_op_def *def = &io_op_defs[req->opcode]; + + /* assign early for deferred execution for non-fixed file */ +- if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE)) ++ if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE) && !req->file) + req->file = io_file_get_normal(req, req->cqe.fd); + if (!def->prep_async) + return 0; diff --git a/queue-6.1/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch b/queue-6.1/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch new file mode 100644 index 00000000000..dd18ef71296 --- /dev/null +++ b/queue-6.1/io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch @@ -0,0 +1,46 @@ +From b5d3ae202fbfe055aa2a8ae8524531ee1dcab717 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 24 Jan 2023 08:24:25 -0700 +Subject: io_uring: handle TIF_NOTIFY_RESUME when checking for task_work + +From: Jens Axboe + +commit b5d3ae202fbfe055aa2a8ae8524531ee1dcab717 upstream. + +If TIF_NOTIFY_RESUME is set, then we need to call resume_user_mode_work() +for PF_IO_WORKER threads. They never return to usermode, hence never get +a chance to process any items that are marked by this flag. Most notably +this includes the final put of files, but also any throttling markers set +by block cgroups. + +Cc: stable@vger.kernel.org # 5.10+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.h | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/io_uring/io_uring.h ++++ b/io_uring/io_uring.h +@@ -3,6 +3,7 @@ + + #include + #include ++#include + #include + #include + #include "io-wq.h" +@@ -255,6 +256,13 @@ static inline int io_run_task_work(void) + */ + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + clear_notify_signal(); ++ /* ++ * PF_IO_WORKER never returns to userspace, so check here if we have ++ * notify work that needs processing. ++ */ ++ if (current->flags & PF_IO_WORKER && ++ test_thread_flag(TIF_NOTIFY_RESUME)) ++ resume_user_mode_work(NULL); + if (task_work_pending(current)) { + __set_current_state(TASK_RUNNING); + task_work_run(); diff --git a/queue-6.1/io_uring-remove-msg_nosignal-from-recvmsg.patch b/queue-6.1/io_uring-remove-msg_nosignal-from-recvmsg.patch new file mode 100644 index 00000000000..20c31f51680 --- /dev/null +++ b/queue-6.1/io_uring-remove-msg_nosignal-from-recvmsg.patch @@ -0,0 +1,39 @@ +From 7605c43d67face310b4b87dee1a28bc0c8cd8c0f Mon Sep 17 00:00:00 2001 +From: David Lamparter +Date: Fri, 24 Feb 2023 16:01:24 +0100 +Subject: io_uring: remove MSG_NOSIGNAL from recvmsg + +From: David Lamparter + +commit 7605c43d67face310b4b87dee1a28bc0c8cd8c0f upstream. + +MSG_NOSIGNAL is not applicable for the receiving side, SIGPIPE is +generated when trying to write to a "broken pipe". AF_PACKET's +packet_recvmsg() does enforce this, giving back EINVAL when MSG_NOSIGNAL +is set - making it unuseable in io_uring's recvmsg. + +Remove MSG_NOSIGNAL from io_recvmsg_prep(). + +Cc: stable@vger.kernel.org # v5.10+ +Signed-off-by: David Lamparter +Cc: Eric Dumazet +Cc: Jens Axboe +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230224150123.128346-1-equinox@diac24.net +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -553,7 +553,7 @@ int io_recvmsg_prep(struct io_kiocb *req + sr->flags = READ_ONCE(sqe->ioprio); + if (sr->flags & ~(RECVMSG_FLAGS)) + return -EINVAL; +- sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; ++ sr->msg_flags = READ_ONCE(sqe->msg_flags); + if (sr->msg_flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + if (sr->msg_flags & MSG_ERRQUEUE) diff --git a/queue-6.1/io_uring-replace-0-length-array-with-flexible-array.patch b/queue-6.1/io_uring-replace-0-length-array-with-flexible-array.patch new file mode 100644 index 00000000000..3ff3316a574 --- /dev/null +++ b/queue-6.1/io_uring-replace-0-length-array-with-flexible-array.patch @@ -0,0 +1,52 @@ +From 36632d062975a9ff4410c90dd6d37922b68d0920 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 5 Jan 2023 11:05:11 -0800 +Subject: io_uring: Replace 0-length array with flexible array + +From: Kees Cook + +commit 36632d062975a9ff4410c90dd6d37922b68d0920 upstream. + +Zero-length arrays are deprecated[1]. Replace struct io_uring_buf_ring's +"bufs" with a flexible array member. (How is the size of this array +verified?) Detected with GCC 13, using -fstrict-flex-arrays=3: + +In function 'io_ring_buffer_select', + inlined from 'io_buffer_select' at io_uring/kbuf.c:183:10: +io_uring/kbuf.c:141:23: warning: array subscript 255 is outside the bounds of an interior zero-length array 'struct io_uring_buf[0]' [-Wzero-length-bounds] + 141 | buf = &br->bufs[head]; + | ^~~~~~~~~~~~~~~ +In file included from include/linux/io_uring.h:7, + from io_uring/kbuf.c:10: +include/uapi/linux/io_uring.h: In function 'io_buffer_select': +include/uapi/linux/io_uring.h:628:41: note: while referencing 'bufs' + 628 | struct io_uring_buf bufs[0]; + | ^~~~ + +[1] https://www.kernel.org/doc/html/latest/process/deprecated.html#zero-length-and-one-element-arrays + +Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers") +Cc: Jens Axboe +Cc: Pavel Begunkov +Cc: "Gustavo A. R. Silva" +Cc: stable@vger.kernel.org +Cc: io-uring@vger.kernel.org +Signed-off-by: Kees Cook +Reviewed-by: Gustavo A. R. Silva +Link: https://lore.kernel.org/r/20230105190507.gonna.131-kees@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/io_uring.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/uapi/linux/io_uring.h ++++ b/include/uapi/linux/io_uring.h +@@ -617,7 +617,7 @@ struct io_uring_buf_ring { + __u16 resv3; + __u16 tail; + }; +- struct io_uring_buf bufs[0]; ++ __DECLARE_FLEX_ARRAY(struct io_uring_buf, bufs); + }; + }; + diff --git a/queue-6.1/io_uring-rsrc-disallow-multi-source-reg-buffers.patch b/queue-6.1/io_uring-rsrc-disallow-multi-source-reg-buffers.patch new file mode 100644 index 00000000000..50a947875e7 --- /dev/null +++ b/queue-6.1/io_uring-rsrc-disallow-multi-source-reg-buffers.patch @@ -0,0 +1,48 @@ +From edd478269640b360c6f301f2baa04abdda563ef3 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Wed, 22 Feb 2023 14:36:48 +0000 +Subject: io_uring/rsrc: disallow multi-source reg buffers + +From: Pavel Begunkov + +commit edd478269640b360c6f301f2baa04abdda563ef3 upstream. + +If two or more mappings go back to back to each other they can be passed +into io_uring to be registered as a single registered buffer. That would +even work if mappings came from different sources, e.g. it's possible to +mix in this way anon pages and pages from shmem or hugetlb. That is not +a problem but it'd rather be less prone if we forbid such mixing. + +Cc: +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/rsrc.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/io_uring/rsrc.c ++++ b/io_uring/rsrc.c +@@ -1147,14 +1147,17 @@ struct page **io_pin_pages(unsigned long + pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, + pages, vmas); + if (pret == nr_pages) { ++ struct file *file = vmas[0]->vm_file; ++ + /* don't support file backed memory */ + for (i = 0; i < nr_pages; i++) { +- struct vm_area_struct *vma = vmas[i]; +- +- if (vma_is_shmem(vma)) ++ if (vmas[i]->vm_file != file) { ++ ret = -EINVAL; ++ break; ++ } ++ if (!file) + continue; +- if (vma->vm_file && +- !is_file_hugepages(vma->vm_file)) { ++ if (!vma_is_shmem(vmas[i]) && !is_file_hugepages(file)) { + ret = -EOPNOTSUPP; + break; + } diff --git a/queue-6.1/io_uring-use-user-visible-tail-in-io_uring_poll.patch b/queue-6.1/io_uring-use-user-visible-tail-in-io_uring_poll.patch new file mode 100644 index 00000000000..c85e8930927 --- /dev/null +++ b/queue-6.1/io_uring-use-user-visible-tail-in-io_uring_poll.patch @@ -0,0 +1,33 @@ +From c10bb64684813a326174c3eebcafb3ee5af52ca3 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 23 Jan 2023 14:37:13 +0000 +Subject: io_uring: use user visible tail in io_uring_poll() + +From: Pavel Begunkov + +commit c10bb64684813a326174c3eebcafb3ee5af52ca3 upstream. + +We return POLLIN from io_uring_poll() depending on whether there are +CQEs for the userspace, and so we should use the user visible tail +pointer instead of a transient cached value. + +Cc: stable@vger.kernel.org +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/228ffcbf30ba98856f66ffdb9a6a60ead1dd96c0.1674484266.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2653,7 +2653,7 @@ static __poll_t io_uring_poll(struct fil + * pushs them to do the flush. + */ + +- if (io_cqring_events(ctx) || io_has_work(ctx)) ++ if (__io_cqring_events_user(ctx) || io_has_work(ctx)) + mask |= EPOLLIN | EPOLLRDNORM; + + return mask; diff --git a/queue-6.1/series b/queue-6.1/series index 0a5ef020239..3ff52428dbf 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -658,3 +658,11 @@ rtc-pm8xxx-fix-set-alarm-race.patch ipmi-ipmb-fix-the-module_parm_desc-associated-to-retry_time_ms.patch ipmi-ssif-resend_msg-cannot-fail.patch ipmi_ssif-rename-idle-state-and-check.patch +io_uring-replace-0-length-array-with-flexible-array.patch +io_uring-use-user-visible-tail-in-io_uring_poll.patch +io_uring-handle-tif_notify_resume-when-checking-for-task_work.patch +io_uring-add-a-conditional-reschedule-to-the-iopoll-cancelation-loop.patch +io_uring-add-reschedule-point-to-handle_tw_list.patch +io_uring-rsrc-disallow-multi-source-reg-buffers.patch +io_uring-remove-msg_nosignal-from-recvmsg.patch +io_uring-fix-fget-leak-when-fs-don-t-support-nowait-buffered-read.patch