From bac09f0958749a5c969ed2fafe40e9d712bf4e6a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 17 Jul 2023 22:12:10 +0200 Subject: [PATCH] 5.10-stable patches added patches: io_uring-add-reschedule-point-to-handle_tw_list.patch io_uring-use-io_schedule-in-cqring-wait.patch --- ...d-reschedule-point-to-handle_tw_list.patch | 38 +++++++++ ...uring-use-io_schedule-in-cqring-wait.patch | 78 +++++++++++++++++++ queue-5.10/series | 2 + 3 files changed, 118 insertions(+) create mode 100644 queue-5.10/io_uring-add-reschedule-point-to-handle_tw_list.patch create mode 100644 queue-5.10/io_uring-use-io_schedule-in-cqring-wait.patch diff --git a/queue-5.10/io_uring-add-reschedule-point-to-handle_tw_list.patch b/queue-5.10/io_uring-add-reschedule-point-to-handle_tw_list.patch new file mode 100644 index 00000000000..83cf6781b1f --- /dev/null +++ b/queue-5.10/io_uring-add-reschedule-point-to-handle_tw_list.patch @@ -0,0 +1,38 @@ +From 4e214e7e01158a87308a17766706159bca472855 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 17 Jul 2023 10:27:20 -0600 +Subject: io_uring: add reschedule point to handle_tw_list() + +From: Jens Axboe + +Commit f58680085478dd292435727210122960d38e8014 upstream. + +If CONFIG_PREEMPT_NONE is set and the task_work chains are long, we +could be running into issues blocking others for too long. Add a +reschedule check in handle_tw_list(), and flush the ctx if we need to +reschedule. + +Cc: stable@vger.kernel.org # 5.10+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2214,9 +2214,12 @@ static void tctx_task_work(struct callba + } + req->io_task_work.func(req, &locked); + node = next; ++ if (unlikely(need_resched())) { ++ ctx_flush_and_put(ctx, &locked); ++ ctx = NULL; ++ cond_resched(); ++ } + } while (node); +- +- cond_resched(); + } + + ctx_flush_and_put(ctx, &locked); diff --git a/queue-5.10/io_uring-use-io_schedule-in-cqring-wait.patch b/queue-5.10/io_uring-use-io_schedule-in-cqring-wait.patch new file mode 100644 index 00000000000..2330ab23fa2 --- /dev/null +++ b/queue-5.10/io_uring-use-io_schedule-in-cqring-wait.patch @@ -0,0 +1,78 @@ +From c8c88d523c89e0ac8affbf2fd57def82e0d5d4bf Mon Sep 17 00:00:00 2001 +From: Andres Freund +Date: Sun, 16 Jul 2023 12:07:03 -0600 +Subject: io_uring: Use io_schedule* in cqring wait + +From: Andres Freund + +Commit 8a796565cec3601071cbbd27d6304e202019d014 upstream. + +I observed poor performance of io_uring compared to synchronous IO. That +turns out to be caused by deeper CPU idle states entered with io_uring, +due to io_uring using plain schedule(), whereas synchronous IO uses +io_schedule(). + +The losses due to this are substantial. On my cascade lake workstation, +t/io_uring from the fio repository e.g. yields regressions between 20% +and 40% with the following command: +./t/io_uring -r 5 -X0 -d 1 -s 1 -c 1 -p 0 -S$use_sync -R 0 /mnt/t2/fio/write.0.0 + +This is repeatable with different filesystems, using raw block devices +and using different block devices. + +Use io_schedule_prepare() / io_schedule_finish() in +io_cqring_wait_schedule() to address the difference. + +After that using io_uring is on par or surpassing synchronous IO (using +registered files etc makes it reliably win, but arguably is a less fair +comparison). + +There are other calls to schedule() in io_uring/, but none immediately +jump out to be similarly situated, so I did not touch them. Similarly, +it's possible that mutex_lock_io() should be used, but it's not clear if +there are cases where that matters. + +Cc: stable@vger.kernel.org # 5.10+ +Cc: Pavel Begunkov +Cc: io-uring@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Andres Freund +Link: https://lore.kernel.org/r/20230707162007.194068-1-andres@anarazel.de +[axboe: minor style fixup] +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -7625,7 +7625,7 @@ static inline int io_cqring_wait_schedul + struct io_wait_queue *iowq, + ktime_t *timeout) + { +- int ret; ++ int token, ret; + + /* make sure we run task_work before checking for signals */ + ret = io_run_task_work_sig(); +@@ -7635,9 +7635,17 @@ static inline int io_cqring_wait_schedul + if (test_bit(0, &ctx->check_cq_overflow)) + return 1; + ++ /* ++ * Use io_schedule_prepare/finish, so cpufreq can take into account ++ * that the task is waiting for IO - turns out to be important for low ++ * QD IO. ++ */ ++ token = io_schedule_prepare(); ++ ret = 1; + if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) +- return -ETIME; +- return 1; ++ ret = -ETIME; ++ io_schedule_finish(token); ++ return ret; + } + + /* diff --git a/queue-5.10/series b/queue-5.10/series index 744b077b3e4..f82a50ab21b 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -331,3 +331,5 @@ rcu-tasks-mark-trc_reader_nesting-data-races.patch rcu-tasks-mark-trc_reader_special.b.need_qs-data-races.patch rcu-tasks-simplify-trc_read_check_handler-atomic-operations.patch block-partition-fix-signedness-issue-for-amiga-partitions.patch +io_uring-use-io_schedule-in-cqring-wait.patch +io_uring-add-reschedule-point-to-handle_tw_list.patch -- 2.47.3