From d76e56ec047c08fd028e7eb97ccc3f479288ebfd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 26 Oct 2025 15:56:55 +0100 Subject: [PATCH] 6.12-stable patches added patches: io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch --- ...er-on-when-to-update-the-stime-usage.patch | 150 ++++++++++++++++++ ...ay-from-getrusage-for-cpu-accounting.patch | 140 ++++++++++++++++ queue-6.12/series | 2 + 3 files changed, 292 insertions(+) create mode 100644 queue-6.12/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch create mode 100644 queue-6.12/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch diff --git a/queue-6.12/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch b/queue-6.12/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch new file mode 100644 index 0000000000..69b8ab933e --- /dev/null +++ b/queue-6.12/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch @@ -0,0 +1,150 @@ +From bfe554574c418c0ef57cd234bed1bf31e9bb4f00 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 21 Oct 2025 11:44:39 -0600 +Subject: io_uring/sqpoll: be smarter on when to update the stime usage + +From: Jens Axboe + +Commit a94e0657269c5b8e1a90b17aa2c048b3d276e16d upstream. + +The current approach is a bit naive, and hence calls the time querying +way too often. Only start the "doing work" timer when there's actual +work to do, and then use that information to terminate (and account) the +work time once done. This greatly reduces the frequency of these calls, +when they cannot have changed anyway. + +Running a basic random reader that is setup to use SQPOLL, a profile +before this change shows these as the top cycle consumers: + ++ 32.60% iou-sqp-1074 [kernel.kallsyms] [k] thread_group_cputime_adjusted ++ 19.97% iou-sqp-1074 [kernel.kallsyms] [k] thread_group_cputime ++ 12.20% io_uring io_uring [.] submitter_uring_fn ++ 4.13% iou-sqp-1074 [kernel.kallsyms] [k] getrusage ++ 2.45% iou-sqp-1074 [kernel.kallsyms] [k] io_submit_sqes ++ 2.18% iou-sqp-1074 [kernel.kallsyms] [k] __pi_memset_generic ++ 2.09% iou-sqp-1074 [kernel.kallsyms] [k] cputime_adjust + +and after this change, top of profile looks as follows: + ++ 36.23% io_uring io_uring [.] submitter_uring_fn ++ 23.26% iou-sqp-819 [kernel.kallsyms] [k] io_sq_thread ++ 10.14% iou-sqp-819 [kernel.kallsyms] [k] io_sq_tw ++ 6.52% iou-sqp-819 [kernel.kallsyms] [k] tctx_task_work_run ++ 4.82% iou-sqp-819 [kernel.kallsyms] [k] nvme_submit_cmds.part.0 ++ 2.91% iou-sqp-819 [kernel.kallsyms] [k] io_submit_sqes +[...] + 0.02% iou-sqp-819 [kernel.kallsyms] [k] cputime_adjust + +where it's spending the cycles on things that actually matter. + +Reported-by: Fengnan Chang +Cc: stable@vger.kernel.org +Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/sqpoll.c | 43 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 32 insertions(+), 11 deletions(-) + +--- a/io_uring/sqpoll.c ++++ b/io_uring/sqpoll.c +@@ -176,6 +176,11 @@ static inline bool io_sqd_events_pending + return READ_ONCE(sqd->state); + } + ++struct io_sq_time { ++ bool started; ++ u64 usec; ++}; ++ + u64 io_sq_cpu_usec(struct task_struct *tsk) + { + u64 utime, stime; +@@ -185,12 +190,24 @@ u64 io_sq_cpu_usec(struct task_struct *t + return stime; + } + +-static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec) ++static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist) + { +- sqd->work_time += io_sq_cpu_usec(current) - usec; ++ if (!ist->started) ++ return; ++ ist->started = false; ++ sqd->work_time += io_sq_cpu_usec(current) - ist->usec; + } + +-static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) ++static void io_sq_start_worktime(struct io_sq_time *ist) ++{ ++ if (ist->started) ++ return; ++ ist->started = true; ++ ist->usec = io_sq_cpu_usec(current); ++} ++ ++static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd, ++ bool cap_entries, struct io_sq_time *ist) + { + unsigned int to_submit; + int ret = 0; +@@ -203,6 +220,8 @@ static int __io_sq_thread(struct io_ring + if (to_submit || !wq_list_empty(&ctx->iopoll_list)) { + const struct cred *creds = NULL; + ++ io_sq_start_worktime(ist); ++ + if (ctx->sq_creds != current_cred()) + creds = override_creds(ctx->sq_creds); + +@@ -284,7 +303,6 @@ static int io_sq_thread(void *data) + unsigned long timeout = 0; + char buf[TASK_COMM_LEN]; + DEFINE_WAIT(wait); +- u64 start; + + /* offload context creation failed, just exit */ + if (!current->io_uring) { +@@ -319,6 +337,7 @@ static int io_sq_thread(void *data) + mutex_lock(&sqd->lock); + while (1) { + bool cap_entries, sqt_spin = false; ++ struct io_sq_time ist = { }; + + if (io_sqd_events_pending(sqd) || signal_pending(current)) { + if (io_sqd_handle_event(sqd)) +@@ -327,9 +346,8 @@ static int io_sq_thread(void *data) + } + + cap_entries = !list_is_singular(&sqd->ctx_list); +- start = io_sq_cpu_usec(current); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { +- int ret = __io_sq_thread(ctx, cap_entries); ++ int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist); + + if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) + sqt_spin = true; +@@ -337,15 +355,18 @@ static int io_sq_thread(void *data) + if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) + sqt_spin = true; + +- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) +- if (io_napi(ctx)) ++ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { ++ if (io_napi(ctx)) { ++ io_sq_start_worktime(&ist); + io_napi_sqpoll_busy_poll(ctx); ++ } ++ } ++ ++ io_sq_update_worktime(sqd, &ist); + + if (sqt_spin || !time_after(jiffies, timeout)) { +- if (sqt_spin) { +- io_sq_update_worktime(sqd, start); ++ if (sqt_spin) + timeout = jiffies + sqd->sq_thread_idle; +- } + if (unlikely(need_resched())) { + mutex_unlock(&sqd->lock); + cond_resched(); diff --git a/queue-6.12/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch b/queue-6.12/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch new file mode 100644 index 0000000000..4cc61e531b --- /dev/null +++ b/queue-6.12/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch @@ -0,0 +1,140 @@ +From b9c7da23ba07c6781e13f97398b2979d2ea6230f Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 21 Oct 2025 07:16:08 -0600 +Subject: io_uring/sqpoll: switch away from getrusage() for CPU accounting + +From: Jens Axboe + +Commit 8ac9b0d33e5c0a995338ee5f25fe1b6ff7d97f65 upstream. + +getrusage() does a lot more than what the SQPOLL accounting needs, the +latter only cares about (and uses) the stime. Rather than do a full +RUSAGE_SELF summation, just query the used stime instead. + +Cc: stable@vger.kernel.org +Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") +Reviewed-by: Gabriel Krisman Bertazi +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/fdinfo.c | 8 ++++---- + io_uring/sqpoll.c | 32 ++++++++++++++++++-------------- + io_uring/sqpoll.h | 1 + + 3 files changed, 23 insertions(+), 18 deletions(-) + +--- a/io_uring/fdinfo.c ++++ b/io_uring/fdinfo.c +@@ -55,7 +55,6 @@ __cold void io_uring_show_fdinfo(struct + struct io_ring_ctx *ctx = file->private_data; + struct io_overflow_cqe *ocqe; + struct io_rings *r = ctx->rings; +- struct rusage sq_usage; + unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; + unsigned int sq_head = READ_ONCE(r->sq.head); + unsigned int sq_tail = READ_ONCE(r->sq.tail); +@@ -155,14 +154,15 @@ __cold void io_uring_show_fdinfo(struct + * thread termination. + */ + if (tsk) { ++ u64 usec; ++ + get_task_struct(tsk); + rcu_read_unlock(); +- getrusage(tsk, RUSAGE_SELF, &sq_usage); ++ usec = io_sq_cpu_usec(tsk); + put_task_struct(tsk); + sq_pid = sq->task_pid; + sq_cpu = sq->sq_cpu; +- sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 +- + sq_usage.ru_stime.tv_usec); ++ sq_total_time = usec; + sq_work_time = sq->work_time; + } else { + rcu_read_unlock(); +--- a/io_uring/sqpoll.c ++++ b/io_uring/sqpoll.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -175,6 +176,20 @@ static inline bool io_sqd_events_pending + return READ_ONCE(sqd->state); + } + ++u64 io_sq_cpu_usec(struct task_struct *tsk) ++{ ++ u64 utime, stime; ++ ++ task_cputime_adjusted(tsk, &utime, &stime); ++ do_div(stime, 1000); ++ return stime; ++} ++ ++static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec) ++{ ++ sqd->work_time += io_sq_cpu_usec(current) - usec; ++} ++ + static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) + { + unsigned int to_submit; +@@ -261,26 +276,15 @@ static bool io_sq_tw_pending(struct llis + return retry_list || !llist_empty(&tctx->task_list); + } + +-static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start) +-{ +- struct rusage end; +- +- getrusage(current, RUSAGE_SELF, &end); +- end.ru_stime.tv_sec -= start->ru_stime.tv_sec; +- end.ru_stime.tv_usec -= start->ru_stime.tv_usec; +- +- sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000; +-} +- + static int io_sq_thread(void *data) + { + struct llist_node *retry_list = NULL; + struct io_sq_data *sqd = data; + struct io_ring_ctx *ctx; +- struct rusage start; + unsigned long timeout = 0; + char buf[TASK_COMM_LEN]; + DEFINE_WAIT(wait); ++ u64 start; + + /* offload context creation failed, just exit */ + if (!current->io_uring) { +@@ -323,7 +327,7 @@ static int io_sq_thread(void *data) + } + + cap_entries = !list_is_singular(&sqd->ctx_list); +- getrusage(current, RUSAGE_SELF, &start); ++ start = io_sq_cpu_usec(current); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + int ret = __io_sq_thread(ctx, cap_entries); + +@@ -339,7 +343,7 @@ static int io_sq_thread(void *data) + + if (sqt_spin || !time_after(jiffies, timeout)) { + if (sqt_spin) { +- io_sq_update_worktime(sqd, &start); ++ io_sq_update_worktime(sqd, start); + timeout = jiffies + sqd->sq_thread_idle; + } + if (unlikely(need_resched())) { +--- a/io_uring/sqpoll.h ++++ b/io_uring/sqpoll.h +@@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_da + void io_put_sq_data(struct io_sq_data *sqd); + void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); + int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); ++u64 io_sq_cpu_usec(struct task_struct *tsk); + + static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) + { diff --git a/queue-6.12/series b/queue-6.12/series index 855c601c5f..2f7e4eaa26 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -88,3 +88,5 @@ drm-panic-fix-qr_code-ensure-vmargin-is-positive.patch gpio-ljca-fix-duplicated-irq-mapping.patch io_uring-correct-__must_hold-annotation-in-io_instal.patch sched-remove-never-used-code-in-mm_cid_get.patch +io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch +io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch -- 2.47.3