]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 26 Oct 2025 14:56:55 +0000 (15:56 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 26 Oct 2025 14:56:55 +0000 (15:56 +0100)
added patches:
io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch
io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch

queue-6.12/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch [new file with mode: 0644]
queue-6.12/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch [new file with mode: 0644]
queue-6.12/series

diff --git a/queue-6.12/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch b/queue-6.12/io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch
new file mode 100644 (file)
index 0000000..69b8ab9
--- /dev/null
@@ -0,0 +1,150 @@
+From bfe554574c418c0ef57cd234bed1bf31e9bb4f00 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 21 Oct 2025 11:44:39 -0600
+Subject: io_uring/sqpoll: be smarter on when to update the stime usage
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit a94e0657269c5b8e1a90b17aa2c048b3d276e16d upstream.
+
+The current approach is a bit naive, and hence calls the time querying
+way too often. Only start the "doing work" timer when there's actual
+work to do, and then use that information to terminate (and account) the
+work time once done. This greatly reduces the frequency of these calls,
+when they cannot have changed anyway.
+
+Running a basic random reader that is setup to use SQPOLL, a profile
+before this change shows these as the top cycle consumers:
+
++   32.60%  iou-sqp-1074  [kernel.kallsyms]  [k] thread_group_cputime_adjusted
++   19.97%  iou-sqp-1074  [kernel.kallsyms]  [k] thread_group_cputime
++   12.20%  io_uring      io_uring           [.] submitter_uring_fn
++    4.13%  iou-sqp-1074  [kernel.kallsyms]  [k] getrusage
++    2.45%  iou-sqp-1074  [kernel.kallsyms]  [k] io_submit_sqes
++    2.18%  iou-sqp-1074  [kernel.kallsyms]  [k] __pi_memset_generic
++    2.09%  iou-sqp-1074  [kernel.kallsyms]  [k] cputime_adjust
+
+and after this change, top of profile looks as follows:
+
++   36.23%  io_uring     io_uring           [.] submitter_uring_fn
++   23.26%  iou-sqp-819  [kernel.kallsyms]  [k] io_sq_thread
++   10.14%  iou-sqp-819  [kernel.kallsyms]  [k] io_sq_tw
++    6.52%  iou-sqp-819  [kernel.kallsyms]  [k] tctx_task_work_run
++    4.82%  iou-sqp-819  [kernel.kallsyms]  [k] nvme_submit_cmds.part.0
++    2.91%  iou-sqp-819  [kernel.kallsyms]  [k] io_submit_sqes
+[...]
+     0.02%  iou-sqp-819  [kernel.kallsyms]  [k] cputime_adjust
+
+where it's spending the cycles on things that actually matter.
+
+Reported-by: Fengnan Chang <changfengnan@bytedance.com>
+Cc: stable@vger.kernel.org
+Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/sqpoll.c |   43 ++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 32 insertions(+), 11 deletions(-)
+
+--- a/io_uring/sqpoll.c
++++ b/io_uring/sqpoll.c
+@@ -176,6 +176,11 @@ static inline bool io_sqd_events_pending
+       return READ_ONCE(sqd->state);
+ }
++struct io_sq_time {
++      bool started;
++      u64 usec;
++};
++
+ u64 io_sq_cpu_usec(struct task_struct *tsk)
+ {
+       u64 utime, stime;
+@@ -185,12 +190,24 @@ u64 io_sq_cpu_usec(struct task_struct *t
+       return stime;
+ }
+-static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec)
++static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist)
+ {
+-      sqd->work_time += io_sq_cpu_usec(current) - usec;
++      if (!ist->started)
++              return;
++      ist->started = false;
++      sqd->work_time += io_sq_cpu_usec(current) - ist->usec;
+ }
+-static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
++static void io_sq_start_worktime(struct io_sq_time *ist)
++{
++      if (ist->started)
++              return;
++      ist->started = true;
++      ist->usec = io_sq_cpu_usec(current);
++}
++
++static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd,
++                        bool cap_entries, struct io_sq_time *ist)
+ {
+       unsigned int to_submit;
+       int ret = 0;
+@@ -203,6 +220,8 @@ static int __io_sq_thread(struct io_ring
+       if (to_submit || !wq_list_empty(&ctx->iopoll_list)) {
+               const struct cred *creds = NULL;
++              io_sq_start_worktime(ist);
++
+               if (ctx->sq_creds != current_cred())
+                       creds = override_creds(ctx->sq_creds);
+@@ -284,7 +303,6 @@ static int io_sq_thread(void *data)
+       unsigned long timeout = 0;
+       char buf[TASK_COMM_LEN];
+       DEFINE_WAIT(wait);
+-      u64 start;
+       /* offload context creation failed, just exit */
+       if (!current->io_uring) {
+@@ -319,6 +337,7 @@ static int io_sq_thread(void *data)
+       mutex_lock(&sqd->lock);
+       while (1) {
+               bool cap_entries, sqt_spin = false;
++              struct io_sq_time ist = { };
+               if (io_sqd_events_pending(sqd) || signal_pending(current)) {
+                       if (io_sqd_handle_event(sqd))
+@@ -327,9 +346,8 @@ static int io_sq_thread(void *data)
+               }
+               cap_entries = !list_is_singular(&sqd->ctx_list);
+-              start = io_sq_cpu_usec(current);
+               list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+-                      int ret = __io_sq_thread(ctx, cap_entries);
++                      int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist);
+                       if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
+                               sqt_spin = true;
+@@ -337,15 +355,18 @@ static int io_sq_thread(void *data)
+               if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE))
+                       sqt_spin = true;
+-              list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+-                      if (io_napi(ctx))
++              list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
++                      if (io_napi(ctx)) {
++                              io_sq_start_worktime(&ist);
+                               io_napi_sqpoll_busy_poll(ctx);
++                      }
++              }
++
++              io_sq_update_worktime(sqd, &ist);
+               if (sqt_spin || !time_after(jiffies, timeout)) {
+-                      if (sqt_spin) {
+-                              io_sq_update_worktime(sqd, start);
++                      if (sqt_spin)
+                               timeout = jiffies + sqd->sq_thread_idle;
+-                      }
+                       if (unlikely(need_resched())) {
+                               mutex_unlock(&sqd->lock);
+                               cond_resched();
diff --git a/queue-6.12/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch b/queue-6.12/io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch
new file mode 100644 (file)
index 0000000..4cc61e5
--- /dev/null
@@ -0,0 +1,140 @@
+From b9c7da23ba07c6781e13f97398b2979d2ea6230f Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 21 Oct 2025 07:16:08 -0600
+Subject: io_uring/sqpoll: switch away from getrusage() for CPU accounting
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 8ac9b0d33e5c0a995338ee5f25fe1b6ff7d97f65 upstream.
+
+getrusage() does a lot more than what the SQPOLL accounting needs, the
+latter only cares about (and uses) the stime. Rather than do a full
+RUSAGE_SELF summation, just query the used stime instead.
+
+Cc: stable@vger.kernel.org
+Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads")
+Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/fdinfo.c |    8 ++++----
+ io_uring/sqpoll.c |   32 ++++++++++++++++++--------------
+ io_uring/sqpoll.h |    1 +
+ 3 files changed, 23 insertions(+), 18 deletions(-)
+
+--- a/io_uring/fdinfo.c
++++ b/io_uring/fdinfo.c
+@@ -55,7 +55,6 @@ __cold void io_uring_show_fdinfo(struct
+       struct io_ring_ctx *ctx = file->private_data;
+       struct io_overflow_cqe *ocqe;
+       struct io_rings *r = ctx->rings;
+-      struct rusage sq_usage;
+       unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
+       unsigned int sq_head = READ_ONCE(r->sq.head);
+       unsigned int sq_tail = READ_ONCE(r->sq.tail);
+@@ -155,14 +154,15 @@ __cold void io_uring_show_fdinfo(struct
+                * thread termination.
+                */
+               if (tsk) {
++                      u64 usec;
++
+                       get_task_struct(tsk);
+                       rcu_read_unlock();
+-                      getrusage(tsk, RUSAGE_SELF, &sq_usage);
++                      usec = io_sq_cpu_usec(tsk);
+                       put_task_struct(tsk);
+                       sq_pid = sq->task_pid;
+                       sq_cpu = sq->sq_cpu;
+-                      sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
+-                                       + sq_usage.ru_stime.tv_usec);
++                      sq_total_time = usec;
+                       sq_work_time = sq->work_time;
+               } else {
+                       rcu_read_unlock();
+--- a/io_uring/sqpoll.c
++++ b/io_uring/sqpoll.c
+@@ -11,6 +11,7 @@
+ #include <linux/audit.h>
+ #include <linux/security.h>
+ #include <linux/cpuset.h>
++#include <linux/sched/cputime.h>
+ #include <linux/io_uring.h>
+ #include <uapi/linux/io_uring.h>
+@@ -175,6 +176,20 @@ static inline bool io_sqd_events_pending
+       return READ_ONCE(sqd->state);
+ }
++u64 io_sq_cpu_usec(struct task_struct *tsk)
++{
++      u64 utime, stime;
++
++      task_cputime_adjusted(tsk, &utime, &stime);
++      do_div(stime, 1000);
++      return stime;
++}
++
++static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec)
++{
++      sqd->work_time += io_sq_cpu_usec(current) - usec;
++}
++
+ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
+ {
+       unsigned int to_submit;
+@@ -261,26 +276,15 @@ static bool io_sq_tw_pending(struct llis
+       return retry_list || !llist_empty(&tctx->task_list);
+ }
+-static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
+-{
+-      struct rusage end;
+-
+-      getrusage(current, RUSAGE_SELF, &end);
+-      end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
+-      end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
+-
+-      sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
+-}
+-
+ static int io_sq_thread(void *data)
+ {
+       struct llist_node *retry_list = NULL;
+       struct io_sq_data *sqd = data;
+       struct io_ring_ctx *ctx;
+-      struct rusage start;
+       unsigned long timeout = 0;
+       char buf[TASK_COMM_LEN];
+       DEFINE_WAIT(wait);
++      u64 start;
+       /* offload context creation failed, just exit */
+       if (!current->io_uring) {
+@@ -323,7 +327,7 @@ static int io_sq_thread(void *data)
+               }
+               cap_entries = !list_is_singular(&sqd->ctx_list);
+-              getrusage(current, RUSAGE_SELF, &start);
++              start = io_sq_cpu_usec(current);
+               list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+                       int ret = __io_sq_thread(ctx, cap_entries);
+@@ -339,7 +343,7 @@ static int io_sq_thread(void *data)
+               if (sqt_spin || !time_after(jiffies, timeout)) {
+                       if (sqt_spin) {
+-                              io_sq_update_worktime(sqd, &start);
++                              io_sq_update_worktime(sqd, start);
+                               timeout = jiffies + sqd->sq_thread_idle;
+                       }
+                       if (unlikely(need_resched())) {
+--- a/io_uring/sqpoll.h
++++ b/io_uring/sqpoll.h
+@@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_da
+ void io_put_sq_data(struct io_sq_data *sqd);
+ void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
+ int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
++u64 io_sq_cpu_usec(struct task_struct *tsk);
+ static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
+ {
index 855c601c5f1b4e6e8aaeaa09d5d7b82d126e9b67..2f7e4eaa26328bd84e0dc208f80d14341eefcd91 100644 (file)
@@ -88,3 +88,5 @@ drm-panic-fix-qr_code-ensure-vmargin-is-positive.patch
 gpio-ljca-fix-duplicated-irq-mapping.patch
 io_uring-correct-__must_hold-annotation-in-io_instal.patch
 sched-remove-never-used-code-in-mm_cid_get.patch
+io_uring-sqpoll-switch-away-from-getrusage-for-cpu-accounting.patch
+io_uring-sqpoll-be-smarter-on-when-to-update-the-stime-usage.patch