]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched: Fix runtime accounting w/ split exec & sched contexts
authorJohn Stultz <jstultz@google.com>
Sat, 12 Jul 2025 03:33:46 +0000 (03:33 +0000)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 14 Jul 2025 15:16:32 +0000 (17:16 +0200)
Without proxy-exec, we normally charge the "current" task for
both its vruntime as well as its sum_exec_runtime.

With proxy, however, we have two "current" contexts: the
scheduler context and the execution context. We want to charge
the execution context rq->curr (ie: proxy/lock holder) execution
time to its sum_exec_runtime (so it's clear to userland the
rq->curr task *is* running), as well as its thread group.

However the rest of the time accounting (such a vruntime and
cgroup accounting), we charge against the scheduler context
(rq->donor) task, because it is from that task that the time
is being "donated".

If the donor and curr tasks are the same, then it's the same as
without proxy.

Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://lkml.kernel.org/r/20250712033407.2383110-6-jstultz@google.com
kernel/sched/fair.c

index 8334580ed3a30a79326b476697050b4babf527d4..97176458f2be5160a60f47a599658bf5f9de02db 100644 (file)
@@ -1152,30 +1152,40 @@ void post_init_entity_util_avg(struct task_struct *p)
        sa->runnable_avg = sa->util_avg;
 }
 
-static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
+static s64 update_se(struct rq *rq, struct sched_entity *se)
 {
        u64 now = rq_clock_task(rq);
        s64 delta_exec;
 
-       delta_exec = now - curr->exec_start;
+       delta_exec = now - se->exec_start;
        if (unlikely(delta_exec <= 0))
                return delta_exec;
 
-       curr->exec_start = now;
-       curr->sum_exec_runtime += delta_exec;
+       se->exec_start = now;
+       if (entity_is_task(se)) {
+               struct task_struct *donor = task_of(se);
+               struct task_struct *running = rq->curr;
+               /*
+                * If se is a task, we account the time against the running
+                * task, as w/ proxy-exec they may not be the same.
+                */
+               running->se.exec_start = now;
+               running->se.sum_exec_runtime += delta_exec;
 
-       if (entity_is_task(curr)) {
-               struct task_struct *p = task_of(curr);
+               trace_sched_stat_runtime(running, delta_exec);
+               account_group_exec_runtime(running, delta_exec);
 
-               trace_sched_stat_runtime(p, delta_exec);
-               account_group_exec_runtime(p, delta_exec);
-               cgroup_account_cputime(p, delta_exec);
+               /* cgroup time is always accounted against the donor */
+               cgroup_account_cputime(donor, delta_exec);
+       } else {
+               /* If not task, account the time against donor se  */
+               se->sum_exec_runtime += delta_exec;
        }
 
        if (schedstat_enabled()) {
                struct sched_statistics *stats;
 
-               stats = __schedstats_from_se(curr);
+               stats = __schedstats_from_se(se);
                __schedstat_set(stats->exec_max,
                                max(delta_exec, stats->exec_max));
        }
@@ -1188,9 +1198,7 @@ static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
  */
 s64 update_curr_common(struct rq *rq)
 {
-       struct task_struct *donor = rq->donor;
-
-       return update_curr_se(rq, &donor->se);
+       return update_se(rq, &rq->donor->se);
 }
 
 /*
@@ -1198,6 +1206,12 @@ s64 update_curr_common(struct rq *rq)
  */
 static void update_curr(struct cfs_rq *cfs_rq)
 {
+       /*
+        * Note: cfs_rq->curr corresponds to the task picked to
+        * run (ie: rq->donor.se) which due to proxy-exec may
+        * not necessarily be the actual task running
+        * (rq->curr.se). This is easy to confuse!
+        */
        struct sched_entity *curr = cfs_rq->curr;
        struct rq *rq = rq_of(cfs_rq);
        s64 delta_exec;
@@ -1206,7 +1220,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
        if (unlikely(!curr))
                return;
 
-       delta_exec = update_curr_se(rq, curr);
+       delta_exec = update_se(rq, curr);
        if (unlikely(delta_exec <= 0))
                return;