]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched/cputime: Handle dyntick-idle steal time correctly
authorFrederic Weisbecker <frederic@kernel.org>
Fri, 8 May 2026 13:16:47 +0000 (15:16 +0200)
committerThomas Gleixner <tglx@kernel.org>
Tue, 2 Jun 2026 19:27:26 +0000 (21:27 +0200)
The dyntick-idle steal time is currently accounted when the tick restarts
but the stolen idle time is not subtracted from the idle time that was
already accounted. This is to avoid observing the idle time going backward
as the dyntick-idle cputime accessors can't reliably know in advance the
stolen idle time.

In order to maintain a forward progressing idle cputime while subtracting
idle steal time from it, keep track of the previously accounted idle stolen
time and substract it from _later_ idle cputime accounting.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Link: https://patch.msgid.link/20260508131647.43868-16-frederic@kernel.org
include/linux/kernel_stat.h
kernel/sched/cputime.c

index 512104b0ff49ba7d1271a72f7c2f2ade8ea61d7b..fce1392e21403fe859e9924c73c77d7c7e0995ff 100644 (file)
@@ -39,6 +39,7 @@ struct kernel_cpustat {
        bool            idle_elapse;
        seqcount_t      idle_sleeptime_seq;
        u64             idle_entrytime;
+       u64             idle_stealtime[2];
 #endif
        u64             cpustat[NR_STATS];
 };
index 94be22aa5cb693ace109cf05c0deb254b5a16bd9..244b57417240203543110f2c4c79bcb86f7fb97b 100644 (file)
@@ -425,19 +425,32 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
 static void kcpustat_idle_stop(struct kernel_cpustat *kc, u64 now)
 {
        u64 *cpustat = kc->cpustat;
-       u64 delta;
+       u64 delta, steal, steal_delta;
+       int iowait;
 
        if (!kc->idle_elapse)
                return;
 
+       iowait = nr_iowait_cpu(smp_processor_id()) > 0;
        delta = now - kc->idle_entrytime;
+       steal = steal_account_process_time(delta);
 
+       /*
+        * Record the idle time after substracting the steal time from
+        * previous update sequence. Don't substract the steal time from
+        * the current update sequence to avoid readers moving backward.
+        */
        write_seqcount_begin(&kc->idle_sleeptime_seq);
-       if (nr_iowait_cpu(smp_processor_id()) > 0)
+       steal_delta = min_t(u64, kc->idle_stealtime[iowait], delta);
+       delta -= steal_delta;
+       kc->idle_stealtime[iowait] -= steal_delta;
+
+       if (iowait)
                cpustat[CPUTIME_IOWAIT] += delta;
        else
                cpustat[CPUTIME_IDLE] += delta;
 
+       kc->idle_stealtime[iowait] += steal;
        kc->idle_entrytime = now;
        kc->idle_elapse = false;
        write_seqcount_end(&kc->idle_sleeptime_seq);
@@ -464,7 +477,6 @@ void kcpustat_dyntick_stop(u64 now)
                kcpustat_idle_stop(kc, now);
                kc->idle_dyntick = false;
                vtime_dyntick_stop();
-               steal_account_process_time(ULONG_MAX);
        }
 }
 
@@ -508,6 +520,7 @@ static u64 kcpustat_field_dyntick(int cpu, enum cpu_usage_stat idx,
                                  bool compute_delta, u64 now)
 {
        struct kernel_cpustat *kc = &kcpustat_cpu(cpu);
+       int iowait = idx == CPUTIME_IOWAIT;
        u64 *cpustat = kc->cpustat;
        unsigned int seq;
        u64 idle;
@@ -516,8 +529,13 @@ static u64 kcpustat_field_dyntick(int cpu, enum cpu_usage_stat idx,
                seq = read_seqcount_begin(&kc->idle_sleeptime_seq);
 
                idle = cpustat[idx];
-               if (kc->idle_elapse && compute_delta && now > kc->idle_entrytime)
-                       idle += (now - kc->idle_entrytime);
+
+               if (kc->idle_elapse && compute_delta && now > kc->idle_entrytime) {
+                       u64 delta = now - kc->idle_entrytime;
+
+                       delta -= min_t(u64, kc->idle_stealtime[iowait], delta);
+                       idle += delta;
+               }
        } while (read_seqcount_retry(&kc->idle_sleeptime_seq, seq));
 
        return idle;