From: Linus Torvalds Date: Mon, 15 Jun 2026 08:18:52 +0000 (+0530) Subject: Merge tag 'timers-nohz-2026-06-13' of gitolite.kernel.org:pub/scm/linux/kernel/git... X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=a53fcff8fc7530f59a8171824ed586200df724a0;p=thirdparty%2Fkernel%2Flinux.git Merge tag 'timers-nohz-2026-06-13' of gitolite.kernel.org:pub/scm/linux/kernel/git/tip/tip Pull NOHZ updates from Thomas Gleixner: - Fix a long standing TOCTOU in get_cpu_sleep_time_us() - Make the CPU offline NOHZ handling more robust by disabling NOHZ on the outgoing CPU early instead of creating unneeded state which needs to be undone. - Unify idle CPU time accounting instead of having two different accounting mechanisms. These two different mechanisms are not really independent, but the different properties can in the worst case cause that gloabl idle time can be observed going backwards. - Consolidate the idle/iowait time retrieval interfaces instead of converting back and forth between them. - Make idle interrupt time accounting more robust. The original code assumes that interrupt time accouting is enabled and therefore stops elapsing idle time while an interrupt is handled in NOHZ dyntick state. That assumption is not correct as interrupt time accounting can be disabled at compile and runtime. - Fix an accounting error between dyntick idle time and dyntick idle steal time. The stolen time is not accounted and therefore idle time becomes inaccurate. The stolen time is now accounted after the fact as there is no way to predict the steal time upfront. * tag 'timers-nohz-2026-06-13' of gitolite.kernel.org:pub/scm/linux/kernel/git/tip/tip: sched/cputime: Handle dyntick-idle steal time correctly sched/cputime: Handle idle irqtime gracefully sched/cputime: Provide get_cpu_[idle|iowait]_time_us() off-case tick/sched: Consolidate idle time fetching APIs tick/sched: Account tickless idle cputime only when tick is stopped tick/sched: Remove unused fields tick/sched: Move dyntick-idle cputime accounting to cputime code tick/sched: Remove nohz disabled special case in cputime fetch tick/sched: Unify idle cputime accounting s390/time: Prepare to stop elapsing in dynticks-idle powerpc/time: Prepare to stop elapsing in dynticks-idle sched/cputime: Correctly support generic vtime idle time sched/cputime: Remove superfluous and error prone kcpustat_field() parameter sched/idle: Handle offlining first in idle loop tick/sched: Fix TOCTOU in nohz idle time fetch --- a53fcff8fc7530f59a8171824ed586200df724a0 diff --cc drivers/cpufreq/cpufreq_governor.c index 8a85bd32defe7,3c4a1f9af3ae8..710d93ec89b53 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@@ -112,9 -104,9 +112,9 @@@ void gov_update_cpu_data(struct dbs_dat j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time, dbs_data->io_is_busy); - j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j); - if (dbs_data->ignore_nice_load) - j_cdbs->prev_cpu_nice = kcpustat_field(CPUTIME_NICE, j); ++ j_cdbs->prev_cpu_nice = kcpustat_field(CPUTIME_NICE, j); } + mutex_unlock(&policy_dbs->update_mutex); } } EXPORT_SYMBOL_GPL(gov_update_cpu_data); @@@ -173,18 -164,12 +173,18 @@@ unsigned int dbs_update(struct cpufreq_ j_cdbs->prev_cpu_idle = cur_idle_time; - if (ignore_nice) { - u64 cur_nice = kcpustat_field(CPUTIME_NICE, j); - + /* + * Always sample cur_nice and advance prev_cpu_nice, regardless + * of ignore_nice. This keeps prev_cpu_nice current so that + * enabling ignore_nice_load via sysfs never produces a + * stale-baseline spike (the delta will be at most one sampling + * interval of accumulated nice time, not since boot). + */ - cur_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j); ++ cur_nice = kcpustat_field(CPUTIME_NICE, j); + if (ignore_nice) idle_time += div_u64(cur_nice - j_cdbs->prev_cpu_nice, NSEC_PER_USEC); - j_cdbs->prev_cpu_nice = cur_nice; - } + + j_cdbs->prev_cpu_nice = cur_nice; if (unlikely(!time_elapsed)) { /* @@@ -552,9 -537,10 +552,9 @@@ int cpufreq_dbs_governor_start(struct c * Make the first invocation of dbs_update() compute the load. */ j_cdbs->prev_load = 0; - j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j); - - if (ignore_nice) - j_cdbs->prev_cpu_nice = kcpustat_field(CPUTIME_NICE, j); ++ j_cdbs->prev_cpu_nice = kcpustat_field(CPUTIME_NICE, j); } + mutex_unlock(&policy_dbs->update_mutex); gov->start(policy); diff --cc fs/proc/stat.c index 20c3df9a9b800,c00468a83f64b..79b42f2bfd000 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@@ -18,39 -18,10 +18,7 @@@ #ifndef arch_irq_stat_cpu #define arch_irq_stat_cpu(cpu) 0 #endif -#ifndef arch_irq_stat -#define arch_irq_stat() 0 -#endif - u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) - { - u64 idle, idle_usecs = -1ULL; - - if (cpu_online(cpu)) - idle_usecs = get_cpu_idle_time_us(cpu, NULL); - - if (idle_usecs == -1ULL) - /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ - idle = kcs->cpustat[CPUTIME_IDLE]; - else - idle = idle_usecs * NSEC_PER_USEC; - - return idle; - } - - static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) - { - u64 iowait, iowait_usecs = -1ULL; - - if (cpu_online(cpu)) - iowait_usecs = get_cpu_iowait_time_us(cpu, NULL); - - if (iowait_usecs == -1ULL) - /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ - iowait = kcs->cpustat[CPUTIME_IOWAIT]; - else - iowait = iowait_usecs * NSEC_PER_USEC; - - return iowait; - } - static void show_irq_gap(struct seq_file *p, unsigned int gap) { static const char zeros[] = " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0";