]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
cpuidle: governors: teo: Rework the handling of tick wakeups
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 13 Nov 2025 16:56:27 +0000 (17:56 +0100)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 20 Nov 2025 13:49:57 +0000 (14:49 +0100)
If the wakeup pattern is clearly dominated by tick wakeups, count those
wakeups as hits on the deepest available idle state to increase the
likelihood of stopping the tick, especially on systems where there are
only 2 usable idle states and the tick can only be stopped when the
deeper state is selected.

This change is expected to reduce power on some systems where state 0 is
selected relatively often even though they are almost idle.  Without it,
the governor may end up selecting the shallowest idle state all the time
even if the system is almost completely idle due all tick wakeups being
counted as hits on that state and preventing the tick from being stopped
at all.

Fixes: 4b20b07ce72f ("cpuidle: teo: Don't count non-existent intercepts")
Reported-by: Reka Norman <rekanorman@chromium.org>
Closes: https://lore.kernel.org/linux-pm/CAEmPcwsNMNnNXuxgvHTQ93Mx-q3Oz9U57THQsU_qdcCx1m4w5g@mail.gmail.com/
Tested-by: Reka Norman <rekanorman@chromium.org>
Tested-by: Christian Loehle <christian.loehle@arm.com>
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: 92ce5c07b7a1: cpuidle: teo: Reorder candidate state index checks
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: ea185406d1ed: cpuidle: teo: Combine candidate state index checks against 0
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: b9a6af26bd83: cpuidle: teo: Drop local variable prev_intercept_idx
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: e24f8a55de50: cpuidle: teo: Clarify two code comments
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: d619b5cc6780: cpuidle: teo: Simplify counting events used for tick management
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: 13ed5c4a6d9c: cpuidle: teo: Skip getting the sleep length if wakeups are very frequent
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: ddcfa7964677: cpuidle: teo: Simplify handling of total events count
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: 65e18e654475: cpuidle: teo: Replace time_span_ns with a flag
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: 0796ddf4a7f0: cpuidle: teo: Use this_cpu_ptr() where possible
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: 8f3f01082d7a: cpuidle: governors: teo: Use s64 consistently in teo_update()
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+: b54df61c7428: cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold
Cc: 6.11+ <stable@vger.kernel.org> # 6.11+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
[ rjw: Rebase on commit 0796ddf4a7f0, changelog update ]
Link: https://patch.msgid.link/6228387.lOV4Wx5bFT@rafael.j.wysocki
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
drivers/cpuidle/governors/teo.c

index 8b80d73e518ed15e3646217b8465176aae2c3834..94ba00b7617d731613203c83863806f13d762f07 100644 (file)
@@ -133,17 +133,19 @@ struct teo_bin {
  * @sleep_length_ns: Time till the closest timer event (at the selection time).
  * @state_bins: Idle state data bins for this CPU.
  * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
+ * @total_tick: Wakeups by the scheduler tick.
  * @tick_intercepts: "Intercepts" before TICK_NSEC.
  * @short_idles: Wakeups after short idle periods.
- * @artificial_wakeup: Set if the wakeup has been triggered by a safety net.
+ * @tick_wakeup: Set if the last wakeup was by the scheduler tick.
  */
 struct teo_cpu {
        s64 sleep_length_ns;
        struct teo_bin state_bins[CPUIDLE_STATE_MAX];
        unsigned int total;
+       unsigned int total_tick;
        unsigned int tick_intercepts;
        unsigned int short_idles;
-       bool artificial_wakeup;
+       bool tick_wakeup;
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -172,9 +174,10 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
        teo_decay(&cpu_data->short_idles);
 
-       if (cpu_data->artificial_wakeup) {
+       if (dev->poll_time_limit) {
+               dev->poll_time_limit = false;
                /*
-                * If one of the safety nets has triggered, assume that this
+                * Polling state timeout has triggered, so assume that this
                 * might have been a long sleep.
                 */
                measured_ns = S64_MAX;
@@ -223,6 +226,21 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
        cpu_data->total = total + PULSE;
 
        teo_decay(&cpu_data->tick_intercepts);
+
+       teo_decay(&cpu_data->total_tick);
+       if (cpu_data->tick_wakeup) {
+               cpu_data->total_tick += PULSE;
+               /*
+                * If tick wakeups dominate the wakeup pattern, count this one
+                * as a hit on the deepest available idle state to increase the
+                * likelihood of stopping the tick.
+                */
+               if (3 * cpu_data->total_tick > 2 * cpu_data->total) {
+                       cpu_data->state_bins[drv->state_count-1].hits += PULSE;
+                       return;
+               }
+       }
+
        /*
         * If the measured idle duration falls into the same bin as the sleep
         * length, this is a "hit", so update the "hits" metric for that bin.
@@ -512,18 +530,9 @@ static void teo_reflect(struct cpuidle_device *dev, int state)
 {
        struct teo_cpu *cpu_data = this_cpu_ptr(&teo_cpus);
 
+       cpu_data->tick_wakeup = tick_nohz_idle_got_tick();
+
        dev->last_state_idx = state;
-       if (dev->poll_time_limit ||
-           (tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
-               /*
-                * The wakeup was not "genuine", but triggered by one of the
-                * safety nets.
-                */
-               dev->poll_time_limit = false;
-               cpu_data->artificial_wakeup = true;
-       } else {
-               cpu_data->artificial_wakeup = false;
-       }
 }
 
 /**