cpufreq: governor: Fix negative 'idle_time' handling in dbs_update()

author Jie Zhan <zhanjie9@hisilicon.com>

Thu, 13 Feb 2025 03:55:10 +0000 (11:55 +0800)

committer Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Thu, 20 Feb 2025 19:27:19 +0000 (20:27 +0100)
author Jie Zhan <zhanjie9@hisilicon.com>
Thu, 13 Feb 2025 03:55:10 +0000 (11:55 +0800)
committer Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 20 Feb 2025 19:27:19 +0000 (20:27 +0100)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c

index af44ee6a64304f0e56265b9bbf5c00a60285726d..1a7fcaf39cc9b5e33b6140bc053e8c3143feb057 100644 (file)
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -145,7 +145,23 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
                 time_elapsed = update_time - j_cdbs->prev_update_time;
                 j_cdbs->prev_update_time = update_time;
  
-               idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
+               /*
+                * cur_idle_time could be smaller than j_cdbs->prev_cpu_idle if
+                * it's obtained from get_cpu_idle_time_jiffy() when NOHZ is
+                * off, where idle_time is calculated by the difference between
+                * time elapsed in jiffies and "busy time" obtained from CPU
+                * statistics.  If a CPU is 100% busy, the time elapsed and busy
+                * time should grow with the same amount in two consecutive
+                * samples, but in practice there could be a tiny difference,
+                * making the accumulated idle time decrease sometimes.  Hence,
+                * in this case, idle_time should be regarded as 0 in order to
+                * make the further process correct.
+                */
+               if (cur_idle_time > j_cdbs->prev_cpu_idle)
+                       idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
+               else
+                       idle_time = 0;
+
                 j_cdbs->prev_cpu_idle = cur_idle_time;
  
                 if (ignore_nice) {
@@ -162,7 +178,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
                          * calls, so the previous load value can be used then.
                          */
                         load = j_cdbs->prev_load;
-               } else if (unlikely((int)idle_time > 2 * sampling_rate &&
+               } else if (unlikely(idle_time > 2 * sampling_rate &&
                                     j_cdbs->prev_load)) {
                         /*
                          * If the CPU had gone completely idle and a task has
@@ -189,30 +205,15 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
                         load = j_cdbs->prev_load;
                         j_cdbs->prev_load = 0;
                 } else {
-                       if (time_elapsed >= idle_time) {
+                       if (time_elapsed > idle_time)
                                 load = 100 * (time_elapsed - idle_time) / time_elapsed;
-                       } else {
-                               /*
-                                * That can happen if idle_time is returned by
-                                * get_cpu_idle_time_jiffy().  In that case
-                                * idle_time is roughly equal to the difference
-                                * between time_elapsed and "busy time" obtained
-                                * from CPU statistics.  Then, the "busy time"
-                                * can end up being greater than time_elapsed
-                                * (for example, if jiffies_64 and the CPU
-                                * statistics are updated by different CPUs),
-                                * so idle_time may in fact be negative.  That
-                                * means, though, that the CPU was busy all
-                                * the time (on the rough average) during the
-                                * last sampling interval and 100 can be
-                                * returned as the load.
-                                */
-                               load = (int)idle_time < 0 ? 100 : 0;
-                       }
+                       else
+                               load = 0;
+
                         j_cdbs->prev_load = load;
                 }
  
-               if (unlikely((int)idle_time > 2 * sampling_rate)) {
+               if (unlikely(idle_time > 2 * sampling_rate)) {
                         unsigned int periods = idle_time / sampling_rate;
  
                         if (periods < idle_periods)
author	Jie Zhan <zhanjie9@hisilicon.com>
	Thu, 13 Feb 2025 03:55:10 +0000 (11:55 +0800)
committer	Rafael J. Wysocki <rafael.j.wysocki@intel.com>
	Thu, 20 Feb 2025 19:27:19 +0000 (20:27 +0100)