]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
cpufreq: governor: Fix data races on per-CPU idle/nice baselines
authorZhongqiu Han <zhongqiu.han@oss.qualcomm.com>
Sun, 19 Apr 2026 13:26:53 +0000 (21:26 +0800)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Tue, 26 May 2026 17:06:14 +0000 (19:06 +0200)
gov_update_cpu_data() resets per-CPU prev_cpu_idle for every CPU in the
governed domain, and conditionally resets prev_cpu_nice when
ignore_nice_load is set. It is called from sysfs store callbacks
(e.g. ignore_nice_load_store) which run under attr_set->update_lock,
held by the surrounding governor_store().

Concurrently, dbs_work_handler() calls gov->gov_dbs_update() (which calls
dbs_update()) under policy_dbs->update_mutex. dbs_update() both reads and
writes the same prev_cpu_idle / prev_cpu_nice fields. The potential race
path is:

Path A (sysfs write, holds attr_set->update_lock only):

  governor_store()
    mutex_lock(&attr_set->update_lock)
    ignore_nice_load_store()
      dbs_data->ignore_nice_load = input
      gov_update_cpu_data(dbs_data)
        list_for_each_entry(policy_dbs, ...)
          for_each_cpu(j, ...)
            j_cdbs->prev_cpu_idle = get_cpu_idle_time(...)  /* write */
            j_cdbs->prev_cpu_nice = kcpustat_field(...)     /* write */
    mutex_unlock(&attr_set->update_lock)

Path B (work queue, holds policy_dbs->update_mutex only):

  dbs_work_handler()
    mutex_lock(&policy_dbs->update_mutex)
    gov->gov_dbs_update(policy)
      dbs_update()
        for_each_cpu(j, policy->cpus)
          idle_time = cur - j_cdbs->prev_cpu_idle           /* read  */
          j_cdbs->prev_cpu_idle = cur_idle_time             /* write */
          idle_time += cur_nice - j_cdbs->prev_cpu_nice     /* read  */
          j_cdbs->prev_cpu_nice = cur_nice                  /* write */
    mutex_unlock(&policy_dbs->update_mutex)

Because attr_set->update_lock and policy_dbs->update_mutex are two
completely independent locks, the two paths are not mutually exclusive.
This results in a data race on cpu_dbs_info.prev_cpu_idle and
cpu_dbs_info.prev_cpu_nice.

Fix this by also acquiring policy_dbs->update_mutex in
gov_update_cpu_data() for each policy, so that path A participates in
the mutual exclusion already established by dbs_work_handler(). Also
update the function comment to accurately reflect the two-level locking
contract.

Additionally, cpufreq_dbs_governor_start() initializes prev_cpu_idle
using io_busy read from dbs_data->io_is_busy without holding
policy_dbs->update_mutex.  A concurrent io_is_busy_store() can update
io_is_busy and call gov_update_cpu_data(), which writes prev_cpu_idle
with the new value under the mutex.  cpufreq_dbs_governor_start() then
overwrites prev_cpu_idle with the stale io_busy value, leaving the
baseline inconsistent with the tunable.  Fix this by reading io_busy
inside the mutex.

The root of this race dates back to the original ondemand/conservative
governors. Before commit ee88415caf73 ("[CPUFREQ] Cleanup locking in
conservative governor") and commit 5a75c82828e7 ("[CPUFREQ] Cleanup
locking in ondemand governor"), all accesses to prev_cpu_idle and
prev_cpu_nice in cpufreq_governor_dbs() (path X), store_ignore_nice_load()
/io_is_busy_store() (path Y), and do_dbs_timer() (path Z) were serialised
by the same dbs_mutex, so no race existed. Those two commits switched
do_dbs_timer() from dbs_mutex to a per-policy/per-cpu timer_mutex to
reduce lock contention, but left path Y (store) still holding dbs_mutex.
As a result, path Y (store) and path Z (do_dbs_timer) no longer shared a
common lock, introducing a potential race on prev_cpu_idle/prev_cpu_nice
between path Y (store) and dbs_check_cpu().

Commit 326c86deaed54a ("[CPUFREQ] Remove unneeded locks") then removed
dbs_mutex from store_ignore_nice_load()/io_is_busy_store() entirely,
introducing an additional potential race between path Y (now lockless)
and cpufreq_governor_dbs() (path X, still holding dbs_mutex), while the
race between path Y and path Z remained.

Fixes: ee88415caf736b ("[CPUFREQ] Cleanup locking in conservative governor")
Fixes: 5a75c82828e7c0 ("[CPUFREQ] Cleanup locking in ondemand governor")
Fixes: 326c86deaed54a ("[CPUFREQ] Remove unneeded locks")
Signed-off-by: Zhongqiu Han <zhongqiu.han@oss.qualcomm.com>
Link: https://patch.msgid.link/20260419132655.3800673-2-zhongqiu.han@oss.qualcomm.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
drivers/cpufreq/cpufreq_governor.c

index 86f35e45191423ebcfe22a463ccc57bc5031eec7..fc6f705c5a9c706f19f1da97d3da56ea2d57447d 100644 (file)
@@ -90,7 +90,8 @@ EXPORT_SYMBOL_GPL(sampling_rate_store);
  * (that may be a single policy or a bunch of them if governor tunables are
  * system-wide).
  *
- * Call under the @dbs_data mutex.
+ * Call under the @dbs_data->attr_set.update_lock. The per-policy
+ * update_mutex is acquired and released internally for each policy.
  */
 void gov_update_cpu_data(struct dbs_data *dbs_data)
 {
@@ -99,6 +100,7 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
        list_for_each_entry(policy_dbs, &dbs_data->attr_set.policy_list, list) {
                unsigned int j;
 
+               mutex_lock(&policy_dbs->update_mutex);
                for_each_cpu(j, policy_dbs->policy->cpus) {
                        struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
 
@@ -107,6 +109,7 @@ void gov_update_cpu_data(struct dbs_data *dbs_data)
                        if (dbs_data->ignore_nice_load)
                                j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
                }
+               mutex_unlock(&policy_dbs->update_mutex);
        }
 }
 EXPORT_SYMBOL_GPL(gov_update_cpu_data);
@@ -527,8 +530,9 @@ int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
 
        sampling_rate = dbs_data->sampling_rate;
        ignore_nice = dbs_data->ignore_nice_load;
-       io_busy = dbs_data->io_is_busy;
 
+       mutex_lock(&policy_dbs->update_mutex);
+       io_busy = dbs_data->io_is_busy;
        for_each_cpu(j, policy->cpus) {
                struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
 
@@ -541,6 +545,7 @@ int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
                if (ignore_nice)
                        j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
        }
+       mutex_unlock(&policy_dbs->update_mutex);
 
        gov->start(policy);