]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
sched/deadline: Rebuild root domain accounting after every update
authorJuri Lelli <juri.lelli@redhat.com>
Thu, 13 Mar 2025 17:10:21 +0000 (18:10 +0100)
committerPeter Zijlstra <peterz@infradead.org>
Mon, 17 Mar 2025 10:23:42 +0000 (11:23 +0100)
Rebuilding of root domains accounting information (total_bw) is
currently broken on some cases, e.g. suspend/resume on aarch64. Problem
is that the way we keep track of domain changes and try to add bandwidth
back is convoluted and fragile.

Fix it by simplify things by making sure bandwidth accounting is cleared
and completely restored after root domains changes (after root domains
are again stable).

To be sure we always call dl_rebuild_rd_accounting while holding
cpuset_mutex we also add cpuset_reset_sched_domains() wrapper.

Fixes: 53916d5fd3c0 ("sched/deadline: Check bandwidth overflow earlier for hotplug")
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Co-developed-by: Waiman Long <llong@redhat.com>
Signed-off-by: Waiman Long <llong@redhat.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/Z9MRfeJKJUOyUSto@jlelli-thinkpadt14gen4.remote.csb
include/linux/cpuset.h
include/linux/sched/deadline.h
include/linux/sched/topology.h
kernel/cgroup/cpuset.c
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/topology.c

index 835e7b793f6a3c3fcbf7fc213e545e3b5cc91b1c..17cc90d900f966c46521820fca8f557bc4f97396 100644 (file)
@@ -128,6 +128,7 @@ extern bool current_cpuset_is_being_rebound(void);
 extern void rebuild_sched_domains(void);
 
 extern void cpuset_print_current_mems_allowed(void);
+extern void cpuset_reset_sched_domains(void);
 
 /*
  * read_mems_allowed_begin is required when making decisions involving
@@ -264,6 +265,11 @@ static inline void rebuild_sched_domains(void)
        partition_sched_domains(1, NULL, NULL);
 }
 
+static inline void cpuset_reset_sched_domains(void)
+{
+       partition_sched_domains(1, NULL, NULL);
+}
+
 static inline void cpuset_print_current_mems_allowed(void)
 {
 }
index 6ec578600b24ca4185180a1b4f3c4af33bb93e57..f9aabbc9d22ef87011cf966906eff3b77944e867 100644 (file)
@@ -34,6 +34,7 @@ static inline bool dl_time_before(u64 a, u64 b)
 struct root_domain;
 extern void dl_add_task_root_domain(struct task_struct *p);
 extern void dl_clear_root_domain(struct root_domain *rd);
+extern void dl_clear_root_domain_cpu(int cpu);
 
 #endif /* CONFIG_SMP */
 
index 7f3dbafe18177c0680b5b503e6d3b5b524d3592f..1622232bd08b9c02f2d4091b9a055c58ac03bd07 100644 (file)
@@ -166,6 +166,8 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
        return to_cpumask(sd->span);
 }
 
+extern void dl_rebuild_rd_accounting(void);
+
 extern void partition_sched_domains_locked(int ndoms_new,
                                           cpumask_var_t doms_new[],
                                           struct sched_domain_attr *dattr_new);
index f87526edb2a46bc25eb95fc6ef7eb978b09a1bbf..1892dc8cd2119180100a005019c40e5c21edf636 100644 (file)
@@ -954,10 +954,12 @@ static void dl_update_tasks_root_domain(struct cpuset *cs)
        css_task_iter_end(&it);
 }
 
-static void dl_rebuild_rd_accounting(void)
+void dl_rebuild_rd_accounting(void)
 {
        struct cpuset *cs = NULL;
        struct cgroup_subsys_state *pos_css;
+       int cpu;
+       u64 cookie = ++dl_cookie;
 
        lockdep_assert_held(&cpuset_mutex);
        lockdep_assert_cpus_held();
@@ -965,11 +967,12 @@ static void dl_rebuild_rd_accounting(void)
 
        rcu_read_lock();
 
-       /*
-        * Clear default root domain DL accounting, it will be computed again
-        * if a task belongs to it.
-        */
-       dl_clear_root_domain(&def_root_domain);
+       for_each_possible_cpu(cpu) {
+               if (dl_bw_visited(cpu, cookie))
+                       continue;
+
+               dl_clear_root_domain_cpu(cpu);
+       }
 
        cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
 
@@ -996,7 +999,6 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 {
        sched_domains_mutex_lock();
        partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
-       dl_rebuild_rd_accounting();
        sched_domains_mutex_unlock();
 }
 
@@ -1083,6 +1085,13 @@ void rebuild_sched_domains(void)
        cpus_read_unlock();
 }
 
+void cpuset_reset_sched_domains(void)
+{
+       mutex_lock(&cpuset_mutex);
+       partition_sched_domains(1, NULL, NULL);
+       mutex_unlock(&cpuset_mutex);
+}
+
 /**
  * cpuset_update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
  * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
index 84f68007e08f0bb2de176502db7ee3b9afd447de..affa99f56f65371f355050f7560622154ce9c21f 100644 (file)
@@ -8229,7 +8229,7 @@ static void cpuset_cpu_active(void)
                 * operation in the resume sequence, just build a single sched
                 * domain, ignoring cpusets.
                 */
-               partition_sched_domains(1, NULL, NULL);
+               cpuset_reset_sched_domains();
                if (--num_cpus_frozen)
                        return;
                /*
@@ -8248,7 +8248,7 @@ static void cpuset_cpu_inactive(unsigned int cpu)
                cpuset_update_active_cpus();
        } else {
                num_cpus_frozen++;
-               partition_sched_domains(1, NULL, NULL);
+               cpuset_reset_sched_domains();
        }
 }
 
index 3e05032e9e0ebf4a6214d0a15aa5c3a1c2ba019e..5dca336cdd7ca5b4824edbfc87459068e4f37fb5 100644 (file)
@@ -166,7 +166,7 @@ static inline unsigned long dl_bw_capacity(int i)
        }
 }
 
-static inline bool dl_bw_visited(int cpu, u64 cookie)
+bool dl_bw_visited(int cpu, u64 cookie)
 {
        struct root_domain *rd = cpu_rq(cpu)->rd;
 
@@ -207,7 +207,7 @@ static inline unsigned long dl_bw_capacity(int i)
        return SCHED_CAPACITY_SCALE;
 }
 
-static inline bool dl_bw_visited(int cpu, u64 cookie)
+bool dl_bw_visited(int cpu, u64 cookie)
 {
        return false;
 }
@@ -2981,18 +2981,22 @@ void dl_clear_root_domain(struct root_domain *rd)
        rd->dl_bw.total_bw = 0;
 
        /*
-        * dl_server bandwidth is only restored when CPUs are attached to root
-        * domains (after domains are created or CPUs moved back to the
-        * default root doamin).
+        * dl_servers are not tasks. Since dl_add_task_root_domain ignores
+        * them, we need to account for them here explicitly.
         */
        for_each_cpu(i, rd->span) {
                struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server;
 
                if (dl_server(dl_se) && cpu_active(i))
-                       rd->dl_bw.total_bw += dl_se->dl_bw;
+                       __dl_add(&rd->dl_bw, dl_se->dl_bw, dl_bw_cpus(i));
        }
 }
 
+void dl_clear_root_domain_cpu(int cpu)
+{
+       dl_clear_root_domain(cpu_rq(cpu)->rd);
+}
+
 #endif /* CONFIG_SMP */
 
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
index 44093339761c91cdb27c0cf3e3fd044a85d680f3..363ad268a25b0f52bd29320fe213b7e38a642a18 100644 (file)
@@ -2791,6 +2791,7 @@ match3:
        ndoms_cur = ndoms_new;
 
        update_sched_domain_debugfs();
+       dl_rebuild_rd_accounting();
 }
 
 /*