sched/fair: Attach sched_domain_shared to sd_asym_cpucapacity

author K Prateek Nayak <kprateek.nayak@amd.com>

Sat, 16 May 2026 05:58:50 +0000 (07:58 +0200)

committer Peter Zijlstra <peterz@infradead.org>

Tue, 19 May 2026 10:17:38 +0000 (12:17 +0200)
author K Prateek Nayak <kprateek.nayak@amd.com>
Sat, 16 May 2026 05:58:50 +0000 (07:58 +0200)
committer Peter Zijlstra <peterz@infradead.org>
Tue, 19 May 2026 10:17:38 +0000 (12:17 +0200)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 03f63b094ff984090a47cf73b51b744e266bfc9f..2637a6fe9a87edd58d61077d9be251d68a8878cd 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7773,7 +7773,7 @@ static inline void set_idle_cores(int cpu, int val)
  {
         struct sched_domain_shared *sds;
  
-       sds = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
+       sds = rcu_dereference_all(per_cpu(sd_balance_shared, cpu));
         if (sds)
                 WRITE_ONCE(sds->has_idle_cores, val);
  }
@@ -7782,7 +7782,7 @@ static inline bool test_idle_cores(int cpu)
  {
         struct sched_domain_shared *sds;
  
-       sds = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
+       sds = rcu_dereference_all(per_cpu(sd_balance_shared, cpu));
         if (sds)
                 return READ_ONCE(sds->has_idle_cores);
  
@@ -7791,7 +7791,7 @@ static inline bool test_idle_cores(int cpu)
  
  /*
   * Scans the local SMT mask to see if the entire core is idle, and records this
- * information in sd_llc_shared->has_idle_cores.
+ * information in sd_balance_shared->has_idle_cores.
   *
   * Since SMT siblings share all cache levels, inspecting this limited remote
   * state should be fairly cheap.
@@ -7821,7 +7821,8 @@ unlock:
  /*
   * Scan the entire LLC domain for idle cores; this dynamically switches off if
   * there are no idle cores left in the system; tracked through
- * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
+ * sd_balance_shared->has_idle_cores and enabled through update_idle_core()
+ * above.
   */
  static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
  {
@@ -7885,7 +7886,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
         struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
         int i, cpu, idle_cpu = -1, nr = INT_MAX;
  
-       if (sched_feat(SIS_UTIL)) {
+       if (sched_feat(SIS_UTIL) && sd->shared) {
                 /*
                  * Increment because !--nr is the condition to stop scan.
                  *
@@ -12764,7 +12765,7 @@ static void nohz_balancer_kick(struct rq *rq)
                 goto out;
         }
  
-       sds = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
+       sds = rcu_dereference_all(per_cpu(sd_balance_shared, cpu));
         if (sds) {
                 /*
                  * If there is an imbalance between LLC domains (IOW we could
@@ -12792,7 +12793,11 @@ static void set_cpu_sd_state_busy(int cpu)
         struct sched_domain *sd;
         sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
  
-       if (!sd || !sd->nohz_idle)
+       /*
+        * sd->nohz_idle only pairs with nr_busy_cpus on sd->shared; if this
+        * domain has no shared object there is nothing to clear or account.
+        */
+       if (!sd || !sd->shared || !sd->nohz_idle)
                 return;
         sd->nohz_idle = 0;
  
@@ -12817,7 +12822,8 @@ static void set_cpu_sd_state_idle(int cpu)
         struct sched_domain *sd;
         sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
  
-       if (!sd || sd->nohz_idle)
+       /* See set_cpu_sd_state_busy(): nohz_idle is only used with sd->shared. */
+       if (!sd || !sd->shared || sd->nohz_idle)
                 return;
         sd->nohz_idle = 1;
  
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index ffe77b2b6296ce1d9cbc11034a7ec4a388b1e78f..bfb4b47c021b2a929dd23503408f148ea8df3868 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2164,7 +2164,7 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
  DECLARE_PER_CPU(int, sd_llc_size);
  DECLARE_PER_CPU(int, sd_llc_id);
  DECLARE_PER_CPU(int, sd_share_id);
-DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
+DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_balance_shared);
  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c

index a1f46e3f4edea3686c64157fe32a7511c93cf9ec..f96d50131495abf99b80fa5817e2bbf380505da4 100644 (file)
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -665,7 +665,7 @@ DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
  DEFINE_PER_CPU(int, sd_llc_size);
  DEFINE_PER_CPU(int, sd_llc_id);
  DEFINE_PER_CPU(int, sd_share_id);
-DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
+DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_balance_shared);
  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
@@ -680,20 +680,38 @@ static void update_top_cache_domain(int cpu)
         int id = cpu;
         int size = 1;
  
+       sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
+       /*
+        * The shared object is attached to sd_asym_cpucapacity only when the
+        * asym domain is non-overlapping (i.e., not built from SD_NUMA).
+        * On overlapping (NUMA) asym domains we fall back to letting the
+        * SD_SHARE_LLC path own the shared object, so sd->shared may be NULL
+        * here.
+        */
+       if (sd && sd->shared)
+               sds = sd->shared;
+
+       rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
+
         sd = highest_flag_domain(cpu, SD_SHARE_LLC);
         if (sd) {
                 id = cpumask_first(sched_domain_span(sd));
                 size = cpumask_weight(sched_domain_span(sd));
  
-               /* If sd_llc exists, sd_llc_shared should exist too. */
-               WARN_ON_ONCE(!sd->shared);
-               sds = sd->shared;
+               /*
+                * If sd_asym_cpucapacity didn't claim the shared object,
+                * sd_llc must have one linked.
+                */
+               if (!sds) {
+                       WARN_ON_ONCE(!sd->shared);
+                       sds = sd->shared;
+               }
         }
  
         rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
         per_cpu(sd_llc_size, cpu) = size;
         per_cpu(sd_llc_id, cpu) = id;
-       rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
+       rcu_assign_pointer(per_cpu(sd_balance_shared, cpu), sds);
  
         sd = lowest_flag_domain(cpu, SD_CLUSTER);
         if (sd)
@@ -711,9 +729,6 @@ static void update_top_cache_domain(int cpu)
  
         sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
         rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
-
-       sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
-       rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
  }
  
  /*
@@ -2648,6 +2663,54 @@ static void adjust_numa_imbalance(struct sched_domain *sd_llc)
         }
  }
  
+static void init_sched_domain_shared(struct s_data *d, struct sched_domain *sd)
+{
+       int sd_id = cpumask_first(sched_domain_span(sd));
+
+       sd->shared = *per_cpu_ptr(d->sds, sd_id);
+       /*
+        * nr_busy_cpus is consumed only by the NOHZ kick path via
+        * sd_balance_shared; on the asym-capacity path it is initialized but
+        * never read.
+        */
+       atomic_set(&sd->shared->nr_busy_cpus, sd->span_weight);
+       atomic_inc(&sd->shared->ref);
+}
+
+/*
+ * For asymmetric CPU capacity, attach sched_domain_shared on the innermost
+ * SD_ASYM_CPUCAPACITY_FULL ancestor of @cpu's base domain when that ancestor is
+ * not an overlapping NUMA-built domain (then LLC should claim shared).
+ *
+ * A CPU may lack any FULL ancestor (e.g., exclusive cpuset symmetric island),
+ * then LLC must claim shared instead.
+ *
+ * Note: SD_ASYM_CPUCAPACITY_FULL is only set when all CPU capacity values
+ * are present in the domain span, so the asym domain we attach to cannot
+ * degenerate into a single-capacity group. The relevant edge cases are instead
+ * covered by the caveats above.
+ *
+ * Return true if this CPU's asym path claimed sd->shared, false otherwise.
+ */
+static bool claim_asym_sched_domain_shared(struct s_data *d, int cpu)
+{
+       struct sched_domain *sd = *per_cpu_ptr(d->sd, cpu);
+       struct sched_domain *sd_asym;
+
+       if (!sd)
+               return false;
+
+       sd_asym = sd;
+       while (sd_asym && !(sd_asym->flags & SD_ASYM_CPUCAPACITY_FULL))
+               sd_asym = sd_asym->parent;
+
+       if (!sd_asym || (sd_asym->flags & SD_NUMA))
+               return false;
+
+       init_sched_domain_shared(d, sd_asym);
+       return true;
+}
+
  /*
   * Build sched domains for a given set of CPUs and attach the sched domains
   * to the individual CPUs
@@ -2706,20 +2769,26 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
         }
  
         for_each_cpu(i, cpu_map) {
+               bool asym_claimed = false;
+
                 sd = *per_cpu_ptr(d.sd, i);
                 if (!sd)
                         continue;
  
+               if (has_asym)
+                       asym_claimed = claim_asym_sched_domain_shared(&d, i);
+
                 /* First, find the topmost SD_SHARE_LLC domain */
                 while (sd->parent && (sd->parent->flags & SD_SHARE_LLC))
                         sd = sd->parent;
  
                 if (sd->flags & SD_SHARE_LLC) {
-                       int sd_id = cpumask_first(sched_domain_span(sd));
-
-                       sd->shared = *per_cpu_ptr(d.sds, sd_id);
-                       atomic_set(&sd->shared->nr_busy_cpus, sd->span_weight);
-                       atomic_inc(&sd->shared->ref);
+                       /*
+                        * Initialize the sd->shared for SD_SHARE_LLC unless
+                        * the asym path above already claimed it.
+                        */
+                       if (!asym_claimed)
+                               init_sched_domain_shared(&d, sd);
  
                         /*
                          * In presence of higher domains, adjust the
author	K Prateek Nayak <kprateek.nayak@amd.com>
	Sat, 16 May 2026 05:58:50 +0000 (07:58 +0200)
committer	Peter Zijlstra <peterz@infradead.org>
	Tue, 19 May 2026 10:17:38 +0000 (12:17 +0200)
kernel/sched/fair.c		patch \| blob \| blame \| history
kernel/sched/sched.h		patch \| blob \| blame \| history
kernel/sched/topology.c		patch \| blob \| blame \| history