From: Chen Yu Date: Wed, 13 May 2026 20:39:25 +0000 (-0700) Subject: sched/cache: Fix has_multi_llcs iff at least one partition has multiple LLCs X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=a7660ce1590fc1316a44cc2af53a07a21dfc25da;p=thirdparty%2Flinux.git sched/cache: Fix has_multi_llcs iff at least one partition has multiple LLCs sched_cache_present is a global static key, but build_sched_domains() is called per partition from the "Build new domains" loop in partition_sched_domains_locked(). Each call unconditionally sets the key based solely on the has_multi_llcs local variable for that partition. The call to the last partition set the value even when there are previous partitions with multiple LLCs. If partition A (multi-LLC) is built first, the key is enabled. Then when partition B (single-LLC) is built, the key is disabled. The multi-LLC partition A is still active but the key is now off. Fix it by doing a similar thing as sched_energy_present: check the multi-LLCs during the iteration over all the partitions rather than checking it on a single partition. This bug was reported by sashiko. Fixes: d59f4fd1d303 ("sched/cache: Enable cache aware scheduling for multi LLCs NUMA node") Signed-off-by: Chen Yu Co-developed-by: Tim Chen Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/c541af2547d54509fbfd3b3a1e8072e2e5c7ff68.1778703694.git.tim.c.chen@linux.intel.com --- diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 4b7c64cbe854..e47a3f72eb72 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -951,6 +951,7 @@ static void _sched_cache_active_set(void) } } +/* used by debugfs */ void sched_cache_active_set(void) { cpus_read_lock(); @@ -1000,12 +1001,27 @@ void sched_update_llc_bytes(unsigned int cpu) unlock: sched_domains_mutex_unlock(); } + +static void sched_cache_set(bool has_multi_llcs) +{ + /* + * TBD: check before writing to it. sched domain rebuild + * is not in the critical path, leave as-is for now. + */ + if (has_multi_llcs) + static_branch_enable_cpuslocked(&sched_cache_present); + else + static_branch_disable_cpuslocked(&sched_cache_present); + + _sched_cache_active_set(); +} #else static bool alloc_sd_llc(const struct cpumask *cpu_map, struct s_data *d) { return false; } +static inline void sched_cache_set(bool has_multi_llcs) { } #endif /* @@ -2950,7 +2966,8 @@ void sched_domains_free_llc_id(int cpu) * to the individual CPUs */ static int -build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr) +build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr, + bool *multi_llcs) { enum s_alloc alloc_state = sa_none; bool has_multi_llcs = false; @@ -3094,18 +3111,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att ret = 0; error: -#ifdef CONFIG_SCHED_CACHE - /* - * TBD: check before writing to it. sched domain rebuild - * is not in the critical path, leave as-is for now. - */ - if (!ret && has_multi_llcs) - static_branch_enable_cpuslocked(&sched_cache_present); - else - static_branch_disable_cpuslocked(&sched_cache_present); - - _sched_cache_active_set(); -#endif + *multi_llcs = has_multi_llcs; __free_domain_allocs(&d, alloc_state, cpu_map); return ret; @@ -3168,6 +3174,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) */ int __init sched_init_domains(const struct cpumask *cpu_map) { + bool multi_llcs; int err; zalloc_cpumask_var(&sched_domains_llc_id_allocmask, GFP_KERNEL); @@ -3182,7 +3189,9 @@ int __init sched_init_domains(const struct cpumask *cpu_map) if (!doms_cur) doms_cur = &fallback_doms; cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_TYPE_DOMAIN)); - err = build_sched_domains(doms_cur[0], NULL); + err = build_sched_domains(doms_cur[0], NULL, &multi_llcs); + if (!err) + sched_cache_set(multi_llcs); return err; } @@ -3255,6 +3264,7 @@ static void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new struct sched_domain_attr *dattr_new) { bool __maybe_unused has_eas = false; + bool has_multi_llcs = false, multi_llcs; int i, j, n; int new_topology; @@ -3304,14 +3314,41 @@ match1: for (i = 0; i < ndoms_new; i++) { for (j = 0; j < n && !new_topology; j++) { if (cpumask_equal(doms_new[i], doms_cur[j]) && - dattrs_equal(dattr_new, i, dattr_cur, j)) + dattrs_equal(dattr_new, i, dattr_cur, j)) { + /* + * Reused partition has to be taken care + * of here, because there could be a corner + * case that if the reused partition is skipped + * and only new partition is considered, an + * incorrect has_multi_llcs would be set. For + * example: + * If the only multi-LLC partition is reused + * and a new single-LLC partition is built, + * sched_cache_set(false) disables cache-aware + * scheduling globally despite the reused + * multi-LLC partition still being active. + */ + struct sched_domain *sd; + int cpu = cpumask_first(doms_cur[j]); + + guard(rcu)(); + sd = rcu_dereference(cpu_rq(cpu)->sd); + while (sd && sd->parent && (sd->parent->flags & SD_SHARE_LLC)) + sd = sd->parent; + if (sd && (sd->flags & SD_SHARE_LLC) && sd->parent && + sd_in_multi_llcs(sd)) + has_multi_llcs = true; goto match2; + } } /* No match - add a new doms_new */ - build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); + build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL, + &multi_llcs); + has_multi_llcs |= multi_llcs; match2: ; } + sched_cache_set(has_multi_llcs); #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) /* Build perf domains: */