From: Greg Kroah-Hartman Date: Mon, 21 Aug 2023 13:09:08 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.4.12~44 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=eabb5d96ceb5a60b8542e60c8c93ba8d76cb1488;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: sched-fair-remove-capacity-inversion-detection.patch sched-fair-unlink-misfit-task-from-cpu-overutilized.patch --- diff --git a/queue-6.1/sched-fair-remove-capacity-inversion-detection.patch b/queue-6.1/sched-fair-remove-capacity-inversion-detection.patch new file mode 100644 index 00000000000..e7eb5f1afef --- /dev/null +++ b/queue-6.1/sched-fair-remove-capacity-inversion-detection.patch @@ -0,0 +1,171 @@ +From a2e90611b9f425adbbfcdaa5b5e49958ddf6f61b Mon Sep 17 00:00:00 2001 +From: Vincent Guittot +Date: Wed, 1 Feb 2023 15:36:28 +0100 +Subject: sched/fair: Remove capacity inversion detection + +From: Vincent Guittot + +commit a2e90611b9f425adbbfcdaa5b5e49958ddf6f61b upstream. + +Remove the capacity inversion detection which is now handled by +util_fits_cpu() returning -1 when we need to continue to look for a +potential CPU with better performance. + +This ends up almost reverting patches below except for some comments: +commit da07d2f9c153 ("sched/fair: Fixes for capacity inversion detection") +commit aa69c36f31aa ("sched/fair: Consider capacity inversion in util_fits_cpu()") +commit 44c7b80bffc3 ("sched/fair: Detect capacity inversion") + +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20230201143628.270912-3-vincent.guittot@linaro.org +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 84 +++------------------------------------------------ + kernel/sched/sched.h | 19 ----------- + 2 files changed, 5 insertions(+), 98 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4464,17 +4464,9 @@ static inline int util_fits_cpu(unsigned + * + * For uclamp_max, we can tolerate a drop in performance level as the + * goal is to cap the task. So it's okay if it's getting less. +- * +- * In case of capacity inversion we should honour the inverted capacity +- * for both uclamp_min and uclamp_max all the time. + */ +- capacity_orig = cpu_in_capacity_inversion(cpu); +- if (capacity_orig) { +- capacity_orig_thermal = capacity_orig; +- } else { +- capacity_orig = capacity_orig_of(cpu); +- capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); +- } ++ capacity_orig = capacity_orig_of(cpu); ++ capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); + + /* + * We want to force a task to fit a cpu as implied by uclamp_max. +@@ -8929,82 +8921,16 @@ static unsigned long scale_rt_capacity(i + + static void update_cpu_capacity(struct sched_domain *sd, int cpu) + { +- unsigned long capacity_orig = arch_scale_cpu_capacity(cpu); + unsigned long capacity = scale_rt_capacity(cpu); + struct sched_group *sdg = sd->groups; +- struct rq *rq = cpu_rq(cpu); + +- rq->cpu_capacity_orig = capacity_orig; ++ cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu); + + if (!capacity) + capacity = 1; + +- rq->cpu_capacity = capacity; +- +- /* +- * Detect if the performance domain is in capacity inversion state. +- * +- * Capacity inversion happens when another perf domain with equal or +- * lower capacity_orig_of() ends up having higher capacity than this +- * domain after subtracting thermal pressure. +- * +- * We only take into account thermal pressure in this detection as it's +- * the only metric that actually results in *real* reduction of +- * capacity due to performance points (OPPs) being dropped/become +- * unreachable due to thermal throttling. +- * +- * We assume: +- * * That all cpus in a perf domain have the same capacity_orig +- * (same uArch). +- * * Thermal pressure will impact all cpus in this perf domain +- * equally. +- */ +- if (sched_energy_enabled()) { +- unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); +- struct perf_domain *pd; +- +- rcu_read_lock(); +- +- pd = rcu_dereference(rq->rd->pd); +- rq->cpu_capacity_inverted = 0; +- +- for (; pd; pd = pd->next) { +- struct cpumask *pd_span = perf_domain_span(pd); +- unsigned long pd_cap_orig, pd_cap; +- +- /* We can't be inverted against our own pd */ +- if (cpumask_test_cpu(cpu_of(rq), pd_span)) +- continue; +- +- cpu = cpumask_any(pd_span); +- pd_cap_orig = arch_scale_cpu_capacity(cpu); +- +- if (capacity_orig < pd_cap_orig) +- continue; +- +- /* +- * handle the case of multiple perf domains have the +- * same capacity_orig but one of them is under higher +- * thermal pressure. We record it as capacity +- * inversion. +- */ +- if (capacity_orig == pd_cap_orig) { +- pd_cap = pd_cap_orig - thermal_load_avg(cpu_rq(cpu)); +- +- if (pd_cap > inv_cap) { +- rq->cpu_capacity_inverted = inv_cap; +- break; +- } +- } else if (pd_cap_orig > inv_cap) { +- rq->cpu_capacity_inverted = inv_cap; +- break; +- } +- } +- +- rcu_read_unlock(); +- } +- +- trace_sched_cpu_capacity_tp(rq); ++ cpu_rq(cpu)->cpu_capacity = capacity; ++ trace_sched_cpu_capacity_tp(cpu_rq(cpu)); + + sdg->sgc->capacity = capacity; + sdg->sgc->min_capacity = capacity; +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -1041,7 +1041,6 @@ struct rq { + + unsigned long cpu_capacity; + unsigned long cpu_capacity_orig; +- unsigned long cpu_capacity_inverted; + + struct balance_callback *balance_callback; + +@@ -2879,24 +2878,6 @@ static inline unsigned long capacity_ori + return cpu_rq(cpu)->cpu_capacity_orig; + } + +-/* +- * Returns inverted capacity if the CPU is in capacity inversion state. +- * 0 otherwise. +- * +- * Capacity inversion detection only considers thermal impact where actual +- * performance points (OPPs) gets dropped. +- * +- * Capacity inversion state happens when another performance domain that has +- * equal or lower capacity_orig_of() becomes effectively larger than the perf +- * domain this CPU belongs to due to thermal pressure throttling it hard. +- * +- * See comment in update_cpu_capacity(). +- */ +-static inline unsigned long cpu_in_capacity_inversion(int cpu) +-{ +- return cpu_rq(cpu)->cpu_capacity_inverted; +-} +- + /** + * enum cpu_util_type - CPU utilization type + * @FREQUENCY_UTIL: Utilization used to select frequency diff --git a/queue-6.1/sched-fair-unlink-misfit-task-from-cpu-overutilized.patch b/queue-6.1/sched-fair-unlink-misfit-task-from-cpu-overutilized.patch new file mode 100644 index 00000000000..dc36fe3ddec --- /dev/null +++ b/queue-6.1/sched-fair-unlink-misfit-task-from-cpu-overutilized.patch @@ -0,0 +1,268 @@ +From e5ed0550c04c5469ecdc1634d8aa18c8609590f0 Mon Sep 17 00:00:00 2001 +From: Vincent Guittot +Date: Wed, 1 Feb 2023 15:36:27 +0100 +Subject: sched/fair: unlink misfit task from cpu overutilized + +From: Vincent Guittot + +commit e5ed0550c04c5469ecdc1634d8aa18c8609590f0 upstream. + +By taking into account uclamp_min, the 1:1 relation between task misfit +and cpu overutilized is no more true as a task with a small util_avg may +not fit a high capacity cpu because of uclamp_min constraint. + +Add a new state in util_fits_cpu() to reflect the case that task would fit +a CPU except for the uclamp_min hint which is a performance requirement. + +Use -1 to reflect that a CPU doesn't fit only because of uclamp_min so we +can use this new value to take additional action to select the best CPU +that doesn't match uclamp_min hint. + +When util_fits_cpu() returns -1, we will continue to look for a possible +CPU with better performance, which replaces Capacity Inversion detection +with capacity_orig_of() - thermal_load_avg to detect a capacity inversion. + +Signed-off-by: Vincent Guittot +Reviewed-and-tested-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Tested-by: Kajetan Puchalski +Link: https://lore.kernel.org/r/20230201143628.270912-2-vincent.guittot@linaro.org +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 105 ++++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 82 insertions(+), 23 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4549,8 +4549,8 @@ static inline int util_fits_cpu(unsigned + * handle the case uclamp_min > uclamp_max. + */ + uclamp_min = min(uclamp_min, uclamp_max); +- if (util < uclamp_min && capacity_orig != SCHED_CAPACITY_SCALE) +- fits = fits && (uclamp_min <= capacity_orig_thermal); ++ if (fits && (util < uclamp_min) && (uclamp_min > capacity_orig_thermal)) ++ return -1; + + return fits; + } +@@ -4560,7 +4560,11 @@ static inline int task_fits_cpu(struct t + unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN); + unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX); + unsigned long util = task_util_est(p); +- return util_fits_cpu(util, uclamp_min, uclamp_max, cpu); ++ /* ++ * Return true only if the cpu fully fits the task requirements, which ++ * include the utilization but also the performance hints. ++ */ ++ return (util_fits_cpu(util, uclamp_min, uclamp_max, cpu) > 0); + } + + static inline void update_misfit_status(struct task_struct *p, struct rq *rq) +@@ -6043,6 +6047,7 @@ static inline bool cpu_overutilized(int + unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); + unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); + ++ /* Return true only if the utilization doesn't fit CPU's capacity */ + return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu); + } + +@@ -6836,6 +6841,7 @@ static int + select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) + { + unsigned long task_util, util_min, util_max, best_cap = 0; ++ int fits, best_fits = 0; + int cpu, best_cpu = -1; + struct cpumask *cpus; + +@@ -6851,12 +6857,28 @@ select_idle_capacity(struct task_struct + + if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) + continue; +- if (util_fits_cpu(task_util, util_min, util_max, cpu)) ++ ++ fits = util_fits_cpu(task_util, util_min, util_max, cpu); ++ ++ /* This CPU fits with all requirements */ ++ if (fits > 0) + return cpu; ++ /* ++ * Only the min performance hint (i.e. uclamp_min) doesn't fit. ++ * Look for the CPU with best capacity. ++ */ ++ else if (fits < 0) ++ cpu_cap = capacity_orig_of(cpu) - thermal_load_avg(cpu_rq(cpu)); + +- if (cpu_cap > best_cap) { ++ /* ++ * First, select CPU which fits better (-1 being better than 0). ++ * Then, select the one with best capacity at same level. ++ */ ++ if ((fits < best_fits) || ++ ((fits == best_fits) && (cpu_cap > best_cap))) { + best_cap = cpu_cap; + best_cpu = cpu; ++ best_fits = fits; + } + } + +@@ -6869,7 +6891,11 @@ static inline bool asym_fits_cpu(unsigne + int cpu) + { + if (sched_asym_cpucap_active()) +- return util_fits_cpu(util, util_min, util_max, cpu); ++ /* ++ * Return true only if the cpu fully fits the task requirements ++ * which include the utilization and the performance hints. ++ */ ++ return (util_fits_cpu(util, util_min, util_max, cpu) > 0); + + return true; + } +@@ -7236,6 +7262,9 @@ static int find_energy_efficient_cpu(str + unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024; + struct root_domain *rd = this_rq()->rd; + int cpu, best_energy_cpu, target = -1; ++ int prev_fits = -1, best_fits = -1; ++ unsigned long best_thermal_cap = 0; ++ unsigned long prev_thermal_cap = 0; + struct sched_domain *sd; + struct perf_domain *pd; + struct energy_env eenv; +@@ -7271,6 +7300,7 @@ static int find_energy_efficient_cpu(str + unsigned long prev_spare_cap = 0; + int max_spare_cap_cpu = -1; + unsigned long base_energy; ++ int fits, max_fits = -1; + + cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask); + +@@ -7320,7 +7350,9 @@ static int find_energy_efficient_cpu(str + util_min = max(rq_util_min, p_util_min); + util_max = max(rq_util_max, p_util_max); + } +- if (!util_fits_cpu(util, util_min, util_max, cpu)) ++ ++ fits = util_fits_cpu(util, util_min, util_max, cpu); ++ if (!fits) + continue; + + lsub_positive(&cpu_cap, util); +@@ -7328,7 +7360,9 @@ static int find_energy_efficient_cpu(str + if (cpu == prev_cpu) { + /* Always use prev_cpu as a candidate. */ + prev_spare_cap = cpu_cap; +- } else if (cpu_cap > max_spare_cap) { ++ prev_fits = fits; ++ } else if ((fits > max_fits) || ++ ((fits == max_fits) && (cpu_cap > max_spare_cap))) { + /* + * Find the CPU with the maximum spare capacity + * among the remaining CPUs in the performance +@@ -7336,6 +7370,7 @@ static int find_energy_efficient_cpu(str + */ + max_spare_cap = cpu_cap; + max_spare_cap_cpu = cpu; ++ max_fits = fits; + } + } + +@@ -7354,26 +7389,50 @@ static int find_energy_efficient_cpu(str + if (prev_delta < base_energy) + goto unlock; + prev_delta -= base_energy; ++ prev_thermal_cap = cpu_thermal_cap; + best_delta = min(best_delta, prev_delta); + } + + /* Evaluate the energy impact of using max_spare_cap_cpu. */ + if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) { ++ /* Current best energy cpu fits better */ ++ if (max_fits < best_fits) ++ continue; ++ ++ /* ++ * Both don't fit performance hint (i.e. uclamp_min) ++ * but best energy cpu has better capacity. ++ */ ++ if ((max_fits < 0) && ++ (cpu_thermal_cap <= best_thermal_cap)) ++ continue; ++ + cur_delta = compute_energy(&eenv, pd, cpus, p, + max_spare_cap_cpu); + /* CPU utilization has changed */ + if (cur_delta < base_energy) + goto unlock; + cur_delta -= base_energy; +- if (cur_delta < best_delta) { +- best_delta = cur_delta; +- best_energy_cpu = max_spare_cap_cpu; +- } ++ ++ /* ++ * Both fit for the task but best energy cpu has lower ++ * energy impact. ++ */ ++ if ((max_fits > 0) && (best_fits > 0) && ++ (cur_delta >= best_delta)) ++ continue; ++ ++ best_delta = cur_delta; ++ best_energy_cpu = max_spare_cap_cpu; ++ best_fits = max_fits; ++ best_thermal_cap = cpu_thermal_cap; + } + } + rcu_read_unlock(); + +- if (best_delta < prev_delta) ++ if ((best_fits > prev_fits) || ++ ((best_fits > 0) && (best_delta < prev_delta)) || ++ ((best_fits < 0) && (best_thermal_cap > prev_thermal_cap))) + target = best_energy_cpu; + + return target; +@@ -10183,24 +10242,23 @@ static struct sched_group *find_busiest_ + */ + update_sd_lb_stats(env, &sds); + +- if (sched_energy_enabled()) { +- struct root_domain *rd = env->dst_rq->rd; +- +- if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized)) +- goto out_balanced; +- } +- +- local = &sds.local_stat; +- busiest = &sds.busiest_stat; +- + /* There is no busy sibling group to pull tasks from */ + if (!sds.busiest) + goto out_balanced; + ++ busiest = &sds.busiest_stat; ++ + /* Misfit tasks should be dealt with regardless of the avg load */ + if (busiest->group_type == group_misfit_task) + goto force_balance; + ++ if (sched_energy_enabled()) { ++ struct root_domain *rd = env->dst_rq->rd; ++ ++ if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized)) ++ goto out_balanced; ++ } ++ + /* ASYM feature bypasses nice load balance check */ + if (busiest->group_type == group_asym_packing) + goto force_balance; +@@ -10213,6 +10271,7 @@ static struct sched_group *find_busiest_ + if (busiest->group_type == group_imbalanced) + goto force_balance; + ++ local = &sds.local_stat; + /* + * If the local group is busier than the selected busiest group + * don't try and pull any tasks. diff --git a/queue-6.1/series b/queue-6.1/series index 89b96f87c9a..69824bff91f 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -171,3 +171,5 @@ drm-amdgpu-pm-fix-throttle_status-for-other-than-mp1-11.0.7.patch asoc-amd-vangogh-select-config_snd_amd_acp_config.patch drm-amd-display-disable-rco-for-dcn314.patch zsmalloc-allow-only-one-active-pool-compaction-context.patch +sched-fair-unlink-misfit-task-from-cpu-overutilized.patch +sched-fair-remove-capacity-inversion-detection.patch