From: Greg Kroah-Hartman Date: Sat, 22 Apr 2023 17:28:03 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v4.14.314~50 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bf6cfdad7fef696f097de24c69b6ae6fb87337a0;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch sched-fair-detect-capacity-inversion.patch sched-fair-fixes-for-capacity-inversion-detection.patch sched-uclamp-cater-for-uclamp-in-find_energy_efficient_cpu-s-early-exit-condition.patch sched-uclamp-fix-a-uninitialized-variable-warnings.patch sched-uclamp-fix-fits_capacity-check-in-feec.patch sched-uclamp-make-asym_fits_capacity-use-util_fits_cpu.patch sched-uclamp-make-cpu_overutilized-use-util_fits_cpu.patch sched-uclamp-make-select_idle_capacity-use-util_fits_cpu.patch sched-uclamp-make-task_fits_capacity-use-util_fits_cpu.patch --- diff --git a/queue-5.10/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch b/queue-5.10/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch new file mode 100644 index 00000000000..f4d03a61f8b --- /dev/null +++ b/queue-5.10/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch @@ -0,0 +1,60 @@ +From stable-owner@vger.kernel.org Tue Apr 18 16:10:15 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:41 +0100 +Subject: sched/fair: Consider capacity inversion in util_fits_cpu() +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140943.90621-9-qyousef@layalina.io> + +From: Qais Yousef + +commit: aa69c36f31aadc1669bfa8a3de6a47b5e6c98ee8 upstream. + +We do consider thermal pressure in util_fits_cpu() for uclamp_min only. +With the exception of the biggest cores which by definition are the max +performance point of the system and all tasks by definition should fit. + +Even under thermal pressure, the capacity of the biggest CPU is the +highest in the system and should still fit every task. Except when it +reaches capacity inversion point, then this is no longer true. + +We can handle this by using the inverted capacity as capacity_orig in +util_fits_cpu(). Which not only addresses the problem above, but also +ensure uclamp_max now considers the inverted capacity. Force fitting +a task when a CPU is in this adverse state will contribute to making the +thermal throttling last longer. + +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-10-qais.yousef@arm.com +(cherry picked from commit aa69c36f31aadc1669bfa8a3de6a47b5e6c98ee8) +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4113,12 +4113,16 @@ static inline int util_fits_cpu(unsigned + * For uclamp_max, we can tolerate a drop in performance level as the + * goal is to cap the task. So it's okay if it's getting less. + * +- * In case of capacity inversion, which is not handled yet, we should +- * honour the inverted capacity for both uclamp_min and uclamp_max all +- * the time. ++ * In case of capacity inversion we should honour the inverted capacity ++ * for both uclamp_min and uclamp_max all the time. + */ +- capacity_orig = capacity_orig_of(cpu); +- capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); ++ capacity_orig = cpu_in_capacity_inversion(cpu); ++ if (capacity_orig) { ++ capacity_orig_thermal = capacity_orig; ++ } else { ++ capacity_orig = capacity_orig_of(cpu); ++ capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); ++ } + + /* + * We want to force a task to fit a cpu as implied by uclamp_max. diff --git a/queue-5.10/sched-fair-detect-capacity-inversion.patch b/queue-5.10/sched-fair-detect-capacity-inversion.patch new file mode 100644 index 00000000000..343fab1496d --- /dev/null +++ b/queue-5.10/sched-fair-detect-capacity-inversion.patch @@ -0,0 +1,155 @@ +From stable-owner@vger.kernel.org Tue Apr 18 16:10:14 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:40 +0100 +Subject: sched/fair: Detect capacity inversion +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140943.90621-8-qyousef@layalina.io> + +From: Qais Yousef + +commit: 44c7b80bffc3a657a36857098d5d9c49d94e652b upstream. + +Check each performance domain to see if thermal pressure is causing its +capacity to be lower than another performance domain. + +We assume that each performance domain has CPUs with the same +capacities, which is similar to an assumption made in energy_model.c + +We also assume that thermal pressure impacts all CPUs in a performance +domain equally. + +If there're multiple performance domains with the same capacity_orig, we +will trigger a capacity inversion if the domain is under thermal +pressure. + +The new cpu_in_capacity_inversion() should help users to know when +information about capacity_orig are not reliable and can opt in to use +the inverted capacity as the 'actual' capacity_orig. + +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-9-qais.yousef@arm.com +(cherry picked from commit 44c7b80bffc3a657a36857098d5d9c49d94e652b) +[Trivial conflict in kernel/sched/fair.c and sched.h due to code shuffling] +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++--- + kernel/sched/sched.h | 19 +++++++++++++++ + 2 files changed, 79 insertions(+), 3 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -8376,16 +8376,73 @@ static unsigned long scale_rt_capacity(i + + static void update_cpu_capacity(struct sched_domain *sd, int cpu) + { ++ unsigned long capacity_orig = arch_scale_cpu_capacity(cpu); + unsigned long capacity = scale_rt_capacity(cpu); + struct sched_group *sdg = sd->groups; ++ struct rq *rq = cpu_rq(cpu); + +- cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu); ++ rq->cpu_capacity_orig = capacity_orig; + + if (!capacity) + capacity = 1; + +- cpu_rq(cpu)->cpu_capacity = capacity; +- trace_sched_cpu_capacity_tp(cpu_rq(cpu)); ++ rq->cpu_capacity = capacity; ++ ++ /* ++ * Detect if the performance domain is in capacity inversion state. ++ * ++ * Capacity inversion happens when another perf domain with equal or ++ * lower capacity_orig_of() ends up having higher capacity than this ++ * domain after subtracting thermal pressure. ++ * ++ * We only take into account thermal pressure in this detection as it's ++ * the only metric that actually results in *real* reduction of ++ * capacity due to performance points (OPPs) being dropped/become ++ * unreachable due to thermal throttling. ++ * ++ * We assume: ++ * * That all cpus in a perf domain have the same capacity_orig ++ * (same uArch). ++ * * Thermal pressure will impact all cpus in this perf domain ++ * equally. ++ */ ++ if (static_branch_unlikely(&sched_asym_cpucapacity)) { ++ unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); ++ struct perf_domain *pd = rcu_dereference(rq->rd->pd); ++ ++ rq->cpu_capacity_inverted = 0; ++ ++ for (; pd; pd = pd->next) { ++ struct cpumask *pd_span = perf_domain_span(pd); ++ unsigned long pd_cap_orig, pd_cap; ++ ++ cpu = cpumask_any(pd_span); ++ pd_cap_orig = arch_scale_cpu_capacity(cpu); ++ ++ if (capacity_orig < pd_cap_orig) ++ continue; ++ ++ /* ++ * handle the case of multiple perf domains have the ++ * same capacity_orig but one of them is under higher ++ * thermal pressure. We record it as capacity ++ * inversion. ++ */ ++ if (capacity_orig == pd_cap_orig) { ++ pd_cap = pd_cap_orig - thermal_load_avg(cpu_rq(cpu)); ++ ++ if (pd_cap > inv_cap) { ++ rq->cpu_capacity_inverted = inv_cap; ++ break; ++ } ++ } else if (pd_cap_orig > inv_cap) { ++ rq->cpu_capacity_inverted = inv_cap; ++ break; ++ } ++ } ++ } ++ ++ trace_sched_cpu_capacity_tp(rq); + + sdg->sgc->capacity = capacity; + sdg->sgc->min_capacity = capacity; +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -973,6 +973,7 @@ struct rq { + + unsigned long cpu_capacity; + unsigned long cpu_capacity_orig; ++ unsigned long cpu_capacity_inverted; + + struct callback_head *balance_callback; + +@@ -2539,6 +2540,24 @@ static inline unsigned long capacity_ori + { + return cpu_rq(cpu)->cpu_capacity_orig; + } ++ ++/* ++ * Returns inverted capacity if the CPU is in capacity inversion state. ++ * 0 otherwise. ++ * ++ * Capacity inversion detection only considers thermal impact where actual ++ * performance points (OPPs) gets dropped. ++ * ++ * Capacity inversion state happens when another performance domain that has ++ * equal or lower capacity_orig_of() becomes effectively larger than the perf ++ * domain this CPU belongs to due to thermal pressure throttling it hard. ++ * ++ * See comment in update_cpu_capacity(). ++ */ ++static inline unsigned long cpu_in_capacity_inversion(int cpu) ++{ ++ return cpu_rq(cpu)->cpu_capacity_inverted; ++} + #endif + + /** diff --git a/queue-5.10/sched-fair-fixes-for-capacity-inversion-detection.patch b/queue-5.10/sched-fair-fixes-for-capacity-inversion-detection.patch new file mode 100644 index 00000000000..e5c668d5d98 --- /dev/null +++ b/queue-5.10/sched-fair-fixes-for-capacity-inversion-detection.patch @@ -0,0 +1,68 @@ +From stable-owner@vger.kernel.org Tue Apr 18 16:10:18 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:43 +0100 +Subject: sched/fair: Fixes for capacity inversion detection +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140943.90621-11-qyousef@layalina.io> + +From: Qais Yousef + +commit da07d2f9c153e457e845d4dcfdd13568d71d18a4 upstream. + +Traversing the Perf Domains requires rcu_read_lock() to be held and is +conditional on sched_energy_enabled(). Ensure right protections applied. + +Also skip capacity inversion detection for our own pd; which was an +error. + +Fixes: 44c7b80bffc3 ("sched/fair: Detect capacity inversion") +Reported-by: Dietmar Eggemann +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Vincent Guittot +Link: https://lore.kernel.org/r/20230112122708.330667-3-qyousef@layalina.io +(cherry picked from commit da07d2f9c153e457e845d4dcfdd13568d71d18a4) +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -8407,16 +8407,23 @@ static void update_cpu_capacity(struct s + * * Thermal pressure will impact all cpus in this perf domain + * equally. + */ +- if (static_branch_unlikely(&sched_asym_cpucapacity)) { ++ if (sched_energy_enabled()) { + unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); +- struct perf_domain *pd = rcu_dereference(rq->rd->pd); ++ struct perf_domain *pd; + ++ rcu_read_lock(); ++ ++ pd = rcu_dereference(rq->rd->pd); + rq->cpu_capacity_inverted = 0; + + for (; pd; pd = pd->next) { + struct cpumask *pd_span = perf_domain_span(pd); + unsigned long pd_cap_orig, pd_cap; + ++ /* We can't be inverted against our own pd */ ++ if (cpumask_test_cpu(cpu_of(rq), pd_span)) ++ continue; ++ + cpu = cpumask_any(pd_span); + pd_cap_orig = arch_scale_cpu_capacity(cpu); + +@@ -8441,6 +8448,8 @@ static void update_cpu_capacity(struct s + break; + } + } ++ ++ rcu_read_unlock(); + } + + trace_sched_cpu_capacity_tp(rq); diff --git a/queue-5.10/sched-uclamp-cater-for-uclamp-in-find_energy_efficient_cpu-s-early-exit-condition.patch b/queue-5.10/sched-uclamp-cater-for-uclamp-in-find_energy_efficient_cpu-s-early-exit-condition.patch new file mode 100644 index 00000000000..2ca6999f969 --- /dev/null +++ b/queue-5.10/sched-uclamp-cater-for-uclamp-in-find_energy_efficient_cpu-s-early-exit-condition.patch @@ -0,0 +1,66 @@ +From stable-owner@vger.kernel.org Tue Apr 18 16:10:12 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:39 +0100 +Subject: sched/uclamp: Cater for uclamp in find_energy_efficient_cpu()'s early exit condition +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140943.90621-7-qyousef@layalina.io> + +From: Qais Yousef + +commit d81304bc6193554014d4372a01debdf65e1e9a4d upstream. + +If the utilization of the woken up task is 0, we skip the energy +calculation because it has no impact. + +But if the task is boosted (uclamp_min != 0) will have an impact on task +placement and frequency selection. Only skip if the util is truly +0 after applying uclamp values. + +Change uclamp_task_cpu() signature to avoid unnecessary additional calls +to uclamp_eff_get(). feec() is the only user now. + +Fixes: 732cd75b8c920 ("sched/fair: Select an energy-efficient CPU on task wake-up") +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-8-qais.yousef@arm.com +(cherry picked from commit d81304bc6193554014d4372a01debdf65e1e9a4d) +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3928,14 +3928,16 @@ static inline unsigned long task_util_es + } + + #ifdef CONFIG_UCLAMP_TASK +-static inline unsigned long uclamp_task_util(struct task_struct *p) ++static inline unsigned long uclamp_task_util(struct task_struct *p, ++ unsigned long uclamp_min, ++ unsigned long uclamp_max) + { +- return clamp(task_util_est(p), +- uclamp_eff_value(p, UCLAMP_MIN), +- uclamp_eff_value(p, UCLAMP_MAX)); ++ return clamp(task_util_est(p), uclamp_min, uclamp_max); + } + #else +-static inline unsigned long uclamp_task_util(struct task_struct *p) ++static inline unsigned long uclamp_task_util(struct task_struct *p, ++ unsigned long uclamp_min, ++ unsigned long uclamp_max) + { + return task_util_est(p); + } +@@ -6836,7 +6838,7 @@ static int find_energy_efficient_cpu(str + goto fail; + + sync_entity_load_avg(&p->se); +- if (!task_util_est(p)) ++ if (!uclamp_task_util(p, p_util_min, p_util_max)) + goto unlock; + + for (; pd; pd = pd->next) { diff --git a/queue-5.10/sched-uclamp-fix-a-uninitialized-variable-warnings.patch b/queue-5.10/sched-uclamp-fix-a-uninitialized-variable-warnings.patch new file mode 100644 index 00000000000..94d266ba649 --- /dev/null +++ b/queue-5.10/sched-uclamp-fix-a-uninitialized-variable-warnings.patch @@ -0,0 +1,97 @@ +From stable-owner@vger.kernel.org Tue Apr 18 16:10:16 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:42 +0100 +Subject: sched/uclamp: Fix a uninitialized variable warnings +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef , kernel test robot , Dan Carpenter +Message-ID: <20230418140943.90621-10-qyousef@layalina.io> + +From: Qais Yousef + +commit e26fd28db82899be71b4b949527373d0a6be1e65 upstream. + +Addresses the following warnings: + +> config: riscv-randconfig-m031-20221111 +> compiler: riscv64-linux-gcc (GCC) 12.1.0 +> +> smatch warnings: +> kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_min'. +> kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_max'. + +Fixes: 244226035a1f ("sched/uclamp: Fix fits_capacity() check in feec()") +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Vincent Guittot +Link: https://lore.kernel.org/r/20230112122708.330667-2-qyousef@layalina.io +(cherry picked from commit e26fd28db82899be71b4b949527373d0a6be1e65) +[Conflict in kernel/sched/fair.c due to new automatic variable in +master vs 5.10 and new code around for loop] +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 35 ++++++++++++++++------------------- + 1 file changed, 16 insertions(+), 19 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6846,9 +6846,9 @@ static int find_energy_efficient_cpu(str + goto unlock; + + for (; pd; pd = pd->next) { ++ unsigned long util_min = p_util_min, util_max = p_util_max; + unsigned long cur_delta, spare_cap, max_spare_cap = 0; + unsigned long rq_util_min, rq_util_max; +- unsigned long util_min, util_max; + unsigned long base_energy_pd; + int max_spare_cap_cpu = -1; + +@@ -6857,6 +6857,8 @@ static int find_energy_efficient_cpu(str + base_energy += base_energy_pd; + + for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { ++ struct rq *rq = cpu_rq(cpu); ++ + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) + continue; + +@@ -6872,24 +6874,19 @@ static int find_energy_efficient_cpu(str + * much capacity we can get out of the CPU; this is + * aligned with schedutil_cpu_util(). + */ +- if (uclamp_is_used()) { +- if (uclamp_rq_is_idle(cpu_rq(cpu))) { +- util_min = p_util_min; +- util_max = p_util_max; +- } else { +- /* +- * Open code uclamp_rq_util_with() except for +- * the clamp() part. Ie: apply max aggregation +- * only. util_fits_cpu() logic requires to +- * operate on non clamped util but must use the +- * max-aggregated uclamp_{min, max}. +- */ +- rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); +- rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); +- +- util_min = max(rq_util_min, p_util_min); +- util_max = max(rq_util_max, p_util_max); +- } ++ if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) { ++ /* ++ * Open code uclamp_rq_util_with() except for ++ * the clamp() part. Ie: apply max aggregation ++ * only. util_fits_cpu() logic requires to ++ * operate on non clamped util but must use the ++ * max-aggregated uclamp_{min, max}. ++ */ ++ rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN); ++ rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX); ++ ++ util_min = max(rq_util_min, p_util_min); ++ util_max = max(rq_util_max, p_util_max); + } + if (!util_fits_cpu(util, util_min, util_max, cpu)) + continue; diff --git a/queue-5.10/sched-uclamp-fix-fits_capacity-check-in-feec.patch b/queue-5.10/sched-uclamp-fix-fits_capacity-check-in-feec.patch new file mode 100644 index 00000000000..eb6993617c7 --- /dev/null +++ b/queue-5.10/sched-uclamp-fix-fits_capacity-check-in-feec.patch @@ -0,0 +1,202 @@ +From qyousef@layalina.io Tue Apr 18 16:10:02 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:35 +0100 +Subject: sched/uclamp: Fix fits_capacity() check in feec() +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef , Yun Hsiang +Message-ID: <20230418140943.90621-3-qyousef@layalina.io> + +From: Qais Yousef + +commit 244226035a1f9b2b6c326e55ae5188fab4f428cb upstream. + +As reported by Yun Hsiang [1], if a task has its uclamp_min >= 0.8 * 1024, +it'll always pick the previous CPU because fits_capacity() will always +return false in this case. + +The new util_fits_cpu() logic should handle this correctly for us beside +more corner cases where similar failures could occur, like when using +UCLAMP_MAX. + +We open code uclamp_rq_util_with() except for the clamp() part, +util_fits_cpu() needs the 'raw' values to be passed to it. + +Also introduce uclamp_rq_{set, get}() shorthand accessors to get uclamp +value for the rq. Makes the code more readable and ensures the right +rules (use READ_ONCE/WRITE_ONCE) are respected transparently. + +[1] https://lists.linaro.org/pipermail/eas-dev/2020-July/001488.html + +Fixes: 1d42509e475c ("sched/fair: Make EAS wakeup placement consider uclamp restrictions") +Reported-by: Yun Hsiang +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-4-qais.yousef@arm.com +(cherry picked from commit 244226035a1f9b2b6c326e55ae5188fab4f428cb) +[Fix trivial conflict in kernel/sched/fair.c due to new automatic +variables in master vs 5.10] +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/core.c | 10 +++++----- + kernel/sched/fair.c | 26 ++++++++++++++++++++++++-- + kernel/sched/sched.h | 42 +++++++++++++++++++++++++++++++++++++++--- + 3 files changed, 68 insertions(+), 10 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -980,7 +980,7 @@ static inline void uclamp_idle_reset(str + if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE)) + return; + +- WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value); ++ uclamp_rq_set(rq, clamp_id, clamp_value); + } + + static inline +@@ -1158,8 +1158,8 @@ static inline void uclamp_rq_inc_id(stru + if (bucket->tasks == 1 || uc_se->value > bucket->value) + bucket->value = uc_se->value; + +- if (uc_se->value > READ_ONCE(uc_rq->value)) +- WRITE_ONCE(uc_rq->value, uc_se->value); ++ if (uc_se->value > uclamp_rq_get(rq, clamp_id)) ++ uclamp_rq_set(rq, clamp_id, uc_se->value); + } + + /* +@@ -1225,7 +1225,7 @@ static inline void uclamp_rq_dec_id(stru + if (likely(bucket->tasks)) + return; + +- rq_clamp = READ_ONCE(uc_rq->value); ++ rq_clamp = uclamp_rq_get(rq, clamp_id); + /* + * Defensive programming: this should never happen. If it happens, + * e.g. due to future modification, warn and fixup the expected value. +@@ -1233,7 +1233,7 @@ static inline void uclamp_rq_dec_id(stru + SCHED_WARN_ON(bucket->value > rq_clamp); + if (bucket->value >= rq_clamp) { + bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value); +- WRITE_ONCE(uc_rq->value, bkt_clamp); ++ uclamp_rq_set(rq, clamp_id, bkt_clamp); + } + } + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6802,6 +6802,8 @@ compute_energy(struct task_struct *p, in + static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) + { + unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX; ++ unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0; ++ unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024; + struct root_domain *rd = cpu_rq(smp_processor_id())->rd; + unsigned long cpu_cap, util, base_energy = 0; + int cpu, best_energy_cpu = prev_cpu; +@@ -6829,6 +6831,8 @@ static int find_energy_efficient_cpu(str + + for (; pd; pd = pd->next) { + unsigned long cur_delta, spare_cap, max_spare_cap = 0; ++ unsigned long rq_util_min, rq_util_max; ++ unsigned long util_min, util_max; + unsigned long base_energy_pd; + int max_spare_cap_cpu = -1; + +@@ -6852,8 +6856,26 @@ static int find_energy_efficient_cpu(str + * much capacity we can get out of the CPU; this is + * aligned with schedutil_cpu_util(). + */ +- util = uclamp_rq_util_with(cpu_rq(cpu), util, p); +- if (!fits_capacity(util, cpu_cap)) ++ if (uclamp_is_used()) { ++ if (uclamp_rq_is_idle(cpu_rq(cpu))) { ++ util_min = p_util_min; ++ util_max = p_util_max; ++ } else { ++ /* ++ * Open code uclamp_rq_util_with() except for ++ * the clamp() part. Ie: apply max aggregation ++ * only. util_fits_cpu() logic requires to ++ * operate on non clamped util but must use the ++ * max-aggregated uclamp_{min, max}. ++ */ ++ rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); ++ rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); ++ ++ util_min = max(rq_util_min, p_util_min); ++ util_max = max(rq_util_max, p_util_max); ++ } ++ } ++ if (!util_fits_cpu(util, util_min, util_max, cpu)) + continue; + + /* Always use prev_cpu as a candidate. */ +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2402,6 +2402,23 @@ static inline void cpufreq_update_util(s + #ifdef CONFIG_UCLAMP_TASK + unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); + ++static inline unsigned long uclamp_rq_get(struct rq *rq, ++ enum uclamp_id clamp_id) ++{ ++ return READ_ONCE(rq->uclamp[clamp_id].value); ++} ++ ++static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id, ++ unsigned int value) ++{ ++ WRITE_ONCE(rq->uclamp[clamp_id].value, value); ++} ++ ++static inline bool uclamp_rq_is_idle(struct rq *rq) ++{ ++ return rq->uclamp_flags & UCLAMP_FLAG_IDLE; ++} ++ + /** + * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values. + * @rq: The rq to clamp against. Must not be NULL. +@@ -2437,12 +2454,12 @@ unsigned long uclamp_rq_util_with(struct + * Ignore last runnable task's max clamp, as this task will + * reset it. Similarly, no need to read the rq's min clamp. + */ +- if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) ++ if (uclamp_rq_is_idle(rq)) + goto out; + } + +- min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value)); +- max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value)); ++ min_util = max_t(unsigned long, min_util, uclamp_rq_get(rq, UCLAMP_MIN)); ++ max_util = max_t(unsigned long, max_util, uclamp_rq_get(rq, UCLAMP_MAX)); + out: + /* + * Since CPU's {min,max}_util clamps are MAX aggregated considering +@@ -2488,6 +2505,25 @@ static inline bool uclamp_is_used(void) + { + return false; + } ++ ++static inline unsigned long uclamp_rq_get(struct rq *rq, ++ enum uclamp_id clamp_id) ++{ ++ if (clamp_id == UCLAMP_MIN) ++ return 0; ++ ++ return SCHED_CAPACITY_SCALE; ++} ++ ++static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id, ++ unsigned int value) ++{ ++} ++ ++static inline bool uclamp_rq_is_idle(struct rq *rq) ++{ ++ return false; ++} + #endif /* CONFIG_UCLAMP_TASK */ + + #ifdef arch_scale_freq_capacity diff --git a/queue-5.10/sched-uclamp-make-asym_fits_capacity-use-util_fits_cpu.patch b/queue-5.10/sched-uclamp-make-asym_fits_capacity-use-util_fits_cpu.patch new file mode 100644 index 00000000000..9bf39e53bf4 --- /dev/null +++ b/queue-5.10/sched-uclamp-make-asym_fits_capacity-use-util_fits_cpu.patch @@ -0,0 +1,103 @@ +From qyousef@layalina.io Tue Apr 18 16:10:04 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:37 +0100 +Subject: sched/uclamp: Make asym_fits_capacity() use util_fits_cpu() +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140943.90621-5-qyousef@layalina.io> + +From: Qais Yousef + +commit a2e7f03ed28fce26c78b985f87913b6ce3accf9d upstream. + +Use the new util_fits_cpu() to ensure migration margin and capacity +pressure are taken into account correctly when uclamp is being used +otherwise we will fail to consider CPUs as fitting in scenarios where +they should. + +s/asym_fits_capacity/asym_fits_cpu/ to better reflect what it does now. + +Fixes: b4c9c9f15649 ("sched/fair: Prefer prev cpu in asymmetric wakeup path") +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-6-qais.yousef@arm.com +(cherry picked from commit a2e7f03ed28fce26c78b985f87913b6ce3accf9d) +[Conflict in kernel/sched/fair.c due different name of static key +wrapper function and slightly different if condition block in one of the +asym_fits_cpu() call sites] +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6422,10 +6422,13 @@ select_idle_capacity(struct task_struct + return best_cpu; + } + +-static inline bool asym_fits_capacity(unsigned long task_util, int cpu) ++static inline bool asym_fits_cpu(unsigned long util, ++ unsigned long util_min, ++ unsigned long util_max, ++ int cpu) + { + if (static_branch_unlikely(&sched_asym_cpucapacity)) +- return fits_capacity(task_util, capacity_of(cpu)); ++ return util_fits_cpu(util, util_min, util_max, cpu); + + return true; + } +@@ -6436,7 +6439,7 @@ static inline bool asym_fits_capacity(un + static int select_idle_sibling(struct task_struct *p, int prev, int target) + { + struct sched_domain *sd; +- unsigned long task_util; ++ unsigned long task_util, util_min, util_max; + int i, recent_used_cpu; + + /* +@@ -6445,11 +6448,13 @@ static int select_idle_sibling(struct ta + */ + if (static_branch_unlikely(&sched_asym_cpucapacity)) { + sync_entity_load_avg(&p->se); +- task_util = uclamp_task_util(p); ++ task_util = task_util_est(p); ++ util_min = uclamp_eff_value(p, UCLAMP_MIN); ++ util_max = uclamp_eff_value(p, UCLAMP_MAX); + } + + if ((available_idle_cpu(target) || sched_idle_cpu(target)) && +- asym_fits_capacity(task_util, target)) ++ asym_fits_cpu(task_util, util_min, util_max, target)) + return target; + + /* +@@ -6457,7 +6462,7 @@ static int select_idle_sibling(struct ta + */ + if (prev != target && cpus_share_cache(prev, target) && + (available_idle_cpu(prev) || sched_idle_cpu(prev)) && +- asym_fits_capacity(task_util, prev)) ++ asym_fits_cpu(task_util, util_min, util_max, prev)) + return prev; + + /* +@@ -6472,7 +6477,7 @@ static int select_idle_sibling(struct ta + in_task() && + prev == smp_processor_id() && + this_rq()->nr_running <= 1 && +- asym_fits_capacity(task_util, prev)) { ++ asym_fits_cpu(task_util, util_min, util_max, prev)) { + return prev; + } + +@@ -6483,7 +6488,7 @@ static int select_idle_sibling(struct ta + cpus_share_cache(recent_used_cpu, target) && + (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && + cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && +- asym_fits_capacity(task_util, recent_used_cpu)) { ++ asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { + /* + * Replace recent_used_cpu with prev as it is a potential + * candidate for the next wake: diff --git a/queue-5.10/sched-uclamp-make-cpu_overutilized-use-util_fits_cpu.patch b/queue-5.10/sched-uclamp-make-cpu_overutilized-use-util_fits_cpu.patch new file mode 100644 index 00000000000..1e96961ab2f --- /dev/null +++ b/queue-5.10/sched-uclamp-make-cpu_overutilized-use-util_fits_cpu.patch @@ -0,0 +1,58 @@ +From qyousef@layalina.io Tue Apr 18 16:10:06 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:38 +0100 +Subject: sched/uclamp: Make cpu_overutilized() use util_fits_cpu() +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140943.90621-6-qyousef@layalina.io> + +From: Qais Yousef + +commit c56ab1b3506ba0e7a872509964b100912bde165d upstream. + +So that it is now uclamp aware. + +This fixes a major problem of busy tasks capped with UCLAMP_MAX keeping +the system in overutilized state which disables EAS and leads to wasting +energy in the long run. + +Without this patch running a busy background activity like JIT +compilation on Pixel 6 causes the system to be in overutilized state +74.5% of the time. + +With this patch this goes down to 9.79%. + +It also fixes another problem when long running tasks that have their +UCLAMP_MIN changed while running such that they need to upmigrate to +honour the new UCLAMP_MIN value. The upmigration doesn't get triggered +because overutilized state never gets set in this state, hence misfit +migration never happens at tick in this case until the task wakes up +again. + +Fixes: af24bde8df202 ("sched/uclamp: Add uclamp support to energy_compute()") +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-7-qais.yousef@arm.com +(cherry picked from commit c56ab1b3506ba0e7a872509964b100912bde165d) +[Conflict in kernel/sched/fair.c: use cpu_util() instead of +cpu_util_cfs()] +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5657,7 +5657,10 @@ static inline unsigned long cpu_util(int + + static inline bool cpu_overutilized(int cpu) + { +- return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); ++ unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); ++ unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); ++ ++ return !util_fits_cpu(cpu_util(cpu), rq_util_min, rq_util_max, cpu); + } + + static inline void update_overutilized_status(struct rq *rq) diff --git a/queue-5.10/sched-uclamp-make-select_idle_capacity-use-util_fits_cpu.patch b/queue-5.10/sched-uclamp-make-select_idle_capacity-use-util_fits_cpu.patch new file mode 100644 index 00000000000..426c17eca18 --- /dev/null +++ b/queue-5.10/sched-uclamp-make-select_idle_capacity-use-util_fits_cpu.patch @@ -0,0 +1,57 @@ +From qyousef@layalina.io Tue Apr 18 16:10:03 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:36 +0100 +Subject: sched/uclamp: Make select_idle_capacity() use util_fits_cpu() +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140943.90621-4-qyousef@layalina.io> + +From: Qais Yousef + +commit b759caa1d9f667b94727b2ad12589cbc4ce13a82 upstream. + +Use the new util_fits_cpu() to ensure migration margin and capacity +pressure are taken into account correctly when uclamp is being used +otherwise we will fail to consider CPUs as fitting in scenarios where +they should. + +Fixes: b4c9c9f15649 ("sched/fair: Prefer prev cpu in asymmetric wakeup path") +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-5-qais.yousef@arm.com +(cherry picked from commit b759caa1d9f667b94727b2ad12589cbc4ce13a82) +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -6394,21 +6394,23 @@ static int select_idle_cpu(struct task_s + static int + select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) + { +- unsigned long task_util, best_cap = 0; ++ unsigned long task_util, util_min, util_max, best_cap = 0; + int cpu, best_cpu = -1; + struct cpumask *cpus; + + cpus = this_cpu_cpumask_var_ptr(select_idle_mask); + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + +- task_util = uclamp_task_util(p); ++ task_util = task_util_est(p); ++ util_min = uclamp_eff_value(p, UCLAMP_MIN); ++ util_max = uclamp_eff_value(p, UCLAMP_MAX); + + for_each_cpu_wrap(cpu, cpus, target) { + unsigned long cpu_cap = capacity_of(cpu); + + if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) + continue; +- if (fits_capacity(task_util, cpu_cap)) ++ if (util_fits_cpu(task_util, util_min, util_max, cpu)) + return cpu; + + if (cpu_cap > best_cap) { diff --git a/queue-5.10/sched-uclamp-make-task_fits_capacity-use-util_fits_cpu.patch b/queue-5.10/sched-uclamp-make-task_fits_capacity-use-util_fits_cpu.patch new file mode 100644 index 00000000000..d2e680ab77a --- /dev/null +++ b/queue-5.10/sched-uclamp-make-task_fits_capacity-use-util_fits_cpu.patch @@ -0,0 +1,111 @@ +From qyousef@layalina.io Tue Apr 18 16:09:59 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:09:34 +0100 +Subject: sched/uclamp: Make task_fits_capacity() use util_fits_cpu() +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140943.90621-2-qyousef@layalina.io> + +From: Qais Yousef + +commit b48e16a69792b5dc4a09d6807369d11b2970cc36 upstream. + +So that the new uclamp rules in regard to migration margin and capacity +pressure are taken into account correctly. + +Fixes: a7008c07a568 ("sched/fair: Make task_fits_capacity() consider uclamp restrictions") +Co-developed-by: Vincent Guittot +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-3-qais.yousef@arm.com +(cherry picked from commit b48e16a69792b5dc4a09d6807369d11b2970cc36) +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 26 ++++++++++++++++---------- + kernel/sched/sched.h | 9 +++++++++ + 2 files changed, 25 insertions(+), 10 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4197,10 +4197,12 @@ static inline int util_fits_cpu(unsigned + return fits; + } + +-static inline int task_fits_capacity(struct task_struct *p, +- unsigned long capacity) ++static inline int task_fits_cpu(struct task_struct *p, int cpu) + { +- return fits_capacity(uclamp_task_util(p), capacity); ++ unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN); ++ unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX); ++ unsigned long util = task_util_est(p); ++ return util_fits_cpu(util, uclamp_min, uclamp_max, cpu); + } + + static inline void update_misfit_status(struct task_struct *p, struct rq *rq) +@@ -4213,7 +4215,7 @@ static inline void update_misfit_status( + return; + } + +- if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) { ++ if (task_fits_cpu(p, cpu_of(rq))) { + rq->misfit_task_load = 0; + return; + } +@@ -7942,7 +7944,7 @@ static int detach_tasks(struct lb_env *e + + case migrate_misfit: + /* This is not a misfit task */ +- if (task_fits_capacity(p, capacity_of(env->src_cpu))) ++ if (task_fits_cpu(p, env->src_cpu)) + goto next; + + env->imbalance = 0; +@@ -8884,6 +8886,10 @@ static inline void update_sg_wakeup_stat + + memset(sgs, 0, sizeof(*sgs)); + ++ /* Assume that task can't fit any CPU of the group */ ++ if (sd->flags & SD_ASYM_CPUCAPACITY) ++ sgs->group_misfit_task_load = 1; ++ + for_each_cpu(i, sched_group_span(group)) { + struct rq *rq = cpu_rq(i); + unsigned int local; +@@ -8903,12 +8909,12 @@ static inline void update_sg_wakeup_stat + if (!nr_running && idle_cpu_without(i, p)) + sgs->idle_cpus++; + +- } ++ /* Check if task fits in the CPU */ ++ if (sd->flags & SD_ASYM_CPUCAPACITY && ++ sgs->group_misfit_task_load && ++ task_fits_cpu(p, i)) ++ sgs->group_misfit_task_load = 0; + +- /* Check if task fits in the group */ +- if (sd->flags & SD_ASYM_CPUCAPACITY && +- !task_fits_capacity(p, group->sgc->max_capacity)) { +- sgs->group_misfit_task_load = 1; + } + + sgs->group_capacity = group->sgc->capacity; +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2468,6 +2468,15 @@ static inline bool uclamp_is_used(void) + return static_branch_likely(&sched_uclamp_used); + } + #else /* CONFIG_UCLAMP_TASK */ ++static inline unsigned long uclamp_eff_value(struct task_struct *p, ++ enum uclamp_id clamp_id) ++{ ++ if (clamp_id == UCLAMP_MIN) ++ return 0; ++ ++ return SCHED_CAPACITY_SCALE; ++} ++ + static inline + unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, + struct task_struct *p) diff --git a/queue-5.10/series b/queue-5.10/series index 9810ca5644e..c727640ca25 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -33,3 +33,13 @@ memstick-fix-memory-leak-if-card-device-is-never-registered.patch kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch mmc-sdhci_am654-set-high_speed_ena-for-sdr12-and-sdr25.patch mm-khugepaged-check-again-on-anon-uffd-wp-during-isolation.patch +sched-uclamp-make-task_fits_capacity-use-util_fits_cpu.patch +sched-uclamp-fix-fits_capacity-check-in-feec.patch +sched-uclamp-make-select_idle_capacity-use-util_fits_cpu.patch +sched-uclamp-make-asym_fits_capacity-use-util_fits_cpu.patch +sched-uclamp-make-cpu_overutilized-use-util_fits_cpu.patch +sched-uclamp-cater-for-uclamp-in-find_energy_efficient_cpu-s-early-exit-condition.patch +sched-fair-detect-capacity-inversion.patch +sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch +sched-uclamp-fix-a-uninitialized-variable-warnings.patch +sched-fair-fixes-for-capacity-inversion-detection.patch