From: Greg Kroah-Hartman Date: Sat, 22 Apr 2023 17:17:40 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v4.14.314~52 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f4a0c3b1bd23d0845a5c932ee6174adc9664f012;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch sched-fair-detect-capacity-inversion.patch sched-fair-fixes-for-capacity-inversion-detection.patch --- diff --git a/queue-6.1/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch b/queue-6.1/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch new file mode 100644 index 00000000000..1901ebcaa5f --- /dev/null +++ b/queue-6.1/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch @@ -0,0 +1,60 @@ +From stable-owner@vger.kernel.org Tue Apr 18 16:05:21 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:04:53 +0100 +Subject: sched/fair: Consider capacity inversion in util_fits_cpu() +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef , Qais Yousef +Message-ID: <20230418140454.87367-3-qyousef@layalina.io> + +From: Qais Yousef + +commit: aa69c36f31aadc1669bfa8a3de6a47b5e6c98ee8 upstream. + +We do consider thermal pressure in util_fits_cpu() for uclamp_min only. +With the exception of the biggest cores which by definition are the max +performance point of the system and all tasks by definition should fit. + +Even under thermal pressure, the capacity of the biggest CPU is the +highest in the system and should still fit every task. Except when it +reaches capacity inversion point, then this is no longer true. + +We can handle this by using the inverted capacity as capacity_orig in +util_fits_cpu(). Which not only addresses the problem above, but also +ensure uclamp_max now considers the inverted capacity. Force fitting +a task when a CPU is in this adverse state will contribute to making the +thermal throttling last longer. + +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-10-qais.yousef@arm.com +(cherry picked from commit aa69c36f31aadc1669bfa8a3de6a47b5e6c98ee8) +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4465,12 +4465,16 @@ static inline int util_fits_cpu(unsigned + * For uclamp_max, we can tolerate a drop in performance level as the + * goal is to cap the task. So it's okay if it's getting less. + * +- * In case of capacity inversion, which is not handled yet, we should +- * honour the inverted capacity for both uclamp_min and uclamp_max all +- * the time. ++ * In case of capacity inversion we should honour the inverted capacity ++ * for both uclamp_min and uclamp_max all the time. + */ +- capacity_orig = capacity_orig_of(cpu); +- capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); ++ capacity_orig = cpu_in_capacity_inversion(cpu); ++ if (capacity_orig) { ++ capacity_orig_thermal = capacity_orig; ++ } else { ++ capacity_orig = capacity_orig_of(cpu); ++ capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); ++ } + + /* + * We want to force a task to fit a cpu as implied by uclamp_max. diff --git a/queue-6.1/sched-fair-detect-capacity-inversion.patch b/queue-6.1/sched-fair-detect-capacity-inversion.patch new file mode 100644 index 00000000000..11b527c860e --- /dev/null +++ b/queue-6.1/sched-fair-detect-capacity-inversion.patch @@ -0,0 +1,154 @@ +From stable-owner@vger.kernel.org Tue Apr 18 16:05:20 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:04:52 +0100 +Subject: sched/fair: Detect capacity inversion +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef , Qais Yousef +Message-ID: <20230418140454.87367-2-qyousef@layalina.io> + +From: Qais Yousef + +commit: 44c7b80bffc3a657a36857098d5d9c49d94e652b upstream. + +Check each performance domain to see if thermal pressure is causing its +capacity to be lower than another performance domain. + +We assume that each performance domain has CPUs with the same +capacities, which is similar to an assumption made in energy_model.c + +We also assume that thermal pressure impacts all CPUs in a performance +domain equally. + +If there're multiple performance domains with the same capacity_orig, we +will trigger a capacity inversion if the domain is under thermal +pressure. + +The new cpu_in_capacity_inversion() should help users to know when +information about capacity_orig are not reliable and can opt in to use +the inverted capacity as the 'actual' capacity_orig. + +Signed-off-by: Qais Yousef +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lore.kernel.org/r/20220804143609.515789-9-qais.yousef@arm.com +(cherry picked from commit 44c7b80bffc3a657a36857098d5d9c49d94e652b) +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++--- + kernel/sched/sched.h | 19 +++++++++++++++ + 2 files changed, 79 insertions(+), 3 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -8866,16 +8866,73 @@ static unsigned long scale_rt_capacity(i + + static void update_cpu_capacity(struct sched_domain *sd, int cpu) + { ++ unsigned long capacity_orig = arch_scale_cpu_capacity(cpu); + unsigned long capacity = scale_rt_capacity(cpu); + struct sched_group *sdg = sd->groups; ++ struct rq *rq = cpu_rq(cpu); + +- cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu); ++ rq->cpu_capacity_orig = capacity_orig; + + if (!capacity) + capacity = 1; + +- cpu_rq(cpu)->cpu_capacity = capacity; +- trace_sched_cpu_capacity_tp(cpu_rq(cpu)); ++ rq->cpu_capacity = capacity; ++ ++ /* ++ * Detect if the performance domain is in capacity inversion state. ++ * ++ * Capacity inversion happens when another perf domain with equal or ++ * lower capacity_orig_of() ends up having higher capacity than this ++ * domain after subtracting thermal pressure. ++ * ++ * We only take into account thermal pressure in this detection as it's ++ * the only metric that actually results in *real* reduction of ++ * capacity due to performance points (OPPs) being dropped/become ++ * unreachable due to thermal throttling. ++ * ++ * We assume: ++ * * That all cpus in a perf domain have the same capacity_orig ++ * (same uArch). ++ * * Thermal pressure will impact all cpus in this perf domain ++ * equally. ++ */ ++ if (static_branch_unlikely(&sched_asym_cpucapacity)) { ++ unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); ++ struct perf_domain *pd = rcu_dereference(rq->rd->pd); ++ ++ rq->cpu_capacity_inverted = 0; ++ ++ for (; pd; pd = pd->next) { ++ struct cpumask *pd_span = perf_domain_span(pd); ++ unsigned long pd_cap_orig, pd_cap; ++ ++ cpu = cpumask_any(pd_span); ++ pd_cap_orig = arch_scale_cpu_capacity(cpu); ++ ++ if (capacity_orig < pd_cap_orig) ++ continue; ++ ++ /* ++ * handle the case of multiple perf domains have the ++ * same capacity_orig but one of them is under higher ++ * thermal pressure. We record it as capacity ++ * inversion. ++ */ ++ if (capacity_orig == pd_cap_orig) { ++ pd_cap = pd_cap_orig - thermal_load_avg(cpu_rq(cpu)); ++ ++ if (pd_cap > inv_cap) { ++ rq->cpu_capacity_inverted = inv_cap; ++ break; ++ } ++ } else if (pd_cap_orig > inv_cap) { ++ rq->cpu_capacity_inverted = inv_cap; ++ break; ++ } ++ } ++ } ++ ++ trace_sched_cpu_capacity_tp(rq); + + sdg->sgc->capacity = capacity; + sdg->sgc->min_capacity = capacity; +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -1041,6 +1041,7 @@ struct rq { + + unsigned long cpu_capacity; + unsigned long cpu_capacity_orig; ++ unsigned long cpu_capacity_inverted; + + struct balance_callback *balance_callback; + +@@ -2878,6 +2879,24 @@ static inline unsigned long capacity_ori + return cpu_rq(cpu)->cpu_capacity_orig; + } + ++/* ++ * Returns inverted capacity if the CPU is in capacity inversion state. ++ * 0 otherwise. ++ * ++ * Capacity inversion detection only considers thermal impact where actual ++ * performance points (OPPs) gets dropped. ++ * ++ * Capacity inversion state happens when another performance domain that has ++ * equal or lower capacity_orig_of() becomes effectively larger than the perf ++ * domain this CPU belongs to due to thermal pressure throttling it hard. ++ * ++ * See comment in update_cpu_capacity(). ++ */ ++static inline unsigned long cpu_in_capacity_inversion(int cpu) ++{ ++ return cpu_rq(cpu)->cpu_capacity_inverted; ++} ++ + /** + * enum cpu_util_type - CPU utilization type + * @FREQUENCY_UTIL: Utilization used to select frequency diff --git a/queue-6.1/sched-fair-fixes-for-capacity-inversion-detection.patch b/queue-6.1/sched-fair-fixes-for-capacity-inversion-detection.patch new file mode 100644 index 00000000000..9195718a5ec --- /dev/null +++ b/queue-6.1/sched-fair-fixes-for-capacity-inversion-detection.patch @@ -0,0 +1,68 @@ +From stable-owner@vger.kernel.org Tue Apr 18 16:05:23 2023 +From: Qais Yousef +Date: Tue, 18 Apr 2023 15:04:54 +0100 +Subject: sched/fair: Fixes for capacity inversion detection +To: stable@vger.kernel.org, Greg Kroah-Hartman +Cc: Peter Zijlstra , Vincent Guittot , Dietmar Eggemann , Qais Yousef +Message-ID: <20230418140454.87367-4-qyousef@layalina.io> + +From: Qais Yousef + +commit: da07d2f9c153e457e845d4dcfdd13568d71d18a4 upstream. + +Traversing the Perf Domains requires rcu_read_lock() to be held and is +conditional on sched_energy_enabled(). Ensure right protections applied. + +Also skip capacity inversion detection for our own pd; which was an +error. + +Fixes: 44c7b80bffc3 ("sched/fair: Detect capacity inversion") +Reported-by: Dietmar Eggemann +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Vincent Guittot +Link: https://lore.kernel.org/r/20230112122708.330667-3-qyousef@layalina.io +(cherry picked from commit da07d2f9c153e457e845d4dcfdd13568d71d18a4) +Signed-off-by: Qais Yousef (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -8900,16 +8900,23 @@ static void update_cpu_capacity(struct s + * * Thermal pressure will impact all cpus in this perf domain + * equally. + */ +- if (static_branch_unlikely(&sched_asym_cpucapacity)) { ++ if (sched_energy_enabled()) { + unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); +- struct perf_domain *pd = rcu_dereference(rq->rd->pd); ++ struct perf_domain *pd; + ++ rcu_read_lock(); ++ ++ pd = rcu_dereference(rq->rd->pd); + rq->cpu_capacity_inverted = 0; + + for (; pd; pd = pd->next) { + struct cpumask *pd_span = perf_domain_span(pd); + unsigned long pd_cap_orig, pd_cap; + ++ /* We can't be inverted against our own pd */ ++ if (cpumask_test_cpu(cpu_of(rq), pd_span)) ++ continue; ++ + cpu = cpumask_any(pd_span); + pd_cap_orig = arch_scale_cpu_capacity(cpu); + +@@ -8934,6 +8941,8 @@ static void update_cpu_capacity(struct s + break; + } + } ++ ++ rcu_read_unlock(); + } + + trace_sched_cpu_capacity_tp(rq); diff --git a/queue-6.1/series b/queue-6.1/series index 006e445890c..d1121b96a64 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -76,3 +76,6 @@ mm-kmsan-handle-alloc-failures-in-kmsan_ioremap_page_range.patch mm-kmsan-handle-alloc-failures-in-kmsan_vmap_pages_range_noflush.patch mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages.patch mm-mmap-regression-fix-for-unmapped_area-_topdown.patch +sched-fair-detect-capacity-inversion.patch +sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch +sched-fair-fixes-for-capacity-inversion-detection.patch