]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 Apr 2023 17:17:40 +0000 (19:17 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 Apr 2023 17:17:40 +0000 (19:17 +0200)
added patches:
sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch
sched-fair-detect-capacity-inversion.patch
sched-fair-fixes-for-capacity-inversion-detection.patch

queue-6.1/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch [new file with mode: 0644]
queue-6.1/sched-fair-detect-capacity-inversion.patch [new file with mode: 0644]
queue-6.1/sched-fair-fixes-for-capacity-inversion-detection.patch [new file with mode: 0644]
queue-6.1/series

diff --git a/queue-6.1/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch b/queue-6.1/sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch
new file mode 100644 (file)
index 0000000..1901ebc
--- /dev/null
@@ -0,0 +1,60 @@
+From stable-owner@vger.kernel.org Tue Apr 18 16:05:21 2023
+From: Qais Yousef <qyousef@layalina.io>
+Date: Tue, 18 Apr 2023 15:04:53 +0100
+Subject: sched/fair: Consider capacity inversion in util_fits_cpu()
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>, Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>, Qais Yousef <qais.yousef@arm.com>, Qais Yousef <qyousef@layalina.io>
+Message-ID: <20230418140454.87367-3-qyousef@layalina.io>
+
+From: Qais Yousef <qais.yousef@arm.com>
+
+commit: aa69c36f31aadc1669bfa8a3de6a47b5e6c98ee8 upstream.
+
+We do consider thermal pressure in util_fits_cpu() for uclamp_min only.
+With the exception of the biggest cores which by definition are the max
+performance point of the system and all tasks by definition should fit.
+
+Even under thermal pressure, the capacity of the biggest CPU is the
+highest in the system and should still fit every task. Except when it
+reaches capacity inversion point, then this is no longer true.
+
+We can handle this by using the inverted capacity as capacity_orig in
+util_fits_cpu(). Which not only addresses the problem above, but also
+ensure uclamp_max now considers the inverted capacity. Force fitting
+a task when a CPU is in this adverse state will contribute to making the
+thermal throttling last longer.
+
+Signed-off-by: Qais Yousef <qais.yousef@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220804143609.515789-10-qais.yousef@arm.com
+(cherry picked from commit aa69c36f31aadc1669bfa8a3de6a47b5e6c98ee8)
+Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/fair.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -4465,12 +4465,16 @@ static inline int util_fits_cpu(unsigned
+        * For uclamp_max, we can tolerate a drop in performance level as the
+        * goal is to cap the task. So it's okay if it's getting less.
+        *
+-       * In case of capacity inversion, which is not handled yet, we should
+-       * honour the inverted capacity for both uclamp_min and uclamp_max all
+-       * the time.
++       * In case of capacity inversion we should honour the inverted capacity
++       * for both uclamp_min and uclamp_max all the time.
+        */
+-      capacity_orig = capacity_orig_of(cpu);
+-      capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu);
++      capacity_orig = cpu_in_capacity_inversion(cpu);
++      if (capacity_orig) {
++              capacity_orig_thermal = capacity_orig;
++      } else {
++              capacity_orig = capacity_orig_of(cpu);
++              capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu);
++      }
+       /*
+        * We want to force a task to fit a cpu as implied by uclamp_max.
diff --git a/queue-6.1/sched-fair-detect-capacity-inversion.patch b/queue-6.1/sched-fair-detect-capacity-inversion.patch
new file mode 100644 (file)
index 0000000..11b527c
--- /dev/null
@@ -0,0 +1,154 @@
+From stable-owner@vger.kernel.org Tue Apr 18 16:05:20 2023
+From: Qais Yousef <qyousef@layalina.io>
+Date: Tue, 18 Apr 2023 15:04:52 +0100
+Subject: sched/fair: Detect capacity inversion
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>, Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>, Qais Yousef <qais.yousef@arm.com>, Qais Yousef <qyousef@layalina.io>
+Message-ID: <20230418140454.87367-2-qyousef@layalina.io>
+
+From: Qais Yousef <qais.yousef@arm.com>
+
+commit: 44c7b80bffc3a657a36857098d5d9c49d94e652b upstream.
+
+Check each performance domain to see if thermal pressure is causing its
+capacity to be lower than another performance domain.
+
+We assume that each performance domain has CPUs with the same
+capacities, which is similar to an assumption made in energy_model.c
+
+We also assume that thermal pressure impacts all CPUs in a performance
+domain equally.
+
+If there're multiple performance domains with the same capacity_orig, we
+will trigger a capacity inversion if the domain is under thermal
+pressure.
+
+The new cpu_in_capacity_inversion() should help users to know when
+information about capacity_orig are not reliable and can opt in to use
+the inverted capacity as the 'actual' capacity_orig.
+
+Signed-off-by: Qais Yousef <qais.yousef@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220804143609.515789-9-qais.yousef@arm.com
+(cherry picked from commit 44c7b80bffc3a657a36857098d5d9c49d94e652b)
+Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/fair.c  |   63 ++++++++++++++++++++++++++++++++++++++++++++++++---
+ kernel/sched/sched.h |   19 +++++++++++++++
+ 2 files changed, 79 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8866,16 +8866,73 @@ static unsigned long scale_rt_capacity(i
+ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
+ {
++      unsigned long capacity_orig = arch_scale_cpu_capacity(cpu);
+       unsigned long capacity = scale_rt_capacity(cpu);
+       struct sched_group *sdg = sd->groups;
++      struct rq *rq = cpu_rq(cpu);
+-      cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
++      rq->cpu_capacity_orig = capacity_orig;
+       if (!capacity)
+               capacity = 1;
+-      cpu_rq(cpu)->cpu_capacity = capacity;
+-      trace_sched_cpu_capacity_tp(cpu_rq(cpu));
++      rq->cpu_capacity = capacity;
++
++      /*
++       * Detect if the performance domain is in capacity inversion state.
++       *
++       * Capacity inversion happens when another perf domain with equal or
++       * lower capacity_orig_of() ends up having higher capacity than this
++       * domain after subtracting thermal pressure.
++       *
++       * We only take into account thermal pressure in this detection as it's
++       * the only metric that actually results in *real* reduction of
++       * capacity due to performance points (OPPs) being dropped/become
++       * unreachable due to thermal throttling.
++       *
++       * We assume:
++       *   * That all cpus in a perf domain have the same capacity_orig
++       *     (same uArch).
++       *   * Thermal pressure will impact all cpus in this perf domain
++       *     equally.
++       */
++      if (static_branch_unlikely(&sched_asym_cpucapacity)) {
++              unsigned long inv_cap = capacity_orig - thermal_load_avg(rq);
++              struct perf_domain *pd = rcu_dereference(rq->rd->pd);
++
++              rq->cpu_capacity_inverted = 0;
++
++              for (; pd; pd = pd->next) {
++                      struct cpumask *pd_span = perf_domain_span(pd);
++                      unsigned long pd_cap_orig, pd_cap;
++
++                      cpu = cpumask_any(pd_span);
++                      pd_cap_orig = arch_scale_cpu_capacity(cpu);
++
++                      if (capacity_orig < pd_cap_orig)
++                              continue;
++
++                      /*
++                       * handle the case of multiple perf domains have the
++                       * same capacity_orig but one of them is under higher
++                       * thermal pressure. We record it as capacity
++                       * inversion.
++                       */
++                      if (capacity_orig == pd_cap_orig) {
++                              pd_cap = pd_cap_orig - thermal_load_avg(cpu_rq(cpu));
++
++                              if (pd_cap > inv_cap) {
++                                      rq->cpu_capacity_inverted = inv_cap;
++                                      break;
++                              }
++                      } else if (pd_cap_orig > inv_cap) {
++                              rq->cpu_capacity_inverted = inv_cap;
++                              break;
++                      }
++              }
++      }
++
++      trace_sched_cpu_capacity_tp(rq);
+       sdg->sgc->capacity = capacity;
+       sdg->sgc->min_capacity = capacity;
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1041,6 +1041,7 @@ struct rq {
+       unsigned long           cpu_capacity;
+       unsigned long           cpu_capacity_orig;
++      unsigned long           cpu_capacity_inverted;
+       struct balance_callback *balance_callback;
+@@ -2878,6 +2879,24 @@ static inline unsigned long capacity_ori
+       return cpu_rq(cpu)->cpu_capacity_orig;
+ }
++/*
++ * Returns inverted capacity if the CPU is in capacity inversion state.
++ * 0 otherwise.
++ *
++ * Capacity inversion detection only considers thermal impact where actual
++ * performance points (OPPs) gets dropped.
++ *
++ * Capacity inversion state happens when another performance domain that has
++ * equal or lower capacity_orig_of() becomes effectively larger than the perf
++ * domain this CPU belongs to due to thermal pressure throttling it hard.
++ *
++ * See comment in update_cpu_capacity().
++ */
++static inline unsigned long cpu_in_capacity_inversion(int cpu)
++{
++      return cpu_rq(cpu)->cpu_capacity_inverted;
++}
++
+ /**
+  * enum cpu_util_type - CPU utilization type
+  * @FREQUENCY_UTIL:   Utilization used to select frequency
diff --git a/queue-6.1/sched-fair-fixes-for-capacity-inversion-detection.patch b/queue-6.1/sched-fair-fixes-for-capacity-inversion-detection.patch
new file mode 100644 (file)
index 0000000..9195718
--- /dev/null
@@ -0,0 +1,68 @@
+From stable-owner@vger.kernel.org Tue Apr 18 16:05:23 2023
+From: Qais Yousef <qyousef@layalina.io>
+Date: Tue, 18 Apr 2023 15:04:54 +0100
+Subject: sched/fair: Fixes for capacity inversion detection
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>, Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>, Qais Yousef <qyousef@layalina.io>
+Message-ID: <20230418140454.87367-4-qyousef@layalina.io>
+
+From: Qais Yousef <qyousef@layalina.io>
+
+commit: da07d2f9c153e457e845d4dcfdd13568d71d18a4 upstream.
+
+Traversing the Perf Domains requires rcu_read_lock() to be held and is
+conditional on sched_energy_enabled(). Ensure right protections applied.
+
+Also skip capacity inversion detection for our own pd; which was an
+error.
+
+Fixes: 44c7b80bffc3 ("sched/fair: Detect capacity inversion")
+Reported-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lore.kernel.org/r/20230112122708.330667-3-qyousef@layalina.io
+(cherry picked from commit da07d2f9c153e457e845d4dcfdd13568d71d18a4)
+Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/fair.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -8900,16 +8900,23 @@ static void update_cpu_capacity(struct s
+        *   * Thermal pressure will impact all cpus in this perf domain
+        *     equally.
+        */
+-      if (static_branch_unlikely(&sched_asym_cpucapacity)) {
++      if (sched_energy_enabled()) {
+               unsigned long inv_cap = capacity_orig - thermal_load_avg(rq);
+-              struct perf_domain *pd = rcu_dereference(rq->rd->pd);
++              struct perf_domain *pd;
++              rcu_read_lock();
++
++              pd = rcu_dereference(rq->rd->pd);
+               rq->cpu_capacity_inverted = 0;
+               for (; pd; pd = pd->next) {
+                       struct cpumask *pd_span = perf_domain_span(pd);
+                       unsigned long pd_cap_orig, pd_cap;
++                      /* We can't be inverted against our own pd */
++                      if (cpumask_test_cpu(cpu_of(rq), pd_span))
++                              continue;
++
+                       cpu = cpumask_any(pd_span);
+                       pd_cap_orig = arch_scale_cpu_capacity(cpu);
+@@ -8934,6 +8941,8 @@ static void update_cpu_capacity(struct s
+                               break;
+                       }
+               }
++
++              rcu_read_unlock();
+       }
+       trace_sched_cpu_capacity_tp(rq);
index 006e445890c92c2a813483df93c20ffcb88824e3..d1121b96a6478fa1814390a3ef81d018bdcbfe1c 100644 (file)
@@ -76,3 +76,6 @@ mm-kmsan-handle-alloc-failures-in-kmsan_ioremap_page_range.patch
 mm-kmsan-handle-alloc-failures-in-kmsan_vmap_pages_range_noflush.patch
 mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages.patch
 mm-mmap-regression-fix-for-unmapped_area-_topdown.patch
+sched-fair-detect-capacity-inversion.patch
+sched-fair-consider-capacity-inversion-in-util_fits_cpu.patch
+sched-fair-fixes-for-capacity-inversion-detection.patch