+++ /dev/null
-From f4e13af71a0b8d8031bf04897306e52bf021dd19 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Thu, 16 Dec 2021 22:53:19 +0000
-Subject: sched/sugov: Ignore 'busy' filter when rq is capped by uclamp_max
-
-From: Qais Yousef <qais.yousef@arm.com>
-
-[ Upstream commit 7a17e1db1265471f7718af100cfc5e41280d53a7 ]
-
-sugov_update_single_{freq, perf}() contains a 'busy' filter that ensures
-we don't bring the frqeuency down if there's no idle time (CPU is busy).
-
-The problem is that with uclamp_max we will have scenarios where a busy
-task is capped to run at a lower frequency and this filter prevents
-applying the capping when this task starts running.
-
-We handle this by skipping the filter when uclamp is enabled and the rq
-is being capped by uclamp_max.
-
-We introduce a new function uclamp_rq_is_capped() to help detecting when
-this capping is taking effect. Some code shuffling was required to allow
-using cpu_util_{cfs, rt}() in this new function.
-
-On 2 Core SMT2 Intel laptop I see:
-
-Without this patch:
-
- uclampset -M 0 sysbench --test=cpu --threads = 4 run
-
-produces a score of ~3200 consistently. Which is the highest possible.
-
-Compiling the kernel also results in frequency running at max 3.1GHz all
-the time - running uclampset -M 400 to cap it has no effect without this
-patch.
-
-With this patch:
-
- uclampset -M 0 sysbench --test=cpu --threads = 4 run
-
-produces a score of ~1100 with some outliers in ~1700. Uclamp max
-aggregates the performance requirements, so having high values sometimes
-is expected if some other task happens to require that frequency starts
-running at the same time.
-
-When compiling the kernel with uclampset -M 400 I can see the
-frequencies mostly in the ~2GHz region. Helpful to conserve power and
-prevent heating when not plugged in.
-
-Fixes: 982d9cdc22c9 ("sched/cpufreq, sched/uclamp: Add clamps for FAIR and RT tasks")
-Signed-off-by: Qais Yousef <qais.yousef@arm.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lkml.kernel.org/r/20211216225320.2957053-2-qais.yousef@arm.com
-Stable-dep-of: b48e16a69792 ("sched/uclamp: Make task_fits_capacity() use util_fits_cpu()")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- kernel/sched/cpufreq_schedutil.c | 10 ++-
- kernel/sched/sched.h | 139 +++++++++++++++++--------------
- 2 files changed, 86 insertions(+), 63 deletions(-)
-
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 7f6bb37d3a2f..93dcea233c65 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -349,8 +349,11 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
- /*
- * Do not reduce the frequency if the CPU has not been idle
- * recently, as the reduction is likely to be premature then.
-+ *
-+ * Except when the rq is capped by uclamp_max.
- */
-- if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
-+ if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
-+ sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
- next_f = sg_policy->next_freq;
-
- /* Restore cached freq as next_freq has changed */
-@@ -396,8 +399,11 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
- /*
- * Do not reduce the target performance level if the CPU has not been
- * idle recently, as the reduction is likely to be premature then.
-+ *
-+ * Except when the rq is capped by uclamp_max.
- */
-- if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
-+ if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
-+ sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
- sg_cpu->util = prev_util;
-
- cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 7a3fcd70aa86..435bd7e43359 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -2847,6 +2847,67 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
- static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
- #endif /* CONFIG_CPU_FREQ */
-
-+#ifdef arch_scale_freq_capacity
-+# ifndef arch_scale_freq_invariant
-+# define arch_scale_freq_invariant() true
-+# endif
-+#else
-+# define arch_scale_freq_invariant() false
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static inline unsigned long capacity_orig_of(int cpu)
-+{
-+ return cpu_rq(cpu)->cpu_capacity_orig;
-+}
-+
-+/**
-+ * enum cpu_util_type - CPU utilization type
-+ * @FREQUENCY_UTIL: Utilization used to select frequency
-+ * @ENERGY_UTIL: Utilization used during energy calculation
-+ *
-+ * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
-+ * need to be aggregated differently depending on the usage made of them. This
-+ * enum is used within effective_cpu_util() to differentiate the types of
-+ * utilization expected by the callers, and adjust the aggregation accordingly.
-+ */
-+enum cpu_util_type {
-+ FREQUENCY_UTIL,
-+ ENERGY_UTIL,
-+};
-+
-+unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
-+ unsigned long max, enum cpu_util_type type,
-+ struct task_struct *p);
-+
-+static inline unsigned long cpu_bw_dl(struct rq *rq)
-+{
-+ return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
-+}
-+
-+static inline unsigned long cpu_util_dl(struct rq *rq)
-+{
-+ return READ_ONCE(rq->avg_dl.util_avg);
-+}
-+
-+static inline unsigned long cpu_util_cfs(struct rq *rq)
-+{
-+ unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
-+
-+ if (sched_feat(UTIL_EST)) {
-+ util = max_t(unsigned long, util,
-+ READ_ONCE(rq->cfs.avg.util_est.enqueued));
-+ }
-+
-+ return util;
-+}
-+
-+static inline unsigned long cpu_util_rt(struct rq *rq)
-+{
-+ return READ_ONCE(rq->avg_rt.util_avg);
-+}
-+#endif
-+
- #ifdef CONFIG_UCLAMP_TASK
- unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
-
-@@ -2903,6 +2964,21 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
- return clamp(util, min_util, max_util);
- }
-
-+/* Is the rq being capped/throttled by uclamp_max? */
-+static inline bool uclamp_rq_is_capped(struct rq *rq)
-+{
-+ unsigned long rq_util;
-+ unsigned long max_util;
-+
-+ if (!static_branch_likely(&sched_uclamp_used))
-+ return false;
-+
-+ rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq);
-+ max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
-+
-+ return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util;
-+}
-+
- /*
- * When uclamp is compiled in, the aggregation at rq level is 'turned off'
- * by default in the fast path and only gets turned on once userspace performs
-@@ -2923,73 +2999,14 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
- return util;
- }
-
-+static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
-+
- static inline bool uclamp_is_used(void)
- {
- return false;
- }
- #endif /* CONFIG_UCLAMP_TASK */
-
--#ifdef arch_scale_freq_capacity
--# ifndef arch_scale_freq_invariant
--# define arch_scale_freq_invariant() true
--# endif
--#else
--# define arch_scale_freq_invariant() false
--#endif
--
--#ifdef CONFIG_SMP
--static inline unsigned long capacity_orig_of(int cpu)
--{
-- return cpu_rq(cpu)->cpu_capacity_orig;
--}
--
--/**
-- * enum cpu_util_type - CPU utilization type
-- * @FREQUENCY_UTIL: Utilization used to select frequency
-- * @ENERGY_UTIL: Utilization used during energy calculation
-- *
-- * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
-- * need to be aggregated differently depending on the usage made of them. This
-- * enum is used within effective_cpu_util() to differentiate the types of
-- * utilization expected by the callers, and adjust the aggregation accordingly.
-- */
--enum cpu_util_type {
-- FREQUENCY_UTIL,
-- ENERGY_UTIL,
--};
--
--unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
-- unsigned long max, enum cpu_util_type type,
-- struct task_struct *p);
--
--static inline unsigned long cpu_bw_dl(struct rq *rq)
--{
-- return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
--}
--
--static inline unsigned long cpu_util_dl(struct rq *rq)
--{
-- return READ_ONCE(rq->avg_dl.util_avg);
--}
--
--static inline unsigned long cpu_util_cfs(struct rq *rq)
--{
-- unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
--
-- if (sched_feat(UTIL_EST)) {
-- util = max_t(unsigned long, util,
-- READ_ONCE(rq->cfs.avg.util_est.enqueued));
-- }
--
-- return util;
--}
--
--static inline unsigned long cpu_util_rt(struct rq *rq)
--{
-- return READ_ONCE(rq->avg_rt.util_avg);
--}
--#endif
--
- #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
- static inline unsigned long cpu_util_irq(struct rq *rq)
- {
---
-2.35.1
-
Link: https://lore.kernel.org/r/20220804143609.515789-3-qais.yousef@arm.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
- kernel/sched/fair.c | 26 ++++++++++++++++----------
- kernel/sched/sched.h | 9 +++++++++
+ kernel/sched/fair.c | 26 ++++++++++++++++----------
+ kernel/sched/sched.h | 9 +++++++++
2 files changed, 25 insertions(+), 10 deletions(-)
-diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
-index 951138dbf564..a4c71dfae95e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
-@@ -4243,10 +4243,12 @@ static inline int util_fits_cpu(unsigned long util,
+@@ -4243,10 +4243,12 @@ static inline int util_fits_cpu(unsigned
return fits;
}
}
static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
-@@ -4259,7 +4261,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
+@@ -4259,7 +4261,7 @@ static inline void update_misfit_status(
return;
}
rq->misfit_task_load = 0;
return;
}
-@@ -8157,7 +8159,7 @@ static int detach_tasks(struct lb_env *env)
+@@ -8157,7 +8159,7 @@ static int detach_tasks(struct lb_env *e
case migrate_misfit:
/* This is not a misfit task */
goto next;
env->imbalance = 0;
-@@ -9042,6 +9044,10 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
+@@ -9042,6 +9044,10 @@ static inline void update_sg_wakeup_stat
memset(sgs, 0, sizeof(*sgs));
for_each_cpu(i, sched_group_span(group)) {
struct rq *rq = cpu_rq(i);
unsigned int local;
-@@ -9061,12 +9067,12 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
+@@ -9061,12 +9067,12 @@ static inline void update_sg_wakeup_stat
if (!nr_running && idle_cpu_without(i, p))
sgs->idle_cpus++;
}
sgs->group_capacity = group->sgc->capacity;
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 435bd7e43359..4a98bb9fd881 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
-@@ -2992,6 +2992,15 @@ static inline bool uclamp_is_used(void)
+@@ -2916,6 +2916,15 @@ static inline bool uclamp_is_used(void)
return static_branch_likely(&sched_uclamp_used);
}
#else /* CONFIG_UCLAMP_TASK */
static inline
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
struct task_struct *p)
---
-2.35.1
-