From: Tejun Heo <tj@kernel.org>
Date: Wed, 11 Sep 2024 18:43:26 +0000 (-1000)
Subject: Merge branch 'tip/sched/core' into sched_ext/for-6.12
X-Git-Tag: v6.12-rc1~111^2~1
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0b1777f0fa045c561fd26c8fda61f5eb7a930ed3;p=thirdparty%2Flinux.git

Merge branch 'tip/sched/core' into sched_ext/for-6.12

Pull in tip/sched/core to resolve two merge conflicts:

- 96fd6c65efc6 ("sched: Factor out update_other_load_avgs() from __update_blocked_others()")
  5d871a63997f ("sched/fair: Move effective_cpu_util() and effective_cpu_util() in fair.c")

  A simple context conflict. The former added __update_blocked_others() in
  the same #ifdef CONFIG_SMP block that effective_cpu_util() and
  sched_cpu_util() are in and the latter moved those functions to fair.c.
  This makes __update_blocked_others() more out of place. Will follow up
  with a patch to relocate.

- 96fd6c65efc6 ("sched: Factor out update_other_load_avgs() from __update_blocked_others()")
  84d265281d6c ("sched/pelt: Use rq_clock_task() for hw_pressure")

  The former factored out the body of __update_blocked_others() into
  update_other_load_avgs(). The latter changed how update_hw_load_avg() is
  called in the body. Resolved by applying the change to
  update_other_load_avgs() instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
---

0b1777f0fa045c561fd26c8fda61f5eb7a930ed3
diff --cc kernel/sched/syscalls.c
index 7ecade89eada6,c62acf509b748..b621e0050e426
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@@ -258,126 -258,6 +258,28 @@@ int sched_core_idle_cpu(int cpu
  
  #endif
  
 +#ifdef CONFIG_SMP
 +/*
 + * Load avg and utiliztion metrics need to be updated periodically and before
 + * consumption. This function updates the metrics for all subsystems except for
 + * the fair class. @rq must be locked and have its clock updated.
 + */
 +bool update_other_load_avgs(struct rq *rq)
 +{
 +	u64 now = rq_clock_pelt(rq);
 +	const struct sched_class *curr_class = rq->curr->sched_class;
 +	unsigned long hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
 +
 +	lockdep_assert_rq_held(rq);
 +
++	/* hw_pressure doesn't care about invariance */
 +	return update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
 +		update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
- 		update_hw_load_avg(now, rq, hw_pressure) |
++		update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure) |
 +		update_irq_load_avg(rq, 0);
 +}
- 
- /*
-  * This function computes an effective utilization for the given CPU, to be
-  * used for frequency selection given the linear relation: f = u * f_max.
-  *
-  * The scheduler tracks the following metrics:
-  *
-  *   cpu_util_{cfs,rt,dl,irq}()
-  *   cpu_bw_dl()
-  *
-  * Where the cfs,rt and dl util numbers are tracked with the same metric and
-  * synchronized windows and are thus directly comparable.
-  *
-  * The cfs,rt,dl utilization are the running times measured with rq->clock_task
-  * which excludes things like IRQ and steal-time. These latter are then accrued
-  * in the IRQ utilization.
-  *
-  * The DL bandwidth number OTOH is not a measured metric but a value computed
-  * based on the task model parameters and gives the minimal utilization
-  * required to meet deadlines.
-  */
- unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
- 				 unsigned long *min,
- 				 unsigned long *max)
- {
- 	unsigned long util, irq, scale;
- 	struct rq *rq = cpu_rq(cpu);
- 
- 	scale = arch_scale_cpu_capacity(cpu);
- 
- 	/*
- 	 * Early check to see if IRQ/steal time saturates the CPU, can be
- 	 * because of inaccuracies in how we track these -- see
- 	 * update_irq_load_avg().
- 	 */
- 	irq = cpu_util_irq(rq);
- 	if (unlikely(irq >= scale)) {
- 		if (min)
- 			*min = scale;
- 		if (max)
- 			*max = scale;
- 		return scale;
- 	}
- 
- 	if (min) {
- 		/*
- 		 * The minimum utilization returns the highest level between:
- 		 * - the computed DL bandwidth needed with the IRQ pressure which
- 		 *   steals time to the deadline task.
- 		 * - The minimum performance requirement for CFS and/or RT.
- 		 */
- 		*min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN));
- 
- 		/*
- 		 * When an RT task is runnable and uclamp is not used, we must
- 		 * ensure that the task will run at maximum compute capacity.
- 		 */
- 		if (!uclamp_is_used() && rt_rq_is_runnable(&rq->rt))
- 			*min = max(*min, scale);
- 	}
- 
- 	/*
- 	 * Because the time spend on RT/DL tasks is visible as 'lost' time to
- 	 * CFS tasks and we use the same metric to track the effective
- 	 * utilization (PELT windows are synchronized) we can directly add them
- 	 * to obtain the CPU's actual utilization.
- 	 */
- 	util = util_cfs + cpu_util_rt(rq);
- 	util += cpu_util_dl(rq);
- 
- 	/*
- 	 * The maximum hint is a soft bandwidth requirement, which can be lower
- 	 * than the actual utilization because of uclamp_max requirements.
- 	 */
- 	if (max)
- 		*max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX));
- 
- 	if (util >= scale)
- 		return scale;
- 
- 	/*
- 	 * There is still idle time; further improve the number by using the
- 	 * IRQ metric. Because IRQ/steal time is hidden from the task clock we
- 	 * need to scale the task numbers:
- 	 *
- 	 *              max - irq
- 	 *   U' = irq + --------- * U
- 	 *                 max
- 	 */
- 	util = scale_irq_capacity(util, irq, scale);
- 	util += irq;
- 
- 	return min(scale, util);
- }
- 
- unsigned long sched_cpu_util(int cpu)
- {
- 	return effective_cpu_util(cpu, cpu_util_cfs(cpu), NULL, NULL);
- }
 +#endif /* CONFIG_SMP */
 +
  /**
   * find_process_by_pid - find a process with a matching PID value.
   * @pid: the pid in question.