From 8835f7776f431dd84233a7eb2db09b50fd02769b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Jan 2025 10:30:32 +0100 Subject: [PATCH] drop sched patch that is causing problems for people in 6.12 queue --- ...evdf-entity-placement-bug-causing-sc.patch | 259 ------------------ ...ix-update_cfs_group-vs-delay_dequeue.patch | 9 +- queue-6.12/series | 1 - 3 files changed, 2 insertions(+), 267 deletions(-) delete mode 100644 queue-6.12/sched-fair-fix-eevdf-entity-placement-bug-causing-sc.patch diff --git a/queue-6.12/sched-fair-fix-eevdf-entity-placement-bug-causing-sc.patch b/queue-6.12/sched-fair-fix-eevdf-entity-placement-bug-causing-sc.patch deleted file mode 100644 index e9b33cef4a..0000000000 --- a/queue-6.12/sched-fair-fix-eevdf-entity-placement-bug-causing-sc.patch +++ /dev/null @@ -1,259 +0,0 @@ -From 5f1e9bf4a001dc935b5c913507a27be28e80dccc Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Thu, 9 Jan 2025 11:59:59 +0100 -Subject: sched/fair: Fix EEVDF entity placement bug causing scheduling lag - -From: Peter Zijlstra - -[ Upstream commit 6d71a9c6160479899ee744d2c6d6602a191deb1f ] - -I noticed this in my traces today: - - turbostat-1222 [006] d..2. 311.935649: reweight_entity: (ffff888108f13e00-ffff88885ef38440-6) - { weight: 1048576 avg_vruntime: 3184159639071 vruntime: 3184159640194 (-1123) deadline: 3184162621107 } -> - { weight: 2 avg_vruntime: 3184177463330 vruntime: 3184748414495 (-570951165) deadline: 4747605329439 } - turbostat-1222 [006] d..2. 311.935651: reweight_entity: (ffff888108f13e00-ffff88885ef38440-6) - { weight: 2 avg_vruntime: 3184177463330 vruntime: 3184748414495 (-570951165) deadline: 4747605329439 } -> - { weight: 1048576 avg_vruntime: 3184176414812 vruntime: 3184177464419 (-1049607) deadline: 3184180445332 } - -Which is a weight transition: 1048576 -> 2 -> 1048576. - -One would expect the lag to shoot out *AND* come back, notably: - - -1123*1048576/2 = -588775424 - -588775424*2/1048576 = -1123 - -Except the trace shows it is all off. Worse, subsequent cycles shoot it -out further and further. - -This made me have a very hard look at reweight_entity(), and -specifically the ->on_rq case, which is more prominent with -DELAY_DEQUEUE. - -And indeed, it is all sorts of broken. While the computation of the new -lag is correct, the computation for the new vruntime, using the new lag -is broken for it does not consider the logic set out in place_entity(). - -With the below patch, I now see things like: - - migration/12-55 [012] d..3. 309.006650: reweight_entity: (ffff8881e0e6f600-ffff88885f235f40-12) - { weight: 977582 avg_vruntime: 4860513347366 vruntime: 4860513347908 (-542) deadline: 4860516552475 } -> - { weight: 2 avg_vruntime: 4860528915984 vruntime: 4860793840706 (-264924722) deadline: 6427157349203 } - migration/14-62 [014] d..3. 309.006698: reweight_entity: (ffff8881e0e6cc00-ffff88885f3b5f40-15) - { weight: 2 avg_vruntime: 4874472992283 vruntime: 4939833828823 (-65360836540) deadline: 6316614641111 } -> - { weight: 967149 avg_vruntime: 4874217684324 vruntime: 4874217688559 (-4235) deadline: 4874220535650 } - -Which isn't perfect yet, but much closer. - -Reported-by: Doug Smythies -Reported-by: Ingo Molnar -Tested-by: Ingo Molnar -Signed-off-by: Peter Zijlstra (Intel) -Signed-off-by: Ingo Molnar -Fixes: eab03c23c2a1 ("sched/eevdf: Fix vruntime adjustment on reweight") -Link: https://lore.kernel.org/r/20250109105959.GA2981@noisy.programming.kicks-ass.net -Signed-off-by: Sasha Levin ---- - kernel/sched/fair.c | 145 ++++++-------------------------------------- - 1 file changed, 18 insertions(+), 127 deletions(-) - -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 1ca96c99872f0..bf8153af96879 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -689,21 +689,16 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) - * - * XXX could add max_slice to the augmented data to track this. - */ --static s64 entity_lag(u64 avruntime, struct sched_entity *se) -+static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) - { - s64 vlag, limit; - -- vlag = avruntime - se->vruntime; -- limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); -- -- return clamp(vlag, -limit, limit); --} -- --static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) --{ - SCHED_WARN_ON(!se->on_rq); - -- se->vlag = entity_lag(avg_vruntime(cfs_rq), se); -+ vlag = avg_vruntime(cfs_rq) - se->vruntime; -+ limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); -+ -+ se->vlag = clamp(vlag, -limit, limit); - } - - /* -@@ -3774,137 +3769,32 @@ static inline void - dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } - #endif - --static void reweight_eevdf(struct sched_entity *se, u64 avruntime, -- unsigned long weight) --{ -- unsigned long old_weight = se->load.weight; -- s64 vlag, vslice; -- -- /* -- * VRUNTIME -- * -------- -- * -- * COROLLARY #1: The virtual runtime of the entity needs to be -- * adjusted if re-weight at !0-lag point. -- * -- * Proof: For contradiction assume this is not true, so we can -- * re-weight without changing vruntime at !0-lag point. -- * -- * Weight VRuntime Avg-VRuntime -- * before w v V -- * after w' v' V' -- * -- * Since lag needs to be preserved through re-weight: -- * -- * lag = (V - v)*w = (V'- v')*w', where v = v' -- * ==> V' = (V - v)*w/w' + v (1) -- * -- * Let W be the total weight of the entities before reweight, -- * since V' is the new weighted average of entities: -- * -- * V' = (WV + w'v - wv) / (W + w' - w) (2) -- * -- * by using (1) & (2) we obtain: -- * -- * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v -- * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v -- * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v -- * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3) -- * -- * Since we are doing at !0-lag point which means V != v, we -- * can simplify (3): -- * -- * ==> W / (W + w' - w) = w / w' -- * ==> Ww' = Ww + ww' - ww -- * ==> W * (w' - w) = w * (w' - w) -- * ==> W = w (re-weight indicates w' != w) -- * -- * So the cfs_rq contains only one entity, hence vruntime of -- * the entity @v should always equal to the cfs_rq's weighted -- * average vruntime @V, which means we will always re-weight -- * at 0-lag point, thus breach assumption. Proof completed. -- * -- * -- * COROLLARY #2: Re-weight does NOT affect weighted average -- * vruntime of all the entities. -- * -- * Proof: According to corollary #1, Eq. (1) should be: -- * -- * (V - v)*w = (V' - v')*w' -- * ==> v' = V' - (V - v)*w/w' (4) -- * -- * According to the weighted average formula, we have: -- * -- * V' = (WV - wv + w'v') / (W - w + w') -- * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w') -- * = (WV - wv + w'V' - Vw + wv) / (W - w + w') -- * = (WV + w'V' - Vw) / (W - w + w') -- * -- * ==> V'*(W - w + w') = WV + w'V' - Vw -- * ==> V' * (W - w) = (W - w) * V (5) -- * -- * If the entity is the only one in the cfs_rq, then reweight -- * always occurs at 0-lag point, so V won't change. Or else -- * there are other entities, hence W != w, then Eq. (5) turns -- * into V' = V. So V won't change in either case, proof done. -- * -- * -- * So according to corollary #1 & #2, the effect of re-weight -- * on vruntime should be: -- * -- * v' = V' - (V - v) * w / w' (4) -- * = V - (V - v) * w / w' -- * = V - vl * w / w' -- * = V - vl' -- */ -- if (avruntime != se->vruntime) { -- vlag = entity_lag(avruntime, se); -- vlag = div_s64(vlag * old_weight, weight); -- se->vruntime = avruntime - vlag; -- } -- -- /* -- * DEADLINE -- * -------- -- * -- * When the weight changes, the virtual time slope changes and -- * we should adjust the relative virtual deadline accordingly. -- * -- * d' = v' + (d - v)*w/w' -- * = V' - (V - v)*w/w' + (d - v)*w/w' -- * = V - (V - v)*w/w' + (d - v)*w/w' -- * = V + (d - V)*w/w' -- */ -- vslice = (s64)(se->deadline - avruntime); -- vslice = div_s64(vslice * old_weight, weight); -- se->deadline = avruntime + vslice; --} -+static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags); - - static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - unsigned long weight) - { - bool curr = cfs_rq->curr == se; -- u64 avruntime; - - if (se->on_rq) { - /* commit outstanding execution time */ - update_curr(cfs_rq); -- avruntime = avg_vruntime(cfs_rq); -+ update_entity_lag(cfs_rq, se); -+ se->deadline -= se->vruntime; -+ se->rel_deadline = 1; - if (!curr) - __dequeue_entity(cfs_rq, se); - update_load_sub(&cfs_rq->load, se->load.weight); - } - dequeue_load_avg(cfs_rq, se); - -- if (se->on_rq) { -- reweight_eevdf(se, avruntime, weight); -- } else { -- /* -- * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), -- * we need to scale se->vlag when w_i changes. -- */ -- se->vlag = div_s64(se->vlag * se->load.weight, weight); -- } -+ /* -+ * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), -+ * we need to scale se->vlag when w_i changes. -+ */ -+ se->vlag = div_s64(se->vlag * se->load.weight, weight); -+ if (se->rel_deadline) -+ se->deadline = div_s64(se->deadline * se->load.weight, weight); - - update_load_set(&se->load, weight); - -@@ -3919,6 +3809,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, - enqueue_load_avg(cfs_rq, se); - if (se->on_rq) { - update_load_add(&cfs_rq->load, se->load.weight); -+ place_entity(cfs_rq, se, 0); - if (!curr) - __enqueue_entity(cfs_rq, se); - -@@ -5359,7 +5250,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) - - se->vruntime = vruntime - lag; - -- if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) { -+ if (se->rel_deadline) { - se->deadline += se->vruntime; - se->rel_deadline = 0; - return; --- -2.39.5 - diff --git a/queue-6.12/sched-fair-fix-update_cfs_group-vs-delay_dequeue.patch b/queue-6.12/sched-fair-fix-update_cfs_group-vs-delay_dequeue.patch index bce460966e..434a6149ed 100644 --- a/queue-6.12/sched-fair-fix-update_cfs_group-vs-delay_dequeue.patch +++ b/queue-6.12/sched-fair-fix-update_cfs_group-vs-delay_dequeue.patch @@ -25,14 +25,12 @@ Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250110115720.GA17405@noisy.programming.kicks-ass.net Signed-off-by: Sasha Levin --- - kernel/sched/fair.c | 6 +++++- + kernel/sched/fair.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index bf8153af96879..7f8677ce83f9f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -3956,7 +3956,11 @@ static void update_cfs_group(struct sched_entity *se) +@@ -4065,7 +4065,11 @@ static void update_cfs_group(struct sche struct cfs_rq *gcfs_rq = group_cfs_rq(se); long shares; @@ -45,6 +43,3 @@ index bf8153af96879..7f8677ce83f9f 100644 return; if (throttled_hierarchy(gcfs_rq)) --- -2.39.5 - diff --git a/queue-6.12/series b/queue-6.12/series index 7215bc5b61..859327cdf9 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -69,7 +69,6 @@ scx-fix-maximal-bpf-selftest-prog.patch rdma-bnxt_re-fix-to-export-port-num-to-ib_query_qp.patch sched_ext-fix-dsq_local_on-selftest.patch nvmet-propagate-npwg-topology.patch -sched-fair-fix-eevdf-entity-placement-bug-causing-sc.patch sched-fair-fix-update_cfs_group-vs-delay_dequeue.patch x86-asm-make-serialize-always_inline.patch alsa-hda-realtek-add-support-for-ayaneo-system-using-cs35l41-hda.patch -- 2.47.3