From: Greg Kroah-Hartman Date: Tue, 18 Feb 2025 15:03:37 +0000 (+0100) Subject: 6.13-stable patches X-Git-Tag: v6.1.129~35 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8cc0d4610d23235b5c3fbdb4eb72443f3a033a0c;p=thirdparty%2Fkernel%2Fstable-queue.git 6.13-stable patches added patches: sched-deadline-check-bandwidth-overflow-earlier-for-hotplug.patch sched-deadline-correctly-account-for-allocated-bandwidth-during-hotplug.patch sched-deadline-restore-dl_server-bandwidth-on-non-destructive-root-domain-changes.patch --- diff --git a/queue-6.13/sched-deadline-check-bandwidth-overflow-earlier-for-hotplug.patch b/queue-6.13/sched-deadline-check-bandwidth-overflow-earlier-for-hotplug.patch new file mode 100644 index 0000000000..8cd686a8b6 --- /dev/null +++ b/queue-6.13/sched-deadline-check-bandwidth-overflow-earlier-for-hotplug.patch @@ -0,0 +1,119 @@ +From 53916d5fd3c0b658de3463439dd2b7ce765072cb Mon Sep 17 00:00:00 2001 +From: Juri Lelli +Date: Fri, 15 Nov 2024 11:48:29 +0000 +Subject: sched/deadline: Check bandwidth overflow earlier for hotplug + +From: Juri Lelli + +commit 53916d5fd3c0b658de3463439dd2b7ce765072cb upstream. + +Currently we check for bandwidth overflow potentially due to hotplug +operations at the end of sched_cpu_deactivate(), after the cpu going +offline has already been removed from scheduling, active_mask, etc. +This can create issues for DEADLINE tasks, as there is a substantial +race window between the start of sched_cpu_deactivate() and the moment +we possibly decide to roll-back the operation if dl_bw_deactivate() +returns failure in cpuset_cpu_inactive(). An example is a throttled +task that sees its replenishment timer firing while the cpu it was +previously running on is considered offline, but before +dl_bw_deactivate() had a chance to say no and roll-back happened. + +Fix this by directly calling dl_bw_deactivate() first thing in +sched_cpu_deactivate() and do the required calculation in the former +function considering the cpu passed as an argument as offline already. + +By doing so we also simplify sched_cpu_deactivate(), as there is no need +anymore for any kind of roll-back if we fail early. + +Signed-off-by: Juri Lelli +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Phil Auld +Tested-by: Waiman Long +Link: https://lore.kernel.org/r/Zzc1DfPhbvqDDIJR@jlelli-thinkpadt14gen4.remote.csb +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/core.c | 22 +++++++--------------- + kernel/sched/deadline.c | 12 ++++++++++-- + 2 files changed, 17 insertions(+), 17 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8182,19 +8182,14 @@ static void cpuset_cpu_active(void) + cpuset_update_active_cpus(); + } + +-static int cpuset_cpu_inactive(unsigned int cpu) ++static void cpuset_cpu_inactive(unsigned int cpu) + { + if (!cpuhp_tasks_frozen) { +- int ret = dl_bw_deactivate(cpu); +- +- if (ret) +- return ret; + cpuset_update_active_cpus(); + } else { + num_cpus_frozen++; + partition_sched_domains(1, NULL, NULL); + } +- return 0; + } + + static inline void sched_smt_present_inc(int cpu) +@@ -8256,6 +8251,11 @@ int sched_cpu_deactivate(unsigned int cp + struct rq *rq = cpu_rq(cpu); + int ret; + ++ ret = dl_bw_deactivate(cpu); ++ ++ if (ret) ++ return ret; ++ + /* + * Remove CPU from nohz.idle_cpus_mask to prevent participating in + * load balancing when not active +@@ -8301,15 +8301,7 @@ int sched_cpu_deactivate(unsigned int cp + return 0; + + sched_update_numa(cpu, false); +- ret = cpuset_cpu_inactive(cpu); +- if (ret) { +- sched_smt_present_inc(cpu); +- sched_set_rq_online(rq, cpu); +- balance_push_set(cpu, false); +- set_cpu_active(cpu, true); +- sched_update_numa(cpu, true); +- return ret; +- } ++ cpuset_cpu_inactive(cpu); + sched_domains_numa_masks_clear(cpu); + return 0; + } +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -3500,6 +3500,13 @@ static int dl_bw_manage(enum dl_bw_reque + break; + case dl_bw_req_deactivate: + /* ++ * cpu is not off yet, but we need to do the math by ++ * considering it off already (i.e., what would happen if we ++ * turn cpu off?). ++ */ ++ cap -= arch_scale_cpu_capacity(cpu); ++ ++ /* + * cpu is going offline and NORMAL tasks will be moved away + * from it. We can thus discount dl_server bandwidth + * contribution as it won't need to be servicing tasks after +@@ -3516,9 +3523,10 @@ static int dl_bw_manage(enum dl_bw_reque + if (dl_b->total_bw - fair_server_bw > 0) { + /* + * Leaving at least one CPU for DEADLINE tasks seems a +- * wise thing to do. ++ * wise thing to do. As said above, cpu is not offline ++ * yet, so account for that. + */ +- if (dl_bw_cpus(cpu)) ++ if (dl_bw_cpus(cpu) - 1) + overflow = __dl_overflow(dl_b, cap, fair_server_bw, 0); + else + overflow = 1; diff --git a/queue-6.13/sched-deadline-correctly-account-for-allocated-bandwidth-during-hotplug.patch b/queue-6.13/sched-deadline-correctly-account-for-allocated-bandwidth-during-hotplug.patch new file mode 100644 index 0000000000..ed48d49436 --- /dev/null +++ b/queue-6.13/sched-deadline-correctly-account-for-allocated-bandwidth-during-hotplug.patch @@ -0,0 +1,141 @@ +From d4742f6ed7ea6df56e381f82ba4532245fa1e561 Mon Sep 17 00:00:00 2001 +From: Juri Lelli +Date: Thu, 14 Nov 2024 14:28:10 +0000 +Subject: sched/deadline: Correctly account for allocated bandwidth during hotplug + +From: Juri Lelli + +commit d4742f6ed7ea6df56e381f82ba4532245fa1e561 upstream. + +For hotplug operations, DEADLINE needs to check that there is still enough +bandwidth left after removing the CPU that is going offline. We however +fail to do so currently. + +Restore the correct behavior by restructuring dl_bw_manage() a bit, so +that overflow conditions (not enough bandwidth left) are properly +checked. Also account for dl_server bandwidth, i.e. discount such +bandwidth in the calculation since NORMAL tasks will be anyway moved +away from the CPU as a result of the hotplug operation. + +Signed-off-by: Juri Lelli +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Phil Auld +Tested-by: Waiman Long +Link: https://lore.kernel.org/r/20241114142810.794657-3-juri.lelli@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/core.c | 2 +- + kernel/sched/deadline.c | 48 +++++++++++++++++++++++++++++++++++++++--------- + kernel/sched/sched.h | 2 +- + 3 files changed, 41 insertions(+), 11 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8185,7 +8185,7 @@ static void cpuset_cpu_active(void) + static int cpuset_cpu_inactive(unsigned int cpu) + { + if (!cpuhp_tasks_frozen) { +- int ret = dl_bw_check_overflow(cpu); ++ int ret = dl_bw_deactivate(cpu); + + if (ret) + return ret; +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -3464,29 +3464,31 @@ int dl_cpuset_cpumask_can_shrink(const s + } + + enum dl_bw_request { +- dl_bw_req_check_overflow = 0, ++ dl_bw_req_deactivate = 0, + dl_bw_req_alloc, + dl_bw_req_free + }; + + static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw) + { +- unsigned long flags; ++ unsigned long flags, cap; + struct dl_bw *dl_b; + bool overflow = 0; ++ u64 fair_server_bw = 0; + + rcu_read_lock_sched(); + dl_b = dl_bw_of(cpu); + raw_spin_lock_irqsave(&dl_b->lock, flags); + +- if (req == dl_bw_req_free) { ++ cap = dl_bw_capacity(cpu); ++ switch (req) { ++ case dl_bw_req_free: + __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu)); +- } else { +- unsigned long cap = dl_bw_capacity(cpu); +- ++ break; ++ case dl_bw_req_alloc: + overflow = __dl_overflow(dl_b, cap, 0, dl_bw); + +- if (req == dl_bw_req_alloc && !overflow) { ++ if (!overflow) { + /* + * We reserve space in the destination + * root_domain, as we can't fail after this point. +@@ -3495,6 +3497,34 @@ static int dl_bw_manage(enum dl_bw_reque + */ + __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu)); + } ++ break; ++ case dl_bw_req_deactivate: ++ /* ++ * cpu is going offline and NORMAL tasks will be moved away ++ * from it. We can thus discount dl_server bandwidth ++ * contribution as it won't need to be servicing tasks after ++ * the cpu is off. ++ */ ++ if (cpu_rq(cpu)->fair_server.dl_server) ++ fair_server_bw = cpu_rq(cpu)->fair_server.dl_bw; ++ ++ /* ++ * Not much to check if no DEADLINE bandwidth is present. ++ * dl_servers we can discount, as tasks will be moved out the ++ * offlined CPUs anyway. ++ */ ++ if (dl_b->total_bw - fair_server_bw > 0) { ++ /* ++ * Leaving at least one CPU for DEADLINE tasks seems a ++ * wise thing to do. ++ */ ++ if (dl_bw_cpus(cpu)) ++ overflow = __dl_overflow(dl_b, cap, fair_server_bw, 0); ++ else ++ overflow = 1; ++ } ++ ++ break; + } + + raw_spin_unlock_irqrestore(&dl_b->lock, flags); +@@ -3503,9 +3533,9 @@ static int dl_bw_manage(enum dl_bw_reque + return overflow ? -EBUSY : 0; + } + +-int dl_bw_check_overflow(int cpu) ++int dl_bw_deactivate(int cpu) + { +- return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0); ++ return dl_bw_manage(dl_bw_req_deactivate, cpu, 0); + } + + int dl_bw_alloc(int cpu, u64 dl_bw) +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -362,7 +362,7 @@ extern void __getparam_dl(struct task_st + extern bool __checkparam_dl(const struct sched_attr *attr); + extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); + extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); +-extern int dl_bw_check_overflow(int cpu); ++extern int dl_bw_deactivate(int cpu); + extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec); + /* + * SCHED_DEADLINE supports servers (nested scheduling) with the following diff --git a/queue-6.13/sched-deadline-restore-dl_server-bandwidth-on-non-destructive-root-domain-changes.patch b/queue-6.13/sched-deadline-restore-dl_server-bandwidth-on-non-destructive-root-domain-changes.patch new file mode 100644 index 0000000000..3e113e5cc6 --- /dev/null +++ b/queue-6.13/sched-deadline-restore-dl_server-bandwidth-on-non-destructive-root-domain-changes.patch @@ -0,0 +1,78 @@ +From 41d4200b7103152468552ee50998cda914102049 Mon Sep 17 00:00:00 2001 +From: Juri Lelli +Date: Thu, 14 Nov 2024 14:28:09 +0000 +Subject: sched/deadline: Restore dl_server bandwidth on non-destructive root domain changes + +From: Juri Lelli + +commit 41d4200b7103152468552ee50998cda914102049 upstream. + +When root domain non-destructive changes (e.g., only modifying one of +the existing root domains while the rest is not touched) happen we still +need to clear DEADLINE bandwidth accounting so that it's then properly +restored, taking into account DEADLINE tasks associated to each cpuset +(associated to each root domain). After the introduction of dl_servers, +we fail to restore such servers contribution after non-destructive +changes (as they are only considered on destructive changes when +runqueues are attached to the new domains). + +Fix this by making sure we iterate over the dl_servers attached to +domains that have not been destroyed and add their bandwidth +contribution back correctly. + +Signed-off-by: Juri Lelli +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Phil Auld +Tested-by: Waiman Long +Link: https://lore.kernel.org/r/20241114142810.794657-2-juri.lelli@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/deadline.c | 17 ++++++++++++++--- + kernel/sched/topology.c | 8 +++++--- + 2 files changed, 19 insertions(+), 6 deletions(-) + +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -2964,11 +2964,22 @@ void dl_add_task_root_domain(struct task + + void dl_clear_root_domain(struct root_domain *rd) + { +- unsigned long flags; ++ int i; + +- raw_spin_lock_irqsave(&rd->dl_bw.lock, flags); ++ guard(raw_spinlock_irqsave)(&rd->dl_bw.lock); + rd->dl_bw.total_bw = 0; +- raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags); ++ ++ /* ++ * dl_server bandwidth is only restored when CPUs are attached to root ++ * domains (after domains are created or CPUs moved back to the ++ * default root doamin). ++ */ ++ for_each_cpu(i, rd->span) { ++ struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server; ++ ++ if (dl_server(dl_se) && cpu_active(i)) ++ rd->dl_bw.total_bw += dl_se->dl_bw; ++ } + } + + #endif /* CONFIG_SMP */ +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -2721,9 +2721,11 @@ void partition_sched_domains_locked(int + + /* + * This domain won't be destroyed and as such +- * its dl_bw->total_bw needs to be cleared. It +- * will be recomputed in function +- * update_tasks_root_domain(). ++ * its dl_bw->total_bw needs to be cleared. ++ * Tasks contribution will be then recomputed ++ * in function dl_update_tasks_root_domain(), ++ * dl_servers contribution in function ++ * dl_restore_server_root_domain(). + */ + rd = cpu_rq(cpumask_any(doms_cur[i]))->rd; + dl_clear_root_domain(rd); diff --git a/queue-6.13/series b/queue-6.13/series index ffd43d8d50..f1c44b7a05 100644 --- a/queue-6.13/series +++ b/queue-6.13/series @@ -260,3 +260,6 @@ drm-v3d-stop-active-perfmon-if-it-is-being-destroyed.patch drm-zynqmp_dp-fix-integer-overflow-in-zynqmp_dp_rate_get.patch drm-xe-tracing-fix-a-potential-tp_printk-uaf.patch drm-renesas-rz-du-increase-supported-resolutions.patch +sched-deadline-restore-dl_server-bandwidth-on-non-destructive-root-domain-changes.patch +sched-deadline-correctly-account-for-allocated-bandwidth-during-hotplug.patch +sched-deadline-check-bandwidth-overflow-earlier-for-hotplug.patch