From: Greg Kroah-Hartman Date: Thu, 14 Aug 2025 15:18:02 +0000 (+0200) Subject: add a sched patch to the "next" queue X-Git-Tag: v6.1.148~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2e410b5c2e99ce4c42972661ea8f1451e35148b7;p=thirdparty%2Fkernel%2Fstable-queue.git add a sched patch to the "next" queue --- diff --git a/next/6.15/sched-deadline-less-agressive-dl_server-handling.patch b/next/6.15/sched-deadline-less-agressive-dl_server-handling.patch new file mode 100644 index 0000000000..0441721fab --- /dev/null +++ b/next/6.15/sched-deadline-less-agressive-dl_server-handling.patch @@ -0,0 +1,163 @@ +From c0a825f92d92fdccff156fd030efa068df138577 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 May 2025 11:19:30 +0200 +Subject: sched/deadline: Less agressive dl_server handling + +From: Peter Zijlstra + +[ Upstream commit cccb45d7c4295bbfeba616582d0249f2d21e6df5 ] + +Chris reported that commit 5f6bd380c7bd ("sched/rt: Remove default +bandwidth control") caused a significant dip in his favourite +benchmark of the day. Simply disabling dl_server cured things. + +His workload hammers the 0->1, 1->0 transitions, and the +dl_server_{start,stop}() overhead kills it -- fairly obviously a bad +idea in hind sight and all that. + +Change things around to only disable the dl_server when there has not +been a fair task around for a whole period. Since the default period +is 1 second, this ensures the benchmark never trips this, overhead +gone. + +Fixes: 557a6bfc662c ("sched/fair: Add trivial fair server") +Reported-by: Chris Mason +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Juri Lelli +Acked-by: Juri Lelli +Link: https://lkml.kernel.org/r/20250702121158.465086194@infradead.org +Signed-off-by: Sasha Levin +--- + include/linux/sched.h | 1 + + kernel/sched/deadline.c | 25 ++++++++++++++++++++++--- + kernel/sched/fair.c | 9 --------- + 3 files changed, 23 insertions(+), 12 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index f96ac1982893..1f92572b20c0 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -702,6 +702,7 @@ struct sched_dl_entity { + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; ++ unsigned int dl_server_idle : 1; + + /* + * Bandwidth enforcement timer. Each -deadline task has its +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index 89019a140826..094134c9b135 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1215,6 +1215,8 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf) + /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */ + static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC; + ++static bool dl_server_stopped(struct sched_dl_entity *dl_se); ++ + static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se) + { + struct rq *rq = rq_of_dl_se(dl_se); +@@ -1234,6 +1236,7 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_ + + if (!dl_se->server_has_tasks(dl_se)) { + replenish_dl_entity(dl_se); ++ dl_server_stopped(dl_se); + return HRTIMER_NORESTART; + } + +@@ -1639,8 +1642,10 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) + void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) + { + /* 0 runtime = fair server disabled */ +- if (dl_se->dl_runtime) ++ if (dl_se->dl_runtime) { ++ dl_se->dl_server_idle = 0; + update_curr_dl_se(dl_se->rq, dl_se, delta_exec); ++ } + } + + void dl_server_start(struct sched_dl_entity *dl_se) +@@ -1663,7 +1668,7 @@ void dl_server_start(struct sched_dl_entity *dl_se) + setup_new_dl_entity(dl_se); + } + +- if (!dl_se->dl_runtime) ++ if (!dl_se->dl_runtime || dl_se->dl_server_active) + return; + + dl_se->dl_server_active = 1; +@@ -1684,6 +1689,20 @@ void dl_server_stop(struct sched_dl_entity *dl_se) + dl_se->dl_server_active = 0; + } + ++static bool dl_server_stopped(struct sched_dl_entity *dl_se) ++{ ++ if (!dl_se->dl_server_active) ++ return false; ++ ++ if (dl_se->dl_server_idle) { ++ dl_server_stop(dl_se); ++ return true; ++ } ++ ++ dl_se->dl_server_idle = 1; ++ return false; ++} ++ + void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, + dl_server_has_tasks_f has_tasks, + dl_server_pick_f pick_task) +@@ -2435,7 +2454,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq) + if (dl_server(dl_se)) { + p = dl_se->server_pick_task(dl_se); + if (!p) { +- if (dl_server_active(dl_se)) { ++ if (!dl_server_stopped(dl_se)) { + dl_se->dl_yielded = 1; + update_curr_dl_se(rq, dl_se, 0); + } +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 138d9f4658d5..9746eff2eff7 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5886,7 +5886,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + long queued_delta, runnable_delta, idle_delta, dequeue = 1; +- long rq_h_nr_queued = rq->cfs.h_nr_queued; + + raw_spin_lock(&cfs_b->lock); + /* This will start the period timer if necessary */ +@@ -5970,10 +5969,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + + /* At this point se is NULL and we are at root level*/ + sub_nr_running(rq, queued_delta); +- +- /* Stop the fair server if throttling resulted in no runnable tasks */ +- if (rq_h_nr_queued && !rq->cfs.h_nr_queued) +- dl_server_stop(&rq->fair_server); + done: + /* + * Note: distribution will already see us throttled via the +@@ -7067,7 +7062,6 @@ static void set_next_buddy(struct sched_entity *se); + static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + { + bool was_sched_idle = sched_idle_rq(rq); +- int rq_h_nr_queued = rq->cfs.h_nr_queued; + bool task_sleep = flags & DEQUEUE_SLEEP; + bool task_delayed = flags & DEQUEUE_DELAYED; + struct task_struct *p = NULL; +@@ -7151,9 +7145,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + + sub_nr_running(rq, h_nr_queued); + +- if (rq_h_nr_queued && !rq->cfs.h_nr_queued) +- dl_server_stop(&rq->fair_server); +- + /* balance early to pull high priority tasks */ + if (unlikely(!was_sched_idle && sched_idle_rq(rq))) + rq->next_balance = jiffies; +-- +2.39.5 + diff --git a/next/6.16/sched-deadline-less-agressive-dl_server-handling.patch b/next/6.16/sched-deadline-less-agressive-dl_server-handling.patch new file mode 100644 index 0000000000..4d6e3871b8 --- /dev/null +++ b/next/6.16/sched-deadline-less-agressive-dl_server-handling.patch @@ -0,0 +1,163 @@ +From d92f16b1d33db49aad293ba7bb82c53571cc1a2e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 May 2025 11:19:30 +0200 +Subject: sched/deadline: Less agressive dl_server handling + +From: Peter Zijlstra + +[ Upstream commit cccb45d7c4295bbfeba616582d0249f2d21e6df5 ] + +Chris reported that commit 5f6bd380c7bd ("sched/rt: Remove default +bandwidth control") caused a significant dip in his favourite +benchmark of the day. Simply disabling dl_server cured things. + +His workload hammers the 0->1, 1->0 transitions, and the +dl_server_{start,stop}() overhead kills it -- fairly obviously a bad +idea in hind sight and all that. + +Change things around to only disable the dl_server when there has not +been a fair task around for a whole period. Since the default period +is 1 second, this ensures the benchmark never trips this, overhead +gone. + +Fixes: 557a6bfc662c ("sched/fair: Add trivial fair server") +Reported-by: Chris Mason +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Juri Lelli +Acked-by: Juri Lelli +Link: https://lkml.kernel.org/r/20250702121158.465086194@infradead.org +Signed-off-by: Sasha Levin +--- + include/linux/sched.h | 1 + + kernel/sched/deadline.c | 25 ++++++++++++++++++++++--- + kernel/sched/fair.c | 9 --------- + 3 files changed, 23 insertions(+), 12 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index aa9c5be7a632..ae75562cca59 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -701,6 +701,7 @@ struct sched_dl_entity { + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; ++ unsigned int dl_server_idle : 1; + + /* + * Bandwidth enforcement timer. Each -deadline task has its +diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c +index 89019a140826..094134c9b135 100644 +--- a/kernel/sched/deadline.c ++++ b/kernel/sched/deadline.c +@@ -1215,6 +1215,8 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf) + /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */ + static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC; + ++static bool dl_server_stopped(struct sched_dl_entity *dl_se); ++ + static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se) + { + struct rq *rq = rq_of_dl_se(dl_se); +@@ -1234,6 +1236,7 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_ + + if (!dl_se->server_has_tasks(dl_se)) { + replenish_dl_entity(dl_se); ++ dl_server_stopped(dl_se); + return HRTIMER_NORESTART; + } + +@@ -1639,8 +1642,10 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) + void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec) + { + /* 0 runtime = fair server disabled */ +- if (dl_se->dl_runtime) ++ if (dl_se->dl_runtime) { ++ dl_se->dl_server_idle = 0; + update_curr_dl_se(dl_se->rq, dl_se, delta_exec); ++ } + } + + void dl_server_start(struct sched_dl_entity *dl_se) +@@ -1663,7 +1668,7 @@ void dl_server_start(struct sched_dl_entity *dl_se) + setup_new_dl_entity(dl_se); + } + +- if (!dl_se->dl_runtime) ++ if (!dl_se->dl_runtime || dl_se->dl_server_active) + return; + + dl_se->dl_server_active = 1; +@@ -1684,6 +1689,20 @@ void dl_server_stop(struct sched_dl_entity *dl_se) + dl_se->dl_server_active = 0; + } + ++static bool dl_server_stopped(struct sched_dl_entity *dl_se) ++{ ++ if (!dl_se->dl_server_active) ++ return false; ++ ++ if (dl_se->dl_server_idle) { ++ dl_server_stop(dl_se); ++ return true; ++ } ++ ++ dl_se->dl_server_idle = 1; ++ return false; ++} ++ + void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq, + dl_server_has_tasks_f has_tasks, + dl_server_pick_f pick_task) +@@ -2435,7 +2454,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq) + if (dl_server(dl_se)) { + p = dl_se->server_pick_task(dl_se); + if (!p) { +- if (dl_server_active(dl_se)) { ++ if (!dl_server_stopped(dl_se)) { + dl_se->dl_yielded = 1; + update_curr_dl_se(rq, dl_se, 0); + } +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 7a14da5396fb..3ab8d4765edd 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5889,7 +5889,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + struct sched_entity *se; + long queued_delta, runnable_delta, idle_delta, dequeue = 1; +- long rq_h_nr_queued = rq->cfs.h_nr_queued; + + raw_spin_lock(&cfs_b->lock); + /* This will start the period timer if necessary */ +@@ -5973,10 +5972,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) + + /* At this point se is NULL and we are at root level*/ + sub_nr_running(rq, queued_delta); +- +- /* Stop the fair server if throttling resulted in no runnable tasks */ +- if (rq_h_nr_queued && !rq->cfs.h_nr_queued) +- dl_server_stop(&rq->fair_server); + done: + /* + * Note: distribution will already see us throttled via the +@@ -7070,7 +7065,6 @@ static void set_next_buddy(struct sched_entity *se); + static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + { + bool was_sched_idle = sched_idle_rq(rq); +- int rq_h_nr_queued = rq->cfs.h_nr_queued; + bool task_sleep = flags & DEQUEUE_SLEEP; + bool task_delayed = flags & DEQUEUE_DELAYED; + struct task_struct *p = NULL; +@@ -7154,9 +7148,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) + + sub_nr_running(rq, h_nr_queued); + +- if (rq_h_nr_queued && !rq->cfs.h_nr_queued) +- dl_server_stop(&rq->fair_server); +- + /* balance early to pull high priority tasks */ + if (unlikely(!was_sched_idle && sched_idle_rq(rq))) + rq->next_balance = jiffies; +-- +2.39.5 +