]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
sched/deadline: Less agressive dl_server handling
authorPeter Zijlstra <peterz@infradead.org>
Tue, 20 May 2025 09:19:30 +0000 (11:19 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Wed, 9 Jul 2025 11:40:21 +0000 (13:40 +0200)
Chris reported that commit 5f6bd380c7bd ("sched/rt: Remove default
bandwidth control") caused a significant dip in his favourite
benchmark of the day. Simply disabling dl_server cured things.

His workload hammers the 0->1, 1->0 transitions, and the
dl_server_{start,stop}() overhead kills it -- fairly obviously a bad
idea in hind sight and all that.

Change things around to only disable the dl_server when there has not
been a fair task around for a whole period. Since the default period
is 1 second, this ensures the benchmark never trips this, overhead
gone.

Fixes: 557a6bfc662c ("sched/fair: Add trivial fair server")
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
Acked-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://lkml.kernel.org/r/20250702121158.465086194@infradead.org
include/linux/sched.h
kernel/sched/deadline.c
kernel/sched/fair.c

index eec6b225e9d140ba2607ef14961fbd9c60156852..4802fcf738cded1a10eb3693f4d252772ef44d99 100644 (file)
@@ -698,6 +698,7 @@ struct sched_dl_entity {
        unsigned int                    dl_defer          : 1;
        unsigned int                    dl_defer_armed    : 1;
        unsigned int                    dl_defer_running  : 1;
+       unsigned int                    dl_server_idle    : 1;
 
        /*
         * Bandwidth enforcement timer. Each -deadline task has its
index 0f30697ad7956d5a8976dd5e660a0ca74bf6581c..23668fc60bd34bf8d0f1fedb425cbc12eab6ed63 100644 (file)
@@ -1150,6 +1150,8 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
 /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
 static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
 
+static bool dl_server_stopped(struct sched_dl_entity *dl_se);
+
 static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
 {
        struct rq *rq = rq_of_dl_se(dl_se);
@@ -1169,6 +1171,7 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
 
                if (!dl_se->server_has_tasks(dl_se)) {
                        replenish_dl_entity(dl_se);
+                       dl_server_stopped(dl_se);
                        return HRTIMER_NORESTART;
                }
 
@@ -1572,8 +1575,10 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
 void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
 {
        /* 0 runtime = fair server disabled */
-       if (dl_se->dl_runtime)
+       if (dl_se->dl_runtime) {
+               dl_se->dl_server_idle = 0;
                update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
+       }
 }
 
 void dl_server_start(struct sched_dl_entity *dl_se)
@@ -1596,7 +1601,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
                setup_new_dl_entity(dl_se);
        }
 
-       if (!dl_se->dl_runtime)
+       if (!dl_se->dl_runtime || dl_se->dl_server_active)
                return;
 
        dl_se->dl_server_active = 1;
@@ -1617,6 +1622,20 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
        dl_se->dl_server_active = 0;
 }
 
+static bool dl_server_stopped(struct sched_dl_entity *dl_se)
+{
+       if (!dl_se->dl_server_active)
+               return false;
+
+       if (dl_se->dl_server_idle) {
+               dl_server_stop(dl_se);
+               return true;
+       }
+
+       dl_se->dl_server_idle = 1;
+       return false;
+}
+
 void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
                    dl_server_has_tasks_f has_tasks,
                    dl_server_pick_f pick_task)
@@ -2354,7 +2373,7 @@ again:
        if (dl_server(dl_se)) {
                p = dl_se->server_pick_task(dl_se);
                if (!p) {
-                       if (dl_server_active(dl_se)) {
+                       if (!dl_server_stopped(dl_se)) {
                                dl_se->dl_yielded = 1;
                                update_curr_dl_se(rq, dl_se, 0);
                        }
index ab0822cc51c28891dc7f9147d88f160788d3a6d0..a1350c513a87db785aeee271d2f38bba528dfc8e 100644 (file)
@@ -5802,7 +5802,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
        struct sched_entity *se;
        long queued_delta, runnable_delta, idle_delta, dequeue = 1;
-       long rq_h_nr_queued = rq->cfs.h_nr_queued;
 
        raw_spin_lock(&cfs_b->lock);
        /* This will start the period timer if necessary */
@@ -5886,10 +5885,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 
        /* At this point se is NULL and we are at root level*/
        sub_nr_running(rq, queued_delta);
-
-       /* Stop the fair server if throttling resulted in no runnable tasks */
-       if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
-               dl_server_stop(&rq->fair_server);
 done:
        /*
         * Note: distribution will already see us throttled via the
@@ -6966,7 +6961,6 @@ static void set_next_buddy(struct sched_entity *se);
 static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 {
        bool was_sched_idle = sched_idle_rq(rq);
-       int rq_h_nr_queued = rq->cfs.h_nr_queued;
        bool task_sleep = flags & DEQUEUE_SLEEP;
        bool task_delayed = flags & DEQUEUE_DELAYED;
        struct task_struct *p = NULL;
@@ -7050,9 +7044,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 
        sub_nr_running(rq, h_nr_queued);
 
-       if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
-               dl_server_stop(&rq->fair_server);
-
        /* balance early to pull high priority tasks */
        if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
                rq->next_balance = jiffies;