]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched: Rework dl_server
authorPeter Zijlstra <peterz@infradead.org>
Tue, 13 Aug 2024 22:25:55 +0000 (00:25 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 3 Sep 2024 13:26:32 +0000 (15:26 +0200)
When a task is selected through a dl_server, it will have p->dl_server
set, such that it can account runtime to the dl_server, see
update_curr_task().

Currently p->dl_server is set in pick*task() whenever it goes through
the dl_server, clearing it is a bit of a mess though. The trivial
solution is clearing it on the final put (now that we have this
location).

However, this gives a problem when:

p = pick_task(rq);
if (p)
put_prev_set_next_task(rq, prev, next);

picks the same task but through a different path, notably when it goes
from picking through the dl_server to a direct pick or vice-versa. In
that case we cannot readily determine wether we should clear or
preserve p->dl_server.

An additional complication is pick_*task() setting p->dl_server for a
remote pick, it might still need to update runtime before it schedules
the core_pick.

Close all these holes and remove all the random clearing of
p->dl_server by:

 - having pick_*task() manage rq->dl_server

 - having the final put_prev_task() clear p->dl_server

 - having the first set_next_task() set p->dl_server = rq->dl_server

 - complicate the core_sched code to save/restore rq->dl_server where
   appropriate.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240813224016.259853414@infradead.org
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/sched.h

index 8a1cf93da20327e8a9120c2c4543369b605f06de..ffcd637dc8e42a875d4db7098a177c8ba76c8254 100644 (file)
@@ -3668,8 +3668,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
                rq->idle_stamp = 0;
        }
 #endif
-
-       p->dl_server = NULL;
 }
 
 /*
@@ -5859,14 +5857,6 @@ static void prev_balance(struct rq *rq, struct task_struct *prev,
                        break;
        }
 #endif
-
-       /*
-        * We've updated @prev and no longer need the server link, clear it.
-        * Must be done before ->pick_next_task() because that can (re)set
-        * ->dl_server.
-        */
-       if (prev->dl_server)
-               prev->dl_server = NULL;
 }
 
 /*
@@ -5878,6 +5868,8 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
        const struct sched_class *class;
        struct task_struct *p;
 
+       rq->dl_server = NULL;
+
        /*
         * Optimization: we know that if all tasks are in the fair class we can
         * call that function directly, but only if the @prev task wasn't of a
@@ -5897,20 +5889,6 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                        put_prev_set_next_task(rq, prev, p);
                }
 
-               /*
-                * This is a normal CFS pick, but the previous could be a DL pick.
-                * Clear it as previous is no longer picked.
-                */
-               if (prev->dl_server)
-                       prev->dl_server = NULL;
-
-               /*
-                * This is the fast path; it cannot be a DL server pick;
-                * therefore even if @p == @prev, ->dl_server must be NULL.
-                */
-               if (p->dl_server)
-                       p->dl_server = NULL;
-
                return p;
        }
 
@@ -5958,6 +5936,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
        const struct sched_class *class;
        struct task_struct *p;
 
+       rq->dl_server = NULL;
+
        for_each_class(class) {
                p = class->pick_task(rq);
                if (p)
@@ -5996,6 +5976,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                 * another cpu during offline.
                 */
                rq->core_pick = NULL;
+               rq->core_dl_server = NULL;
                return __pick_next_task(rq, prev, rf);
        }
 
@@ -6014,7 +5995,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq);
 
                next = rq->core_pick;
+               rq->dl_server = rq->core_dl_server;
                rq->core_pick = NULL;
+               rq->core_dl_server = NULL;
                goto out_set_next;
        }
 
@@ -6059,6 +6042,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                next = pick_task(rq);
                if (!next->core_cookie) {
                        rq->core_pick = NULL;
+                       rq->core_dl_server = NULL;
                        /*
                         * For robustness, update the min_vruntime_fi for
                         * unconstrained picks as well.
@@ -6086,7 +6070,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                if (i != cpu && (rq_i != rq->core || !core_clock_updated))
                        update_rq_clock(rq_i);
 
-               p = rq_i->core_pick = pick_task(rq_i);
+               rq_i->core_pick = p = pick_task(rq_i);
+               rq_i->core_dl_server = rq_i->dl_server;
+
                if (!max || prio_less(max, p, fi_before))
                        max = p;
        }
@@ -6110,6 +6096,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
                }
 
                rq_i->core_pick = p;
+               rq_i->core_dl_server = NULL;
 
                if (p == rq_i->idle) {
                        if (rq_i->nr_running) {
@@ -6170,6 +6157,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
                if (i == cpu) {
                        rq_i->core_pick = NULL;
+                       rq_i->core_dl_server = NULL;
                        continue;
                }
 
@@ -6178,6 +6166,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
                if (rq_i->curr == rq_i->core_pick) {
                        rq_i->core_pick = NULL;
+                       rq_i->core_dl_server = NULL;
                        continue;
                }
 
@@ -8401,6 +8390,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SCHED_CORE
                rq->core = rq;
                rq->core_pick = NULL;
+               rq->core_dl_server = NULL;
                rq->core_enabled = 0;
                rq->core_tree = RB_ROOT;
                rq->core_forceidle_count = 0;
index a1547e1cd96e8bb85ea29d7acb3f1d8197477299..e83b684306276e6bec14290c21c9515a00c4c07b 100644 (file)
@@ -2423,7 +2423,7 @@ again:
                        update_curr_dl_se(rq, dl_se, 0);
                        goto again;
                }
-               p->dl_server = dl_se;
+               rq->dl_server = dl_se;
        } else {
                p = dl_task_of(dl_se);
        }
index c5b7873dcc301235f86e7ca2c986f87d1977300e..f673112170638bb61e470d4e9c52c2a39d294ae2 100644 (file)
@@ -8749,14 +8749,6 @@ again:
                cfs_rq = group_cfs_rq(se);
        } while (cfs_rq);
 
-       /*
-        * This can be called from directly from CFS's ->pick_task() or indirectly
-        * from DL's ->pick_task when fair server is enabled. In the indirect case,
-        * DL will set ->dl_server just after this function is called, so its Ok to
-        * clear. In the direct case, we are picking directly so we must clear it.
-        */
-       task_of(se)->dl_server = NULL;
-
        return task_of(se);
 }
 
@@ -8780,6 +8772,8 @@ again:
        if (prev->sched_class != &fair_sched_class)
                goto simple;
 
+       __put_prev_set_next_dl_server(rq, prev, p);
+
        /*
         * Because of the set_next_buddy() in dequeue_task_fair() it is rather
         * likely that a next task is from the same cgroup as the current.
index aae35818cca44fc7b463fbd18e866ad95bc4f082..2a216c9153e92475fe502ce69af676a1167abe11 100644 (file)
@@ -1066,6 +1066,7 @@ struct rq {
        unsigned int            nr_uninterruptible;
 
        struct task_struct __rcu        *curr;
+       struct sched_dl_entity  *dl_server;
        struct task_struct      *idle;
        struct task_struct      *stop;
        unsigned long           next_balance;
@@ -1193,6 +1194,7 @@ struct rq {
        /* per rq */
        struct rq               *core;
        struct task_struct      *core_pick;
+       struct sched_dl_entity  *core_dl_server;
        unsigned int            core_enabled;
        unsigned int            core_sched_seq;
        struct rb_root          core_tree;
@@ -2370,12 +2372,24 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
        next->sched_class->set_next_task(rq, next, false);
 }
 
+static inline void
+__put_prev_set_next_dl_server(struct rq *rq,
+                             struct task_struct *prev,
+                             struct task_struct *next)
+{
+       prev->dl_server = NULL;
+       next->dl_server = rq->dl_server;
+       rq->dl_server = NULL;
+}
+
 static inline void put_prev_set_next_task(struct rq *rq,
                                          struct task_struct *prev,
                                          struct task_struct *next)
 {
        WARN_ON_ONCE(rq->curr != prev);
 
+       __put_prev_set_next_dl_server(rq, prev, next);
+
        if (next == prev)
                return;