]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched/fair: Call update_curr() before unthrottling the hierarchy
authorK Prateek Nayak <kprateek.nayak@amd.com>
Tue, 2 Jun 2026 05:25:29 +0000 (05:25 +0000)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 2 Jun 2026 10:26:12 +0000 (12:26 +0200)
Subsequent commits will allow update_curr() to throttle the hierarchy
when the runtime accounting exceeds allocated quota. Call update_curr()
before the unthrottle event, and in tg_unthrottle_up() to catch up on
any remaining runtime and stabilize the "runtime_remaining" and
"throttle_count" for that cfs_rq.

Doing an update_curr() early ensures the cfs_rq is not throttled right
back up again when the unthrottle is in progress.

Since all callers of unthrottle_cfs_rq(), except two, already update the
rq_clock and call rq_clock_start_loop_update(), move the
update_rq_clock() from unthrottle_cfs_rq() to the callers that don't
update the rq_clock.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Benjamin Segall <bsegall@google.com>
Tested-by: Aaron Lu <ziqianlu@bytedance.com>
Link: https://patch.msgid.link/20260602052531.11450-1-kprateek.nayak@amd.com
kernel/sched/core.c
kernel/sched/fair.c

index dd031410ab1ae34ebf25d250777760613dd7ca08..e745c58671ed500e7d130b5d80cbb8fb8b404851 100644 (file)
@@ -9859,11 +9859,14 @@ static int tg_set_cfs_bandwidth(struct task_group *tg,
                struct rq *rq = cfs_rq->rq;
 
                guard(rq_lock_irq)(rq);
+
                cfs_rq->runtime_enabled = runtime_enabled;
                cfs_rq->runtime_remaining = 1;
 
-               if (cfs_rq->throttled)
+               if (cfs_rq->throttled) {
+                       update_rq_clock(rq);
                        unthrottle_cfs_rq(cfs_rq);
+               }
        }
 
        if (runtime_was_enabled && !runtime_enabled)
index 26a8bbb9e1e23d93b77d1feb67476987c0441aa2..f91d85cd121b47970c53cd454d40e921a8c90418 100644 (file)
@@ -6740,6 +6740,15 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
        struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
        struct task_struct *p, *tmp;
 
+       /*
+        * If cfs_rq->curr is set, the cfs_rq might not have caught up
+        * since the last clock update. Do it now before we begin
+        * queueing task onto it to save the need for unnecessarily
+        * unthrottle the hierarchy for this cfs_rq to be throttled
+        * right back again.
+        */
+       update_curr(cfs_rq);
+
        if (--cfs_rq->throttle_count)
                return 0;
 
@@ -6882,14 +6891,16 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
         * We can't unthrottle this cfs_rq without any runtime remaining because
         * any enqueue in tg_unthrottle_up() will immediately trigger a throttle,
         * which is not supposed to happen on unthrottle path.
+        *
+        * Catch up on the remaining runtime since last clock update before
+        * checking runtime remaining.
         */
+       update_curr(cfs_rq);
        if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0)
                return;
 
        cfs_rq->throttled = 0;
 
-       update_rq_clock(rq);
-
        scoped_guard(raw_spinlock, &cfs_b->lock) {
                list_del_rcu(&cfs_rq->throttled_list);
 
@@ -6964,6 +6975,7 @@ static inline void __unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq)
        bool first;
 
        if (rq == this_rq()) {
+               update_rq_clock(rq);
                unthrottle_cfs_rq(cfs_rq);
                return;
        }
@@ -7017,6 +7029,11 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
                if (!list_empty(&cfs_rq->throttled_csd_list))
                        continue;
 
+               if (cfs_rq->curr) {
+                       update_rq_clock(rq);
+                       update_curr(cfs_rq);
+               }
+
                /* By the above checks, this should never be true */
                WARN_ON_ONCE(cfs_rq->runtime_remaining > 0);