From: K Prateek Nayak Date: Tue, 2 Jun 2026 05:25:29 +0000 (+0000) Subject: sched/fair: Call update_curr() before unthrottling the hierarchy X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=28ad5427682bccf06074366f347a6083d6730c1e;p=thirdparty%2Flinux.git sched/fair: Call update_curr() before unthrottling the hierarchy Subsequent commits will allow update_curr() to throttle the hierarchy when the runtime accounting exceeds allocated quota. Call update_curr() before the unthrottle event, and in tg_unthrottle_up() to catch up on any remaining runtime and stabilize the "runtime_remaining" and "throttle_count" for that cfs_rq. Doing an update_curr() early ensures the cfs_rq is not throttled right back up again when the unthrottle is in progress. Since all callers of unthrottle_cfs_rq(), except two, already update the rq_clock and call rq_clock_start_loop_update(), move the update_rq_clock() from unthrottle_cfs_rq() to the callers that don't update the rq_clock. Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Benjamin Segall Tested-by: Aaron Lu Link: https://patch.msgid.link/20260602052531.11450-1-kprateek.nayak@amd.com --- diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dd031410ab1ae..e745c58671ed5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9859,11 +9859,14 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, struct rq *rq = cfs_rq->rq; guard(rq_lock_irq)(rq); + cfs_rq->runtime_enabled = runtime_enabled; cfs_rq->runtime_remaining = 1; - if (cfs_rq->throttled) + if (cfs_rq->throttled) { + update_rq_clock(rq); unthrottle_cfs_rq(cfs_rq); + } } if (runtime_was_enabled && !runtime_enabled) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 26a8bbb9e1e23..f91d85cd121b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6740,6 +6740,15 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq)); struct task_struct *p, *tmp; + /* + * If cfs_rq->curr is set, the cfs_rq might not have caught up + * since the last clock update. Do it now before we begin + * queueing task onto it to save the need for unnecessarily + * unthrottle the hierarchy for this cfs_rq to be throttled + * right back again. + */ + update_curr(cfs_rq); + if (--cfs_rq->throttle_count) return 0; @@ -6882,14 +6891,16 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) * We can't unthrottle this cfs_rq without any runtime remaining because * any enqueue in tg_unthrottle_up() will immediately trigger a throttle, * which is not supposed to happen on unthrottle path. + * + * Catch up on the remaining runtime since last clock update before + * checking runtime remaining. */ + update_curr(cfs_rq); if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0) return; cfs_rq->throttled = 0; - update_rq_clock(rq); - scoped_guard(raw_spinlock, &cfs_b->lock) { list_del_rcu(&cfs_rq->throttled_list); @@ -6964,6 +6975,7 @@ static inline void __unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq) bool first; if (rq == this_rq()) { + update_rq_clock(rq); unthrottle_cfs_rq(cfs_rq); return; } @@ -7017,6 +7029,11 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) if (!list_empty(&cfs_rq->throttled_csd_list)) continue; + if (cfs_rq->curr) { + update_rq_clock(rq); + update_curr(cfs_rq); + } + /* By the above checks, this should never be true */ WARN_ON_ONCE(cfs_rq->runtime_remaining > 0);