From: K Prateek Nayak Date: Tue, 2 Jun 2026 05:25:30 +0000 (+0000) Subject: sched/fair: Move the throttled tasks to a local list in tg_unthrottle_up() X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=102a28344a60e637934ffca62d50ff8319b11165;p=thirdparty%2Flinux.git sched/fair: Move the throttled tasks to a local list in tg_unthrottle_up() An update_curr() during the enqueue of throttled task will start throttling the hierarchy from subsequent commit. This can lead to tg_throttle_down() seeing non-empty throttled_limbo_list for the cfs_rq attaching the task from throttled_limbo_list one by one. For example: R | A / \ *B C | rq->curr *B is throttled with tasks on hte limbo list. When the tasks are unthrottled via tg_unthrottle_up() and entity of group B is placed onto A, update_curr() is called to catch up the vruntime and it may throttle group A causing the subsequent tg_throttle_down() to see the pending task's on B's limbo list. tg_unthrottle_up() /* --cfs_rq->throttle_count == 0 */ list_for_each_entry_safe(p, cfs_rq->throttled_limbo_list) enqueue_task_fair() enqueue_entity(se /* B->se */) update_curr(cfs_rq /* A->gcfs_rq */) account_cfs_rq_runtime(cfs_rq) throttle_cfs_rq(cfs_rq /* A->gcfs_rq */ ) tg_throttle_down() /* Reaches B->cfs_rq with throttle_count == 0 */ !!! !list_empty(&cfs_rq->throttled_limbo_list)) !!! Move the tasks from throttled_limbo_list onto a local list before starting the unthrottle to prevent the splat described above. If the hierarchy is throttled again in middle of an unthrottle, put the pending tasks back onto the limbo list to prevent running them unnecessarily. Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Benjamin Segall Tested-by: Aaron Lu Link: https://patch.msgid.link/20260602052531.11450-2-kprateek.nayak@amd.com --- diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f91d85cd121b4..3f3f09a021db8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6739,6 +6739,7 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) struct rq *rq = data; struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq)); struct task_struct *p, *tmp; + LIST_HEAD(throttled_tasks); /* * If cfs_rq->curr is set, the cfs_rq might not have caught up @@ -6769,13 +6770,31 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) cfs_rq->throttled_clock_self_time += delta; } + /* + * Move the tasks to a local list since an update_curr() during + * enqueue_task_fair() can throttle a higher cfs_rq, and it can + * see the "throttled_limbo_list" being non-empty in + * tg_throttle_down() if throttle_count turned 0 above. + */ + list_splice_init(&cfs_rq->throttled_limbo_list, &throttled_tasks); + /* Re-enqueue the tasks that have been throttled at this level. */ - list_for_each_entry_safe(p, tmp, &cfs_rq->throttled_limbo_list, throttle_node) { + list_for_each_entry_safe(p, tmp, &throttled_tasks, throttle_node) { + /* + * Back to being throttled! Break out and put the remaining + * tasks back onto the limbo_list to prevent running them + * unnecessarily. + */ + if (cfs_rq->throttle_count) + break; + list_del_init(&p->throttle_node); p->throttled = false; - enqueue_task_fair(rq_of(cfs_rq), p, ENQUEUE_WAKEUP); + enqueue_task_fair(rq, p, ENQUEUE_WAKEUP); } + list_splice(&throttled_tasks, &cfs_rq->throttled_limbo_list); + /* Add cfs_rq with load or one or more already running entities to the list */ if (!cfs_rq_is_decayed(cfs_rq)) list_add_leaf_cfs_rq(cfs_rq);