]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched/fair: Move the throttled tasks to a local list in tg_unthrottle_up()
authorK Prateek Nayak <kprateek.nayak@amd.com>
Tue, 2 Jun 2026 05:25:30 +0000 (05:25 +0000)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 2 Jun 2026 10:26:12 +0000 (12:26 +0200)
An update_curr() during the enqueue of throttled task will start
throttling the hierarchy from subsequent commit. This can lead to
tg_throttle_down() seeing non-empty throttled_limbo_list for the cfs_rq
attaching the task from throttled_limbo_list one by one. For example:

     R
     |
     A
    / \
  *B   C
       |
       rq->curr

*B is throttled with tasks on hte limbo list. When the tasks are
unthrottled via tg_unthrottle_up() and entity of group B is placed onto
A, update_curr() is called to catch up the vruntime and it may throttle
group A causing the subsequent tg_throttle_down() to see the pending
task's on B's limbo list.

  tg_unthrottle_up()
    /* --cfs_rq->throttle_count == 0 */
    list_for_each_entry_safe(p, cfs_rq->throttled_limbo_list)
      enqueue_task_fair()
        enqueue_entity(se /* B->se */)
          update_curr(cfs_rq /* A->gcfs_rq */)
            account_cfs_rq_runtime(cfs_rq)
              throttle_cfs_rq(cfs_rq /* A->gcfs_rq */ )
                tg_throttle_down()
                  /* Reaches B->cfs_rq with throttle_count == 0 */

                  !!! !list_empty(&cfs_rq->throttled_limbo_list)) !!!

Move the tasks from throttled_limbo_list onto a local list before
starting the unthrottle to prevent the splat described above. If the
hierarchy is throttled again in middle of an unthrottle, put the pending
tasks back onto the limbo list to prevent running them unnecessarily.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Benjamin Segall <bsegall@google.com>
Tested-by: Aaron Lu <ziqianlu@bytedance.com>
Link: https://patch.msgid.link/20260602052531.11450-2-kprateek.nayak@amd.com
kernel/sched/fair.c

index f91d85cd121b47970c53cd454d40e921a8c90418..3f3f09a021db831d2316e1daf8a52dcce14cb1fc 100644 (file)
@@ -6739,6 +6739,7 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
        struct rq *rq = data;
        struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
        struct task_struct *p, *tmp;
+       LIST_HEAD(throttled_tasks);
 
        /*
         * If cfs_rq->curr is set, the cfs_rq might not have caught up
@@ -6769,13 +6770,31 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
                cfs_rq->throttled_clock_self_time += delta;
        }
 
+       /*
+        * Move the tasks to a local list since an update_curr() during
+        * enqueue_task_fair() can throttle a higher cfs_rq, and it can
+        * see the "throttled_limbo_list" being non-empty in
+        * tg_throttle_down() if throttle_count turned 0 above.
+        */
+       list_splice_init(&cfs_rq->throttled_limbo_list, &throttled_tasks);
+
        /* Re-enqueue the tasks that have been throttled at this level. */
-       list_for_each_entry_safe(p, tmp, &cfs_rq->throttled_limbo_list, throttle_node) {
+       list_for_each_entry_safe(p, tmp, &throttled_tasks, throttle_node) {
+               /*
+                * Back to being throttled! Break out and put the remaining
+                * tasks back onto the limbo_list to prevent running them
+                * unnecessarily.
+                */
+               if (cfs_rq->throttle_count)
+                       break;
+
                list_del_init(&p->throttle_node);
                p->throttled = false;
-               enqueue_task_fair(rq_of(cfs_rq), p, ENQUEUE_WAKEUP);
+               enqueue_task_fair(rq, p, ENQUEUE_WAKEUP);
        }
 
+       list_splice(&throttled_tasks, &cfs_rq->throttled_limbo_list);
+
        /* Add cfs_rq with load or one or more already running entities to the list */
        if (!cfs_rq_is_decayed(cfs_rq))
                list_add_leaf_cfs_rq(cfs_rq);