]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
sched/fair: Block delayed tasks on throttled hierarchy during dequeue
authorK Prateek Nayak <kprateek.nayak@amd.com>
Thu, 23 Oct 2025 04:03:59 +0000 (04:03 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 29 Oct 2025 13:10:15 +0000 (14:10 +0100)
Dequeuing a fair task on a throttled hierarchy returns early on
encountering a throttled cfs_rq since the throttle path has already
dequeued the hierarchy above and has adjusted the h_nr_* accounting till
the root cfs_rq.

dequeue_entities() crucially misses calling __block_task() for delayed
tasks being dequeued on the throttled hierarchies, but this was mostly
harmless until commit b7ca5743a260 ("sched/core: Tweak
wait_task_inactive() to force dequeue sched_delayed tasks") since all
existing cases would re-enqueue the task if task_on_rq_queued() returned
true and the task would eventually be blocked at pick after the
hierarchy was unthrottled.

wait_task_inactive() is special as it expects the delayed task on
throttled hierarchy to reach the blocked state on dequeue but since
__block_task() is never called, task_on_rq_queued() continues to return
true. Furthermore, since the task is now off the hierarchy, the pick
never reaches it to fully block the task even after unthrottle leading
to wait_task_inactive() looping endlessly.

Remedy this by calling __block_task() if a delayed task is being
dequeued on a throttled hierarchy.

This fix is only required for stabled kernels implementing delay dequeue
(>= v6.12) before v6.18 since upstream commit e1fad12dcb66 ("sched/fair:
Switch to task based throttle model") indirectly fixes this by removing
the early return conditions in dequeue_entities() as part of the per-task
throttle feature.

Cc: stable@vger.kernel.org
Reported-by: Matt Fleming <matt@readmodwrite.com>
Closes: https://lore.kernel.org/all/20250925133310.1843863-1-matt@readmodwrite.com/
Fixes: b7ca5743a260 ("sched/core: Tweak wait_task_inactive() to force dequeue sched_delayed tasks")
Tested-by: Matt Fleming <mfleming@cloudflare.com>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
kernel/sched/fair.c

index 8f0b1acace0ad09f1545e90d77a4e00cbc5bbcb7..4770d25ae24062c2857ce6fd2d3dd8c0c6463a2e 100644 (file)
@@ -6969,6 +6969,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
        int h_nr_runnable = 0;
        struct cfs_rq *cfs_rq;
        u64 slice = 0;
+       int ret = 0;
 
        if (entity_is_task(se)) {
                p = task_of(se);
@@ -6998,7 +6999,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 
                /* end evaluation on encountering a throttled cfs_rq */
                if (cfs_rq_throttled(cfs_rq))
-                       return 0;
+                       goto out;
 
                /* Don't dequeue parent if it has other entities besides us */
                if (cfs_rq->load.weight) {
@@ -7039,7 +7040,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 
                /* end evaluation on encountering a throttled cfs_rq */
                if (cfs_rq_throttled(cfs_rq))
-                       return 0;
+                       goto out;
        }
 
        sub_nr_running(rq, h_nr_queued);
@@ -7048,6 +7049,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
        if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
                rq->next_balance = jiffies;
 
+       ret = 1;
+out:
        if (p && task_delayed) {
                WARN_ON_ONCE(!task_sleep);
                WARN_ON_ONCE(p->on_rq != 1);
@@ -7063,7 +7066,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
                __block_task(rq, p);
        }
 
-       return 1;
+       return ret;
 }
 
 /*