]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
sched/fair: Implement throttle task work and related helpers
authorValentin Schneider <vschneid@redhat.com>
Fri, 29 Aug 2025 08:11:17 +0000 (16:11 +0800)
committerPeter Zijlstra <peterz@infradead.org>
Wed, 3 Sep 2025 08:03:13 +0000 (10:03 +0200)
Implement throttle_cfs_rq_work() task work which gets executed on task's
ret2user path where the task is dequeued and marked as throttled.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Aaron Lu <ziqianlu@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Matteo Martelli <matteo.martelli@codethink.co.uk>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://lore.kernel.org/r/20250829081120.806-3-ziqianlu@bytedance.com
kernel/sched/fair.c

index 8fff40fcbc425b5f2b4ca1ab57ddf3231c8249c5..dab4ed86d0c826de575287bbe3a247f873df0d92 100644 (file)
@@ -5748,8 +5748,51 @@ static inline int throttled_lb_pair(struct task_group *tg,
               throttled_hierarchy(dest_cfs_rq);
 }
 
+static inline bool task_is_throttled(struct task_struct *p)
+{
+       return cfs_bandwidth_used() && p->throttled;
+}
+
+static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags);
 static void throttle_cfs_rq_work(struct callback_head *work)
 {
+       struct task_struct *p = container_of(work, struct task_struct, sched_throttle_work);
+       struct sched_entity *se;
+       struct cfs_rq *cfs_rq;
+       struct rq *rq;
+
+       WARN_ON_ONCE(p != current);
+       p->sched_throttle_work.next = &p->sched_throttle_work;
+
+       /*
+        * If task is exiting, then there won't be a return to userspace, so we
+        * don't have to bother with any of this.
+        */
+       if ((p->flags & PF_EXITING))
+               return;
+
+       scoped_guard(task_rq_lock, p) {
+               se = &p->se;
+               cfs_rq = cfs_rq_of(se);
+
+               /* Raced, forget */
+               if (p->sched_class != &fair_sched_class)
+                       return;
+
+               /*
+                * If not in limbo, then either replenish has happened or this
+                * task got migrated out of the throttled cfs_rq, move along.
+                */
+               if (!cfs_rq->throttle_count)
+                       return;
+               rq = scope.rq;
+               update_rq_clock(rq);
+               WARN_ON_ONCE(p->throttled || !list_empty(&p->throttle_node));
+               dequeue_task_fair(rq, p, DEQUEUE_SLEEP | DEQUEUE_SPECIAL);
+               list_add(&p->throttle_node, &cfs_rq->throttled_limbo_list);
+               p->throttled = true;
+               resched_curr(rq);
+       }
 }
 
 void init_cfs_throttle_work(struct task_struct *p)
@@ -5789,6 +5832,26 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
        return 0;
 }
 
+static inline bool task_has_throttle_work(struct task_struct *p)
+{
+       return p->sched_throttle_work.next != &p->sched_throttle_work;
+}
+
+static inline void task_throttle_setup_work(struct task_struct *p)
+{
+       if (task_has_throttle_work(p))
+               return;
+
+       /*
+        * Kthreads and exiting tasks don't return to userspace, so adding the
+        * work is pointless
+        */
+       if ((p->flags & (PF_EXITING | PF_KTHREAD)))
+               return;
+
+       task_work_add(p, &p->sched_throttle_work, TWA_RESUME);
+}
+
 static int tg_throttle_down(struct task_group *tg, void *data)
 {
        struct rq *rq = data;
@@ -6652,6 +6715,8 @@ static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
 static inline void sync_throttle(struct task_group *tg, int cpu) {}
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
+static void task_throttle_setup_work(struct task_struct *p) {}
+static bool task_is_throttled(struct task_struct *p) { return false; }
 
 static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
 {