]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
sched/fair: Convert cfs bandwidth throttling to use guards
authorK Prateek Nayak <kprateek.nayak@amd.com>
Tue, 2 Jun 2026 05:00:01 +0000 (05:00 +0000)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 2 Jun 2026 10:26:11 +0000 (12:26 +0200)
Routine conversion of rcu_read_lock(), spin_lock*, and rq_lock usage
within the cfs bandwidth controller to use class guards.

Only notable changes are:

 - Checking for "cfs_rq->runtime_remaining <= 0" instead of the inverse
   to spot a throttle and break early. This also saves the need
   for extra indentation in the unthrottle case.

 - Reordering of list_del_rcu() against throttled_clock indicator update
   in unthrottle_cfs_rq(). Both are done with "cfs_b->lock" held after
   the "cfs_rq->throttled" is cleared which make the reordering safe
   against concurrent list modifications.

No functional changes intended.

Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ben Segall <bsegall@google.com>
Tested-by: Aaron Lu <ziqianlu@bytedance.com>
Link: https://patch.msgid.link/20260602050005.11160-2-kprateek.nayak@amd.com
kernel/sched/fair.c

index 1d4ed883e630726f90151a215b279277488e1e4b..261e5cedc717c8081c3a6bc1643696e5354cf166 100644 (file)
@@ -5035,13 +5035,13 @@ static void __maybe_unused clear_tg_offline_cfs_rqs(struct rq *rq)
         */
        rq_clock_start_loop_update(rq);
 
-       rcu_read_lock();
+       guard(rcu)();
+
        list_for_each_entry_rcu(tg, &task_groups, list) {
                struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
 
                clear_tg_load_avg(cfs_rq);
        }
-       rcu_read_unlock();
 
        rq_clock_stop_loop_update(rq);
 }
@@ -6540,13 +6540,10 @@ static int __assign_cfs_rq_runtime(struct cfs_bandwidth *cfs_b,
 static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-       int ret;
 
-       raw_spin_lock(&cfs_b->lock);
-       ret = __assign_cfs_rq_runtime(cfs_b, cfs_rq, sched_cfs_bandwidth_slice());
-       raw_spin_unlock(&cfs_b->lock);
+       guard(raw_spinlock)(&cfs_b->lock);
 
-       return ret;
+       return __assign_cfs_rq_runtime(cfs_b, cfs_rq, sched_cfs_bandwidth_slice());
 }
 
 static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
@@ -6835,33 +6832,32 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 {
        struct rq *rq = rq_of(cfs_rq);
        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-       int dequeue = 1;
 
-       raw_spin_lock(&cfs_b->lock);
-       /* This will start the period timer if necessary */
-       if (__assign_cfs_rq_runtime(cfs_b, cfs_rq, 1)) {
+       scoped_guard(raw_spinlock, &cfs_b->lock) {
                /*
-                * We have raced with bandwidth becoming available, and if we
-                * actually throttled the timer might not unthrottle us for an
-                * entire period. We additionally needed to make sure that any
-                * subsequent check_cfs_rq_runtime calls agree not to throttle
-                * us, as we may commit to do cfs put_prev+pick_next, so we ask
-                * for 1ns of runtime rather than just check cfs_b.
+                * Check if We have raced with bandwidth becoming available. If
+                * we actually throttled the timer might not unthrottle us for
+                * an entire period. We additionally needed to make sure that
+                * any subsequent check_cfs_rq_runtime calls agree not to
+                * throttle us, as we may commit to do cfs put_prev+pick_next,
+                * so we ask for 1ns of runtime rather than just check cfs_b.
+                *
+                * This will start the period timer if necessary.
+                */
+               if (__assign_cfs_rq_runtime(cfs_b, cfs_rq, 1))
+                       return false;
+
+               /*
+                * No bandwidth available; Add ourselves on the list to be
+                * unthrottled later.
                 */
-               dequeue = 0;
-       } else {
                list_add_tail_rcu(&cfs_rq->throttled_list,
                                  &cfs_b->throttled_cfs_rq);
        }
-       raw_spin_unlock(&cfs_b->lock);
-
-       if (!dequeue)
-               return false;  /* Throttle no longer required. */
 
        /* freeze hierarchy runnable averages while throttled */
-       rcu_read_lock();
-       walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
-       rcu_read_unlock();
+       scoped_guard(rcu)
+               walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
 
        /*
         * Note: distribution will already see us throttled via the
@@ -6894,13 +6890,15 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
        update_rq_clock(rq);
 
-       raw_spin_lock(&cfs_b->lock);
-       if (cfs_rq->throttled_clock) {
+       scoped_guard(raw_spinlock, &cfs_b->lock) {
+               list_del_rcu(&cfs_rq->throttled_list);
+
+               if (!cfs_rq->throttled_clock)
+                       break;
+
                cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
                cfs_rq->throttled_clock = 0;
        }
-       list_del_rcu(&cfs_rq->throttled_list);
-       raw_spin_unlock(&cfs_b->lock);
 
        /* update hierarchical throttle state */
        walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
@@ -6929,9 +6927,8 @@ static void __cfsb_csd_unthrottle(void *arg)
 {
        struct cfs_rq *cursor, *tmp;
        struct rq *rq = arg;
-       struct rq_flags rf;
 
-       rq_lock(rq, &rf);
+       guard(rq_lock)(rq);
 
        /*
         * Iterating over the list can trigger several call to
@@ -6948,7 +6945,7 @@ static void __cfsb_csd_unthrottle(void *arg)
         * race with group being freed in the window between removing it
         * from the list and advancing to the next entry in the list.
         */
-       rcu_read_lock();
+       guard(rcu)();
 
        list_for_each_entry_safe(cursor, tmp, &rq->cfsb_csd_list,
                                 throttled_csd_list) {
@@ -6958,10 +6955,7 @@ static void __cfsb_csd_unthrottle(void *arg)
                        unthrottle_cfs_rq(cursor);
        }
 
-       rcu_read_unlock();
-
        rq_clock_stop_loop_update(rq);
-       rq_unlock(rq, &rf);
 }
 
 static inline void __unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq)
@@ -7001,11 +6995,11 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
        u64 runtime, remaining = 1;
        bool throttled = false;
        struct cfs_rq *cfs_rq, *tmp;
-       struct rq_flags rf;
        struct rq *rq;
        LIST_HEAD(local_unthrottle);
 
-       rcu_read_lock();
+       guard(rcu)();
+
        list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
                                throttled_list) {
                rq = rq_of(cfs_rq);
@@ -7015,65 +7009,63 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
                        break;
                }
 
-               rq_lock_irqsave(rq, &rf);
+               guard(rq_lock_irqsave)(rq);
+
                if (!cfs_rq_throttled(cfs_rq))
-                       goto next;
+                       continue;
 
                /* Already queued for async unthrottle */
                if (!list_empty(&cfs_rq->throttled_csd_list))
-                       goto next;
+                       continue;
 
                /* By the above checks, this should never be true */
                WARN_ON_ONCE(cfs_rq->runtime_remaining > 0);
 
-               raw_spin_lock(&cfs_b->lock);
-               runtime = -cfs_rq->runtime_remaining + 1;
-               if (runtime > cfs_b->runtime)
-                       runtime = cfs_b->runtime;
-               cfs_b->runtime -= runtime;
-               remaining = cfs_b->runtime;
-               raw_spin_unlock(&cfs_b->lock);
+               scoped_guard(raw_spinlock, &cfs_b->lock) {
+                       runtime = -cfs_rq->runtime_remaining + 1;
+                       if (runtime > cfs_b->runtime)
+                               runtime = cfs_b->runtime;
+                       cfs_b->runtime -= runtime;
+                       remaining = cfs_b->runtime;
+               }
 
                cfs_rq->runtime_remaining += runtime;
 
-               /* we check whether we're throttled above */
-               if (cfs_rq->runtime_remaining > 0) {
-                       if (cpu_of(rq) != this_cpu) {
-                               unthrottle_cfs_rq_async(cfs_rq);
-                       } else {
-                               /*
-                                * We currently only expect to be unthrottling
-                                * a single cfs_rq locally.
-                                */
-                               WARN_ON_ONCE(!list_empty(&local_unthrottle));
-                               list_add_tail(&cfs_rq->throttled_csd_list,
-                                             &local_unthrottle);
-                       }
-               } else {
+               /*
+                * Ran out of bandwidth during distribution!
+                * Indicate throttled entities and break early.
+                */
+               if (cfs_rq->runtime_remaining <= 0) {
                        throttled = true;
+                       break;
                }
 
-next:
-               rq_unlock_irqrestore(rq, &rf);
+               /* we check whether we're throttled above */
+               if (cpu_of(rq) != this_cpu) {
+                       unthrottle_cfs_rq_async(cfs_rq);
+                       continue;
+               }
+
+               /*
+                * We currently only expect to be unthrottling
+                * a single cfs_rq locally.
+                */
+               WARN_ON_ONCE(!list_empty(&local_unthrottle));
+               list_add_tail(&cfs_rq->throttled_csd_list, &local_unthrottle);
        }
 
        list_for_each_entry_safe(cfs_rq, tmp, &local_unthrottle,
                                 throttled_csd_list) {
                struct rq *rq = rq_of(cfs_rq);
 
-               rq_lock_irqsave(rq, &rf);
+               guard(rq_lock_irqsave)(rq);
 
                list_del_init(&cfs_rq->throttled_csd_list);
-
                if (cfs_rq_throttled(cfs_rq))
                        unthrottle_cfs_rq(cfs_rq);
-
-               rq_unlock_irqrestore(rq, &rf);
        }
        WARN_ON_ONCE(!list_empty(&local_unthrottle));
 
-       rcu_read_unlock();
-
        return throttled;
 }
 
@@ -7196,7 +7188,8 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
        if (slack_runtime <= 0)
                return;
 
-       raw_spin_lock(&cfs_b->lock);
+       guard(raw_spinlock)(&cfs_b->lock);
+
        if (cfs_b->quota != RUNTIME_INF) {
                cfs_b->runtime += slack_runtime;
 
@@ -7205,7 +7198,6 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
                    !list_empty(&cfs_b->throttled_cfs_rq))
                        start_cfs_slack_bandwidth(cfs_b);
        }
-       raw_spin_unlock(&cfs_b->lock);
 
        /* even if it's not valid for return we don't want to try again */
        cfs_rq->runtime_remaining -= slack_runtime;
@@ -7228,25 +7220,21 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  */
 static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 {
-       u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
-       unsigned long flags;
-
        /* confirm we're still not at a refresh boundary */
-       raw_spin_lock_irqsave(&cfs_b->lock, flags);
-       cfs_b->slack_started = false;
+       scoped_guard(raw_spinlock_irqsave, &cfs_b->lock) {
+               u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
 
-       if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
-               raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
-               return;
-       }
+               cfs_b->slack_started = false;
 
-       if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
-               runtime = cfs_b->runtime;
+               if (runtime_refresh_within(cfs_b, min_bandwidth_expiration))
+                       return;
 
-       raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
+               if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
+                       runtime = cfs_b->runtime;
 
-       if (!runtime)
-               return;
+               if (!runtime)
+                       return;
+       }
 
        distribute_cfs_runtime(cfs_b);
 }
@@ -7335,18 +7323,18 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 {
        struct cfs_bandwidth *cfs_b =
                container_of(timer, struct cfs_bandwidth, period_timer);
-       unsigned long flags;
        int overrun;
        int idle = 0;
        int count = 0;
 
-       raw_spin_lock_irqsave(&cfs_b->lock, flags);
+       CLASS(raw_spinlock_irqsave, cfsb_guard)(&cfs_b->lock);
+
        for (;;) {
                overrun = hrtimer_forward_now(timer, cfs_b->period);
                if (!overrun)
                        break;
 
-               idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
+               idle = do_sched_cfs_period_timer(cfs_b, overrun, cfsb_guard.flags);
 
                if (++count > 3) {
                        u64 new, old = ktime_to_ns(cfs_b->period);
@@ -7379,11 +7367,13 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
                        count = 0;
                }
        }
-       if (idle)
+
+       if (idle) {
                cfs_b->period_active = 0;
-       raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
+               return HRTIMER_NORESTART;
+       }
 
-       return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+       return HRTIMER_RESTART;
 }
 
 void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent)
@@ -7450,14 +7440,12 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
         */
        for_each_possible_cpu(i) {
                struct rq *rq = cpu_rq(i);
-               unsigned long flags;
 
                if (list_empty(&rq->cfsb_csd_list))
                        continue;
 
-               local_irq_save(flags);
-               __cfsb_csd_unthrottle(rq);
-               local_irq_restore(flags);
+               scoped_guard(irqsave)
+                       __cfsb_csd_unthrottle(rq);
        }
 }
 
@@ -7475,16 +7463,15 @@ static void __maybe_unused update_runtime_enabled(struct rq *rq)
 
        lockdep_assert_rq_held(rq);
 
-       rcu_read_lock();
+       guard(rcu)();
+
        list_for_each_entry_rcu(tg, &task_groups, list) {
                struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
                struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
 
-               raw_spin_lock(&cfs_b->lock);
-               cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
-               raw_spin_unlock(&cfs_b->lock);
+               scoped_guard(raw_spinlock, &cfs_b->lock)
+                       cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
        }
-       rcu_read_unlock();
 }
 
 /* cpu offline callback */
@@ -7505,7 +7492,8 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
         */
        rq_clock_start_loop_update(rq);
 
-       rcu_read_lock();
+       guard(rcu)();
+
        list_for_each_entry_rcu(tg, &task_groups, list) {
                struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
 
@@ -7528,7 +7516,6 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
                cfs_rq->runtime_remaining = 1;
                unthrottle_cfs_rq(cfs_rq);
        }
-       rcu_read_unlock();
 
        rq_clock_stop_loop_update(rq);
 }