sched/fair: Convert cfs bandwidth throttling to use guards

author K Prateek Nayak <kprateek.nayak@amd.com>

Tue, 2 Jun 2026 05:00:01 +0000 (05:00 +0000)

committer Peter Zijlstra <peterz@infradead.org>

Tue, 2 Jun 2026 10:26:11 +0000 (12:26 +0200)
author K Prateek Nayak <kprateek.nayak@amd.com>
Tue, 2 Jun 2026 05:00:01 +0000 (05:00 +0000)
committer Peter Zijlstra <peterz@infradead.org>
Tue, 2 Jun 2026 10:26:11 +0000 (12:26 +0200)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 1d4ed883e630726f90151a215b279277488e1e4b..261e5cedc717c8081c3a6bc1643696e5354cf166 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5035,13 +5035,13 @@ static void __maybe_unused clear_tg_offline_cfs_rqs(struct rq *rq)
          */
         rq_clock_start_loop_update(rq);
  
-       rcu_read_lock();
+       guard(rcu)();
+
         list_for_each_entry_rcu(tg, &task_groups, list) {
                 struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
  
                 clear_tg_load_avg(cfs_rq);
         }
-       rcu_read_unlock();
  
         rq_clock_stop_loop_update(rq);
  }
@@ -6540,13 +6540,10 @@ static int __assign_cfs_rq_runtime(struct cfs_bandwidth *cfs_b,
  static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
  {
         struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-       int ret;
  
-       raw_spin_lock(&cfs_b->lock);
-       ret = __assign_cfs_rq_runtime(cfs_b, cfs_rq, sched_cfs_bandwidth_slice());
-       raw_spin_unlock(&cfs_b->lock);
+       guard(raw_spinlock)(&cfs_b->lock);
  
-       return ret;
+       return __assign_cfs_rq_runtime(cfs_b, cfs_rq, sched_cfs_bandwidth_slice());
  }
  
  static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
@@ -6835,33 +6832,32 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
  {
         struct rq *rq = rq_of(cfs_rq);
         struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-       int dequeue = 1;
  
-       raw_spin_lock(&cfs_b->lock);
-       /* This will start the period timer if necessary */
-       if (__assign_cfs_rq_runtime(cfs_b, cfs_rq, 1)) {
+       scoped_guard(raw_spinlock, &cfs_b->lock) {
                 /*
-                * We have raced with bandwidth becoming available, and if we
-                * actually throttled the timer might not unthrottle us for an
-                * entire period. We additionally needed to make sure that any
-                * subsequent check_cfs_rq_runtime calls agree not to throttle
-                * us, as we may commit to do cfs put_prev+pick_next, so we ask
-                * for 1ns of runtime rather than just check cfs_b.
+                * Check if We have raced with bandwidth becoming available. If
+                * we actually throttled the timer might not unthrottle us for
+                * an entire period. We additionally needed to make sure that
+                * any subsequent check_cfs_rq_runtime calls agree not to
+                * throttle us, as we may commit to do cfs put_prev+pick_next,
+                * so we ask for 1ns of runtime rather than just check cfs_b.
+                *
+                * This will start the period timer if necessary.
+                */
+               if (__assign_cfs_rq_runtime(cfs_b, cfs_rq, 1))
+                       return false;
+
+               /*
+                * No bandwidth available; Add ourselves on the list to be
+                * unthrottled later.
                  */
-               dequeue = 0;
-       } else {
                 list_add_tail_rcu(&cfs_rq->throttled_list,
                                   &cfs_b->throttled_cfs_rq);
         }
-       raw_spin_unlock(&cfs_b->lock);
-
-       if (!dequeue)
-               return false;  /* Throttle no longer required. */
  
         /* freeze hierarchy runnable averages while throttled */
-       rcu_read_lock();
-       walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
-       rcu_read_unlock();
+       scoped_guard(rcu)
+               walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
  
         /*
          * Note: distribution will already see us throttled via the
@@ -6894,13 +6890,15 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
  
         update_rq_clock(rq);
  
-       raw_spin_lock(&cfs_b->lock);
-       if (cfs_rq->throttled_clock) {
+       scoped_guard(raw_spinlock, &cfs_b->lock) {
+               list_del_rcu(&cfs_rq->throttled_list);
+
+               if (!cfs_rq->throttled_clock)
+                       break;
+
                 cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
                 cfs_rq->throttled_clock = 0;
         }
-       list_del_rcu(&cfs_rq->throttled_list);
-       raw_spin_unlock(&cfs_b->lock);
  
         /* update hierarchical throttle state */
         walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
@@ -6929,9 +6927,8 @@ static void __cfsb_csd_unthrottle(void *arg)
  {
         struct cfs_rq *cursor, *tmp;
         struct rq *rq = arg;
-       struct rq_flags rf;
  
-       rq_lock(rq, &rf);
+       guard(rq_lock)(rq);
  
         /*
          * Iterating over the list can trigger several call to
@@ -6948,7 +6945,7 @@ static void __cfsb_csd_unthrottle(void *arg)
          * race with group being freed in the window between removing it
          * from the list and advancing to the next entry in the list.
          */
-       rcu_read_lock();
+       guard(rcu)();
  
         list_for_each_entry_safe(cursor, tmp, &rq->cfsb_csd_list,
                                  throttled_csd_list) {
@@ -6958,10 +6955,7 @@ static void __cfsb_csd_unthrottle(void *arg)
                         unthrottle_cfs_rq(cursor);
         }
  
-       rcu_read_unlock();
-
         rq_clock_stop_loop_update(rq);
-       rq_unlock(rq, &rf);
  }
  
  static inline void __unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq)
@@ -7001,11 +6995,11 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
         u64 runtime, remaining = 1;
         bool throttled = false;
         struct cfs_rq *cfs_rq, *tmp;
-       struct rq_flags rf;
         struct rq *rq;
         LIST_HEAD(local_unthrottle);
  
-       rcu_read_lock();
+       guard(rcu)();
+
         list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
                                 throttled_list) {
                 rq = rq_of(cfs_rq);
@@ -7015,65 +7009,63 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
                         break;
                 }
  
-               rq_lock_irqsave(rq, &rf);
+               guard(rq_lock_irqsave)(rq);
+
                 if (!cfs_rq_throttled(cfs_rq))
-                       goto next;
+                       continue;
  
                 /* Already queued for async unthrottle */
                 if (!list_empty(&cfs_rq->throttled_csd_list))
-                       goto next;
+                       continue;
  
                 /* By the above checks, this should never be true */
                 WARN_ON_ONCE(cfs_rq->runtime_remaining > 0);
  
-               raw_spin_lock(&cfs_b->lock);
-               runtime = -cfs_rq->runtime_remaining + 1;
-               if (runtime > cfs_b->runtime)
-                       runtime = cfs_b->runtime;
-               cfs_b->runtime -= runtime;
-               remaining = cfs_b->runtime;
-               raw_spin_unlock(&cfs_b->lock);
+               scoped_guard(raw_spinlock, &cfs_b->lock) {
+                       runtime = -cfs_rq->runtime_remaining + 1;
+                       if (runtime > cfs_b->runtime)
+                               runtime = cfs_b->runtime;
+                       cfs_b->runtime -= runtime;
+                       remaining = cfs_b->runtime;
+               }
  
                 cfs_rq->runtime_remaining += runtime;
  
-               /* we check whether we're throttled above */
-               if (cfs_rq->runtime_remaining > 0) {
-                       if (cpu_of(rq) != this_cpu) {
-                               unthrottle_cfs_rq_async(cfs_rq);
-                       } else {
-                               /*
-                                * We currently only expect to be unthrottling
-                                * a single cfs_rq locally.
-                                */
-                               WARN_ON_ONCE(!list_empty(&local_unthrottle));
-                               list_add_tail(&cfs_rq->throttled_csd_list,
-                                             &local_unthrottle);
-                       }
-               } else {
+               /*
+                * Ran out of bandwidth during distribution!
+                * Indicate throttled entities and break early.
+                */
+               if (cfs_rq->runtime_remaining <= 0) {
                         throttled = true;
+                       break;
                 }
  
-next:
-               rq_unlock_irqrestore(rq, &rf);
+               /* we check whether we're throttled above */
+               if (cpu_of(rq) != this_cpu) {
+                       unthrottle_cfs_rq_async(cfs_rq);
+                       continue;
+               }
+
+               /*
+                * We currently only expect to be unthrottling
+                * a single cfs_rq locally.
+                */
+               WARN_ON_ONCE(!list_empty(&local_unthrottle));
+               list_add_tail(&cfs_rq->throttled_csd_list, &local_unthrottle);
         }
  
         list_for_each_entry_safe(cfs_rq, tmp, &local_unthrottle,
                                  throttled_csd_list) {
                 struct rq *rq = rq_of(cfs_rq);
  
-               rq_lock_irqsave(rq, &rf);
+               guard(rq_lock_irqsave)(rq);
  
                 list_del_init(&cfs_rq->throttled_csd_list);
-
                 if (cfs_rq_throttled(cfs_rq))
                         unthrottle_cfs_rq(cfs_rq);
-
-               rq_unlock_irqrestore(rq, &rf);
         }
         WARN_ON_ONCE(!list_empty(&local_unthrottle));
  
-       rcu_read_unlock();
-
         return throttled;
  }
  
@@ -7196,7 +7188,8 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
         if (slack_runtime <= 0)
                 return;
  
-       raw_spin_lock(&cfs_b->lock);
+       guard(raw_spinlock)(&cfs_b->lock);
+
         if (cfs_b->quota != RUNTIME_INF) {
                 cfs_b->runtime += slack_runtime;
  
@@ -7205,7 +7198,6 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
                     !list_empty(&cfs_b->throttled_cfs_rq))
                         start_cfs_slack_bandwidth(cfs_b);
         }
-       raw_spin_unlock(&cfs_b->lock);
  
         /* even if it's not valid for return we don't want to try again */
         cfs_rq->runtime_remaining -= slack_runtime;
@@ -7228,25 +7220,21 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
   */
  static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
  {
-       u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
-       unsigned long flags;
-
         /* confirm we're still not at a refresh boundary */
-       raw_spin_lock_irqsave(&cfs_b->lock, flags);
-       cfs_b->slack_started = false;
+       scoped_guard(raw_spinlock_irqsave, &cfs_b->lock) {
+               u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
  
-       if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
-               raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
-               return;
-       }
+               cfs_b->slack_started = false;
  
-       if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
-               runtime = cfs_b->runtime;
+               if (runtime_refresh_within(cfs_b, min_bandwidth_expiration))
+                       return;
  
-       raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
+               if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
+                       runtime = cfs_b->runtime;
  
-       if (!runtime)
-               return;
+               if (!runtime)
+                       return;
+       }
  
         distribute_cfs_runtime(cfs_b);
  }
@@ -7335,18 +7323,18 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
  {
         struct cfs_bandwidth *cfs_b =
                 container_of(timer, struct cfs_bandwidth, period_timer);
-       unsigned long flags;
         int overrun;
         int idle = 0;
         int count = 0;
  
-       raw_spin_lock_irqsave(&cfs_b->lock, flags);
+       CLASS(raw_spinlock_irqsave, cfsb_guard)(&cfs_b->lock);
+
         for (;;) {
                 overrun = hrtimer_forward_now(timer, cfs_b->period);
                 if (!overrun)
                         break;
  
-               idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
+               idle = do_sched_cfs_period_timer(cfs_b, overrun, cfsb_guard.flags);
  
                 if (++count > 3) {
                         u64 new, old = ktime_to_ns(cfs_b->period);
@@ -7379,11 +7367,13 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
                         count = 0;
                 }
         }
-       if (idle)
+
+       if (idle) {
                 cfs_b->period_active = 0;
-       raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
+               return HRTIMER_NORESTART;
+       }
  
-       return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+       return HRTIMER_RESTART;
  }
  
  void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent)
@@ -7450,14 +7440,12 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
          */
         for_each_possible_cpu(i) {
                 struct rq *rq = cpu_rq(i);
-               unsigned long flags;
  
                 if (list_empty(&rq->cfsb_csd_list))
                         continue;
  
-               local_irq_save(flags);
-               __cfsb_csd_unthrottle(rq);
-               local_irq_restore(flags);
+               scoped_guard(irqsave)
+                       __cfsb_csd_unthrottle(rq);
         }
  }
  
@@ -7475,16 +7463,15 @@ static void __maybe_unused update_runtime_enabled(struct rq *rq)
  
         lockdep_assert_rq_held(rq);
  
-       rcu_read_lock();
+       guard(rcu)();
+
         list_for_each_entry_rcu(tg, &task_groups, list) {
                 struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
                 struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
  
-               raw_spin_lock(&cfs_b->lock);
-               cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
-               raw_spin_unlock(&cfs_b->lock);
+               scoped_guard(raw_spinlock, &cfs_b->lock)
+                       cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
         }
-       rcu_read_unlock();
  }
  
  /* cpu offline callback */
@@ -7505,7 +7492,8 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
          */
         rq_clock_start_loop_update(rq);
  
-       rcu_read_lock();
+       guard(rcu)();
+
         list_for_each_entry_rcu(tg, &task_groups, list) {
                 struct cfs_rq *cfs_rq = tg_cfs_rq(tg, cpu_of(rq));
  
@@ -7528,7 +7516,6 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
                 cfs_rq->runtime_remaining = 1;
                 unthrottle_cfs_rq(cfs_rq);
         }
-       rcu_read_unlock();
  
         rq_clock_stop_loop_update(rq);
  }
author	K Prateek Nayak <kprateek.nayak@amd.com>
	Tue, 2 Jun 2026 05:00:01 +0000 (05:00 +0000)
committer	Peter Zijlstra <peterz@infradead.org>
	Tue, 2 Jun 2026 10:26:11 +0000 (12:26 +0200)