block/blk-iocost: Split ioc_rqos_throttle()

author Bart Van Assche <bvanassche@acm.org>

Fri, 5 Jun 2026 18:01:02 +0000 (11:01 -0700)

committer Jens Axboe <axboe@kernel.dk>

Fri, 5 Jun 2026 19:41:11 +0000 (13:41 -0600)
author Bart Van Assche <bvanassche@acm.org>
Fri, 5 Jun 2026 18:01:02 +0000 (11:01 -0700)
committer Jens Axboe <axboe@kernel.dk>
Fri, 5 Jun 2026 19:41:11 +0000 (13:41 -0600)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c

index 302388e995883f612ff83b4eafa145b544e5bb17..8f1468444e97dab96df8a648adb0a492dab89608 100644 (file)
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2614,6 +2614,88 @@ static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc)
         return cost;
  }
  
+enum over_budget_action {
+       action_retry,
+       action_commit,
+       action_wait,
+       action_return,
+};
+
+static enum over_budget_action
+iocg_handle_over_budget(struct rq_qos *rqos, struct ioc_gq *iocg,
+                       struct bio *bio, struct ioc_now *now,
+                       struct iocg_wait *wait, bool use_debt, bool ioc_locked,
+                       u64 abs_cost, u64 cost)
+{
+       lockdep_assert_held(&iocg->waitq.lock);
+
+       /*
+        * @iocg must stay activated for debt and waitq handling. Deactivation
+        * is synchronized against both ioc->lock and waitq.lock and we won't
+        * get deactivated as long as we're waiting or have debt, so we're good
+        * if we're activated here. In the unlikely cases that we aren't, just
+        * issue the IO.
+        */
+       if (unlikely(list_empty(&iocg->active_list)))
+               return action_commit;
+
+       /*
+        * We're over budget. If @bio has to be issued regardless, remember
+        * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
+        * off the debt before waking more IOs.
+        *
+        * This way, the debt is continuously paid off each period with the
+        * actual budget available to the cgroup. If we just wound vtime, we
+        * would incorrectly use the current hw_inuse for the entire amount
+        * which, for example, can lead to the cgroup staying blocked for a
+        * long time even with substantially raised hw_inuse.
+        *
+        * An iocg with vdebt should stay online so that the timer can keep
+        * deducting its vdebt and [de]activate use_delay mechanism
+        * accordingly. We don't want to race against the timer trying to
+        * clear them and leave @iocg inactive w/ dangling use_delay heavily
+        * penalizing the cgroup and its descendants.
+        */
+       if (use_debt) {
+               iocg_incur_debt(iocg, abs_cost, now);
+               if (iocg_kick_delay(iocg, now))
+                       blkcg_schedule_throttle(rqos->disk,
+                                               (bio->bi_opf & REQ_SWAP) ==
+                                                       REQ_SWAP);
+               return action_return;
+       }
+
+       /* guarantee that iocgs w/ waiters have maximum inuse */
+       if (!iocg->abs_vdebt && iocg->inuse != iocg->active) {
+               if (!ioc_locked)
+                       return action_retry;
+               lockdep_assert_held(&iocg->ioc->lock);
+               propagate_weights(iocg, iocg->active, iocg->active, true, now);
+       }
+
+       /*
+        * Append self to the waitq and schedule the wakeup timer if we're
+        * the first waiter.  The timer duration is calculated based on the
+        * current vrate.  vtime and hweight changes can make it too short
+        * or too long.  Each wait entry records the absolute cost it's
+        * waiting for to allow re-evaluation using a custom wait entry.
+        *
+        * If too short, the timer simply reschedules itself.  If too long,
+        * the period timer will notice and trigger wakeups.
+        *
+        * All waiters are on iocg->waitq and the wait states are
+        * synchronized using waitq.lock.
+        */
+       init_wait_func(&wait->wait, iocg_wake_fn);
+       wait->bio = bio;
+       wait->abs_cost = abs_cost;
+       wait->committed = false; /* will be set true by waker */
+
+       __add_wait_queue_entry_tail(&iocg->waitq, &wait->wait);
+       iocg_kick_waitq(iocg, ioc_locked, now);
+       return action_wait;
+}
+
  static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
  {
         struct blkcg_gq *blkg = bio->bi_blkg;
@@ -2623,6 +2705,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
         struct iocg_wait wait;
         u64 abs_cost, cost, vtime;
         bool use_debt, ioc_locked;
+       enum over_budget_action action;
         unsigned long flags;
  
         /* bypass IOs if disabled, still initializing, or for root cgroup */
@@ -2663,80 +2746,22 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
         ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt);
  retry_lock:
         iocg_lock(iocg, ioc_locked, &flags);
-
-       /*
-        * @iocg must stay activated for debt and waitq handling. Deactivation
-        * is synchronized against both ioc->lock and waitq.lock and we won't
-        * get deactivated as long as we're waiting or has debt, so we're good
-        * if we're activated here. In the unlikely cases that we aren't, just
-        * issue the IO.
-        */
-       if (unlikely(list_empty(&iocg->active_list))) {
-               iocg_unlock(iocg, ioc_locked, &flags);
+       action = iocg_handle_over_budget(rqos, iocg, bio, &now, &wait, use_debt,
+                                        ioc_locked, abs_cost, cost);
+       iocg_unlock(iocg, ioc_locked, &flags);
+       switch (action) {
+       case action_retry:
+               ioc_locked = true;
+               goto retry_lock;
+       case action_commit:
                 iocg_commit_bio(iocg, bio, abs_cost, cost);
                 return;
-       }
-
-       /*
-        * We're over budget. If @bio has to be issued regardless, remember
-        * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
-        * off the debt before waking more IOs.
-        *
-        * This way, the debt is continuously paid off each period with the
-        * actual budget available to the cgroup. If we just wound vtime, we
-        * would incorrectly use the current hw_inuse for the entire amount
-        * which, for example, can lead to the cgroup staying blocked for a
-        * long time even with substantially raised hw_inuse.
-        *
-        * An iocg with vdebt should stay online so that the timer can keep
-        * deducting its vdebt and [de]activate use_delay mechanism
-        * accordingly. We don't want to race against the timer trying to
-        * clear them and leave @iocg inactive w/ dangling use_delay heavily
-        * penalizing the cgroup and its descendants.
-        */
-       if (use_debt) {
-               iocg_incur_debt(iocg, abs_cost, &now);
-               if (iocg_kick_delay(iocg, &now))
-                       blkcg_schedule_throttle(rqos->disk,
-                                       (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
-               iocg_unlock(iocg, ioc_locked, &flags);
+       case action_return:
                 return;
+       case action_wait:
+               break;
         }
  
-       /* guarantee that iocgs w/ waiters have maximum inuse */
-       if (!iocg->abs_vdebt && iocg->inuse != iocg->active) {
-               if (!ioc_locked) {
-                       iocg_unlock(iocg, false, &flags);
-                       ioc_locked = true;
-                       goto retry_lock;
-               }
-               propagate_weights(iocg, iocg->active, iocg->active, true,
-                                 &now);
-       }
-
-       /*
-        * Append self to the waitq and schedule the wakeup timer if we're
-        * the first waiter.  The timer duration is calculated based on the
-        * current vrate.  vtime and hweight changes can make it too short
-        * or too long.  Each wait entry records the absolute cost it's
-        * waiting for to allow re-evaluation using a custom wait entry.
-        *
-        * If too short, the timer simply reschedules itself.  If too long,
-        * the period timer will notice and trigger wakeups.
-        *
-        * All waiters are on iocg->waitq and the wait states are
-        * synchronized using waitq.lock.
-        */
-       init_wait_func(&wait.wait, iocg_wake_fn);
-       wait.bio = bio;
-       wait.abs_cost = abs_cost;
-       wait.committed = false; /* will be set true by waker */
-
-       __add_wait_queue_entry_tail(&iocg->waitq, &wait.wait);
-       iocg_kick_waitq(iocg, ioc_locked, &now);
-
-       iocg_unlock(iocg, ioc_locked, &flags);
-
         while (true) {
                 set_current_state(TASK_UNINTERRUPTIBLE);
                 if (wait.committed)
author	Bart Van Assche <bvanassche@acm.org>
	Fri, 5 Jun 2026 18:01:02 +0000 (11:01 -0700)
committer	Jens Axboe <axboe@kernel.dk>
	Fri, 5 Jun 2026 19:41:11 +0000 (13:41 -0600)