]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
block/blk-iocost: Split ioc_rqos_throttle()
authorBart Van Assche <bvanassche@acm.org>
Fri, 5 Jun 2026 18:01:02 +0000 (11:01 -0700)
committerJens Axboe <axboe@kernel.dk>
Fri, 5 Jun 2026 19:41:11 +0000 (13:41 -0600)
Prepare for inlining iocg_lock() and iocg_unlock() by moving the code
between these two calls into a new function. No functionality has been
changed.

Reviewed-by: Hannes Reinecke <hare@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://patch.msgid.link/a6d3ed953cef6669d23a80923bf46600733cbdae.1780682325.git.bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-iocost.c

index 302388e995883f612ff83b4eafa145b544e5bb17..8f1468444e97dab96df8a648adb0a492dab89608 100644 (file)
@@ -2614,6 +2614,88 @@ static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc)
        return cost;
 }
 
+enum over_budget_action {
+       action_retry,
+       action_commit,
+       action_wait,
+       action_return,
+};
+
+static enum over_budget_action
+iocg_handle_over_budget(struct rq_qos *rqos, struct ioc_gq *iocg,
+                       struct bio *bio, struct ioc_now *now,
+                       struct iocg_wait *wait, bool use_debt, bool ioc_locked,
+                       u64 abs_cost, u64 cost)
+{
+       lockdep_assert_held(&iocg->waitq.lock);
+
+       /*
+        * @iocg must stay activated for debt and waitq handling. Deactivation
+        * is synchronized against both ioc->lock and waitq.lock and we won't
+        * get deactivated as long as we're waiting or have debt, so we're good
+        * if we're activated here. In the unlikely cases that we aren't, just
+        * issue the IO.
+        */
+       if (unlikely(list_empty(&iocg->active_list)))
+               return action_commit;
+
+       /*
+        * We're over budget. If @bio has to be issued regardless, remember
+        * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
+        * off the debt before waking more IOs.
+        *
+        * This way, the debt is continuously paid off each period with the
+        * actual budget available to the cgroup. If we just wound vtime, we
+        * would incorrectly use the current hw_inuse for the entire amount
+        * which, for example, can lead to the cgroup staying blocked for a
+        * long time even with substantially raised hw_inuse.
+        *
+        * An iocg with vdebt should stay online so that the timer can keep
+        * deducting its vdebt and [de]activate use_delay mechanism
+        * accordingly. We don't want to race against the timer trying to
+        * clear them and leave @iocg inactive w/ dangling use_delay heavily
+        * penalizing the cgroup and its descendants.
+        */
+       if (use_debt) {
+               iocg_incur_debt(iocg, abs_cost, now);
+               if (iocg_kick_delay(iocg, now))
+                       blkcg_schedule_throttle(rqos->disk,
+                                               (bio->bi_opf & REQ_SWAP) ==
+                                                       REQ_SWAP);
+               return action_return;
+       }
+
+       /* guarantee that iocgs w/ waiters have maximum inuse */
+       if (!iocg->abs_vdebt && iocg->inuse != iocg->active) {
+               if (!ioc_locked)
+                       return action_retry;
+               lockdep_assert_held(&iocg->ioc->lock);
+               propagate_weights(iocg, iocg->active, iocg->active, true, now);
+       }
+
+       /*
+        * Append self to the waitq and schedule the wakeup timer if we're
+        * the first waiter.  The timer duration is calculated based on the
+        * current vrate.  vtime and hweight changes can make it too short
+        * or too long.  Each wait entry records the absolute cost it's
+        * waiting for to allow re-evaluation using a custom wait entry.
+        *
+        * If too short, the timer simply reschedules itself.  If too long,
+        * the period timer will notice and trigger wakeups.
+        *
+        * All waiters are on iocg->waitq and the wait states are
+        * synchronized using waitq.lock.
+        */
+       init_wait_func(&wait->wait, iocg_wake_fn);
+       wait->bio = bio;
+       wait->abs_cost = abs_cost;
+       wait->committed = false; /* will be set true by waker */
+
+       __add_wait_queue_entry_tail(&iocg->waitq, &wait->wait);
+       iocg_kick_waitq(iocg, ioc_locked, now);
+       return action_wait;
+}
+
 static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
 {
        struct blkcg_gq *blkg = bio->bi_blkg;
@@ -2623,6 +2705,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
        struct iocg_wait wait;
        u64 abs_cost, cost, vtime;
        bool use_debt, ioc_locked;
+       enum over_budget_action action;
        unsigned long flags;
 
        /* bypass IOs if disabled, still initializing, or for root cgroup */
@@ -2663,80 +2746,22 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
        ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt);
 retry_lock:
        iocg_lock(iocg, ioc_locked, &flags);
-
-       /*
-        * @iocg must stay activated for debt and waitq handling. Deactivation
-        * is synchronized against both ioc->lock and waitq.lock and we won't
-        * get deactivated as long as we're waiting or has debt, so we're good
-        * if we're activated here. In the unlikely cases that we aren't, just
-        * issue the IO.
-        */
-       if (unlikely(list_empty(&iocg->active_list))) {
-               iocg_unlock(iocg, ioc_locked, &flags);
+       action = iocg_handle_over_budget(rqos, iocg, bio, &now, &wait, use_debt,
+                                        ioc_locked, abs_cost, cost);
+       iocg_unlock(iocg, ioc_locked, &flags);
+       switch (action) {
+       case action_retry:
+               ioc_locked = true;
+               goto retry_lock;
+       case action_commit:
                iocg_commit_bio(iocg, bio, abs_cost, cost);
                return;
-       }
-
-       /*
-        * We're over budget. If @bio has to be issued regardless, remember
-        * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
-        * off the debt before waking more IOs.
-        *
-        * This way, the debt is continuously paid off each period with the
-        * actual budget available to the cgroup. If we just wound vtime, we
-        * would incorrectly use the current hw_inuse for the entire amount
-        * which, for example, can lead to the cgroup staying blocked for a
-        * long time even with substantially raised hw_inuse.
-        *
-        * An iocg with vdebt should stay online so that the timer can keep
-        * deducting its vdebt and [de]activate use_delay mechanism
-        * accordingly. We don't want to race against the timer trying to
-        * clear them and leave @iocg inactive w/ dangling use_delay heavily
-        * penalizing the cgroup and its descendants.
-        */
-       if (use_debt) {
-               iocg_incur_debt(iocg, abs_cost, &now);
-               if (iocg_kick_delay(iocg, &now))
-                       blkcg_schedule_throttle(rqos->disk,
-                                       (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
-               iocg_unlock(iocg, ioc_locked, &flags);
+       case action_return:
                return;
+       case action_wait:
+               break;
        }
 
-       /* guarantee that iocgs w/ waiters have maximum inuse */
-       if (!iocg->abs_vdebt && iocg->inuse != iocg->active) {
-               if (!ioc_locked) {
-                       iocg_unlock(iocg, false, &flags);
-                       ioc_locked = true;
-                       goto retry_lock;
-               }
-               propagate_weights(iocg, iocg->active, iocg->active, true,
-                                 &now);
-       }
-
-       /*
-        * Append self to the waitq and schedule the wakeup timer if we're
-        * the first waiter.  The timer duration is calculated based on the
-        * current vrate.  vtime and hweight changes can make it too short
-        * or too long.  Each wait entry records the absolute cost it's
-        * waiting for to allow re-evaluation using a custom wait entry.
-        *
-        * If too short, the timer simply reschedules itself.  If too long,
-        * the period timer will notice and trigger wakeups.
-        *
-        * All waiters are on iocg->waitq and the wait states are
-        * synchronized using waitq.lock.
-        */
-       init_wait_func(&wait.wait, iocg_wake_fn);
-       wait.bio = bio;
-       wait.abs_cost = abs_cost;
-       wait.committed = false; /* will be set true by waker */
-
-       __add_wait_queue_entry_tail(&iocg->waitq, &wait.wait);
-       iocg_kick_waitq(iocg, ioc_locked, &now);
-
-       iocg_unlock(iocg, ioc_locked, &flags);
-
        while (true) {
                set_current_state(TASK_UNINTERRUPTIBLE);
                if (wait.committed)