return cost;
}
+enum over_budget_action {
+ action_retry,
+ action_commit,
+ action_wait,
+ action_return,
+};
+
+static enum over_budget_action
+iocg_handle_over_budget(struct rq_qos *rqos, struct ioc_gq *iocg,
+ struct bio *bio, struct ioc_now *now,
+ struct iocg_wait *wait, bool use_debt, bool ioc_locked,
+ u64 abs_cost, u64 cost)
+{
+ lockdep_assert_held(&iocg->waitq.lock);
+
+ /*
+ * @iocg must stay activated for debt and waitq handling. Deactivation
+ * is synchronized against both ioc->lock and waitq.lock and we won't
+ * get deactivated as long as we're waiting or have debt, so we're good
+ * if we're activated here. In the unlikely cases that we aren't, just
+ * issue the IO.
+ */
+ if (unlikely(list_empty(&iocg->active_list)))
+ return action_commit;
+
+ /*
+ * We're over budget. If @bio has to be issued regardless, remember
+ * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
+ * off the debt before waking more IOs.
+ *
+ * This way, the debt is continuously paid off each period with the
+ * actual budget available to the cgroup. If we just wound vtime, we
+ * would incorrectly use the current hw_inuse for the entire amount
+ * which, for example, can lead to the cgroup staying blocked for a
+ * long time even with substantially raised hw_inuse.
+ *
+ * An iocg with vdebt should stay online so that the timer can keep
+ * deducting its vdebt and [de]activate use_delay mechanism
+ * accordingly. We don't want to race against the timer trying to
+ * clear them and leave @iocg inactive w/ dangling use_delay heavily
+ * penalizing the cgroup and its descendants.
+ */
+ if (use_debt) {
+ iocg_incur_debt(iocg, abs_cost, now);
+ if (iocg_kick_delay(iocg, now))
+ blkcg_schedule_throttle(rqos->disk,
+ (bio->bi_opf & REQ_SWAP) ==
+ REQ_SWAP);
+ return action_return;
+ }
+
+ /* guarantee that iocgs w/ waiters have maximum inuse */
+ if (!iocg->abs_vdebt && iocg->inuse != iocg->active) {
+ if (!ioc_locked)
+ return action_retry;
+ lockdep_assert_held(&iocg->ioc->lock);
+ propagate_weights(iocg, iocg->active, iocg->active, true, now);
+ }
+
+ /*
+ * Append self to the waitq and schedule the wakeup timer if we're
+ * the first waiter. The timer duration is calculated based on the
+ * current vrate. vtime and hweight changes can make it too short
+ * or too long. Each wait entry records the absolute cost it's
+ * waiting for to allow re-evaluation using a custom wait entry.
+ *
+ * If too short, the timer simply reschedules itself. If too long,
+ * the period timer will notice and trigger wakeups.
+ *
+ * All waiters are on iocg->waitq and the wait states are
+ * synchronized using waitq.lock.
+ */
+ init_wait_func(&wait->wait, iocg_wake_fn);
+ wait->bio = bio;
+ wait->abs_cost = abs_cost;
+ wait->committed = false; /* will be set true by waker */
+
+ __add_wait_queue_entry_tail(&iocg->waitq, &wait->wait);
+ iocg_kick_waitq(iocg, ioc_locked, now);
+ return action_wait;
+}
+
static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
{
struct blkcg_gq *blkg = bio->bi_blkg;
struct iocg_wait wait;
u64 abs_cost, cost, vtime;
bool use_debt, ioc_locked;
+ enum over_budget_action action;
unsigned long flags;
/* bypass IOs if disabled, still initializing, or for root cgroup */
ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt);
retry_lock:
iocg_lock(iocg, ioc_locked, &flags);
-
- /*
- * @iocg must stay activated for debt and waitq handling. Deactivation
- * is synchronized against both ioc->lock and waitq.lock and we won't
- * get deactivated as long as we're waiting or has debt, so we're good
- * if we're activated here. In the unlikely cases that we aren't, just
- * issue the IO.
- */
- if (unlikely(list_empty(&iocg->active_list))) {
- iocg_unlock(iocg, ioc_locked, &flags);
+ action = iocg_handle_over_budget(rqos, iocg, bio, &now, &wait, use_debt,
+ ioc_locked, abs_cost, cost);
+ iocg_unlock(iocg, ioc_locked, &flags);
+ switch (action) {
+ case action_retry:
+ ioc_locked = true;
+ goto retry_lock;
+ case action_commit:
iocg_commit_bio(iocg, bio, abs_cost, cost);
return;
- }
-
- /*
- * We're over budget. If @bio has to be issued regardless, remember
- * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
- * off the debt before waking more IOs.
- *
- * This way, the debt is continuously paid off each period with the
- * actual budget available to the cgroup. If we just wound vtime, we
- * would incorrectly use the current hw_inuse for the entire amount
- * which, for example, can lead to the cgroup staying blocked for a
- * long time even with substantially raised hw_inuse.
- *
- * An iocg with vdebt should stay online so that the timer can keep
- * deducting its vdebt and [de]activate use_delay mechanism
- * accordingly. We don't want to race against the timer trying to
- * clear them and leave @iocg inactive w/ dangling use_delay heavily
- * penalizing the cgroup and its descendants.
- */
- if (use_debt) {
- iocg_incur_debt(iocg, abs_cost, &now);
- if (iocg_kick_delay(iocg, &now))
- blkcg_schedule_throttle(rqos->disk,
- (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
- iocg_unlock(iocg, ioc_locked, &flags);
+ case action_return:
return;
+ case action_wait:
+ break;
}
- /* guarantee that iocgs w/ waiters have maximum inuse */
- if (!iocg->abs_vdebt && iocg->inuse != iocg->active) {
- if (!ioc_locked) {
- iocg_unlock(iocg, false, &flags);
- ioc_locked = true;
- goto retry_lock;
- }
- propagate_weights(iocg, iocg->active, iocg->active, true,
- &now);
- }
-
- /*
- * Append self to the waitq and schedule the wakeup timer if we're
- * the first waiter. The timer duration is calculated based on the
- * current vrate. vtime and hweight changes can make it too short
- * or too long. Each wait entry records the absolute cost it's
- * waiting for to allow re-evaluation using a custom wait entry.
- *
- * If too short, the timer simply reschedules itself. If too long,
- * the period timer will notice and trigger wakeups.
- *
- * All waiters are on iocg->waitq and the wait states are
- * synchronized using waitq.lock.
- */
- init_wait_func(&wait.wait, iocg_wake_fn);
- wait.bio = bio;
- wait.abs_cost = abs_cost;
- wait.committed = false; /* will be set true by waker */
-
- __add_wait_queue_entry_tail(&iocg->waitq, &wait.wait);
- iocg_kick_waitq(iocg, ioc_locked, &now);
-
- iocg_unlock(iocg, ioc_locked, &flags);
-
while (true) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (wait.committed)