+++ /dev/null
-From cb8acabbe33b110157955a7425ee876fb81e6bbc Mon Sep 17 00:00:00 2001
-From: Damien Le Moal <damien.lemoal@wdc.com>
-Date: Wed, 28 Aug 2019 13:40:20 +0900
-Subject: block: mq-deadline: Fix queue restart handling
-
-From: Damien Le Moal <damien.lemoal@wdc.com>
-
-commit cb8acabbe33b110157955a7425ee876fb81e6bbc upstream.
-
-Commit 7211aef86f79 ("block: mq-deadline: Fix write completion
-handling") added a call to blk_mq_sched_mark_restart_hctx() in
-dd_dispatch_request() to make sure that write request dispatching does
-not stall when all target zones are locked. This fix left a subtle race
-when a write completion happens during a dispatch execution on another
-CPU:
-
-CPU 0: Dispatch CPU1: write completion
-
-dd_dispatch_request()
- lock(&dd->lock);
- ...
- lock(&dd->zone_lock); dd_finish_request()
- rq = find request lock(&dd->zone_lock);
- unlock(&dd->zone_lock);
- zone write unlock
- unlock(&dd->zone_lock);
- ...
- __blk_mq_free_request
- check restart flag (not set)
- -> queue not run
- ...
- if (!rq && have writes)
- blk_mq_sched_mark_restart_hctx()
- unlock(&dd->lock)
-
-Since the dispatch context finishes after the write request completion
-handling, marking the queue as needing a restart is not seen from
-__blk_mq_free_request() and blk_mq_sched_restart() not executed leading
-to the dispatch stall under 100% write workloads.
-
-Fix this by moving the call to blk_mq_sched_mark_restart_hctx() from
-dd_dispatch_request() into dd_finish_request() under the zone lock to
-ensure full mutual exclusion between write request dispatch selection
-and zone unlock on write request completion.
-
-Fixes: 7211aef86f79 ("block: mq-deadline: Fix write completion handling")
-Cc: stable@vger.kernel.org
-Reported-by: Hans Holmberg <Hans.Holmberg@wdc.com>
-Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
-Reviewed-by: Christoph Hellwig <hch@lst.de>
-Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
-Signed-off-by: Jens Axboe <axboe@kernel.dk>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- block/mq-deadline.c | 19 +++++++++----------
- 1 file changed, 9 insertions(+), 10 deletions(-)
-
---- a/block/mq-deadline.c
-+++ b/block/mq-deadline.c
-@@ -376,13 +376,6 @@ done:
- * hardware queue, but we may return a request that is for a
- * different hardware queue. This is because mq-deadline has shared
- * state for all hardware queues, in terms of sorting, FIFOs, etc.
-- *
-- * For a zoned block device, __dd_dispatch_request() may return NULL
-- * if all the queued write requests are directed at zones that are already
-- * locked due to on-going write requests. In this case, make sure to mark
-- * the queue as needing a restart to ensure that the queue is run again
-- * and the pending writes dispatched once the target zones for the ongoing
-- * write requests are unlocked in dd_finish_request().
- */
- static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
- {
-@@ -391,9 +384,6 @@ static struct request *dd_dispatch_reque
-
- spin_lock(&dd->lock);
- rq = __dd_dispatch_request(dd);
-- if (!rq && blk_queue_is_zoned(hctx->queue) &&
-- !list_empty(&dd->fifo_list[WRITE]))
-- blk_mq_sched_mark_restart_hctx(hctx);
- spin_unlock(&dd->lock);
-
- return rq;
-@@ -559,6 +549,13 @@ static void dd_prepare_request(struct re
- * spinlock so that the zone is never unlocked while deadline_fifo_request()
- * or deadline_next_request() are executing. This function is called for
- * all requests, whether or not these requests complete successfully.
-+ *
-+ * For a zoned block device, __dd_dispatch_request() may have stopped
-+ * dispatching requests if all the queued requests are write requests directed
-+ * at zones that are already locked due to on-going write requests. To ensure
-+ * write request dispatch progress in this case, mark the queue as needing a
-+ * restart to ensure that the queue is run again after completion of the
-+ * request and zones being unlocked.
- */
- static void dd_finish_request(struct request *rq)
- {
-@@ -570,6 +567,8 @@ static void dd_finish_request(struct req
-
- spin_lock_irqsave(&dd->zone_lock, flags);
- blk_req_zone_write_unlock(rq);
-+ if (!list_empty(&dd->fifo_list[WRITE]))
-+ blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
- spin_unlock_irqrestore(&dd->zone_lock, flags);
- }
- }