]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mq-deadline: covert to use request_queue->async_depth
authorYu Kuai <yukuai@fnnas.com>
Tue, 3 Feb 2026 08:19:47 +0000 (16:19 +0800)
committerJens Axboe <axboe@kernel.dk>
Tue, 3 Feb 2026 14:45:36 +0000 (07:45 -0700)
In downstream kernel, we test with mq-deadline with many fio workloads, and
we found a performance regression after commit 39823b47bbd4
("block/mq-deadline: Fix the tag reservation code") with following test:

[global]
rw=randread
direct=1
ramp_time=1
ioengine=libaio
iodepth=1024
numjobs=24
bs=1024k
group_reporting=1
runtime=60

[job1]
filename=/dev/sda

Root cause is that mq-deadline now support configuring async_depth,
although the default value is nr_request, however the minimal value is
1, hence min_shallow_depth is set to 1, causing wake_batch to be 1. For
consequence, sbitmap_queue will be waken up after each IO instead of
8 IO.

In this test case, sda is HDD and max_sectors is 128k, hence each
submitted 1M io will be splited into 8 sequential 128k requests, however
due to there are 24 jobs and total tags are exhausted, the 8 requests are
unlikely to be dispatched sequentially, and changing wake_batch to 1
will make this much worse, accounting blktrace D stage, the percentage
of sequential io is decreased from 8% to 0.8%.

Fix this problem by converting to request_queue->async_depth, where
min_shallow_depth is set each time async_depth is updated.

Noted elevator attribute async_depth is now removed, queue attribute
with the same name is used instead.

Fixes: 39823b47bbd4 ("block/mq-deadline: Fix the tag reservation code")
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/mq-deadline.c

index 29d00221fbea6a3433994475352d4739b1a247d7..95917a88976f4084a4385562f6eba50ad66dd3f5 100644 (file)
@@ -98,7 +98,6 @@ struct deadline_data {
        int fifo_batch;
        int writes_starved;
        int front_merges;
-       u32 async_depth;
        int prio_aging_expire;
 
        spinlock_t lock;
@@ -486,32 +485,16 @@ unlock:
        return rq;
 }
 
-/*
- * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
- * function is used by __blk_mq_get_tag().
- */
 static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
 {
-       struct deadline_data *dd = data->q->elevator->elevator_data;
-
-       /* Do not throttle synchronous reads. */
-       if (blk_mq_is_sync_read(opf))
-               return;
-
-       /*
-        * Throttle asynchronous requests and writes such that these requests
-        * do not block the allocation of synchronous requests.
-        */
-       data->shallow_depth = dd->async_depth;
+       if (!blk_mq_is_sync_read(opf))
+               data->shallow_depth = data->q->async_depth;
 }
 
-/* Called by blk_mq_update_nr_requests(). */
+/* Called by blk_mq_init_sched() and blk_mq_update_nr_requests(). */
 static void dd_depth_updated(struct request_queue *q)
 {
-       struct deadline_data *dd = q->elevator->elevator_data;
-
-       dd->async_depth = q->nr_requests;
-       blk_mq_set_min_shallow_depth(q, 1);
+       blk_mq_set_min_shallow_depth(q, q->async_depth);
 }
 
 static void dd_exit_sched(struct elevator_queue *e)
@@ -576,6 +559,7 @@ static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq)
        blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
 
        q->elevator = eq;
+       q->async_depth = q->nr_requests;
        dd_depth_updated(q);
        return 0;
 }
@@ -763,7 +747,6 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
 SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
 SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
 SHOW_INT(deadline_front_merges_show, dd->front_merges);
-SHOW_INT(deadline_async_depth_show, dd->async_depth);
 SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
 #undef SHOW_INT
 #undef SHOW_JIFFIES
@@ -793,7 +776,6 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA
 STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
 STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
 STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
-STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
 STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
 #undef STORE_FUNCTION
 #undef STORE_INT
@@ -807,7 +789,6 @@ static const struct elv_fs_entry deadline_attrs[] = {
        DD_ATTR(write_expire),
        DD_ATTR(writes_starved),
        DD_ATTR(front_merges),
-       DD_ATTR(async_depth),
        DD_ATTR(fifo_batch),
        DD_ATTR(prio_aging_expire),
        __ATTR_NULL
@@ -894,15 +875,6 @@ static int deadline_starved_show(void *data, struct seq_file *m)
        return 0;
 }
 
-static int dd_async_depth_show(void *data, struct seq_file *m)
-{
-       struct request_queue *q = data;
-       struct deadline_data *dd = q->elevator->elevator_data;
-
-       seq_printf(m, "%u\n", dd->async_depth);
-       return 0;
-}
-
 static int dd_queued_show(void *data, struct seq_file *m)
 {
        struct request_queue *q = data;
@@ -1002,7 +974,6 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
        DEADLINE_NEXT_RQ_ATTR(write2),
        {"batching", 0400, deadline_batching_show},
        {"starved", 0400, deadline_starved_show},
-       {"async_depth", 0400, dd_async_depth_show},
        {"dispatch", 0400, .seq_ops = &deadline_dispatch_seq_ops},
        {"owned_by_driver", 0400, dd_owned_by_driver_show},
        {"queued", 0400, dd_queued_show},