]> git.ipfire.org Git - thirdparty/kernel/stable.git/blobdiff - block/blk-mq.c
Merge tag 'for-6.9/block-20240310' of git://git.kernel.dk/linux
[thirdparty/kernel/stable.git] / block / blk-mq.c
index 9c475a89e3afec4e134c80b4694c761940ff7d8d..555ada922cf06021124eb3170983fc308e8d2a38 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/llist.h>
 #include <linux/cpu.h>
 #include <linux/cache.h>
-#include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/signal.h>
 #include <linux/delay.h>
@@ -322,7 +321,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
        RB_CLEAR_NODE(&rq->rb_node);
        rq->tag = BLK_MQ_NO_TAG;
        rq->internal_tag = BLK_MQ_NO_TAG;
-       rq->start_time_ns = ktime_get_ns();
+       rq->start_time_ns = blk_time_get_ns();
        rq->part = NULL;
        blk_crypto_rq_set_defaults(rq);
 }
@@ -332,7 +331,7 @@ EXPORT_SYMBOL(blk_rq_init);
 static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
 {
        if (blk_mq_need_time_stamp(rq))
-               rq->start_time_ns = ktime_get_ns();
+               rq->start_time_ns = blk_time_get_ns();
        else
                rq->start_time_ns = 0;
 
@@ -443,7 +442,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
 
        /* alloc_time includes depth and tag waits */
        if (blk_queue_rq_alloc_time(q))
-               alloc_time_ns = ktime_get_ns();
+               alloc_time_ns = blk_time_get_ns();
 
        if (data->cmd_flags & REQ_NOWAIT)
                data->flags |= BLK_MQ_REQ_NOWAIT;
@@ -628,7 +627,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 
        /* alloc_time includes depth and tag waits */
        if (blk_queue_rq_alloc_time(q))
-               alloc_time_ns = ktime_get_ns();
+               alloc_time_ns = blk_time_get_ns();
 
        /*
         * If the tag allocator sleeps we could get an allocation for a
@@ -1041,7 +1040,7 @@ static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
 inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
        if (blk_mq_need_time_stamp(rq))
-               __blk_mq_end_request_acct(rq, ktime_get_ns());
+               __blk_mq_end_request_acct(rq, blk_time_get_ns());
 
        blk_mq_finish_request(rq);
 
@@ -1084,7 +1083,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
        u64 now = 0;
 
        if (iob->need_ts)
-               now = ktime_get_ns();
+               now = blk_time_get_ns();
 
        while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
                prefetch(rq->bio);
@@ -1167,10 +1166,11 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq)
        if (force_irqthreads())
                return false;
 
-       /* same CPU or cache domain?  Complete locally */
+       /* same CPU or cache domain and capacity?  Complete locally */
        if (cpu == rq->mq_ctx->cpu ||
            (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
-            cpus_share_cache(cpu, rq->mq_ctx->cpu)))
+            cpus_share_cache(cpu, rq->mq_ctx->cpu) &&
+            cpus_equal_capacity(cpu, rq->mq_ctx->cpu)))
                return false;
 
        /* don't try to IPI to an offline CPU */
@@ -1254,7 +1254,7 @@ void blk_mq_start_request(struct request *rq)
 
        if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags) &&
            !blk_rq_is_passthrough(rq)) {
-               rq->io_start_time_ns = ktime_get_ns();
+               rq->io_start_time_ns = blk_time_get_ns();
                rq->stats_sectors = blk_rq_sectors(rq);
                rq->rq_flags |= RQF_STATS;
                rq_qos_issue(q, rq);
@@ -1409,22 +1409,10 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
        blk_mq_insert_request(rq, at_head ? BLK_MQ_INSERT_AT_HEAD : 0);
        blk_mq_run_hw_queue(hctx, false);
 
-       if (blk_rq_is_poll(rq)) {
+       if (blk_rq_is_poll(rq))
                blk_rq_poll_completion(rq, &wait.done);
-       } else {
-               /*
-                * Prevent hang_check timer from firing at us during very long
-                * I/O
-                */
-               unsigned long hang_check = sysctl_hung_task_timeout_secs;
-
-               if (hang_check)
-                       while (!wait_for_completion_io_timeout(&wait.done,
-                                       hang_check * (HZ/2)))
-                               ;
-               else
-                       wait_for_completion_io(&wait.done);
-       }
+       else
+               blk_wait_io(&wait.done);
 
        return wait.ret;
 }
@@ -2892,9 +2880,6 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
        };
        struct request *rq;
 
-       if (blk_mq_attempt_bio_merge(q, bio, nsegs))
-               return NULL;
-
        rq_qos_throttle(q, bio);
 
        if (plug) {
@@ -2913,22 +2898,31 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 }
 
 /*
- * Check if we can use the passed on request for submitting the passed in bio,
- * and remove it from the request list if it can be used.
+ * Check if there is a suitable cached request and return it.
  */
-static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
-               struct bio *bio)
+static struct request *blk_mq_peek_cached_request(struct blk_plug *plug,
+               struct request_queue *q, blk_opf_t opf)
 {
-       enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
-       enum hctx_type hctx_type = rq->mq_hctx->type;
+       enum hctx_type type = blk_mq_get_hctx_type(opf);
+       struct request *rq;
 
-       WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
+       if (!plug)
+               return NULL;
+       rq = rq_list_peek(&plug->cached_rq);
+       if (!rq || rq->q != q)
+               return NULL;
+       if (type != rq->mq_hctx->type &&
+           (type != HCTX_TYPE_READ || rq->mq_hctx->type != HCTX_TYPE_DEFAULT))
+               return NULL;
+       if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
+               return NULL;
+       return rq;
+}
 
-       if (type != hctx_type &&
-           !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
-               return false;
-       if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
-               return false;
+static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
+               struct bio *bio)
+{
+       WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
 
        /*
         * If any qos ->throttle() end up blocking, we will have flushed the
@@ -2941,7 +2935,6 @@ static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
        blk_mq_rq_time_init(rq, 0);
        rq->cmd_flags = bio->bi_opf;
        INIT_LIST_HEAD(&rq->queuelist);
-       return true;
 }
 
 /**
@@ -2963,50 +2956,43 @@ void blk_mq_submit_bio(struct bio *bio)
        struct blk_plug *plug = blk_mq_plug(bio);
        const int is_sync = op_is_sync(bio->bi_opf);
        struct blk_mq_hw_ctx *hctx;
-       struct request *rq = NULL;
        unsigned int nr_segs = 1;
+       struct request *rq;
        blk_status_t ret;
 
        bio = blk_queue_bounce(bio, q);
 
-       if (plug) {
-               rq = rq_list_peek(&plug->cached_rq);
-               if (rq && rq->q != q)
-                       rq = NULL;
-       }
-       if (rq) {
-               if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
-                       bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
-                       if (!bio)
-                               return;
-               }
-               if (!bio_integrity_prep(bio))
-                       return;
-               if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
-                       return;
-               if (blk_mq_use_cached_rq(rq, plug, bio))
-                       goto done;
-               percpu_ref_get(&q->q_usage_counter);
-       } else {
+       /*
+        * If the plug has a cached request for this queue, try use it.
+        *
+        * The cached request already holds a q_usage_counter reference and we
+        * don't have to acquire a new one if we use it.
+        */
+       rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
+       if (!rq) {
                if (unlikely(bio_queue_enter(bio)))
                        return;
-               if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
-                       bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
-                       if (!bio)
-                               goto fail;
-               }
-               if (!bio_integrity_prep(bio))
-                       goto fail;
        }
 
-       rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
-       if (unlikely(!rq)) {
-fail:
-               blk_queue_exit(q);
-               return;
+       if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
+               bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+               if (!bio)
+                       goto queue_exit;
+       }
+       if (!bio_integrity_prep(bio))
+               goto queue_exit;
+
+       if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
+               goto queue_exit;
+
+       if (!rq) {
+               rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+               if (unlikely(!rq))
+                       goto queue_exit;
+       } else {
+               blk_mq_use_cached_rq(rq, plug, bio);
        }
 
-done:
        trace_block_getrq(bio);
 
        rq_qos_track(q, rq, bio);
@@ -3037,6 +3023,15 @@ done:
        } else {
                blk_mq_run_dispatch_ops(q, blk_mq_try_issue_directly(hctx, rq));
        }
+       return;
+
+queue_exit:
+       /*
+        * Don't drop the queue reference if we were trying to use a cached
+        * request and thus didn't acquire one.
+        */
+       if (!rq)
+               blk_queue_exit(q);
 }
 
 #ifdef CONFIG_BLK_MQ_STACKING
@@ -3098,7 +3093,7 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
        blk_mq_run_dispatch_ops(q,
                        ret = blk_mq_request_issue_directly(rq, true));
        if (ret)
-               blk_account_io_done(rq, ktime_get_ns());
+               blk_account_io_done(rq, blk_time_get_ns());
        return ret;
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
@@ -4078,15 +4073,16 @@ void blk_mq_release(struct request_queue *q)
        blk_mq_sysfs_deinit(q);
 }
 
-static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
-               void *queuedata)
+struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
+               struct queue_limits *lim, void *queuedata)
 {
+       struct queue_limits default_lim = { };
        struct request_queue *q;
        int ret;
 
-       q = blk_alloc_queue(set->numa_node);
-       if (!q)
-               return ERR_PTR(-ENOMEM);
+       q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node);
+       if (IS_ERR(q))
+               return q;
        q->queuedata = queuedata;
        ret = blk_mq_init_allocated_queue(set, q);
        if (ret) {
@@ -4095,20 +4091,15 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
        }
        return q;
 }
-
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
-{
-       return blk_mq_init_queue_data(set, NULL);
-}
-EXPORT_SYMBOL(blk_mq_init_queue);
+EXPORT_SYMBOL(blk_mq_alloc_queue);
 
 /**
  * blk_mq_destroy_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * This shuts down a request queue allocated by blk_mq_init_queue(). All future
+ * This shuts down a request queue allocated by blk_mq_alloc_queue(). All future
  * requests will be failed with -ENODEV. The caller is responsible for dropping
- * the reference from blk_mq_init_queue() by calling blk_put_queue().
+ * the reference from blk_mq_alloc_queue() by calling blk_put_queue().
  *
  * Context: can sleep
  */
@@ -4129,13 +4120,14 @@ void blk_mq_destroy_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_destroy_queue);
 
-struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set,
+               struct queue_limits *lim, void *queuedata,
                struct lock_class_key *lkclass)
 {
        struct request_queue *q;
        struct gendisk *disk;
 
-       q = blk_mq_init_queue_data(set, queuedata);
+       q = blk_mq_alloc_queue(set, lim, queuedata);
        if (IS_ERR(q))
                return ERR_CAST(q);
 
@@ -4389,7 +4381,7 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
        if (set->nr_maps == 1)
                set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
 
-       if (set->ops->map_queues && !is_kdump_kernel()) {
+       if (set->ops->map_queues) {
                int i;
 
                /*
@@ -4488,14 +4480,12 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 
        /*
         * If a crashdump is active, then we are potentially in a very
-        * memory constrained environment. Limit us to 1 queue and
-        * 64 tags to prevent using too much memory.
+        * memory constrained environment. Limit us to  64 tags to prevent
+        * using too much memory.
         */
-       if (is_kdump_kernel()) {
-               set->nr_hw_queues = 1;
-               set->nr_maps = 1;
+       if (is_kdump_kernel())
                set->queue_depth = min(64U, set->queue_depth);
-       }
+
        /*
         * There is no use for more h/w queues than cpus if we just have
         * a single map
@@ -4525,7 +4515,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
                                                  GFP_KERNEL, set->numa_node);
                if (!set->map[i].mq_map)
                        goto out_free_mq_map;
-               set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
+               set->map[i].nr_queues = set->nr_hw_queues;
        }
 
        blk_mq_update_queue_map(set);