]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
block: break pcpu_alloc_mutex dependency on freeze_lock
authorNilay Shroff <nilay@linux.ibm.com>
Sun, 1 Mar 2026 12:59:43 +0000 (18:29 +0530)
committerJens Axboe <axboe@kernel.dk>
Mon, 2 Mar 2026 16:23:04 +0000 (09:23 -0700)
While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex
after ->freeze_lock is acquired or queue is frozen. This potentially
creates a circular dependency involving ->fs_reclaim if reclaim is
triggered simultaneously in a code path which first acquires ->pcpu_
alloc_mutex. As the queue is already frozen while nr_hw_queue update
allocates tagsets, the reclaim can't forward progress and thus it could
cause a potential deadlock as reported in lockdep splat[1].

Fix this by pre-allocating tagset tags before we freeze queue during
nr_hw_queue update. Later the allocated tagset tags could be safely
installed and used after queue is frozen.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/ [1]
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Yu Kuai <yukuai@fnnas.com>
[axboe: fix brace style issue]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq.c

index d5602ff62c7cecdb6f0d199ff61ddb74200eb6bf..0b182013016ab73c518359e1e0614f126d6e6e54 100644 (file)
@@ -4793,38 +4793,45 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
        }
 }
 
-static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
-                                      int new_nr_hw_queues)
+static struct blk_mq_tags **blk_mq_prealloc_tag_set_tags(
+                               struct blk_mq_tag_set *set,
+                               int new_nr_hw_queues)
 {
        struct blk_mq_tags **new_tags;
        int i;
 
        if (set->nr_hw_queues >= new_nr_hw_queues)
-               goto done;
+               return NULL;
 
        new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
                                GFP_KERNEL, set->numa_node);
        if (!new_tags)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        if (set->tags)
                memcpy(new_tags, set->tags, set->nr_hw_queues *
                       sizeof(*set->tags));
-       kfree(set->tags);
-       set->tags = new_tags;
 
        for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {
-               if (!__blk_mq_alloc_map_and_rqs(set, i)) {
-                       while (--i >= set->nr_hw_queues)
-                               __blk_mq_free_map_and_rqs(set, i);
-                       return -ENOMEM;
+               if (blk_mq_is_shared_tags(set->flags)) {
+                       new_tags[i] = set->shared_tags;
+               } else {
+                       new_tags[i] = blk_mq_alloc_map_and_rqs(set, i,
+                                       set->queue_depth);
+                       if (!new_tags[i])
+                               goto out_unwind;
                }
                cond_resched();
        }
 
-done:
-       set->nr_hw_queues = new_nr_hw_queues;
-       return 0;
+       return new_tags;
+out_unwind:
+       while (--i >= set->nr_hw_queues) {
+               if (!blk_mq_is_shared_tags(set->flags))
+                       blk_mq_free_map_and_rqs(set, new_tags[i], i);
+       }
+       kfree(new_tags);
+       return ERR_PTR(-ENOMEM);
 }
 
 /*
@@ -5113,6 +5120,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
        unsigned int memflags;
        int i;
        struct xarray elv_tbl;
+       struct blk_mq_tags **new_tags;
        bool queues_frozen = false;
 
        lockdep_assert_held(&set->tag_list_lock);
@@ -5147,11 +5155,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
                if (blk_mq_elv_switch_none(q, &elv_tbl))
                        goto switch_back;
 
+       new_tags = blk_mq_prealloc_tag_set_tags(set, nr_hw_queues);
+       if (IS_ERR(new_tags))
+               goto switch_back;
+
        list_for_each_entry(q, &set->tag_list, tag_set_list)
                blk_mq_freeze_queue_nomemsave(q);
        queues_frozen = true;
-       if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
-               goto switch_back;
+       if (new_tags) {
+               kfree(set->tags);
+               set->tags = new_tags;
+       }
+       set->nr_hw_queues = nr_hw_queues;
 
 fallback:
        blk_mq_update_queue_map(set);