]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
block: unify elevator tags and type xarrays into struct elv_change_ctx
authorNilay Shroff <nilay@linux.ibm.com>
Thu, 13 Nov 2025 08:58:18 +0000 (14:28 +0530)
committerJens Axboe <axboe@kernel.dk>
Thu, 13 Nov 2025 16:27:49 +0000 (09:27 -0700)
Currently, the nr_hw_queues update path manages two disjoint xarrays —
one for elevator tags and another for elevator type — both used during
elevator switching. Maintaining these two parallel structures for the
same purpose adds unnecessary complexity and potential for mismatched
state.

This patch unifies both xarrays into a single structure, struct
elv_change_ctx, which holds all per-queue elevator change context. A
single xarray, named elv_tbl, now maps each queue (q->id) in a tagset
to its corresponding elv_change_ctx entry, encapsulating the elevator
tags, type and name references.

This unification simplifies the code, improves maintainability, and
clarifies ownership of per-queue elevator state.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Yu Kuai <yukuai@fnnas.com>
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-mq-sched.c
block/blk-mq-sched.h
block/blk-mq.c
block/blk.h
block/elevator.c
block/elevator.h

index 97b69fbe26f6c27fbe849809acaf15302841968a..7ba285a7759b3bcdd0ce2028dabad2b12525af5e 100644 (file)
@@ -427,11 +427,11 @@ void blk_mq_free_sched_tags(struct elevator_tags *et,
        kfree(et);
 }
 
-void blk_mq_free_sched_tags_batch(struct xarray *et_table,
+void blk_mq_free_sched_tags_batch(struct xarray *elv_tbl,
                struct blk_mq_tag_set *set)
 {
        struct request_queue *q;
-       struct elevator_tags *et;
+       struct elv_change_ctx *ctx;
 
        lockdep_assert_held_write(&set->update_nr_hwq_lock);
 
@@ -444,13 +444,47 @@ void blk_mq_free_sched_tags_batch(struct xarray *et_table,
                 * concurrently.
                 */
                if (q->elevator) {
-                       et = xa_load(et_table, q->id);
-                       if (unlikely(!et))
+                       ctx = xa_load(elv_tbl, q->id);
+                       if (!ctx || !ctx->et) {
                                WARN_ON_ONCE(1);
-                       else
-                               blk_mq_free_sched_tags(et, set);
+                               continue;
+                       }
+                       blk_mq_free_sched_tags(ctx->et, set);
+                       ctx->et = NULL;
+               }
+       }
+}
+
+void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl)
+{
+       unsigned long i;
+       struct elv_change_ctx *ctx;
+
+       xa_for_each(elv_tbl, i, ctx) {
+               xa_erase(elv_tbl, i);
+               kfree(ctx);
+       }
+}
+
+int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl,
+               struct blk_mq_tag_set *set)
+{
+       struct request_queue *q;
+       struct elv_change_ctx *ctx;
+
+       lockdep_assert_held_write(&set->update_nr_hwq_lock);
+
+       list_for_each_entry(q, &set->tag_list, tag_set_list) {
+               ctx = kzalloc(sizeof(struct elv_change_ctx), GFP_KERNEL);
+               if (!ctx)
+                       return -ENOMEM;
+
+               if (xa_insert(elv_tbl, q->id, ctx, GFP_KERNEL)) {
+                       kfree(ctx);
+                       return -ENOMEM;
                }
        }
+       return 0;
 }
 
 struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
@@ -497,12 +531,13 @@ out:
        return NULL;
 }
 
-int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
+int blk_mq_alloc_sched_tags_batch(struct xarray *elv_tbl,
                struct blk_mq_tag_set *set, unsigned int nr_hw_queues)
 {
+       struct elv_change_ctx *ctx;
        struct request_queue *q;
        struct elevator_tags *et;
-       gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
+       int ret = -ENOMEM;
 
        lockdep_assert_held_write(&set->update_nr_hwq_lock);
 
@@ -515,26 +550,31 @@ int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
                 * concurrently.
                 */
                if (q->elevator) {
-                       et = blk_mq_alloc_sched_tags(set, nr_hw_queues,
+                       ctx = xa_load(elv_tbl, q->id);
+                       if (WARN_ON_ONCE(!ctx)) {
+                               ret = -ENOENT;
+                               goto out_unwind;
+                       }
+
+                       ctx->et = blk_mq_alloc_sched_tags(set, nr_hw_queues,
                                        blk_mq_default_nr_requests(set));
-                       if (!et)
+                       if (!ctx->et)
                                goto out_unwind;
-                       if (xa_insert(et_table, q->id, et, gfp))
-                               goto out_free_tags;
+
                }
        }
        return 0;
-out_free_tags:
-       blk_mq_free_sched_tags(et, set);
 out_unwind:
        list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) {
                if (q->elevator) {
-                       et = xa_load(et_table, q->id);
-                       if (et)
-                               blk_mq_free_sched_tags(et, set);
+                       ctx = xa_load(elv_tbl, q->id);
+                       if (ctx && ctx->et) {
+                               blk_mq_free_sched_tags(ctx->et, set);
+                               ctx->et = NULL;
+                       }
                }
        }
-       return -ENOMEM;
+       return ret;
 }
 
 /* caller must have a reference to @e, will grab another one if successful */
index 8e21a6b1415d9da102b52939baaecf235eb447b0..2fddbc91a23595b0d6b9a764f8ff27a1b2c2c24c 100644 (file)
@@ -27,6 +27,9 @@ struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
                unsigned int nr_hw_queues, unsigned int nr_requests);
 int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
                struct blk_mq_tag_set *set, unsigned int nr_hw_queues);
+int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl,
+               struct blk_mq_tag_set *set);
+void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl);
 void blk_mq_free_sched_tags(struct elevator_tags *et,
                struct blk_mq_tag_set *set);
 void blk_mq_free_sched_tags_batch(struct xarray *et_table,
index b2fdeaac0efb5beaeaa84ec12f976a4c608b8479..04c6356f0eb2428316e24439cfb1bfa1e3146bd8 100644 (file)
@@ -4989,27 +4989,28 @@ struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q,
  * Switch back to the elevator type stored in the xarray.
  */
 static void blk_mq_elv_switch_back(struct request_queue *q,
-               struct xarray *elv_tbl, struct xarray *et_tbl)
+               struct xarray *elv_tbl)
 {
-       struct elevator_type *e = xa_load(elv_tbl, q->id);
-       struct elevator_tags *t = xa_load(et_tbl, q->id);
+       struct elv_change_ctx *ctx = xa_load(elv_tbl, q->id);
+
+       if (WARN_ON_ONCE(!ctx))
+               return;
 
        /* The elv_update_nr_hw_queues unfreezes the queue. */
-       elv_update_nr_hw_queues(q, e, t);
+       elv_update_nr_hw_queues(q, ctx);
 
        /* Drop the reference acquired in blk_mq_elv_switch_none. */
-       if (e)
-               elevator_put(e);
+       if (ctx->type)
+               elevator_put(ctx->type);
 }
 
 /*
- * Stores elevator type in xarray and set current elevator to none. It uses
- * q->id as an index to store the elevator type into the xarray.
+ * Stores elevator name and type in ctx and set current elevator to none.
  */
 static int blk_mq_elv_switch_none(struct request_queue *q,
                struct xarray *elv_tbl)
 {
-       int ret = 0;
+       struct elv_change_ctx *ctx;
 
        lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock);
 
@@ -5021,10 +5022,11 @@ static int blk_mq_elv_switch_none(struct request_queue *q,
         * can't run concurrently.
         */
        if (q->elevator) {
+               ctx = xa_load(elv_tbl, q->id);
+               if (WARN_ON_ONCE(!ctx))
+                       return -ENOENT;
 
-               ret = xa_insert(elv_tbl, q->id, q->elevator->type, GFP_KERNEL);
-               if (WARN_ON_ONCE(ret))
-                       return ret;
+               ctx->name = q->elevator->type->elevator_name;
 
                /*
                 * Before we switch elevator to 'none', take a reference to
@@ -5035,9 +5037,14 @@ static int blk_mq_elv_switch_none(struct request_queue *q,
                 */
                __elevator_get(q->elevator->type);
 
+               /*
+                * Store elevator type so that we can release the reference
+                * taken above later.
+                */
+               ctx->type = q->elevator->type;
                elevator_set_none(q);
        }
-       return ret;
+       return 0;
 }
 
 static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
@@ -5047,7 +5054,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
        int prev_nr_hw_queues = set->nr_hw_queues;
        unsigned int memflags;
        int i;
-       struct xarray elv_tbl, et_tbl;
+       struct xarray elv_tbl;
        bool queues_frozen = false;
 
        lockdep_assert_held(&set->tag_list_lock);
@@ -5061,11 +5068,12 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 
        memflags = memalloc_noio_save();
 
-       xa_init(&et_tbl);
-       if (blk_mq_alloc_sched_tags_batch(&et_tbl, set, nr_hw_queues) < 0)
-               goto out_memalloc_restore;
-
        xa_init(&elv_tbl);
+       if (blk_mq_alloc_sched_ctx_batch(&elv_tbl, set) < 0)
+               goto out_free_ctx;
+
+       if (blk_mq_alloc_sched_tags_batch(&elv_tbl, set, nr_hw_queues) < 0)
+               goto out_free_ctx;
 
        list_for_each_entry(q, &set->tag_list, tag_set_list) {
                blk_mq_debugfs_unregister_hctxs(q);
@@ -5111,7 +5119,7 @@ switch_back:
                /* switch_back expects queue to be frozen */
                if (!queues_frozen)
                        blk_mq_freeze_queue_nomemsave(q);
-               blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
+               blk_mq_elv_switch_back(q, &elv_tbl);
        }
 
        list_for_each_entry(q, &set->tag_list, tag_set_list) {
@@ -5122,9 +5130,9 @@ switch_back:
                blk_mq_add_hw_queues_cpuhp(q);
        }
 
+out_free_ctx:
+       blk_mq_free_sched_ctx_batch(&elv_tbl);
        xa_destroy(&elv_tbl);
-       xa_destroy(&et_tbl);
-out_memalloc_restore:
        memalloc_noio_restore(memflags);
 
        /* Free the excess tags when nr_hw_queues shrink. */
index 4d809588b771d5c66d4b9d2be26453943f308c77..e4c433f62dfc7acb94d6a1b3e6234ce6e1f24837 100644 (file)
@@ -11,8 +11,7 @@
 #include <xen/xen.h>
 #include "blk-crypto-internal.h"
 
-struct elevator_type;
-struct elevator_tags;
+struct elv_change_ctx;
 
 /*
  * Default upper limit for the software max_sectors limit used for regular I/Os.
@@ -333,8 +332,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
 
 bool blk_insert_flush(struct request *rq);
 
-void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
-               struct elevator_tags *t);
+void elv_update_nr_hw_queues(struct request_queue *q,
+               struct elv_change_ctx *ctx);
 void elevator_set_default(struct request_queue *q);
 void elevator_set_none(struct request_queue *q);
 
index e2ebfbf107b3af9f180143a0f0d3eb68bde2e5c1..cd7bdff205c8c21c9aa48f017ab1532648de4de4 100644 (file)
 #include "blk-wbt.h"
 #include "blk-cgroup.h"
 
-/* Holding context data for changing elevator */
-struct elv_change_ctx {
-       const char *name;
-       bool no_uevent;
-
-       /* for unregistering old elevator */
-       struct elevator_queue *old;
-       /* for registering new elevator */
-       struct elevator_queue *new;
-       /* holds sched tags data */
-       struct elevator_tags *et;
-};
-
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
@@ -706,32 +693,28 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
  * The I/O scheduler depends on the number of hardware queues, this forces a
  * reattachment when nr_hw_queues changes.
  */
-void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
-               struct elevator_tags *t)
+void elv_update_nr_hw_queues(struct request_queue *q,
+               struct elv_change_ctx *ctx)
 {
        struct blk_mq_tag_set *set = q->tag_set;
-       struct elv_change_ctx ctx = {};
        int ret = -ENODEV;
 
        WARN_ON_ONCE(q->mq_freeze_depth == 0);
 
-       if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
-               ctx.name = e->elevator_name;
-               ctx.et = t;
-
+       if (ctx->type && !blk_queue_dying(q) && blk_queue_registered(q)) {
                mutex_lock(&q->elevator_lock);
                /* force to reattach elevator after nr_hw_queue is updated */
-               ret = elevator_switch(q, &ctx);
+               ret = elevator_switch(q, ctx);
                mutex_unlock(&q->elevator_lock);
        }
        blk_mq_unfreeze_queue_nomemrestore(q);
        if (!ret)
-               WARN_ON_ONCE(elevator_change_done(q, &ctx));
+               WARN_ON_ONCE(elevator_change_done(q, ctx));
        /*
         * Free sched tags if it's allocated but we couldn't switch elevator.
         */
-       if (t && !ctx.new)
-               blk_mq_free_sched_tags(t, set);
+       if (ctx->et && !ctx->new)
+               blk_mq_free_sched_tags(ctx->et, set);
 }
 
 /*
index c4d20155065e80415e7be53a411d821f1038ba26..bad43182361e5c6a86bf057a2da2aab6dc9ab33a 100644 (file)
@@ -32,6 +32,21 @@ struct elevator_tags {
        struct blk_mq_tags *tags[];
 };
 
+/* Holding context data for changing elevator */
+struct elv_change_ctx {
+       const char *name;
+       bool no_uevent;
+
+       /* for unregistering old elevator */
+       struct elevator_queue *old;
+       /* for registering new elevator */
+       struct elevator_queue *new;
+       /* store elevator type */
+       struct elevator_type *type;
+       /* holds sched tags data */
+       struct elevator_tags *et;
+};
+
 struct elevator_mq_ops {
        int (*init_sched)(struct request_queue *, struct elevator_queue *);
        void (*exit_sched)(struct elevator_queue *);