blk-mq: add tracepoint block_rq_tag_wait

author Aaron Tomlin <atomlin@atomlin.com>

Mon, 25 May 2026 00:51:23 +0000 (20:51 -0400)

committer Jens Axboe <axboe@kernel.dk>

Tue, 26 May 2026 17:01:54 +0000 (11:01 -0600)
author Aaron Tomlin <atomlin@atomlin.com>
Mon, 25 May 2026 00:51:23 +0000 (20:51 -0400)
committer Jens Axboe <axboe@kernel.dk>
Tue, 26 May 2026 17:01:54 +0000 (11:01 -0600)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c

index 33946cdb571648a0ebaf0d8796af32c7759084f1..35deee5bbc739f0454ab7d579b9203f4b5705419 100644 (file)
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -13,6 +13,7 @@
  #include <linux/kmemleak.h>
  
  #include <linux/delay.h>
+#include <trace/events/block.h>
  #include "blk.h"
  #include "blk-mq.h"
  #include "blk-mq-sched.h"
@@ -181,6 +182,11 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
                 if (tag != BLK_MQ_NO_TAG)
                         break;
  
+               /* Log the starvation event before altering task state */
+               trace_block_rq_tag_wait(data->q, data->hctx,
+                                       data->rq_flags & RQF_SCHED_TAGS,
+                                       data->flags);
+
                 sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE);
  
                 tag = __blk_mq_get_tag(data, bt);
diff --git a/include/trace/events/block.h b/include/trace/events/block.h

index 6aa79e2d799c078ae931b523da12f45135efbeb9..9c97a16850b9221694dbee14b205f055003527ba 100644 (file)
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -226,6 +226,65 @@ DECLARE_EVENT_CLASS(block_rq,
                   IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm)
  );
  
+/**
+ * block_rq_tag_wait - triggered when a request is starved of a tag
+ * @q: request queue of the target device
+ * @hctx: hardware context of the request experiencing starvation
+ * @is_sched_tag: indicates whether the starved pool is the software scheduler
+ * @alloc_flags: allocation flags dictating the specific tag pool
+ *
+ * Called immediately before the submitting context is forced to block due
+ * to the exhaustion of available tags (i.e., physical hardware driver
+ * tags, software scheduler tags, or reserved tags). This trace point
+ * indicates that the context will be placed into an uninterruptible state
+ * via sbitmap_prepare_to_wait(). If a tag is not acquired in the final
+ * lockless retry, the context will yield the CPU via io_schedule() until
+ * an active request completes and relinquishes its assigned tag.
+ */
+TRACE_EVENT(block_rq_tag_wait,
+
+       TP_PROTO(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+                bool is_sched_tag, unsigned int alloc_flags),
+
+       TP_ARGS(q, hctx, is_sched_tag, alloc_flags),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( u32,           hctx_id                 )
+               __field( u32,           nr_tags                 )
+               __field( bool,          is_sched_tag            )
+               __field( bool,          is_reserved             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = q->disk ? disk_devt(q->disk) : 0;
+               __entry->hctx_id        = hctx->queue_num;
+               __entry->is_sched_tag   = is_sched_tag;
+               __entry->is_reserved    = alloc_flags & BLK_MQ_REQ_RESERVED;
+
+               if (__entry->is_reserved) {
+                       __entry->nr_tags = is_sched_tag ?
+                                          hctx->sched_tags->nr_reserved_tags :
+                                          hctx->tags->nr_reserved_tags;
+               } else {
+                       if (is_sched_tag)
+                               __entry->nr_tags = hctx->sched_tags->nr_tags -
+                                                  hctx->sched_tags->nr_reserved_tags;
+                       else
+                               __entry->nr_tags = hctx->tags->nr_tags -
+                                                  hctx->tags->nr_reserved_tags;
+               }
+
+       ),
+
+       TP_printk("%d,%d hctx=%u starved on %s%s tags (depth=%u)",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->hctx_id,
+                 __entry->is_sched_tag ? "scheduler" : "hardware",
+                 __entry->is_reserved ? " reserved" : "",
+                 __entry->nr_tags)
+);
+
  /**
   * block_rq_insert - insert block operation request into queue
   * @rq: block IO operation request
author	Aaron Tomlin <atomlin@atomlin.com>
	Mon, 25 May 2026 00:51:23 +0000 (20:51 -0400)
committer	Jens Axboe <axboe@kernel.dk>
	Tue, 26 May 2026 17:01:54 +0000 (11:01 -0600)
block/blk-mq-tag.c		patch \| blob \| blame \| history
include/trace/events/block.h		patch \| blob \| blame \| history