nvme/io_uring: optimize IOPOLL completions for local ring context

author Ming Lei <ming.lei@redhat.com>

Fri, 16 Jan 2026 07:46:38 +0000 (15:46 +0800)

committer Jens Axboe <axboe@kernel.dk>

Tue, 20 Jan 2026 17:18:01 +0000 (10:18 -0700)
author Ming Lei <ming.lei@redhat.com>
Fri, 16 Jan 2026 07:46:38 +0000 (15:46 +0800)
committer Jens Axboe <axboe@kernel.dk>
Tue, 20 Jan 2026 17:18:01 +0000 (10:18 -0700)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c

index e45ac0ca174e05c6155f10b54fde407f16b6ce2a..fb62633ccbb0d0720f170993d51a471b3bfe315a 100644 (file)
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -426,14 +426,20 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
         pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
  
         /*
-        * IOPOLL could potentially complete this request directly, but
-        * if multiple rings are polling on the same queue, then it's possible
-        * for one ring to find completions for another ring. Punting the
-        * completion via task_work will always direct it to the right
-        * location, rather than potentially complete requests for ringA
-        * under iopoll invocations from ringB.
+        * For IOPOLL, check if this completion is happening in the context
+        * of the same io_ring that owns the request (local context). If so,
+        * we can complete inline without task_work overhead. Otherwise, we
+        * must punt to task_work to ensure completion happens in the correct
+        * ring's context.
          */
-       io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
+       if (blk_rq_is_poll(req) && iob &&
+           iob->poll_ctx == io_uring_cmd_ctx_handle(ioucmd)) {
+               if (pdu->bio)
+                       blk_rq_unmap_user(pdu->bio);
+               io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, 0);
+       } else {
+               io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
+       }
         return RQ_END_IO_FREE;
  }
  
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 438c4946b6e5431f615f016ccc734bf1d4056bcd..251e0f538c4c36655a65382c4a80cfc82d89f266 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1822,6 +1822,7 @@ struct io_comp_batch {
         struct rq_list req_list;
         bool need_ts;
         void (*complete)(struct io_comp_batch *);
+       void *poll_ctx;
  };
  
  static inline bool blk_atomic_write_start_sect_aligned(sector_t sector,
diff --git a/io_uring/rw.c b/io_uring/rw.c

index 70ca88cc1f547144e916c839849112cfde8137d5..ff3192f603f35675654a332643136c8981f78b67 100644 (file)
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -1320,6 +1320,12 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
         DEFINE_IO_COMP_BATCH(iob);
         int nr_events = 0;
  
+       /*
+        * Store the polling io_ring_ctx so drivers can detect if they're
+        * completing a request in the same ring context that's polling.
+        */
+       iob.poll_ctx = ctx;
+
         /*
          * Only spin for completions if we don't have multiple devices hanging
          * off our complete list.
author	Ming Lei <ming.lei@redhat.com>
	Fri, 16 Jan 2026 07:46:38 +0000 (15:46 +0800)
committer	Jens Axboe <axboe@kernel.dk>
	Tue, 20 Jan 2026 17:18:01 +0000 (10:18 -0700)
drivers/nvme/host/ioctl.c		patch \| blob \| blame \| history
include/linux/blkdev.h		patch \| blob \| blame \| history
io_uring/rw.c		patch \| blob \| blame \| history