]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
nvmet: support completion queue sharing
authorWilfred Mallawa <wilfred.mallawa@wdc.com>
Thu, 24 Apr 2025 05:13:52 +0000 (15:13 +1000)
committerChristoph Hellwig <hch@lst.de>
Tue, 20 May 2025 03:34:26 +0000 (05:34 +0200)
The NVMe PCI transport specification allows for completion queues to be
shared by different submission queues.

This patch allows a submission queue to keep track of the completion queue
it is using with reference counting. As such, it can be ensured that a
completion queue is not deleted while a submission queue is actively
using it.

This patch enables completion queue sharing in the pci-epf target driver.
For fabrics drivers, completion queue sharing is not enabled as it is
not possible as per the fabrics specification. However, this patch
modifies the fabrics drivers to correctly integrate the new API that
supports completion queue sharing.

Signed-off-by: Wilfred Mallawa <wilfred.mallawa@wdc.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/core.c
drivers/nvme/target/fc.c
drivers/nvme/target/loop.c
drivers/nvme/target/nvmet.h
drivers/nvme/target/pci-epf.c
drivers/nvme/target/rdma.c
drivers/nvme/target/tcp.c

index 5e3699973d56e63439f61fce0a1738215b1beb1b..c7317299078d3a173aed19b28b37570b5575c6ac 100644 (file)
@@ -63,14 +63,9 @@ static void nvmet_execute_create_sq(struct nvmet_req *req)
        if (status != NVME_SC_SUCCESS)
                goto complete;
 
-       /*
-        * Note: The NVMe specification allows multiple SQs to use the same CQ.
-        * However, the target code does not really support that. So for now,
-        * prevent this and fail the command if sqid and cqid are different.
-        */
-       if (!cqid || cqid != sqid) {
-               pr_err("SQ %u: Unsupported CQID %u\n", sqid, cqid);
-               status = NVME_SC_CQ_INVALID | NVME_STATUS_DNR;
+       status = nvmet_check_io_cqid(ctrl, cqid, false);
+       if (status != NVME_SC_SUCCESS) {
+               pr_err("SQ %u: Invalid CQID %u\n", sqid, cqid);
                goto complete;
        }
 
@@ -79,7 +74,7 @@ static void nvmet_execute_create_sq(struct nvmet_req *req)
                goto complete;
        }
 
-       status = ctrl->ops->create_sq(ctrl, sqid, sq_flags, qsize, prp1);
+       status = ctrl->ops->create_sq(ctrl, sqid, cqid, sq_flags, qsize, prp1);
 
 complete:
        nvmet_req_complete(req, status);
@@ -100,6 +95,12 @@ static void nvmet_execute_delete_cq(struct nvmet_req *req)
        if (status != NVME_SC_SUCCESS)
                goto complete;
 
+       if (!ctrl->cqs[cqid] || nvmet_cq_in_use(ctrl->cqs[cqid])) {
+               /* Some SQs are still using this CQ */
+               status = NVME_SC_QID_INVALID | NVME_STATUS_DNR;
+               goto complete;
+       }
+
        status = ctrl->ops->delete_cq(ctrl, cqid);
 
 complete:
index bfaba79f971d2356b46da1c74e2917661182bfc5..2b02b2f939a57e2072629502eab5f1d061298605 100644 (file)
@@ -905,6 +905,11 @@ u16 nvmet_cq_create(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
        if (status != NVME_SC_SUCCESS)
                return status;
 
+       if (!kref_get_unless_zero(&ctrl->ref))
+               return NVME_SC_INTERNAL | NVME_STATUS_DNR;
+       cq->ctrl = ctrl;
+
+       nvmet_cq_init(cq);
        nvmet_cq_setup(ctrl, cq, qid, size);
 
        return NVME_SC_SUCCESS;
@@ -928,7 +933,7 @@ u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid,
 }
 
 u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
-                   u16 sqid, u16 size)
+                   struct nvmet_cq *cq, u16 sqid, u16 size)
 {
        u16 status;
        int ret;
@@ -940,7 +945,7 @@ u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
        if (status != NVME_SC_SUCCESS)
                return status;
 
-       ret = nvmet_sq_init(sq);
+       ret = nvmet_sq_init(sq, cq);
        if (ret) {
                status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
                goto ctrl_put;
@@ -972,6 +977,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
        wait_for_completion(&sq->free_done);
        percpu_ref_exit(&sq->ref);
        nvmet_auth_sq_free(sq);
+       nvmet_cq_put(sq->cq);
 
        /*
         * we must reference the ctrl again after waiting for inflight IO
@@ -1004,18 +1010,23 @@ static void nvmet_sq_free(struct percpu_ref *ref)
        complete(&sq->free_done);
 }
 
-int nvmet_sq_init(struct nvmet_sq *sq)
+int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq)
 {
        int ret;
 
+       if (!nvmet_cq_get(cq))
+               return -EINVAL;
+
        ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
        if (ret) {
                pr_err("percpu_ref init failed!\n");
+               nvmet_cq_put(cq);
                return ret;
        }
        init_completion(&sq->free_done);
        init_completion(&sq->confirm_done);
        nvmet_auth_sq_init(sq);
+       sq->cq = cq;
 
        return 0;
 }
index 7c2a4e2eb315158d5395674613af9c9daeafc589..2e813e51549c8d7c306e88b0bd875d53f485a218 100644 (file)
@@ -817,7 +817,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
        nvmet_fc_prep_fcp_iodlist(assoc->tgtport, queue);
 
        nvmet_cq_init(&queue->nvme_cq);
-       ret = nvmet_sq_init(&queue->nvme_sq);
+       ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
        if (ret)
                goto out_fail_iodlist;
 
index bbb3699c8686fb3d6f7db047544f93531624a125..c9ec13eff879df8a2c99eefd111dc393e1f6e4eb 100644 (file)
@@ -332,7 +332,8 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
        for (i = 1; i <= nr_io_queues; i++) {
                ctrl->queues[i].ctrl = ctrl;
                nvmet_cq_init(&ctrl->queues[i].nvme_cq);
-               ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+               ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq,
+                               &ctrl->queues[i].nvme_cq);
                if (ret) {
                        nvmet_cq_put(&ctrl->queues[i].nvme_cq);
                        goto out_destroy_queues;
@@ -368,7 +369,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 
        ctrl->queues[0].ctrl = ctrl;
        nvmet_cq_init(&ctrl->queues[0].nvme_cq);
-       error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
+       error = nvmet_sq_init(&ctrl->queues[0].nvme_sq,
+                       &ctrl->queues[0].nvme_cq);
        if (error) {
                nvmet_cq_put(&ctrl->queues[0].nvme_cq);
                return error;
index c87f6fb458e80447316ab02cfbd94c0f8055614c..d3795b09fcc43c3803988159e0dc29ceb5e2dfa3 100644 (file)
@@ -150,6 +150,7 @@ struct nvmet_cq {
 struct nvmet_sq {
        struct nvmet_ctrl       *ctrl;
        struct percpu_ref       ref;
+       struct nvmet_cq         *cq;
        u16                     qid;
        u16                     size;
        u32                     sqhd;
@@ -427,7 +428,7 @@ struct nvmet_fabrics_ops {
        u16 (*get_max_queue_size)(const struct nvmet_ctrl *ctrl);
 
        /* Operations mandatory for PCI target controllers */
-       u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 flags,
+       u16 (*create_sq)(struct nvmet_ctrl *ctrl, u16 sqid, u16 cqid, u16 flags,
                         u16 qsize, u64 prp1);
        u16 (*delete_sq)(struct nvmet_ctrl *ctrl, u16 sqid);
        u16 (*create_cq)(struct nvmet_ctrl *ctrl, u16 cqid, u16 flags,
@@ -588,10 +589,10 @@ bool nvmet_cq_in_use(struct nvmet_cq *cq);
 u16 nvmet_check_sqid(struct nvmet_ctrl *ctrl, u16 sqid, bool create);
 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
                u16 size);
-u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
-               u16 size);
+u16 nvmet_sq_create(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
+       struct nvmet_cq *cq, u16 qid, u16 size);
 void nvmet_sq_destroy(struct nvmet_sq *sq);
-int nvmet_sq_init(struct nvmet_sq *sq);
+int nvmet_sq_init(struct nvmet_sq *sq, struct nvmet_cq *cq);
 
 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
 
index 7dda4156d86c42b3edaaefe3ec3eee628d3dfcd6..ebe2fc355fbd7b31bd6a59cadf5cfd8bdfdd63cf 100644 (file)
@@ -1346,16 +1346,17 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
        nvmet_pci_epf_drain_queue(cq);
        nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
        nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
-       tctrl->cqs[cqid] = NULL;
+       nvmet_cq_put(&cq->nvme_cq);
 
        return NVME_SC_SUCCESS;
 }
 
 static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
-               u16 sqid, u16 flags, u16 qsize, u64 pci_addr)
+               u16 sqid, u16 cqid, u16 flags, u16 qsize, u64 pci_addr)
 {
        struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
        struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid];
+       struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
        u16 status;
 
        if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags))
@@ -1378,7 +1379,8 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
                sq->qes = ctrl->io_sqes;
        sq->pci_size = sq->qes * sq->depth;
 
-       status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth);
+       status = nvmet_sq_create(tctrl, &sq->nvme_sq, &cq->nvme_cq, sqid,
+                       sq->depth);
        if (status != NVME_SC_SUCCESS)
                return status;
 
@@ -1873,8 +1875,8 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
 
        qsize = aqa & 0x00000fff;
        pci_addr = asq & GENMASK_ULL(63, 12);
-       status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, NVME_QUEUE_PHYS_CONTIG,
-                                        qsize, pci_addr);
+       status = nvmet_pci_epf_create_sq(ctrl->tctrl, 0, 0,
+                       NVME_QUEUE_PHYS_CONTIG, qsize, pci_addr);
        if (status != NVME_SC_SUCCESS) {
                dev_err(ctrl->dev, "Failed to create admin submission queue\n");
                nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
index 3ad9b4d1fad215c9d8a3057f7b90ea00cd1e6844..2e5c32298818463c3d03a858071a550c1c2b171f 100644 (file)
@@ -1438,7 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
        }
 
        nvmet_cq_init(&queue->nvme_cq);
-       ret = nvmet_sq_init(&queue->nvme_sq);
+       ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
        if (ret) {
                ret = NVME_RDMA_CM_NO_RSC;
                goto out_free_queue;
index 4dacb6b40fd120e1657737af135aea3a661000ce..5811246dbe426ac50e2d1d5e847b98de4b33afd2 100644 (file)
@@ -1912,7 +1912,7 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
                goto out_ida_remove;
 
        nvmet_cq_init(&queue->nvme_cq);
-       ret = nvmet_sq_init(&queue->nvme_sq);
+       ret = nvmet_sq_init(&queue->nvme_sq, &queue->nvme_cq);
        if (ret)
                goto out_free_connect;