]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
nvme: remove virtual boundary for sgl capable devices
authorKeith Busch <kbusch@kernel.org>
Tue, 14 Oct 2025 15:04:56 +0000 (08:04 -0700)
committerJens Axboe <axboe@kernel.dk>
Fri, 7 Nov 2025 01:11:58 +0000 (18:11 -0700)
The nvme virtual boundary is only required for the PRP format. Devices
that can use SGL for DMA don't need it for IO queues. Drop reporting it
for such devices; rdma fabrics controllers will continue to use the
limit as they currently don't report any boundary requirements, but tcp
and fc never needed it in the first place so they get to report no
virtual boundary.

Applications may continue to align to the same virtual boundaries for
optimization purposes if they want, and the driver will continue to
decide whether to use the PRP format the same as before if the IO allows
it.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/nvme/host/apple.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/nvme/target/loop.c

index f35d3f71d14f32cf7ea8de00c30991fc153ff383..15b3d07f8ccdd023cd3be75eedd349b747c1ecad 100644 (file)
@@ -1283,6 +1283,7 @@ static const struct nvme_ctrl_ops nvme_ctrl_ops = {
        .reg_read64 = apple_nvme_reg_read64,
        .free_ctrl = apple_nvme_free_ctrl,
        .get_address = apple_nvme_get_address,
+       .get_virt_boundary = nvme_get_virt_boundary,
 };
 
 static void apple_nvme_async_probe(void *data, async_cookie_t cookie)
index c0fe50fb7b08c6680f98f5ee77b69c971a88b7f5..4da937f96ae2edb7927ec68472fbc14d08337d8c 100644 (file)
@@ -2069,13 +2069,13 @@ static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
 }
 
 static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl,
-               struct queue_limits *lim)
+               struct queue_limits *lim, bool is_admin)
 {
        lim->max_hw_sectors = ctrl->max_hw_sectors;
        lim->max_segments = min_t(u32, USHRT_MAX,
                min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments));
        lim->max_integrity_segments = ctrl->max_integrity_segments;
-       lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1;
+       lim->virt_boundary_mask = ctrl->ops->get_virt_boundary(ctrl, is_admin);
        lim->max_segment_size = UINT_MAX;
        lim->dma_alignment = 3;
 }
@@ -2177,7 +2177,7 @@ static int nvme_update_ns_info_generic(struct nvme_ns *ns,
        int ret;
 
        lim = queue_limits_start_update(ns->disk->queue);
-       nvme_set_ctrl_limits(ns->ctrl, &lim);
+       nvme_set_ctrl_limits(ns->ctrl, &lim, false);
 
        memflags = blk_mq_freeze_queue(ns->disk->queue);
        ret = queue_limits_commit_update(ns->disk->queue, &lim);
@@ -2381,7 +2381,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
        ns->head->lba_shift = id->lbaf[lbaf].ds;
        ns->head->nuse = le64_to_cpu(id->nuse);
        capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
-       nvme_set_ctrl_limits(ns->ctrl, &lim);
+       nvme_set_ctrl_limits(ns->ctrl, &lim, false);
        nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info);
        nvme_set_chunk_sectors(ns, id, &lim);
        if (!nvme_update_disk_info(ns, id, &lim))
@@ -3588,7 +3588,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
                min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
 
        lim = queue_limits_start_update(ctrl->admin_q);
-       nvme_set_ctrl_limits(ctrl, &lim);
+       nvme_set_ctrl_limits(ctrl, &lim, true);
        ret = queue_limits_commit_update(ctrl->admin_q, &lim);
        if (ret)
                goto out_free;
index 1b58ee7d0dcee420a1cf7d1a4b8e7e558b69ecae..caf5503d0833296aa03c028c0199cf92cc409edc 100644 (file)
@@ -217,6 +217,12 @@ static inline unsigned int nvmf_nr_io_queues(struct nvmf_ctrl_options *opts)
                min(opts->nr_poll_queues, num_online_cpus());
 }
 
+static inline unsigned long nvmf_get_virt_boundary(struct nvme_ctrl *ctrl,
+                                                  bool is_admin)
+{
+       return 0;
+}
+
 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
index 03987f497a5b55533ee169c9a7cb9b479d0f2d92..70c066c2e2d42d779747aac3f3863804862f2954 100644 (file)
@@ -3360,6 +3360,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
        .submit_async_event     = nvme_fc_submit_async_event,
        .delete_ctrl            = nvme_fc_delete_ctrl,
        .get_address            = nvmf_get_address,
+       .get_virt_boundary      = nvmf_get_virt_boundary,
 };
 
 static void
index 928c748ccbd19629a7defc143cc9e7fb9f8f858c..9a5f28c5103c5c42777bd9309a983ef0196c1b95 100644 (file)
@@ -558,6 +558,12 @@ static inline bool nvme_ns_has_pi(struct nvme_ns_head *head)
        return head->pi_type && head->ms == head->pi_size;
 }
 
+static inline unsigned long nvme_get_virt_boundary(struct nvme_ctrl *ctrl,
+                                                  bool is_admin)
+{
+       return NVME_CTRL_PAGE_SIZE - 1;
+}
+
 struct nvme_ctrl_ops {
        const char *name;
        struct module *module;
@@ -578,6 +584,7 @@ struct nvme_ctrl_ops {
        int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
        void (*print_device_info)(struct nvme_ctrl *ctrl);
        bool (*supports_pci_p2pdma)(struct nvme_ctrl *ctrl);
+       unsigned long (*get_virt_boundary)(struct nvme_ctrl *ctrl, bool is_admin);
 };
 
 /*
index c916176bd9f058b49e6e6768675711df52b15765..3c1727df1e36f5020f001a3025bde6cca64b0ce8 100644 (file)
@@ -613,9 +613,22 @@ static inline enum nvme_use_sgl nvme_pci_use_sgls(struct nvme_dev *dev,
        struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
 
        if (nvmeq->qid && nvme_ctrl_sgl_supported(&dev->ctrl)) {
-               if (nvme_req(req)->flags & NVME_REQ_USERCMD)
-                       return SGL_FORCED;
-               if (req->nr_integrity_segments > 1)
+               /*
+                * When the controller is capable of using SGL, there are
+                * several conditions that we force to use it:
+                *
+                * 1. A request containing page gaps within the controller's
+                *    mask can not use the PRP format.
+                *
+                * 2. User commands use SGL because that lets the device
+                *    validate the requested transfer lengths.
+                *
+                * 3. Multiple integrity segments must use SGL as that's the
+                *    only way to describe such a command in NVMe.
+                */
+               if (req_phys_gap_mask(req) & (NVME_CTRL_PAGE_SIZE - 1) ||
+                   nvme_req(req)->flags & NVME_REQ_USERCMD ||
+                   req->nr_integrity_segments > 1)
                        return SGL_FORCED;
                return SGL_SUPPORTED;
        }
@@ -3243,6 +3256,14 @@ static bool nvme_pci_supports_pci_p2pdma(struct nvme_ctrl *ctrl)
        return dma_pci_p2pdma_supported(dev->dev);
 }
 
+static unsigned long nvme_pci_get_virt_boundary(struct nvme_ctrl *ctrl,
+                                               bool is_admin)
+{
+       if (!nvme_ctrl_sgl_supported(ctrl) || is_admin)
+               return NVME_CTRL_PAGE_SIZE - 1;
+       return 0;
+}
+
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
        .name                   = "pcie",
        .module                 = THIS_MODULE,
@@ -3257,6 +3278,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
        .get_address            = nvme_pci_get_address,
        .print_device_info      = nvme_pci_print_device_info,
        .supports_pci_p2pdma    = nvme_pci_supports_pci_p2pdma,
+       .get_virt_boundary      = nvme_pci_get_virt_boundary,
 };
 
 static int nvme_dev_map(struct nvme_dev *dev)
index 190a4cfa8a5ee2e6b97a5a1b304c1338dcd748fa..35c0822edb2d756c98c7637a5f7c36b4ddb364ef 100644 (file)
@@ -2202,6 +2202,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
        .delete_ctrl            = nvme_rdma_delete_ctrl,
        .get_address            = nvmf_get_address,
        .stop_ctrl              = nvme_rdma_stop_ctrl,
+       .get_virt_boundary      = nvme_get_virt_boundary,
 };
 
 /*
index 9a96df1a511c021b4b322bf54dc489cc400f7961..29ad4735fac6bab97de2644404eea9f4a1987840 100644 (file)
@@ -2865,6 +2865,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
        .delete_ctrl            = nvme_tcp_delete_ctrl,
        .get_address            = nvme_tcp_get_address,
        .stop_ctrl              = nvme_tcp_stop_ctrl,
+       .get_virt_boundary      = nvmf_get_virt_boundary,
 };
 
 static bool
index f85a8441bcc6ecdcfb17e9cf1e883d2817cce9fc..fc8e7c9ad8588803852d65c5f65b5e048a2d71e7 100644 (file)
@@ -511,6 +511,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
        .submit_async_event     = nvme_loop_submit_async_event,
        .delete_ctrl            = nvme_loop_delete_ctrl_host,
        .get_address            = nvmf_get_address,
+       .get_virt_boundary      = nvme_get_virt_boundary,
 };
 
 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)