#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/nodemask.h>
#include <linux/once.h>
#include <linux/pci.h>
#include <linux/suspend.h>
static void nvme_delete_io_queues(struct nvme_dev *dev);
static void nvme_update_attrs(struct nvme_dev *dev);
+struct nvme_prp_dma_pools {
+ struct dma_pool *large;
+ struct dma_pool *small;
+};
+
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
struct blk_mq_tag_set admin_tagset;
u32 __iomem *dbs;
struct device *dev;
- struct dma_pool *prp_page_pool;
- struct dma_pool *prp_small_pool;
unsigned online_queues;
unsigned max_qid;
unsigned io_queues[HCTX_MAX_TYPES];
unsigned int nr_allocated_queues;
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
+ struct nvme_prp_dma_pools prp_pools[];
};
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
*/
struct nvme_queue {
struct nvme_dev *dev;
+ struct nvme_prp_dma_pools prp_pools;
spinlock_t sq_lock;
void *sq_cmds;
/* only used for poll queues: */
return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8);
}
+static struct nvme_prp_dma_pools *
+nvme_setup_prp_pools(struct nvme_dev *dev, unsigned numa_node)
+{
+ struct nvme_prp_dma_pools *prp_pools = &dev->prp_pools[numa_node];
+ size_t small_align = 256;
+
+ if (prp_pools->small)
+ return prp_pools; /* already initialized */
+
+ prp_pools->large = dma_pool_create_node("prp list page", dev->dev,
+ NVME_CTRL_PAGE_SIZE,
+ NVME_CTRL_PAGE_SIZE, 0,
+ numa_node);
+ if (!prp_pools->large)
+ return ERR_PTR(-ENOMEM);
+
+ if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512)
+ small_align = 512;
+
+ /* Optimisation for I/Os between 4k and 128k */
+ prp_pools->small = dma_pool_create_node("prp list 256", dev->dev,
+ 256, small_align, 0, numa_node);
+ if (!prp_pools->small) {
+ dma_pool_destroy(prp_pools->large);
+ prp_pools->large = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return prp_pools;
+}
+
+static void nvme_release_prp_pools(struct nvme_dev *dev)
+{
+ unsigned i;
+
+ for (i = 0; i < nr_node_ids; i++) {
+ struct nvme_prp_dma_pools *prp_pools = &dev->prp_pools[i];
+
+ dma_pool_destroy(prp_pools->large);
+ dma_pool_destroy(prp_pools->small);
+ }
+}
+
static int nvme_init_hctx_common(struct blk_mq_hw_ctx *hctx, void *data,
unsigned qid)
{
struct nvme_dev *dev = to_nvme_dev(data);
struct nvme_queue *nvmeq = &dev->queues[qid];
+ struct nvme_prp_dma_pools *prp_pools;
struct blk_mq_tags *tags;
tags = qid ? dev->tagset.tags[qid - 1] : dev->admin_tagset.tags[0];
WARN_ON(tags != hctx->tags);
+ prp_pools = nvme_setup_prp_pools(dev, hctx->numa_node);
+ if (IS_ERR(prp_pools))
+ return PTR_ERR(prp_pools);
+
+ nvmeq->prp_pools = *prp_pools;
hctx->driver_data = nvmeq;
return 0;
}
return true;
}
-static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
+static void nvme_free_prps(struct nvme_queue *nvmeq, struct request *req)
{
const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
__le64 *prp_list = iod->list[i].prp_list;
dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
- dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
+ dma_pool_free(nvmeq->prp_pools.large, prp_list, dma_addr);
dma_addr = next_dma_addr;
}
}
-static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
+static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_queue *nvmeq,
+ struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0);
if (iod->nr_allocations == 0)
- dma_pool_free(dev->prp_small_pool, iod->list[0].sg_list,
+ dma_pool_free(nvmeq->prp_pools.small, iod->list[0].sg_list,
iod->first_dma);
else if (iod->nr_allocations == 1)
- dma_pool_free(dev->prp_page_pool, iod->list[0].sg_list,
+ dma_pool_free(nvmeq->prp_pools.large, iod->list[0].sg_list,
iod->first_dma);
else
- nvme_free_prps(dev, req);
+ nvme_free_prps(nvmeq, req);
mempool_free(iod->sgt.sgl, dev->iod_mempool);
}
}
}
-static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
+static blk_status_t nvme_pci_setup_prps(struct nvme_queue *nvmeq,
struct request *req, struct nvme_rw_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
if (nprps <= (256 / 8)) {
- pool = dev->prp_small_pool;
+ pool = nvmeq->prp_pools.small;
iod->nr_allocations = 0;
} else {
- pool = dev->prp_page_pool;
+ pool = nvmeq->prp_pools.large;
iod->nr_allocations = 1;
}
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
return BLK_STS_OK;
free_prps:
- nvme_free_prps(dev, req);
+ nvme_free_prps(nvmeq, req);
return BLK_STS_RESOURCE;
bad_sgl:
WARN(DO_ONCE(nvme_print_sgl, iod->sgt.sgl, iod->sgt.nents),
sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
}
-static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
+static blk_status_t nvme_pci_setup_sgls(struct nvme_queue *nvmeq,
struct request *req, struct nvme_rw_command *cmd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
}
if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
- pool = dev->prp_small_pool;
+ pool = nvmeq->prp_pools.small;
iod->nr_allocations = 0;
} else {
- pool = dev->prp_page_pool;
+ pool = nvmeq->prp_pools.large;
iod->nr_allocations = 1;
}
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
+ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
blk_status_t ret = BLK_STS_RESOURCE;
int rc;
if (blk_rq_nr_phys_segments(req) == 1) {
- struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct bio_vec bv = req_bvec(req);
if (!is_pci_p2pdma_page(bv.bv_page)) {
}
if (nvme_pci_use_sgls(dev, req, iod->sgt.nents))
- ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
+ ret = nvme_pci_setup_sgls(nvmeq, req, &cmnd->rw);
else
- ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
+ ret = nvme_pci_setup_prps(nvmeq, req, &cmnd->rw);
if (ret != BLK_STS_OK)
goto out_unmap_sg;
return BLK_STS_OK;
static blk_status_t nvme_pci_setup_meta_sgls(struct nvme_dev *dev,
struct request *req)
{
+ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_rw_command *cmnd = &iod->cmd.rw;
struct nvme_sgl_desc *sg_list;
if (rc)
goto out_free_sg;
- sg_list = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, &sgl_dma);
+ sg_list = dma_pool_alloc(nvmeq->prp_pools.small, GFP_ATOMIC, &sgl_dma);
if (!sg_list)
goto out_unmap_sg;
return BLK_STS_OK;
out_unmap_data:
if (blk_rq_nr_phys_segments(req))
- nvme_unmap_data(dev, req);
+ nvme_unmap_data(dev, req->mq_hctx->driver_data, req);
out_free_cmd:
nvme_cleanup_cmd(req);
return ret;
}
static __always_inline void nvme_unmap_metadata(struct nvme_dev *dev,
+ struct nvme_queue *nvmeq,
struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
return;
}
- dma_pool_free(dev->prp_small_pool, iod->meta_list.sg_list,
+ dma_pool_free(nvmeq->prp_pools.small, iod->meta_list.sg_list,
iod->meta_dma);
dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0);
mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool);
struct nvme_dev *dev = nvmeq->dev;
if (blk_integrity_rq(req))
- nvme_unmap_metadata(dev, req);
+ nvme_unmap_metadata(dev, nvmeq, req);
if (blk_rq_nr_phys_segments(req))
- nvme_unmap_data(dev, req);
+ nvme_unmap_data(dev, nvmeq, req);
}
static void nvme_pci_complete_rq(struct request *req)
return 0;
}
-static int nvme_setup_prp_pools(struct nvme_dev *dev)
-{
- size_t small_align = 256;
-
- dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
- NVME_CTRL_PAGE_SIZE,
- NVME_CTRL_PAGE_SIZE, 0);
- if (!dev->prp_page_pool)
- return -ENOMEM;
-
- if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512)
- small_align = 512;
-
- /* Optimisation for I/Os between 4k and 128k */
- dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
- 256, small_align, 0);
- if (!dev->prp_small_pool) {
- dma_pool_destroy(dev->prp_page_pool);
- return -ENOMEM;
- }
- return 0;
-}
-
-static void nvme_release_prp_pools(struct nvme_dev *dev)
-{
- dma_pool_destroy(dev->prp_page_pool);
- dma_pool_destroy(dev->prp_small_pool);
-}
-
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
{
size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1);
struct nvme_dev *dev;
int ret = -ENOMEM;
- dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
+ dev = kzalloc_node(sizeof(*dev) + nr_node_ids * sizeof(*dev->prp_pools),
+ GFP_KERNEL, node);
if (!dev)
return ERR_PTR(-ENOMEM);
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
if (result)
goto out_uninit_ctrl;
- result = nvme_setup_prp_pools(dev);
- if (result)
- goto out_dev_unmap;
-
result = nvme_pci_alloc_iod_mempool(dev);
if (result)
- goto out_release_prp_pools;
+ goto out_dev_unmap;
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
out_release_iod_mempool:
mempool_destroy(dev->iod_mempool);
mempool_destroy(dev->iod_meta_mempool);
-out_release_prp_pools:
- nvme_release_prp_pools(dev);
out_dev_unmap:
nvme_dev_unmap(dev);
out_uninit_ctrl: