--- /dev/null
+From fb915a24c50eaf0e85716810cb4b560d5df1ae80 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 17:52:49 +0200
+Subject: nvme-pci: do not set the NUMA node of device if it has none
+
+From: Pratyush Yadav <ptyadav@amazon.de>
+
+[ Upstream commit dad651b2a44eb6b201738f810254279dca29d30d ]
+
+If a device has no NUMA node information associated with it, the driver
+puts the device in node first_memory_node (say node 0). Not having a
+NUMA node and being associated with node 0 are completely different
+things and it makes little sense to mix the two.
+
+Signed-off-by: Pratyush Yadav <ptyadav@amazon.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/pci.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 42e85b1bf6591..f28f50ea273a9 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -3115,9 +3115,6 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
+ struct nvme_dev *dev;
+ int ret = -ENOMEM;
+
+- if (node == NUMA_NO_NODE)
+- set_dev_node(&pdev->dev, first_memory_node);
+-
+ dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
+ if (!dev)
+ return NULL;
+--
+2.40.1
+
--- /dev/null
+From d95b0d4d08c9a42d8eceb7eef012804d1c3ab235 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Nov 2022 09:44:00 +0100
+Subject: nvme-pci: factor out a nvme_pci_alloc_dev helper
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 2e87570be9d2746e7c4e7ab1cc18fd3ca7de2768 ]
+
+Add a helper that allocates the nvme_dev structure up to the point where
+we can call nvme_init_ctrl. This pairs with the free_ctrl method and can
+thus be used to cleanup the teardown path and make it more symmetric.
+
+Note that this now calls nvme_init_ctrl a lot earlier during probing,
+which also means the per-controller character device shows up earlier.
+Due to the controller state no commnds can be send on it, but it might
+make sense to delay the cdev registration until nvme_init_ctrl_finish.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Keith Busch <kbusch@kernel.org>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Tested-by Gerd Bayer <gbayer@linxu.ibm.com>
+Stable-dep-of: dad651b2a44e ("nvme-pci: do not set the NUMA node of device if it has none")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/pci.c | 81 +++++++++++++++++++++++------------------
+ 1 file changed, 46 insertions(+), 35 deletions(-)
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 6ab532ca77223..42e85b1bf6591 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -2790,6 +2790,7 @@ static void nvme_free_tagset(struct nvme_dev *dev)
+ dev->ctrl.tagset = NULL;
+ }
+
++/* pairs with nvme_pci_alloc_dev */
+ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
+ {
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
+@@ -3106,19 +3107,23 @@ static void nvme_async_probe(void *data, async_cookie_t cookie)
+ nvme_put_ctrl(&dev->ctrl);
+ }
+
+-static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
++static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
++ const struct pci_device_id *id)
+ {
+- int node, result = -ENOMEM;
+- struct nvme_dev *dev;
+ unsigned long quirks = id->driver_data;
++ int node = dev_to_node(&pdev->dev);
++ struct nvme_dev *dev;
++ int ret = -ENOMEM;
+
+- node = dev_to_node(&pdev->dev);
+ if (node == NUMA_NO_NODE)
+ set_dev_node(&pdev->dev, first_memory_node);
+
+ dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
+ if (!dev)
+- return -ENOMEM;
++ return NULL;
++ INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
++ INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
++ mutex_init(&dev->shutdown_lock);
+
+ dev->nr_write_queues = write_queues;
+ dev->nr_poll_queues = poll_queues;
+@@ -3126,25 +3131,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ dev->queues = kcalloc_node(dev->nr_allocated_queues,
+ sizeof(struct nvme_queue), GFP_KERNEL, node);
+ if (!dev->queues)
+- goto free;
++ goto out_free_dev;
+
+ dev->dev = get_device(&pdev->dev);
+- pci_set_drvdata(pdev, dev);
+-
+- result = nvme_dev_map(dev);
+- if (result)
+- goto put_pci;
+-
+- INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
+- INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
+- mutex_init(&dev->shutdown_lock);
+-
+- result = nvme_setup_prp_pools(dev);
+- if (result)
+- goto unmap;
+
+ quirks |= check_vendor_combination_bug(pdev);
+-
+ if (!noacpi && acpi_storage_d3(&pdev->dev)) {
+ /*
+ * Some systems use a bios work around to ask for D3 on
+@@ -3154,34 +3145,54 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ "platform quirk: setting simple suspend\n");
+ quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
+ }
++ ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
++ quirks);
++ if (ret)
++ goto out_put_device;
++ return dev;
+
+- result = nvme_pci_alloc_iod_mempool(dev);
++out_put_device:
++ put_device(dev->dev);
++ kfree(dev->queues);
++out_free_dev:
++ kfree(dev);
++ return ERR_PTR(ret);
++}
++
++static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
++{
++ struct nvme_dev *dev;
++ int result = -ENOMEM;
++
++ dev = nvme_pci_alloc_dev(pdev, id);
++ if (!dev)
++ return -ENOMEM;
++
++ result = nvme_dev_map(dev);
+ if (result)
+- goto release_pools;
++ goto out_uninit_ctrl;
+
+- result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
+- quirks);
++ result = nvme_setup_prp_pools(dev);
++ if (result)
++ goto out_dev_unmap;
++
++ result = nvme_pci_alloc_iod_mempool(dev);
+ if (result)
+- goto release_mempool;
++ goto out_release_prp_pools;
+
+ dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
++ pci_set_drvdata(pdev, dev);
+
+ nvme_reset_ctrl(&dev->ctrl);
+ async_schedule(nvme_async_probe, dev);
+-
+ return 0;
+
+- release_mempool:
+- mempool_destroy(dev->iod_mempool);
+- release_pools:
++out_release_prp_pools:
+ nvme_release_prp_pools(dev);
+- unmap:
++out_dev_unmap:
+ nvme_dev_unmap(dev);
+- put_pci:
+- put_device(dev->dev);
+- free:
+- kfree(dev->queues);
+- kfree(dev);
++out_uninit_ctrl:
++ nvme_uninit_ctrl(&dev->ctrl);
+ return result;
+ }
+
+--
+2.40.1
+
--- /dev/null
+From ead4eb60935f74e93bc2188852d29af0664ef6ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Nov 2022 09:41:41 +0100
+Subject: nvme-pci: factor the iod mempool creation into a helper
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 081a7d958ce4b65f9aab6e70e65b0b2e0b92297c ]
+
+Add a helper to create the iod mempool.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Keith Busch <kbusch@kernel.org>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Tested-by Gerd Bayer <gbayer@linxu.ibm.com>
+Stable-dep-of: dad651b2a44e ("nvme-pci: do not set the NUMA node of device if it has none")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/pci.c | 41 ++++++++++++++++++-----------------------
+ 1 file changed, 18 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index b30269f5e68fb..6ab532ca77223 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -392,14 +392,6 @@ static int nvme_pci_npages_sgl(void)
+ NVME_CTRL_PAGE_SIZE);
+ }
+
+-static size_t nvme_pci_iod_alloc_size(void)
+-{
+- size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
+-
+- return sizeof(__le64 *) * npages +
+- sizeof(struct scatterlist) * NVME_MAX_SEGS;
+-}
+-
+ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int hctx_idx)
+ {
+@@ -2775,6 +2767,22 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
+ dma_pool_destroy(dev->prp_small_pool);
+ }
+
++static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
++{
++ size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
++ size_t alloc_size = sizeof(__le64 *) * npages +
++ sizeof(struct scatterlist) * NVME_MAX_SEGS;
++
++ WARN_ON_ONCE(alloc_size > PAGE_SIZE);
++ dev->iod_mempool = mempool_create_node(1,
++ mempool_kmalloc, mempool_kfree,
++ (void *)alloc_size, GFP_KERNEL,
++ dev_to_node(dev->dev));
++ if (!dev->iod_mempool)
++ return -ENOMEM;
++ return 0;
++}
++
+ static void nvme_free_tagset(struct nvme_dev *dev)
+ {
+ if (dev->tagset.tags)
+@@ -3103,7 +3111,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ int node, result = -ENOMEM;
+ struct nvme_dev *dev;
+ unsigned long quirks = id->driver_data;
+- size_t alloc_size;
+
+ node = dev_to_node(&pdev->dev);
+ if (node == NUMA_NO_NODE)
+@@ -3148,21 +3155,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
+ }
+
+- /*
+- * Double check that our mempool alloc size will cover the biggest
+- * command we support.
+- */
+- alloc_size = nvme_pci_iod_alloc_size();
+- WARN_ON_ONCE(alloc_size > PAGE_SIZE);
+-
+- dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
+- mempool_kfree,
+- (void *) alloc_size,
+- GFP_KERNEL, node);
+- if (!dev->iod_mempool) {
+- result = -ENOMEM;
++ result = nvme_pci_alloc_iod_mempool(dev);
++ if (result)
+ goto release_pools;
+- }
+
+ result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
+ quirks);
+--
+2.40.1
+