]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
nvme-pci: try function level reset on init failure
authorKeith Busch <kbusch@kernel.org>
Tue, 15 Jul 2025 19:16:27 +0000 (12:16 -0700)
committerChristoph Hellwig <hch@lst.de>
Thu, 17 Jul 2025 15:46:33 +0000 (17:46 +0200)
NVMe devices from multiple vendors appear to get stuck in a reset state
that we can't get out of with an NVMe level Controller Reset. The kernel
would report these with messages that look like:

  Device not ready; aborting reset, CSTS=0x1

These have historically required a power cycle to make them usable
again, but in many cases, a PCIe FLR is sufficient to restart operation
without a power cycle. Try it if the initial controller reset fails
during any nvme reset attempt.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/host/pci.c

index 16bf4cb2d063fdb011ebc7808bf7bd73fa0679c9..73d5a5298822a51c0e5e6215ad702797377c6596 100644 (file)
@@ -2064,8 +2064,28 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
         * might be pointing at!
         */
        result = nvme_disable_ctrl(&dev->ctrl, false);
-       if (result < 0)
-               return result;
+       if (result < 0) {
+               struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+               /*
+                * The NVMe Controller Reset method did not get an expected
+                * CSTS.RDY transition, so something with the device appears to
+                * be stuck. Use the lower level and bigger hammer PCIe
+                * Function Level Reset to attempt restoring the device to its
+                * initial state, and try again.
+                */
+               result = pcie_reset_flr(pdev, false);
+               if (result < 0)
+                       return result;
+
+               pci_restore_state(pdev);
+               result = nvme_disable_ctrl(&dev->ctrl, false);
+               if (result < 0)
+                       return result;
+
+               dev_info(dev->ctrl.device,
+                       "controller reset completed after pcie flr\n");
+       }
 
        result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
        if (result)