]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
nvme-pci: do not directly handle subsys reset fallout
authorKeith Busch <kbusch@kernel.org>
Wed, 21 Jan 2026 02:56:58 +0000 (21:56 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 6 Feb 2026 15:44:20 +0000 (16:44 +0100)
[ Upstream commit 210b1f6576e8b367907e7ff51ef425062e1468e4 ]

Scheduling reset_work after a nvme subsystem reset is expected to fail
on pcie, but this also prevents potential handling the platform's pcie
services may provide that might successfully recovering the link without
re-enumeration. Such examples include AER, DPC, and power's EEH.

Provide a pci specific operation that safely initiates a subsystem
reset, and instead of scheduling reset work, read back the status
register to trigger a pcie read error.

Since this only affects pci, the other fabrics drivers subscribe to a
generic nvmf subsystem reset that is exactly the same as before. The
loop fabric doesn't use it because nvmet doesn't support setting that
property anyway.

And since we're using the magic NSSR value in two places now, provide a
symbolic define for it.

Reported-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Stable-dep-of: 0edb475ac0a7 ("nvme: fix PCIe subsystem reset controller state transition")
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/nvme/host/fabrics.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
include/linux/nvme.h

index ce27276f552dada471ea5306bc43450a04484909..fe621028a082e1e1b7022208d897acf60b5d2fed 100644 (file)
@@ -253,6 +253,21 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
 }
 EXPORT_SYMBOL_GPL(nvmf_reg_write32);
 
+int nvmf_subsystem_reset(struct nvme_ctrl *ctrl)
+{
+       int ret;
+
+       if (!nvme_wait_reset(ctrl))
+               return -EBUSY;
+
+       ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, NVME_SUBSYS_RESET);
+       if (ret)
+               return ret;
+
+       return nvme_try_sched_reset(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvmf_subsystem_reset);
+
 /**
  * nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
  *                             connect() errors.
index 60c238caf7a97e763c283d61d65c53b87cc0f8b7..be2388ecc91bead4658e8d352c20ef669cb66895 100644 (file)
@@ -199,6 +199,7 @@ static inline void nvmf_complete_timed_out_request(struct request *rq)
 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
+int nvmf_subsystem_reset(struct nvme_ctrl *ctrl);
 int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
 int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
 int nvmf_register_transport(struct nvmf_transport_ops *ops);
index 71e22617c28743b8f54ec7dc4f2adf38859d605c..dc84cade703db8946901efb23df4a76f87fcd928 100644 (file)
@@ -3362,6 +3362,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
        .reg_read32             = nvmf_reg_read32,
        .reg_read64             = nvmf_reg_read64,
        .reg_write32            = nvmf_reg_write32,
+       .subsystem_reset        = nvmf_subsystem_reset,
        .free_ctrl              = nvme_fc_free_ctrl,
        .submit_async_event     = nvme_fc_submit_async_event,
        .delete_ctrl            = nvme_fc_delete_ctrl,
index 0f49b779dec65a6cac5ece61f08ac619f20c464a..8c97777ba629c31d12213c095f0a04a2cf448365 100644 (file)
@@ -538,6 +538,7 @@ struct nvme_ctrl_ops {
        int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
        void (*free_ctrl)(struct nvme_ctrl *ctrl);
        void (*submit_async_event)(struct nvme_ctrl *ctrl);
+       int (*subsystem_reset)(struct nvme_ctrl *ctrl);
        void (*delete_ctrl)(struct nvme_ctrl *ctrl);
        void (*stop_ctrl)(struct nvme_ctrl *ctrl);
        int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
@@ -636,18 +637,9 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
 
 static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
 {
-       int ret;
-
-       if (!ctrl->subsystem)
+       if (!ctrl->subsystem || !ctrl->ops->subsystem_reset)
                return -ENOTTY;
-       if (!nvme_wait_reset(ctrl))
-               return -EBUSY;
-
-       ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
-       if (ret)
-               return ret;
-
-       return nvme_try_sched_reset(ctrl);
+       return ctrl->ops->subsystem_reset(ctrl);
 }
 
 /*
index e0efe882cd38e668ba9b6f4aa418981b572ebe18..0967aff8ba7c3d8900261ec5efefe85defb43b62 100644 (file)
@@ -1190,6 +1190,41 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
        spin_unlock(&nvmeq->sq_lock);
 }
 
+static int nvme_pci_subsystem_reset(struct nvme_ctrl *ctrl)
+{
+       struct nvme_dev *dev = to_nvme_dev(ctrl);
+       int ret = 0;
+
+       /*
+        * Taking the shutdown_lock ensures the BAR mapping is not being
+        * altered by reset_work. Holding this lock before the RESETTING state
+        * change, if successful, also ensures nvme_remove won't be able to
+        * proceed to iounmap until we're done.
+        */
+       mutex_lock(&dev->shutdown_lock);
+       if (!dev->bar_mapped_size) {
+               ret = -ENODEV;
+               goto unlock;
+       }
+
+       if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
+               ret = -EBUSY;
+               goto unlock;
+       }
+
+       writel(NVME_SUBSYS_RESET, dev->bar + NVME_REG_NSSR);
+       nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE);
+
+       /*
+        * Read controller status to flush the previous write and trigger a
+        * pcie read error.
+        */
+       readl(dev->bar + NVME_REG_CSTS);
+unlock:
+       mutex_unlock(&dev->shutdown_lock);
+       return ret;
+}
+
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
        struct nvme_command c = { };
@@ -3033,6 +3068,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
        .reg_read64             = nvme_pci_reg_read64,
        .free_ctrl              = nvme_pci_free_ctrl,
        .submit_async_event     = nvme_pci_submit_async_event,
+       .subsystem_reset        = nvme_pci_subsystem_reset,
        .get_address            = nvme_pci_get_address,
        .print_device_info      = nvme_pci_print_device_info,
        .supports_pci_p2pdma    = nvme_pci_supports_pci_p2pdma,
index aa1734e2fd44e724881c9b80f002454e94c65931..47fbf561c01e62087b7f0ec916d74cc9d5784ce7 100644 (file)
@@ -2256,6 +2256,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
        .reg_read32             = nvmf_reg_read32,
        .reg_read64             = nvmf_reg_read64,
        .reg_write32            = nvmf_reg_write32,
+       .subsystem_reset        = nvmf_subsystem_reset,
        .free_ctrl              = nvme_rdma_free_ctrl,
        .submit_async_event     = nvme_rdma_submit_async_event,
        .delete_ctrl            = nvme_rdma_delete_ctrl,
index 4e1b91c0416b90ced2a5c9c2ce34bc64919bd9a5..441cacad4498babc7a4119040c0fcab088ab5ad6 100644 (file)
@@ -2612,6 +2612,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
        .reg_read32             = nvmf_reg_read32,
        .reg_read64             = nvmf_reg_read64,
        .reg_write32            = nvmf_reg_write32,
+       .subsystem_reset        = nvmf_subsystem_reset,
        .free_ctrl              = nvme_tcp_free_ctrl,
        .submit_async_event     = nvme_tcp_submit_async_event,
        .delete_ctrl            = nvme_tcp_delete_ctrl,
index 15086715632e0ecf663fa9deebcc8d3ad79c9beb..0d6153fc9b01a53cc8b3fb7d80a1f79c5fbecc6d 100644 (file)
@@ -28,6 +28,9 @@
 
 #define NVME_NSID_ALL          0xffffffff
 
+/* Special NSSR value, 'NVMe' */
+#define NVME_SUBSYS_RESET      0x4E564D65
+
 enum nvme_subsys_type {
        /* Referral to another discovery type target subsystem */
        NVME_NQN_DISC   = 1,