]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
nvme: export controller reset event count via sysfs
authorNilay Shroff <nilay@linux.ibm.com>
Sat, 16 May 2026 18:36:54 +0000 (00:06 +0530)
committerKeith Busch <kbusch@kernel.org>
Thu, 4 Jun 2026 08:57:36 +0000 (01:57 -0700)
The NVMe controller transitions into the RESETTING state during error
recovery, link instability, firmware activation, or when a reset is
explicitly triggered by the user.

Expose a per-ctrl sysfs attribute reset_count, under diag attribute
group to provide visibility into these RESETTING state transitions.
Observing the frequency of reset events can help users identify issues
such as PCIe errors or unstable fabric links. This counter is also
writable thus allowing user to reset its value, if needed.

This counter can also be consumed by monitoring tools such as nvme-top
to improve controller-level observability.

Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/nvme/host/sysfs.c

index 5f885e0ab93088519fc028c25f1f02fcc042f668..efaddab8296e0974294fbd28eb8e2ec6d71dc5c8 100644 (file)
@@ -596,6 +596,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
                case NVME_CTRL_NEW:
                case NVME_CTRL_LIVE:
                        changed = true;
+                       atomic_long_inc(&ctrl->nr_reset);
                        fallthrough;
                default:
                        break;
index 249f1f8dde4043e5404145c15ce31c4efe2ccc33..81f297e995e4a5b0fcd62bcdb9e53cfe80b6fbed 100644 (file)
@@ -416,6 +416,7 @@ struct nvme_ctrl {
        struct work_struct fw_act_work;
        unsigned long events;
        atomic_long_t errors;
+       atomic_long_t nr_reset;
 
 #ifdef CONFIG_NVME_MULTIPATH
        /* asymmetric namespace access: */
index d2c7d943b23fccf749b332e5a7eed93797dda557..ff603a9d7b8caa42830757359c71e440738a6671 100644 (file)
@@ -1151,8 +1151,35 @@ struct device_attribute dev_attr_adm_errors =
        __ATTR(command_error_count, 0644,
                nvme_adm_errors_show, nvme_adm_errors_store);
 
+static ssize_t reset_count_show(struct device *dev,
+                  struct device_attribute *attr, char *buf)
+{
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+       return sysfs_emit(buf, "%lu\n", atomic_long_read(&ctrl->nr_reset));
+}
+
+static ssize_t reset_count_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       int err;
+       unsigned long reset_cnt;
+       struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+       err = kstrtoul(buf, 0, &reset_cnt);
+       if (err)
+               return -EINVAL;
+
+       atomic_long_set(&ctrl->nr_reset, reset_cnt);
+
+       return count;
+}
+
+static DEVICE_ATTR_RW(reset_count);
+
 static struct attribute *nvme_dev_diag_attrs[] = {
        &dev_attr_adm_errors.attr,
+       &dev_attr_reset_count.attr,
        NULL,
 };