From: Nilay Shroff Date: Sat, 16 May 2026 18:36:55 +0000 (+0530) Subject: nvme: export controller reconnect event count via sysfs X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=3c8c284dfcdfce81a02fe3c911196d9876468ae4;p=thirdparty%2Fkernel%2Flinux.git nvme: export controller reconnect event count via sysfs When an NVMe-oF link goes down, the driver attempts to recover the connection by repeatedly reconnecting to the remote controller at configured intervals. A maximum number of reconnect attempts is also configured, after which recovery stops and the controller is removed if the connection cannot be re-established. The driver maintains a counter, nr_reconnects, which is incremented on each reconnect attempt. However if in case the reconnect is successful then this counter reset to zero. Moreover, currently, this counter is only reported via kernel log messages and is not exposed to userspace. Since dmesg is a circular buffer, this information may be lost over time. So introduce a new accumulator which accumulates nr_reconnect attempts and also expose this accumulator per-fabric ctrl via a new sysfs attribute reconnect_count, under diag attribute grroup to provide persistent visibility into the number of reconnect attempts made by the host. This information can help users diagnose unstable links or connectivity issues. Furthermore, this sysfs attribute is also writable so user may reset it to zero, if needed. The reconnect_count can also be consumed by monitoring tools such as nvme-top to improve controller-level observability. Tested-by: Venkat Rao Bagalkote Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e4f4528fe2a2d..f04eb13dd5e9a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3148,6 +3148,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) goto out_term_aen_ops; } + /* accumulate reconnect attempts before resetting it to zero */ + atomic_long_add(ctrl->ctrl.nr_reconnects, &ctrl->ctrl.acc_reconnects); ctrl->ctrl.nr_reconnects = 0; nvme_start_ctrl(&ctrl->ctrl); @@ -3470,6 +3472,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; ctrl->ctrl.nr_reconnects = 0; + atomic_long_set(&ctrl->ctrl.acc_reconnects, 0); INIT_LIST_HEAD(&ctrl->ctrl_list); ctrl->lport = lport; ctrl->rport = rport; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 81f297e995e4a..b367c67dcb37f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -458,6 +458,8 @@ struct nvme_ctrl { u16 icdoff; u16 maxcmd; int nr_reconnects; + /* accumulate reconenct attempts, as nr_reconnects can reset to zero */ + atomic_long_t acc_reconnects; unsigned long flags; struct nvmf_ctrl_options *opts; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index bf73135c1439a..61a91cfb40626 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1110,6 +1110,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", ctrl->ctrl.nr_reconnects); + /* accumulate reconnect attempts before resetting it to zero */ + atomic_long_add(ctrl->ctrl.nr_reconnects, &ctrl->ctrl.acc_reconnects); ctrl->ctrl.nr_reconnects = 0; return; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index ff603a9d7b8ca..933a5adfb7af5 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -1175,17 +1175,52 @@ static ssize_t reset_count_store(struct device *dev, return count; } +static ssize_t reconnect_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", + atomic_long_read(&ctrl->acc_reconnects) + + ctrl->nr_reconnects); +} + +static ssize_t reconnect_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + int err; + unsigned long reconnect_cnt; + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + err = kstrtoul(buf, 0, &reconnect_cnt); + if (err) + return -EINVAL; + + atomic_long_set(&ctrl->acc_reconnects, reconnect_cnt); + + return count; +} + +static DEVICE_ATTR_RW(reconnect_count); + static DEVICE_ATTR_RW(reset_count); static struct attribute *nvme_dev_diag_attrs[] = { &dev_attr_adm_errors.attr, &dev_attr_reset_count.attr, + &dev_attr_reconnect_count.attr, NULL, }; static umode_t nvme_dev_diag_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { + struct device *dev = container_of(kobj, struct device, kobj); + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (a == &dev_attr_reconnect_count.attr && !ctrl->opts) + return 0; + return a->mode; } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9d17c88a62005..9b76b77ffdbbf 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2489,6 +2489,8 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n", ctrl->nr_reconnects, ctrl->opts->max_reconnects); + /* accumulate reconnect attempts before resetting it to zero */ + atomic_long_add(ctrl->nr_reconnects, &ctrl->acc_reconnects); ctrl->nr_reconnects = 0; return;