From: Nilay Shroff Date: Sat, 16 May 2026 18:36:50 +0000 (+0530) Subject: nvme: export multipath failover count via sysfs X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=66ee95b3d490d78283b6e92cb4230d4a04c99817;p=thirdparty%2Flinux.git nvme: export multipath failover count via sysfs When an NVMe command completes with a path-specific error, the NVMe driver may retry the command on an alternate controller or path if one is available. These failover events indicate that I/O was redirected away from the original path. Currently, the number of times requests are failed over to another available path is not visible to userspace. Exposing this information can be useful for diagnosing path health and stability. Export per-path sysfs attribute "multipath_failover_count" under diag attribute group. This attribute is both readable and writable and thus allowing user to reset the counter. This counter can be consumed by monitoring tools such as nvme-top to help identify paths that consistently trigger failovers under load. Tested-by: Venkat Rao Bagalkote Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index bd9e8d5a27132..51c8d928fc80d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -152,6 +152,7 @@ void nvme_failover_req(struct request *req) struct bio *bio; nvme_mpath_clear_current_path(ns); + atomic_long_inc(&ns->failover); /* * If we got back an ANA error, we know the controller is alive but not @@ -1165,6 +1166,32 @@ static ssize_t delayed_removal_secs_store(struct device *dev, DEVICE_ATTR_RW(delayed_removal_secs); +static ssize_t multipath_failover_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + return sysfs_emit(buf, "%lu\n", atomic_long_read(&ns->failover)); +} + +static ssize_t multipath_failover_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long failover; + int ret; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + ret = kstrtoul(buf, 0, &failover); + if (ret) + return -EINVAL; + + atomic_long_set(&ns->failover, failover); + + return count; +} + +DEVICE_ATTR_RW(multipath_failover_count); + static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *desc, void *data) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 3cf95149aa882..73505152fcb1d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -591,6 +591,7 @@ struct nvme_ns { #ifdef CONFIG_NVME_MULTIPATH enum nvme_ana_state ana_state; u32 ana_grpid; + atomic_long_t failover; #endif atomic_long_t retries; struct list_head siblings; @@ -1065,6 +1066,7 @@ extern struct device_attribute dev_attr_ana_state; extern struct device_attribute dev_attr_queue_depth; extern struct device_attribute dev_attr_numa_nodes; extern struct device_attribute dev_attr_delayed_removal_secs; +extern struct device_attribute dev_attr_multipath_failover_count; extern struct device_attribute subsys_attr_iopolicy; static inline bool nvme_disk_is_ns_head(struct gendisk *disk) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 9472430934a32..0e5033db48a36 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -371,6 +371,9 @@ static DEVICE_ATTR_RW(command_retries_count); static struct attribute *nvme_ns_diag_attrs[] = { &dev_attr_command_retries_count.attr, +#ifdef CONFIG_NVME_MULTIPATH + &dev_attr_multipath_failover_count.attr, +#endif NULL, }; @@ -383,7 +386,12 @@ static umode_t nvme_ns_diag_attrs_are_visible(struct kobject *kobj, if (nvme_disk_is_ns_head(dev_to_disk(dev))) return 0; } - +#ifdef CONFIG_NVME_MULTIPATH + if (a == &dev_attr_multipath_failover_count.attr) { + if (nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } +#endif return a->mode; }