From: Nilay Shroff Date: Sat, 16 May 2026 18:36:49 +0000 (+0530) Subject: nvme: export command retry count via sysfs X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=ab5af2903baa472930c94a421efdd22a49036213;p=thirdparty%2Fkernel%2Flinux.git nvme: export command retry count via sysfs When Advanced Command Retry Enable (ACRE) is configured, a controller may interrupt command execution and return a completion status indicating command interrupted with the DNR bit cleared. In this case, the driver retries the command based on the Command Retry Delay (CRD) value provided in the completion status. Currently, these command retries are handled entirely within the NVMe driver and are not visible to userspace. As a result, there is no observability into retry behavior, which can be a useful diagnostic signal. Expose a per-namespace sysfs attribute command_retries_count, under diag attribute group to provide visibility into retry activity. This information can help identify controller-side congestion under load and enables comparison across paths in multipath setups (for example, detecting cases where one path experiences significantly more retries than another under identical workloads). This exported metric is intended for diagnostics and monitoring tools such as nvme-top, and does not change command retry behavior. A new sysfs attribute named "command_retries_count" is added for this purpose. This attribute is both readable as well as writable. So user could reset this counter if needed. Tested-by: Venkat Rao Bagalkote Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 23dfce27ace2d..cbc2932556c5b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -323,6 +323,7 @@ static void nvme_retry_req(struct request *req) { unsigned long delay = 0; u16 crd; + struct nvme_ns *ns = req->q->queuedata; /* The mask and shift result must be <= 3 */ crd = (nvme_req(req)->status & NVME_STATUS_CRD) >> 11; @@ -330,6 +331,9 @@ static void nvme_retry_req(struct request *req) delay = nvme_req(req)->ctrl->crdt[crd - 1] * 100; nvme_req(req)->retries++; + if (ns) + atomic_long_inc(&ns->retries); + blk_mq_requeue_request(req, false); blk_mq_delay_kick_requeue_list(req->q, delay); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 46cfce4dbbf6e..3cf95149aa882 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -592,6 +592,7 @@ struct nvme_ns { enum nvme_ana_state ana_state; u32 ana_grpid; #endif + atomic_long_t retries; struct list_head siblings; struct kref kref; struct nvme_ns_head *head; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 1d507a835783b..9472430934a32 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -344,13 +344,46 @@ const struct attribute_group nvme_ns_mpath_attr_group = { }; #endif +static ssize_t command_retries_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + return sysfs_emit(buf, "%lu\n", atomic_long_read(&ns->retries)); +} + +static ssize_t command_retries_count_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + unsigned long retries; + int err; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + err = kstrtoul(buf, 0, &retries); + if (err) + return -EINVAL; + + atomic_long_set(&ns->retries, retries); + + return count; +} +static DEVICE_ATTR_RW(command_retries_count); + static struct attribute *nvme_ns_diag_attrs[] = { + &dev_attr_command_retries_count.attr, NULL, }; static umode_t nvme_ns_diag_attrs_are_visible(struct kobject *kobj, struct attribute *a, int n) { + struct device *dev = container_of(kobj, struct device, kobj); + + if (a == &dev_attr_command_retries_count.attr) { + if (nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } + return a->mode; }