]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
scsi: ufs: core: Critical health condition
authorAvri Altman <avri.altman@wdc.com>
Tue, 11 Feb 2025 06:58:13 +0000 (08:58 +0200)
committerMartin K. Petersen <martin.petersen@oracle.com>
Thu, 13 Feb 2025 03:17:18 +0000 (22:17 -0500)
Martin hi,

The UFS4.1 standard, released on January 8, 2025, added a new exception
event: HEALTH_CRITICAL, which notifies the host of a device's critical
health condition. This notification implies that the device is approaching
the end of its lifetime based on the amount of performed program/erase
cycles.

Once an EOL (End-of-Life) exception event is received, we increment a
designated member, which is exposed via a sysfs entry. This new entry, will
report the number of times a critical health event has been reported by a
UFS device.

To handle this new sysfs entry, userspace applications can use select(),
poll(), or epoll() to monitor changes in the critical_health attribute. The
kernel will call sysfs_notify() to signal changes, allowing the userspace
application to detect and respond to these changes efficiently.

The host can gain further insight into the specific issue by reading one of
the following attributes: bPreEOLInfo, bDeviceLifeTimeEstA,
bDeviceLifeTimeEstB, bWriteBoosterBufferLifeTimeEst, and
bRPMBLifeTimeEst. All those are available for reading via the driver's
sysfs entries or through an applicable utility. It is up to userspace to
read these attributes if needed.

Signed-off-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20250211065813.58091-1-avri.altman@wdc.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Documentation/ABI/testing/sysfs-driver-ufs
drivers/ufs/core/ufs-sysfs.c
drivers/ufs/core/ufshcd.c
include/ufs/ufs.h
include/ufs/ufshcd.h

index 5fa6655aee84090d302f64bd6a31b277018e230b..ab2adea56715d6af22b9e35da19c18a31929374c 100644 (file)
@@ -1559,3 +1559,15 @@ Description:
                Symbol - HCMID. This file shows the UFSHCD manufacturer id.
                The Manufacturer ID is defined by JEDEC in JEDEC-JEP106.
                The file is read only.
+
+What:          /sys/bus/platform/drivers/ufshcd/*/critical_health
+What:          /sys/bus/platform/devices/*.ufs/critical_health
+Date:          February 2025
+Contact:       Avri Altman <avri.altman@wdc.com>
+Description:   Report the number of times a critical health event has been
+               reported by a UFS device. Further insight into the specific
+               issue can be gained by reading one of: bPreEOLInfo,
+               bDeviceLifeTimeEstA, bDeviceLifeTimeEstB,
+               bWriteBoosterBufferLifeTimeEst, and bRPMBLifeTimeEst.
+
+               The file is read only.
index 3438269a54405b568f7ad0fdd254733931a1500f..90b5ab60f5ae4a5cea34a56e13e8a5307c39a633 100644 (file)
@@ -458,6 +458,14 @@ static ssize_t pm_qos_enable_store(struct device *dev,
        return count;
 }
 
+static ssize_t critical_health_show(struct device *dev,
+                                   struct device_attribute *attr, char *buf)
+{
+       struct ufs_hba *hba = dev_get_drvdata(dev);
+
+       return sysfs_emit(buf, "%d\n", hba->critical_health_count);
+}
+
 static DEVICE_ATTR_RW(rpm_lvl);
 static DEVICE_ATTR_RO(rpm_target_dev_state);
 static DEVICE_ATTR_RO(rpm_target_link_state);
@@ -470,6 +478,7 @@ static DEVICE_ATTR_RW(enable_wb_buf_flush);
 static DEVICE_ATTR_RW(wb_flush_threshold);
 static DEVICE_ATTR_RW(rtc_update_ms);
 static DEVICE_ATTR_RW(pm_qos_enable);
+static DEVICE_ATTR_RO(critical_health);
 
 static struct attribute *ufs_sysfs_ufshcd_attrs[] = {
        &dev_attr_rpm_lvl.attr,
@@ -484,6 +493,7 @@ static struct attribute *ufs_sysfs_ufshcd_attrs[] = {
        &dev_attr_wb_flush_threshold.attr,
        &dev_attr_rtc_update_ms.attr,
        &dev_attr_pm_qos_enable.attr,
+       &dev_attr_critical_health.attr,
        NULL
 };
 
index cd404ade48dcf907f13d1d7a6511723542a9e4d5..ef56a5eb52dcc36d26502f4fa6d9548fdd0d315a 100644 (file)
@@ -6216,6 +6216,11 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
        if (status & hba->ee_drv_mask & MASK_EE_URGENT_TEMP)
                ufshcd_temp_exception_event_handler(hba, status);
 
+       if (status & hba->ee_drv_mask & MASK_EE_HEALTH_CRITICAL) {
+               hba->critical_health_count++;
+               sysfs_notify(&hba->dev->kobj, NULL, "critical_health");
+       }
+
        ufs_debugfs_exception_event(hba, status);
 }
 
@@ -8308,6 +8313,11 @@ static int ufs_get_device_desc(struct ufs_hba *hba)
 
        ufshcd_temp_notif_probe(hba, desc_buf);
 
+       if (dev_info->wspecversion >= 0x410) {
+               hba->critical_health_count = 0;
+               ufshcd_enable_ee(hba, MASK_EE_HEALTH_CRITICAL);
+       }
+
        ufs_init_rtc(hba, desc_buf);
 
        /*
index 89672ad8c3bb090696d82a80b15f39cfb2cc363a..d335bff1a310cc5d1eb2742843b425d781f03f84 100644 (file)
@@ -419,6 +419,7 @@ enum {
        MASK_EE_TOO_LOW_TEMP            = BIT(4),
        MASK_EE_WRITEBOOSTER_EVENT      = BIT(5),
        MASK_EE_PERFORMANCE_THROTTLING  = BIT(6),
+       MASK_EE_HEALTH_CRITICAL         = BIT(9),
 };
 #define MASK_EE_URGENT_TEMP (MASK_EE_TOO_HIGH_TEMP | MASK_EE_TOO_LOW_TEMP)
 
index 650ff238cd74e8d48e5a36348eb06a0c31d365d6..5efa570de4c155fdee69aa9bf87b8c8ab3fcd2cf 100644 (file)
@@ -962,6 +962,7 @@ enum ufshcd_mcq_opr {
  * @ufs_rtc_update_work: A work for UFS RTC periodic update
  * @pm_qos_req: PM QoS request handle
  * @pm_qos_enabled: flag to check if pm qos is enabled
+ * @critical_health_count: count of critical health exceptions
  */
 struct ufs_hba {
        void __iomem *mmio_base;
@@ -1130,6 +1131,8 @@ struct ufs_hba {
        struct delayed_work ufs_rtc_update_work;
        struct pm_qos_request pm_qos_req;
        bool pm_qos_enabled;
+
+       int critical_health_count;
 };
 
 /**