]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amd/ras: add check safety watermark func for pmfw eeprom
authorGangliang Xie <ganglxie@amd.com>
Mon, 15 Dec 2025 05:01:04 +0000 (13:01 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 4 Mar 2026 16:41:43 +0000 (11:41 -0500)
add check safety watermark func for pmfw eeprom

Signed-off-by: Gangliang Xie <ganglxie@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c
drivers/gpu/drm/amd/ras/rascore/ras_core.c
drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h

index 9190c9cd799320a75ad1a74dd4f46a3b535a51eb..4b86a58e81491dfae2ab523a8b6da4dffca00c33 100644 (file)
@@ -571,6 +571,9 @@ bool amdgpu_ras_mgr_check_eeprom_safety_watermark(struct amdgpu_device *adev)
        if (!amdgpu_ras_mgr_is_ready(adev))
                return false;
 
+       if (ras_fw_eeprom_supported(ras_mgr->ras_core))
+               return ras_fw_eeprom_check_safety_watermark(ras_mgr->ras_core);
+
        return ras_eeprom_check_safety_watermark(ras_mgr->ras_core);
 }
 
index 9a5ffcf64b4084d342926d34dc1f4af7e9abe131..01ad6ebab9022a20b34565646c2463563aabb4fc 100644 (file)
@@ -563,6 +563,9 @@ bool ras_core_is_ready(struct ras_core_context *ras_core)
 
 bool ras_core_check_safety_watermark(struct ras_core_context *ras_core)
 {
+       if (ras_fw_eeprom_supported(ras_core))
+               return ras_fw_eeprom_check_safety_watermark(ras_core);
+
        return ras_eeprom_check_safety_watermark(ras_core);
 }
 
index ae63e7394829a0c79adc421f45d1bcd7838dadb3..34a4161251b3d5614c0d7dc00f41cb141fd4242c 100644 (file)
@@ -190,3 +190,33 @@ int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core)
 
        return res;
 }
+
+bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core)
+{
+       struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom;
+       bool ret = false;
+       int bad_page_count;
+
+       if (!control->record_threshold_config)
+               return false;
+
+       bad_page_count = ras_umc_get_badpage_count(ras_core);
+
+       if (bad_page_count > control->record_threshold_count)
+               RAS_DEV_WARN(ras_core->dev, "RAS records:%d exceed threshold:%d",
+                       bad_page_count, control->record_threshold_count);
+
+       if ((control->record_threshold_config == WARN_NONSTOP_OVER_THRESHOLD) ||
+               (control->record_threshold_config == NONSTOP_OVER_THRESHOLD)) {
+               RAS_DEV_WARN(ras_core->dev,
+                       "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n");
+               ret = false;
+       } else {
+               ras_core->is_rma = true;
+               RAS_DEV_WARN(ras_core->dev,
+                       "Please consider adjusting the customized threshold.\n");
+               ret = true;
+       }
+
+       return ret;
+}
index a1003db3c33ba43a87140694a8512fc30208d1e7..b0d3eade43770839e87a852d6ff31470094a947b 100644 (file)
@@ -67,5 +67,6 @@ int ras_fw_get_badpage_ipid(struct ras_core_context *ras_core,
 int ras_fw_erase_ras_table(struct ras_core_context *ras_core,
                                   uint32_t *result);
 int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core);
+bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core);
 
 #endif