From: Gangliang Xie Date: Mon, 15 Dec 2025 05:01:04 +0000 (+0800) Subject: drm/amd/ras: add check safety watermark func for pmfw eeprom X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=50428065575872563c8004fcacb8db3048ada77f;p=thirdparty%2Fkernel%2Flinux.git drm/amd/ras: add check safety watermark func for pmfw eeprom add check safety watermark func for pmfw eeprom Signed-off-by: Gangliang Xie Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index 9190c9cd79932..4b86a58e81491 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -571,6 +571,9 @@ bool amdgpu_ras_mgr_check_eeprom_safety_watermark(struct amdgpu_device *adev) if (!amdgpu_ras_mgr_is_ready(adev)) return false; + if (ras_fw_eeprom_supported(ras_mgr->ras_core)) + return ras_fw_eeprom_check_safety_watermark(ras_mgr->ras_core); + return ras_eeprom_check_safety_watermark(ras_mgr->ras_core); } diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c index 9a5ffcf64b408..01ad6ebab9022 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c @@ -563,6 +563,9 @@ bool ras_core_is_ready(struct ras_core_context *ras_core) bool ras_core_check_safety_watermark(struct ras_core_context *ras_core) { + if (ras_fw_eeprom_supported(ras_core)) + return ras_fw_eeprom_check_safety_watermark(ras_core); + return ras_eeprom_check_safety_watermark(ras_core); } diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c index ae63e7394829a..34a4161251b3d 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c @@ -190,3 +190,33 @@ int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core) return res; } + +bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core) +{ + struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom; + bool ret = false; + int bad_page_count; + + if (!control->record_threshold_config) + return false; + + bad_page_count = ras_umc_get_badpage_count(ras_core); + + if (bad_page_count > control->record_threshold_count) + RAS_DEV_WARN(ras_core->dev, "RAS records:%d exceed threshold:%d", + bad_page_count, control->record_threshold_count); + + if ((control->record_threshold_config == WARN_NONSTOP_OVER_THRESHOLD) || + (control->record_threshold_config == NONSTOP_OVER_THRESHOLD)) { + RAS_DEV_WARN(ras_core->dev, + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n"); + ret = false; + } else { + ras_core->is_rma = true; + RAS_DEV_WARN(ras_core->dev, + "Please consider adjusting the customized threshold.\n"); + ret = true; + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h index a1003db3c33ba..b0d3eade43770 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h @@ -67,5 +67,6 @@ int ras_fw_get_badpage_ipid(struct ras_core_context *ras_core, int ras_fw_erase_ras_table(struct ras_core_context *ras_core, uint32_t *result); int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core); +bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core); #endif