From e0d11bdb294cd1325eeccfec05e07d8c2534b43e Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Thu, 22 Jan 2026 10:19:28 -0500 Subject: [PATCH] drm/amdgpu: Send RMA CPER at bad page loading Some older builds weren't sending RMA CPERs when the bad page threshold was exceeded. Newer builds have resolved this, but there could be systems out there with bad page numbers higher than the threshold, that haven't sent out an RMA CPER. To be thorough and safe, send an RMA CPER when we load the table, if the threshold is met or exceeded, instead of waiting for the next UE to trigger the CPER. Signed-off-by: Kent Russell Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 64dd7a81bff5f..469d04a39d7d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1712,6 +1712,10 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); + if (amdgpu_bad_page_threshold != 0 && + control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) + amdgpu_dpm_send_rma_reason(adev); + } else if (hdr->header == RAS_TABLE_HDR_BAD && amdgpu_bad_page_threshold != 0) { if (hdr->version >= RAS_TABLE_VER_V2_1) { -- 2.47.3