]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: Send RMA CPER at bad page loading
authorKent Russell <kent.russell@amd.com>
Thu, 22 Jan 2026 15:19:28 +0000 (10:19 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 27 Jan 2026 23:13:24 +0000 (18:13 -0500)
Some older builds weren't sending RMA CPERs when the bad page threshold
was exceeded. Newer builds have resolved this, but there could be
systems out there with bad page numbers higher than the threshold, that
haven't sent out an RMA CPER. To be thorough and safe, send an RMA CPER
when we load the table, if the threshold is met or exceeded, instead of
waiting for the next UE to trigger the CPER.

Signed-off-by: Kent Russell <kent.russell@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

index 64dd7a81bff5faa0cbe4515dd707a1d7a6a54ef2..469d04a39d7d0e77a6ba2a5d685da74135c30f1d 100644 (file)
@@ -1712,6 +1712,10 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
                        dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
                                        control->ras_num_bad_pages,
                                        ras->bad_page_cnt_threshold);
+               if (amdgpu_bad_page_threshold != 0 &&
+                       control->ras_num_bad_pages >= ras->bad_page_cnt_threshold)
+                       amdgpu_dpm_send_rma_reason(adev);
+
        } else if (hdr->header == RAS_TABLE_HDR_BAD &&
                   amdgpu_bad_page_threshold != 0) {
                if (hdr->version >= RAS_TABLE_VER_V2_1) {