]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: support to load RAS bad pages from PMFW
authorTao Zhou <tao.zhou1@amd.com>
Thu, 24 Jul 2025 07:01:03 +0000 (15:01 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 6 Nov 2025 15:01:14 +0000 (10:01 -0500)
PMFW manages eeprom bad page records, update bad page loading
accrodingly.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 62d2f988d88f72f527cb789f27765eceb1a3cf93..055a9bbabbdbb01afcdb6af40f5a9eca2732eeae 100644 (file)
@@ -3158,8 +3158,12 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
        int i = 0;
        enum amdgpu_memory_partition save_nps;
 
-       save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
-       bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+       if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+               save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+               bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+       } else {
+               save_nps = nps;
+       }
 
        if (save_nps == nps) {
                if (amdgpu_umc_pages_in_a_row(adev, err_data,
@@ -3225,7 +3229,8 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
 
        if (from_rom) {
                /* there is no pa recs in V3, so skip pa recs processing */
-               if (control->tbl_hdr.version < RAS_TABLE_VER_V3) {
+               if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+                   !amdgpu_ras_smu_eeprom_supported(adev)) {
                        for (i = 0; i < pages; i++) {
                                if (control->ras_num_recs - i >= adev->umc.retire_unit) {
                                        if ((bps[i].address == bps[i + 1].address) &&
@@ -3356,7 +3361,8 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
                        /*In V3, there is no pa recs, and some cases(when address==0) may be parsed
                        as pa recs, so add verion check to avoid it.
                        */
-                       if (control->tbl_hdr.version < RAS_TABLE_VER_V3) {
+                       if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+                           !amdgpu_ras_smu_eeprom_supported(adev)) {
                                for (i = 0; i < control->ras_num_recs; i++) {
                                        if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
                                                if ((bps[i].address == bps[i + 1].address) &&