]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: get RAS bad page address from MCA address
authorTao Zhou <tao.zhou1@amd.com>
Wed, 27 Aug 2025 07:48:06 +0000 (15:48 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 12 Nov 2025 02:54:14 +0000 (21:54 -0500)
Instead of from physical address.

v2: add comment to make the code more readable

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

index 36a5393d6b74eb73d1f0f8c12e1704eff04cb5d7..9e2e098af86c27e7da65c32b0ac94b6e4bc0cc35 100644 (file)
@@ -3014,8 +3014,13 @@ static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
        addr_in.ma.err_addr = bps->address;
        addr_in.ma.socket_id = socket;
        addr_in.ma.ch_inst = bps->mem_channel;
-       /* tell RAS TA the node instance is not used */
-       addr_in.ma.node_inst = TA_RAS_INV_NODE;
+       if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+               /* tell RAS TA the node instance is not used */
+               addr_in.ma.node_inst = TA_RAS_INV_NODE;
+       } else {
+               addr_in.ma.umc_inst = bps->mcumc_id;
+               addr_in.ma.node_inst = bps->cu;
+       }
 
        if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
                ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
@@ -3162,7 +3167,11 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
                save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
                bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
        } else {
-               save_nps = nps;
+               /* if pmfw manages eeprom, save_nps is not stored on eeprom,
+                * we should always convert mca address into physical address,
+                * make save_nps different from nps
+                */
+               save_nps = nps + 1;
        }
 
        if (save_nps == nps) {
index 670c0dedf4e92ee7892fdd60f80ab35b8f9e8c7c..ec248ca6ef930d402702bec722a4ce5d435ac46d 100644 (file)
@@ -1022,9 +1022,9 @@ int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
                record[i - rec_idx].retired_page = 0x1ULL;
                record[i - rec_idx].ts = ts;
                record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
-               record[i - rec_idx].cu = 0;
 
-               adev->umc.ras->mca_ipid_parse(adev, ipid, NULL,
+               adev->umc.ras->mca_ipid_parse(adev, ipid,
+                       (uint32_t *)&(record[i - rec_idx].cu),
                        (uint32_t *)&(record[i - rec_idx].mem_channel),
                        (uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
        }