From: Tao Zhou Date: Wed, 27 Aug 2025 07:48:06 +0000 (+0800) Subject: drm/amdgpu: get RAS bad page address from MCA address X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e84835940e60a7d5263767ee92acc08f9877cb26;p=thirdparty%2Fkernel%2Flinux.git drm/amdgpu: get RAS bad page address from MCA address Instead of from physical address. v2: add comment to make the code more readable Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 36a5393d6b74e..9e2e098af86c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3014,8 +3014,13 @@ static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev, addr_in.ma.err_addr = bps->address; addr_in.ma.socket_id = socket; addr_in.ma.ch_inst = bps->mem_channel; - /* tell RAS TA the node instance is not used */ - addr_in.ma.node_inst = TA_RAS_INV_NODE; + if (!amdgpu_ras_smu_eeprom_supported(adev)) { + /* tell RAS TA the node instance is not used */ + addr_in.ma.node_inst = TA_RAS_INV_NODE; + } else { + addr_in.ma.umc_inst = bps->mcumc_id; + addr_in.ma.node_inst = bps->cu; + } if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) ret = adev->umc.ras->convert_ras_err_addr(adev, err_data, @@ -3162,7 +3167,11 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); } else { - save_nps = nps; + /* if pmfw manages eeprom, save_nps is not stored on eeprom, + * we should always convert mca address into physical address, + * make save_nps different from nps + */ + save_nps = nps + 1; } if (save_nps == nps) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 670c0dedf4e92..ec248ca6ef930 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1022,9 +1022,9 @@ int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control, record[i - rec_idx].retired_page = 0x1ULL; record[i - rec_idx].ts = ts; record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - record[i - rec_idx].cu = 0; - adev->umc.ras->mca_ipid_parse(adev, ipid, NULL, + adev->umc.ras->mca_ipid_parse(adev, ipid, + (uint32_t *)&(record[i - rec_idx].cu), (uint32_t *)&(record[i - rec_idx].mem_channel), (uint32_t *)&(record[i - rec_idx].mcumc_id), NULL); }