From: Tao Zhou Date: Wed, 27 Aug 2025 11:33:02 +0000 (+0800) Subject: drm/amdgpu: try for more times if RAS bad page number is not updated X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=334b27bf712b5ddd19908aba318175e4b9bcf839;p=thirdparty%2Fkernel%2Flinux.git drm/amdgpu: try for more times if RAS bad page number is not updated RAS info update in PMFW is time cost, wait for it. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index ec248ca6ef930..01b38a6e198e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -874,13 +874,33 @@ Out: int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control) { struct amdgpu_device *adev = to_amdgpu_device(control); + int ret, timeout = 1000; if (!amdgpu_ras_smu_eeprom_supported(adev)) return 0; control->ras_num_recs_old = control->ras_num_recs; - return amdgpu_ras_smu_get_badpage_count(adev, + + do { + ret = amdgpu_ras_smu_get_badpage_count(adev, &(control->ras_num_recs), 12); + if (!ret && + (control->ras_num_recs_old == control->ras_num_recs)) { + /* record number update in PMFW needs some time */ + msleep(50); + timeout -= 50; + } else { + break; + } + } while (timeout); + + /* no update of record number is not a real failure, + * don't print warning here + */ + if (!ret && (control->ras_num_recs_old == control->ras_num_recs)) + ret = -EINVAL; + + return ret; } /**