]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: try for more times if RAS bad page number is not updated
authorTao Zhou <tao.zhou1@amd.com>
Wed, 27 Aug 2025 11:33:02 +0000 (19:33 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 12 Nov 2025 02:54:14 +0000 (21:54 -0500)
RAS info update in PMFW is time cost, wait for it.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

index ec248ca6ef930d402702bec722a4ce5d435ac46d..01b38a6e198e4061e46ad9e371bf98c364264f10 100644 (file)
@@ -874,13 +874,33 @@ Out:
 int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control)
 {
        struct amdgpu_device *adev = to_amdgpu_device(control);
+       int ret, timeout = 1000;
 
        if (!amdgpu_ras_smu_eeprom_supported(adev))
                return 0;
 
        control->ras_num_recs_old = control->ras_num_recs;
-       return amdgpu_ras_smu_get_badpage_count(adev,
+
+       do {
+               ret = amdgpu_ras_smu_get_badpage_count(adev,
                        &(control->ras_num_recs), 12);
+               if (!ret &&
+                   (control->ras_num_recs_old == control->ras_num_recs)) {
+                       /* record number update in PMFW needs some time */
+                       msleep(50);
+                       timeout -= 50;
+               } else {
+                       break;
+               }
+       } while (timeout);
+
+       /* no update of record number is not a real failure,
+        * don't print warning here
+        */
+       if (!ret && (control->ras_num_recs_old == control->ras_num_recs))
+               ret = -EINVAL;
+
+       return ret;
 }
 
 /**