]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: add check function for pmfw eeprom
authorGangliang Xie <ganglxie@amd.com>
Thu, 4 Sep 2025 10:09:40 +0000 (18:09 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 6 Nov 2025 14:56:15 +0000 (09:56 -0500)
add check function for pmfw eeprom

Signed-off-by: Gangliang Xie <ganglxie@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

index 6b51574530a4a8d12cef398e04a01c3daf42c5af..3c646d9dad778223b1df1d51bedd13cec763a3f4 100644 (file)
@@ -1499,6 +1499,47 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
        return 0;
 }
 
+static int amdgpu_ras_smu_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+       struct amdgpu_device *adev = to_amdgpu_device(control);
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+       if (!__is_ras_eeprom_supported(adev))
+               return 0;
+
+       control->ras_num_bad_pages = ras->bad_page_num;
+
+       if ((ras->bad_page_cnt_threshold < control->ras_num_bad_pages) &&
+           amdgpu_bad_page_threshold != 0) {
+               dev_warn(adev->dev,
+                       "RAS records:%d exceed threshold:%d\n",
+                       control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+               if ((amdgpu_bad_page_threshold == -1) ||
+                       (amdgpu_bad_page_threshold == -2)) {
+                       dev_warn(adev->dev,
+                                "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
+               } else {
+                       ras->is_rma = true;
+                       dev_warn(adev->dev,
+                                "User defined threshold is set, runtime service will be halt when threshold is reached\n");
+               }
+
+               return 0;
+       }
+
+       dev_dbg(adev->dev,
+               "Found existing EEPROM table with %d records",
+               control->ras_num_bad_pages);
+
+       /* Warn if we are at 90% of the threshold or above
+        */
+       if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
+               dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
+                               control->ras_num_bad_pages,
+                               ras->bad_page_cnt_threshold);
+       return 0;
+}
+
 int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
 {
        struct amdgpu_device *adev = to_amdgpu_device(control);
@@ -1506,6 +1547,9 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
        int res = 0;
 
+       if (amdgpu_ras_smu_eeprom_supported(adev))
+               return amdgpu_ras_smu_eeprom_check(control);
+
        if (!__is_ras_eeprom_supported(adev))
                return 0;