]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: check and drop invalid bad page records
authorYiPeng Chai <YiPeng.Chai@amd.com>
Tue, 12 May 2026 07:09:52 +0000 (15:09 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 19 May 2026 15:54:23 +0000 (11:54 -0400)
Check and drop invalid bad page records.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 6c644cfe6695d80cf61faf9c519f7613b52ee26b..3e1f8b15f75de01570639a36716e2166e1366a0f 100644 (file)
@@ -3093,6 +3093,25 @@ static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
                return  -EINVAL;
 }
 
+static bool __check_record_in_range(struct amdgpu_device *adev,
+                       struct eeprom_table_record *bps, int count)
+{
+       int i;
+
+       for (i = 0; i < count; i++) {
+               if (bps[i].retired_page >=
+                       (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT)) {
+                       dev_warn(adev->dev,
+                               "Recorded address out of range: 0x%llx, 0x%llx, 0x%x, 0x%x\n",
+                               bps[i].address, bps[i].retired_page,
+                               bps[i].mem_channel, bps[i].mcumc_id);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
 static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
                                        struct eeprom_table_record *bps, int count)
 {
@@ -3100,6 +3119,9 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_err_handler_data *data = con->eh_data;
 
+       if (!__check_record_in_range(adev, bps, count))
+               return 0;
+
        for (j = 0; j < count; j++) {
                if (!data->space_left &&
                    amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
@@ -5641,6 +5663,11 @@ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
        uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
        int ret = 0;
 
+       if (pfn >= (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT)) {
+               dev_warn(adev->dev, "Ignoring out-of-range bad page 0x%llx", start);
+               return 0;
+       }
+
        if (amdgpu_ras_check_critical_address(adev, start))
                return 0;