]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: Update boot time errors polling sequence
authorHawking Zhang <Hawking.Zhang@amd.com>
Mon, 29 Jan 2024 12:29:08 +0000 (20:29 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 31 Jan 2024 19:04:55 +0000 (14:04 -0500)
Update boot time errors polling sequence to align with
the latest firmware change.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Frank Min <Frank.Min@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index 79bf6bd428a5a46dbffe191a72da02a59e4e8266..084289db23f9a97e2230be2d23c866ae6e18145e 100644 (file)
@@ -4120,6 +4120,18 @@ static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev,
        u32 reg_data;
        int retry_loop;
 
+       reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+                  aqua_vanjaram_encode_ext_smn_addressing(instance);
+
+       for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
+               reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+               if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) {
+                       *boot_error = AMDGPU_RAS_BOOT_SUCEESS;
+                       return 0;
+               }
+               msleep(1);
+       }
+
        /* The pattern for smn addressing in other SOC could be different from
         * the one for aqua_vanjaram. We should revisit the code if the pattern
         * is changed. In such case, replace the aqua_vanjaram implementation
@@ -4127,7 +4139,7 @@ static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev,
        reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
                   aqua_vanjaram_encode_ext_smn_addressing(instance);
 
-       for (retry_loop = 0; retry_loop < 1000; retry_loop++) {
+       for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
                reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
                if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) {
                        *boot_error = reg_data;
index 0b6ffae1e8bb50bfe5886e5ac67cb81295b6c8ef..d10e5bb0e52f007cdbb380b44ae0b663d89544cd 100644 (file)
@@ -46,6 +46,11 @@ struct amdgpu_iv_entry;
 #define AMDGPU_RAS_GPU_ERR_HBM_ID(x)                   AMDGPU_GET_REG_FIELD(x, 13, 13)
 #define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x)              AMDGPU_GET_REG_FIELD(x, 31, 31)
 
+#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT   1000
+#define AMDGPU_RAS_BOOT_STEADY_STATUS          0xBA
+#define AMDGPU_RAS_BOOT_STATUS_MASK            0xFF
+#define AMDGPU_RAS_BOOT_SUCEESS                        0x80000000
+
 #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS          (0x1 << 0)
 /* position of instance value in sub_block_index of
  * ta_ras_trigger_error_input, the sub block uses lower 12 bits