]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/amdgpu: Skip poison aca bank from UE channel
authorXiang Liu <xiang.liu@amd.com>
Wed, 30 Jul 2025 03:07:43 +0000 (11:07 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Nov 2025 20:34:09 +0000 (15:34 -0500)
[ Upstream commit 8e8e08c831f088ed581444c58a635c49ea1222ab ]

Avoid GFX poison consumption errors logged when fatal error occurs.

Signed-off-by: Xiang Liu <xiang.liu@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c

index 9d6345146495fc8510b40b565feba4a0c851e091..a95f45d0631440a62f55e5cfa3e01872eb0c634f 100644 (file)
@@ -132,6 +132,27 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
                              idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
 }
 
+static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
+{
+
+       struct aca_hwip *hwip;
+       int hwid, mcatype;
+       u64 ipid;
+
+       if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
+               return false;
+
+       hwip = &aca_hwid_mcatypes[type];
+       if (!hwip->hwid)
+               return false;
+
+       ipid = bank->regs[ACA_REG_IDX_IPID];
+       hwid = ACA_REG__IPID__HARDWAREID(ipid);
+       mcatype = ACA_REG__IPID__MCATYPE(ipid);
+
+       return hwip->hwid == hwid && hwip->mcatype == mcatype;
+}
+
 static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
                                       int start, int count,
                                       struct aca_banks *banks, struct ras_query_context *qctx)
@@ -170,6 +191,15 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
 
                bank.type = type;
 
+               /*
+                * Poison being consumed when injecting a UE while running background workloads,
+                * which are unexpected.
+                */
+               if (type == ACA_SMU_TYPE_UE &&
+                   ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
+                   !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
+                       continue;
+
                aca_smu_bank_dump(adev, i, count, &bank, qctx);
 
                ret = aca_banks_add_bank(banks, &bank);
@@ -180,27 +210,6 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
        return 0;
 }
 
-static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
-{
-
-       struct aca_hwip *hwip;
-       int hwid, mcatype;
-       u64 ipid;
-
-       if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
-               return false;
-
-       hwip = &aca_hwid_mcatypes[type];
-       if (!hwip->hwid)
-               return false;
-
-       ipid = bank->regs[ACA_REG_IDX_IPID];
-       hwid = ACA_REG__IPID__HARDWAREID(ipid);
-       mcatype = ACA_REG__IPID__MCATYPE(ipid);
-
-       return hwip->hwid == hwid && hwip->mcatype == mcatype;
-}
-
 static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
 {
        const struct aca_bank_ops *bank_ops = handle->bank_ops;