From: Amber Lin Date: Fri, 13 Mar 2026 09:35:55 +0000 (-0400) Subject: drm/amdgpu: Fixup detect and reset X-Git-Tag: v7.2-rc1~141^2~24^2~164 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=640482525554cc08370f2355be61b9fdf2b066d4;p=thirdparty%2Flinux.git drm/amdgpu: Fixup detect and reset Identify hung queues by comparing doorbells shown in hqd_info from MES with doorbells stored in the driver to find matching queues. Suggested-by: Jonathan Kim Signed-off-by: Amber Lin Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 0d75d1aa60ec5..932518934f5c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -466,23 +466,35 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, &input); - if (r) { - dev_err(adev->dev, "failed to detect and reset\n"); - } else { - *hung_db_num = 0; - for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { - if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { - hung_db_array[i] = db_array[i]; - *hung_db_num += 1; - } + + if (r && detect_only) { + dev_err(adev->dev, "Failed to detect hung queues\n"); + return r; + } + + *hung_db_num = 0; + /* MES passes hung queues' doorbell to driver */ + for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { + /* Finding hung queues where db_array[i] is a valid doorbell */ + if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { + hung_db_array[i] = db_array[i]; + *hung_db_num += 1; } + } - /* - * TODO: return HQD info for MES scheduled user compute queue reset cases - * stored in hung_db_array hqd info offset to full array size - */ + if (r && !hung_db_num) { + dev_err(adev->dev, "Failed to detect and reset hung queues\n"); + return r; } + /* + * TODO: return HQD info for MES scheduled user compute queue reset cases + * stored in hung_db_array hqd info offset to full array size + */ + + if (r) + dev_err(adev->dev, "failed to reset\n"); + return r; }