]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: Fixup detect and reset
authorAmber Lin <Amber.Lin@amd.com>
Fri, 13 Mar 2026 09:35:55 +0000 (05:35 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 17 Apr 2026 19:41:14 +0000 (15:41 -0400)
Identify hung queues by comparing doorbells shown in hqd_info from MES
with doorbells stored in the driver to find matching queues.

Suggested-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c

index 0d75d1aa60ec508f4c2c67f9394cd777e69f9255..932518934f5c2bdd52f59fd1a3753ec55b69bbb6 100644 (file)
@@ -466,23 +466,35 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
 
        r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
                                                          &input);
-       if (r) {
-               dev_err(adev->dev, "failed to detect and reset\n");
-       } else {
-               *hung_db_num = 0;
-               for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
-                       if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
-                               hung_db_array[i] = db_array[i];
-                               *hung_db_num += 1;
-                       }
+
+       if (r && detect_only) {
+               dev_err(adev->dev, "Failed to detect hung queues\n");
+               return r;
+       }
+
+       *hung_db_num = 0;
+       /* MES passes hung queues' doorbell to driver */
+       for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
+               /* Finding hung queues where db_array[i] is a valid doorbell */
+               if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
+                       hung_db_array[i] = db_array[i];
+                       *hung_db_num += 1;
                }
+       }
 
-               /*
-                * TODO: return HQD info for MES scheduled user compute queue reset cases
-                * stored in hung_db_array hqd info offset to full array size
-                */
+       if (r && !hung_db_num) {
+               dev_err(adev->dev, "Failed to detect and reset hung queues\n");
+               return r;
        }
 
+       /*
+        * TODO: return HQD info for MES scheduled user compute queue reset cases
+        * stored in hung_db_array hqd info offset to full array size
+        */
+
+       if (r)
+               dev_err(adev->dev, "failed to reset\n");
+
        return r;
 }