]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: remove deadlocks from amdgpu_userq_pre_reset
authorChristian König <christian.koenig@amd.com>
Mon, 20 Apr 2026 18:18:43 +0000 (20:18 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 11 May 2026 21:46:34 +0000 (17:46 -0400)
The purpose of a GPU reset is to make sure that fence can be signaled
again and the signal and resume workers can make progress again.

So waiting for the resume worker or any fence in the GPU reset path is
just utterly nonsense.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Prike Liang <Prike.Liang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit fcd5f065eab46993af43442fd77ee8d9eb9c5bdf)

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c

index de140a8ed1354aa93979182b57f66f442642ebc5..692f7e3513dfca2f313eaeae7415943564ac8457 100644 (file)
@@ -1504,23 +1504,21 @@ void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
 {
        const struct amdgpu_userq_funcs *userq_funcs;
        struct amdgpu_usermode_queue *queue;
-       struct amdgpu_userq_mgr *uqm;
        unsigned long queue_id;
 
+       /* TODO: We probably need a new lock for the queue state */
        xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
-               uqm = queue->userq_mgr;
-               cancel_delayed_work_sync(&uqm->resume_work);
-               if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
-                       amdgpu_userq_wait_for_last_fence(queue);
-                       userq_funcs = adev->userq_funcs[queue->queue_type];
-                       userq_funcs->unmap(queue);
-                       /* just mark all queues as hung at this point.
-                        * if unmap succeeds, we could map again
-                        * in amdgpu_userq_post_reset() if vram is not lost
-                        */
-                       queue->state = AMDGPU_USERQ_STATE_HUNG;
-                       amdgpu_userq_fence_driver_force_completion(queue);
-               }
+               if (queue->state != AMDGPU_USERQ_STATE_MAPPED)
+                       continue;
+
+               userq_funcs = adev->userq_funcs[queue->queue_type];
+               userq_funcs->unmap(queue);
+               /* just mark all queues as hung at this point.
+                * if unmap succeeds, we could map again
+                * in amdgpu_userq_post_reset() if vram is not lost
+                */
+               queue->state = AMDGPU_USERQ_STATE_HUNG;
+               amdgpu_userq_fence_driver_force_completion(queue);
        }
 }