]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu/userq: make sure queue is valid in the hang_detect_work
authorSunil Khatri <sunil.khatri@amd.com>
Mon, 18 May 2026 14:28:08 +0000 (19:58 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 27 May 2026 15:58:31 +0000 (11:58 -0400)
Thread 1: Running amdgpu_userq_destroy which eventually remove
the queue from door bell and set userq_mgr = NULL.

Thread2: An interrupt might have scheduled the hang_detect_work
which still need userq_mgr to be valid but could get an NULL
ptrs.

To fix that make sure we cancel the hang_detect_work again before
setting userq_mgr to NULL.

Along with that we also need all the queue va to remain valid till
we could be running anything on the queue and hence moving the
userq_va post hang_detect handler is cancelled.

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 1a66ceb98b137d18d303b9889f0e7d8c4db73943)

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c

index 5da107dffccef6ff29ca863fe532a86ab32c2c0c..8d7dad1d30eb14c3e58339eae6c821013e5dbd65 100644 (file)
@@ -427,8 +427,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue)
        xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
        amdgpu_userq_fence_driver_free(queue);
        queue->fence_drv = NULL;
-       queue->userq_mgr = NULL;
-       list_del(&queue->userq_va_list);
 
        up_read(&adev->reset_domain->sem);
 }
@@ -619,11 +617,6 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
 
        /* Cancel any pending hang detection work and cleanup */
        cancel_delayed_work_sync(&queue->hang_detect_work);
-
-       amdgpu_bo_reserve(vm->root.bo, true);
-       amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
-       amdgpu_bo_unreserve(vm->root.bo);
-
        mutex_lock(&uq_mgr->userq_mutex);
        amdgpu_userq_wait_for_last_fence(queue);
 
@@ -635,6 +628,13 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_que
        amdgpu_userq_cleanup(queue);
        mutex_unlock(&uq_mgr->userq_mutex);
 
+       cancel_delayed_work_sync(&queue->hang_detect_work);
+       amdgpu_bo_reserve(vm->root.bo, true);
+       amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
+       amdgpu_bo_unreserve(vm->root.bo);
+       list_del(&queue->userq_va_list);
+       queue->userq_mgr = NULL;
+
        amdgpu_bo_reserve(queue->db_obj.obj, true);
        amdgpu_bo_unpin(queue->db_obj.obj);
        amdgpu_bo_unreserve(queue->db_obj.obj);