]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: Fix double deletion of validate_list
authorHarish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Fri, 9 Jan 2026 20:26:36 +0000 (15:26 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 3 Feb 2026 22:24:21 +0000 (17:24 -0500)
If amdgpu_amdkfd_gpuvm_free_memory_of_gpu() fails after kgd_mem is
removed from validate_list, the mem handle still lingers in the KFD idr.
This means when process is terminated,
kfd_process_free_outstanding_kfd_bos() will call
amdgpu_amdkfd_gpuvm_free_memory_of_gpu() again resulting in double
deletion.

To avoid this -
 (a) Check if list is empty before deleting it
 (b) Rearragne amdgpu_amdkfd_gpuvm_free_memory_of_gpu() such that it can
     be safely called again if it returns failure the first time.

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 6ba60345f45eaf7cb4f89105d26083a4b9fd1cba)

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index b1c24c8fa6862330920de902e903807ecfefabe7..a51e76623bada099269b06ff7ac5f12eabf8ea57 100644 (file)
@@ -1920,21 +1920,21 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
        /* Make sure restore workers don't access the BO any more */
        mutex_lock(&process_info->lock);
-       list_del(&mem->validate_list);
+       if (!list_empty(&mem->validate_list))
+               list_del_init(&mem->validate_list);
        mutex_unlock(&process_info->lock);
 
+       ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+       if (unlikely(ret))
+               return ret;
+
        /* Cleanup user pages and MMU notifiers */
        if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
                amdgpu_hmm_unregister(mem->bo);
-               mutex_lock(&process_info->notifier_lock);
                amdgpu_hmm_range_free(mem->range);
-               mutex_unlock(&process_info->notifier_lock);
+               mem->range = NULL;
        }
 
-       ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
-       if (unlikely(ret))
-               return ret;
-
        amdgpu_amdkfd_remove_eviction_fence(mem->bo,
                                        process_info->eviction_fence);
        pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,