From: Harish Kasiviswanathan Date: Fri, 9 Jan 2026 20:26:36 +0000 (-0500) Subject: drm/amdgpu: Fix double deletion of validate_list X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6b61a54e684006ca0d92d684a1d3c3a00f077d8f;p=thirdparty%2Fkernel%2Flinux.git drm/amdgpu: Fix double deletion of validate_list If amdgpu_amdkfd_gpuvm_free_memory_of_gpu() fails after kgd_mem is removed from validate_list, the mem handle still lingers in the KFD idr. This means when process is terminated, kfd_process_free_outstanding_kfd_bos() will call amdgpu_amdkfd_gpuvm_free_memory_of_gpu() again resulting in double deletion. To avoid this - (a) Check if list is empty before deleting it (b) Rearragne amdgpu_amdkfd_gpuvm_free_memory_of_gpu() such that it can be safely called again if it returns failure the first time. Signed-off-by: Harish Kasiviswanathan Reviewed-by: Philip Yang Signed-off-by: Alex Deucher (cherry picked from commit 6ba60345f45eaf7cb4f89105d26083a4b9fd1cba) --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b1c24c8fa686..a51e76623bad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1920,21 +1920,21 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Make sure restore workers don't access the BO any more */ mutex_lock(&process_info->lock); - list_del(&mem->validate_list); + if (!list_empty(&mem->validate_list)) + list_del_init(&mem->validate_list); mutex_unlock(&process_info->lock); + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); + if (unlikely(ret)) + return ret; + /* Cleanup user pages and MMU notifiers */ if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { amdgpu_hmm_unregister(mem->bo); - mutex_lock(&process_info->notifier_lock); amdgpu_hmm_range_free(mem->range); - mutex_unlock(&process_info->notifier_lock); + mem->range = NULL; } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); - if (unlikely(ret)) - return ret; - amdgpu_amdkfd_remove_eviction_fence(mem->bo, process_info->eviction_fence); pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,