]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: Avoid extra evict-restore process.
authorGang Ba <Gang.Ba@amd.com>
Tue, 8 Jul 2025 18:36:13 +0000 (14:36 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 28 Jul 2025 20:25:19 +0000 (16:25 -0400)
If vm belongs to another process, this is fclose after fork,
wait may enable signaling KFD eviction fence and cause parent process queue evicted.

[677852.634569]  amdkfd_fence_enable_signaling+0x56/0x70 [amdgpu]
[677852.634814]  __dma_fence_enable_signaling+0x3e/0xe0
[677852.634820]  dma_fence_wait_timeout+0x3a/0x140
[677852.634825]  amddma_resv_wait_timeout+0x7f/0xf0 [amdkcl]
[677852.634831]  amdgpu_vm_wait_idle+0x2d/0x60 [amdgpu]
[677852.635026]  amdgpu_flush+0x34/0x50 [amdgpu]
[677852.635208]  filp_flush+0x38/0x90
[677852.635213]  filp_close+0x14/0x30
[677852.635216]  do_close_on_exec+0xdd/0x130
[677852.635221]  begin_new_exec+0x1da/0x490
[677852.635225]  load_elf_binary+0x307/0xea0
[677852.635231]  ? srso_alias_return_thunk+0x5/0xfbef5
[677852.635235]  ? ima_bprm_check+0xa2/0xd0
[677852.635240]  search_binary_handler+0xda/0x260
[677852.635245]  exec_binprm+0x58/0x1a0
[677852.635249]  bprm_execve.part.0+0x16f/0x210
[677852.635254]  bprm_execve+0x45/0x80
[677852.635257]  do_execveat_common.isra.0+0x190/0x200

Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Gang Ba <Gang.Ba@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index d5c0637d739287b9eabcc2a474661059d312c711..5cacf5717016aaa64968be485805e0c48f88cb10 100644 (file)
@@ -2414,13 +2414,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
  */
 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 {
-       timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
-                                       DMA_RESV_USAGE_BOOKKEEP,
-                                       true, timeout);
+       timeout = drm_sched_entity_flush(&vm->immediate, timeout);
        if (timeout <= 0)
                return timeout;
 
-       return dma_fence_wait_timeout(vm->last_unlocked, true, timeout);
+       return drm_sched_entity_flush(&vm->delayed, timeout);
 }
 
 static void amdgpu_vm_destroy_task_info(struct kref *kref)