+++ /dev/null
-From aa5fc4362fac9351557eb27c745579159a2e4520 Mon Sep 17 00:00:00 2001
-From: Liu01 Tong <Tong.Liu01@amd.com>
-Date: Mon, 11 Aug 2025 14:52:37 +0800
-Subject: drm/amdgpu: fix task hang from failed job submission during process kill
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Liu01 Tong <Tong.Liu01@amd.com>
-
-commit aa5fc4362fac9351557eb27c745579159a2e4520 upstream.
-
-During process kill, drm_sched_entity_flush() will kill the vm
-entities. The following job submissions of this process will fail, and
-the resources of these jobs have not been released, nor have the fences
-been signalled, causing tasks to hang and timeout.
-
-Fix by check entity status in amdgpu_vm_ready() and avoid submit jobs to
-stopped entity.
-
-v2: add amdgpu_vm_ready() check before amdgpu_vm_clear_freed() in
-function amdgpu_cs_vm_handling().
-
-Fixes: 1f02f2044bda ("drm/amdgpu: Avoid extra evict-restore process.")
-Signed-off-by: Liu01 Tong <Tong.Liu01@amd.com>
-Signed-off-by: Lin.Cao <lincao12@amd.com>
-Reviewed-by: Christian König <christian.koenig@amd.com>
-Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-(cherry picked from commit f101c13a8720c73e67f8f9d511fbbeda95bcedb1)
-Cc: Jules Maselbas <jmaselbas@zdiv.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +++
- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++----
- 2 files changed, 14 insertions(+), 4 deletions(-)
-
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
-@@ -1116,6 +1116,9 @@ static int amdgpu_cs_vm_handling(struct
- }
- }
-
-+ if (!amdgpu_vm_ready(vm))
-+ return -EINVAL;
-+
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
- if (r)
- return r;
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
-@@ -543,11 +543,10 @@ int amdgpu_vm_validate(struct amdgpu_dev
- * Check if all VM PDs/PTs are ready for updates
- *
- * Returns:
-- * True if VM is not evicting.
-+ * True if VM is not evicting and all VM entities are not stopped
- */
- bool amdgpu_vm_ready(struct amdgpu_vm *vm)
- {
-- bool empty;
- bool ret;
-
- amdgpu_vm_eviction_lock(vm);
-@@ -555,10 +554,18 @@ bool amdgpu_vm_ready(struct amdgpu_vm *v
- amdgpu_vm_eviction_unlock(vm);
-
- spin_lock(&vm->status_lock);
-- empty = list_empty(&vm->evicted);
-+ ret &= list_empty(&vm->evicted);
- spin_unlock(&vm->status_lock);
-
-- return ret && empty;
-+ spin_lock(&vm->immediate.lock);
-+ ret &= !vm->immediate.stopped;
-+ spin_unlock(&vm->immediate.lock);
-+
-+ spin_lock(&vm->delayed.lock);
-+ ret &= !vm->delayed.stopped;
-+ spin_unlock(&vm->delayed.lock);
-+
-+ return ret;
- }
-
- /**