--- /dev/null
+From aa5fc4362fac9351557eb27c745579159a2e4520 Mon Sep 17 00:00:00 2001
+From: Liu01 Tong <Tong.Liu01@amd.com>
+Date: Mon, 11 Aug 2025 14:52:37 +0800
+Subject: drm/amdgpu: fix task hang from failed job submission during process kill
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Liu01 Tong <Tong.Liu01@amd.com>
+
+commit aa5fc4362fac9351557eb27c745579159a2e4520 upstream.
+
+During process kill, drm_sched_entity_flush() will kill the vm
+entities. The following job submissions of this process will fail, and
+the resources of these jobs have not been released, nor have the fences
+been signalled, causing tasks to hang and timeout.
+
+Fix by check entity status in amdgpu_vm_ready() and avoid submit jobs to
+stopped entity.
+
+v2: add amdgpu_vm_ready() check before amdgpu_vm_clear_freed() in
+function amdgpu_cs_vm_handling().
+
+Fixes: 1f02f2044bda ("drm/amdgpu: Avoid extra evict-restore process.")
+Signed-off-by: Liu01 Tong <Tong.Liu01@amd.com>
+Signed-off-by: Lin.Cao <lincao12@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit f101c13a8720c73e67f8f9d511fbbeda95bcedb1)
+Cc: Jules Maselbas <jmaselbas@zdiv.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++----
+ 2 files changed, 14 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -1116,6 +1116,9 @@ static int amdgpu_cs_vm_handling(struct
+ }
+ }
+
++ if (!amdgpu_vm_ready(vm))
++ return -EINVAL;
++
+ r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ if (r)
+ return r;
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -543,11 +543,10 @@ int amdgpu_vm_validate(struct amdgpu_dev
+ * Check if all VM PDs/PTs are ready for updates
+ *
+ * Returns:
+- * True if VM is not evicting.
++ * True if VM is not evicting and all VM entities are not stopped
+ */
+ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
+ {
+- bool empty;
+ bool ret;
+
+ amdgpu_vm_eviction_lock(vm);
+@@ -555,10 +554,18 @@ bool amdgpu_vm_ready(struct amdgpu_vm *v
+ amdgpu_vm_eviction_unlock(vm);
+
+ spin_lock(&vm->status_lock);
+- empty = list_empty(&vm->evicted);
++ ret &= list_empty(&vm->evicted);
+ spin_unlock(&vm->status_lock);
+
+- return ret && empty;
++ spin_lock(&vm->immediate.lock);
++ ret &= !vm->immediate.stopped;
++ spin_unlock(&vm->immediate.lock);
++
++ spin_lock(&vm->delayed.lock);
++ ret &= !vm->delayed.stopped;
++ spin_unlock(&vm->delayed.lock);
++
++ return ret;
+ }
+
+ /**
Signed-off-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
- fs/fs-writeback.c | 9 +++++----
+ fs/fs-writeback.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
-diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
-index 2391b09f4cede..4ae226778d646 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
-@@ -2572,10 +2572,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+@@ -2572,10 +2572,6 @@ void __mark_inode_dirty(struct inode *in
wakeup_bdi = inode_io_list_move_locked(inode, wb,
dirty_list);
/*
* If this is the first dirty inode for this bdi,
* we have to wake-up the corresponding bdi thread
-@@ -2585,6 +2581,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+@@ -2585,6 +2581,11 @@ void __mark_inode_dirty(struct inode *in
if (wakeup_bdi &&
(wb->bdi->capabilities & BDI_CAP_WRITEBACK))
wb_wakeup_delayed(wb);
return;
}
}
---
-2.50.1
-