From: Greg Kroah-Hartman Date: Thu, 4 Sep 2025 16:10:14 +0000 (+0200) Subject: 6.12-stable patches X-Git-Tag: v5.4.299~62 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b62bd148650bfb25709ae2747265a35096cfc09f;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch --- diff --git a/queue-6.12/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch b/queue-6.12/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch new file mode 100644 index 0000000000..2d557cad28 --- /dev/null +++ b/queue-6.12/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch @@ -0,0 +1,84 @@ +From aa5fc4362fac9351557eb27c745579159a2e4520 Mon Sep 17 00:00:00 2001 +From: Liu01 Tong +Date: Mon, 11 Aug 2025 14:52:37 +0800 +Subject: drm/amdgpu: fix task hang from failed job submission during process kill +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Liu01 Tong + +commit aa5fc4362fac9351557eb27c745579159a2e4520 upstream. + +During process kill, drm_sched_entity_flush() will kill the vm +entities. The following job submissions of this process will fail, and +the resources of these jobs have not been released, nor have the fences +been signalled, causing tasks to hang and timeout. + +Fix by check entity status in amdgpu_vm_ready() and avoid submit jobs to +stopped entity. + +v2: add amdgpu_vm_ready() check before amdgpu_vm_clear_freed() in +function amdgpu_cs_vm_handling(). + +Fixes: 1f02f2044bda ("drm/amdgpu: Avoid extra evict-restore process.") +Signed-off-by: Liu01 Tong +Signed-off-by: Lin.Cao +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +(cherry picked from commit f101c13a8720c73e67f8f9d511fbbeda95bcedb1) +Cc: Jules Maselbas +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +++ + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++---- + 2 files changed, 14 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -1116,6 +1116,9 @@ static int amdgpu_cs_vm_handling(struct + } + } + ++ if (!amdgpu_vm_ready(vm)) ++ return -EINVAL; ++ + r = amdgpu_vm_clear_freed(adev, vm, NULL); + if (r) + return r; +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -543,11 +543,10 @@ int amdgpu_vm_validate(struct amdgpu_dev + * Check if all VM PDs/PTs are ready for updates + * + * Returns: +- * True if VM is not evicting. ++ * True if VM is not evicting and all VM entities are not stopped + */ + bool amdgpu_vm_ready(struct amdgpu_vm *vm) + { +- bool empty; + bool ret; + + amdgpu_vm_eviction_lock(vm); +@@ -555,10 +554,18 @@ bool amdgpu_vm_ready(struct amdgpu_vm *v + amdgpu_vm_eviction_unlock(vm); + + spin_lock(&vm->status_lock); +- empty = list_empty(&vm->evicted); ++ ret &= list_empty(&vm->evicted); + spin_unlock(&vm->status_lock); + +- return ret && empty; ++ spin_lock(&vm->immediate.lock); ++ ret &= !vm->immediate.stopped; ++ spin_unlock(&vm->immediate.lock); ++ ++ spin_lock(&vm->delayed.lock); ++ ret &= !vm->delayed.stopped; ++ spin_unlock(&vm->delayed.lock); ++ ++ return ret; + } + + /** diff --git a/queue-6.12/fs-writeback-fix-use-after-free-in-__mark_inode_dirt.patch b/queue-6.12/fs-writeback-fix-use-after-free-in-__mark_inode_dirt.patch index 868c2b5ee7..6713220799 100644 --- a/queue-6.12/fs-writeback-fix-use-after-free-in-__mark_inode_dirt.patch +++ b/queue-6.12/fs-writeback-fix-use-after-free-in-__mark_inode_dirt.patch @@ -71,14 +71,12 @@ Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- - fs/fs-writeback.c | 9 +++++---- + fs/fs-writeback.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) -diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c -index 2391b09f4cede..4ae226778d646 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c -@@ -2572,10 +2572,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2572,10 +2572,6 @@ void __mark_inode_dirty(struct inode *in wakeup_bdi = inode_io_list_move_locked(inode, wb, dirty_list); @@ -89,7 +87,7 @@ index 2391b09f4cede..4ae226778d646 100644 /* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread -@@ -2585,6 +2581,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) +@@ -2585,6 +2581,11 @@ void __mark_inode_dirty(struct inode *in if (wakeup_bdi && (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) wb_wakeup_delayed(wb); @@ -101,6 +99,3 @@ index 2391b09f4cede..4ae226778d646 100644 return; } } --- -2.50.1 - diff --git a/queue-6.12/series b/queue-6.12/series index 928a7c7e30..ef205ef0df 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -13,3 +13,4 @@ bluetooth-hci_sync-avoid-adding-default-advertising-.patch drm-rockchip-vop2-make-vp-registers-nonvolatile.patch btrfs-zoned-skip-zone-finish-of-conventional-zones.patch fs-writeback-fix-use-after-free-in-__mark_inode_dirt.patch +drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch