]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 4 Sep 2025 16:10:14 +0000 (18:10 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 4 Sep 2025 16:10:14 +0000 (18:10 +0200)
added patches:
drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch

queue-6.12/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch [new file with mode: 0644]
queue-6.12/fs-writeback-fix-use-after-free-in-__mark_inode_dirt.patch
queue-6.12/series

diff --git a/queue-6.12/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch b/queue-6.12/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch
new file mode 100644 (file)
index 0000000..2d557ca
--- /dev/null
@@ -0,0 +1,84 @@
+From aa5fc4362fac9351557eb27c745579159a2e4520 Mon Sep 17 00:00:00 2001
+From: Liu01 Tong <Tong.Liu01@amd.com>
+Date: Mon, 11 Aug 2025 14:52:37 +0800
+Subject: drm/amdgpu: fix task hang from failed job submission during process kill
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Liu01 Tong <Tong.Liu01@amd.com>
+
+commit aa5fc4362fac9351557eb27c745579159a2e4520 upstream.
+
+During process kill, drm_sched_entity_flush() will kill the vm
+entities. The following job submissions of this process will fail, and
+the resources of these jobs have not been released, nor have the fences
+been signalled, causing tasks to hang and timeout.
+
+Fix by check entity status in amdgpu_vm_ready() and avoid submit jobs to
+stopped entity.
+
+v2: add amdgpu_vm_ready() check before amdgpu_vm_clear_freed() in
+function amdgpu_cs_vm_handling().
+
+Fixes: 1f02f2044bda ("drm/amdgpu: Avoid extra evict-restore process.")
+Signed-off-by: Liu01 Tong <Tong.Liu01@amd.com>
+Signed-off-by: Lin.Cao <lincao12@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit f101c13a8720c73e67f8f9d511fbbeda95bcedb1)
+Cc: Jules Maselbas <jmaselbas@zdiv.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |    3 +++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |   15 +++++++++++----
+ 2 files changed, 14 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+@@ -1116,6 +1116,9 @@ static int amdgpu_cs_vm_handling(struct
+               }
+       }
++      if (!amdgpu_vm_ready(vm))
++              return -EINVAL;
++
+       r = amdgpu_vm_clear_freed(adev, vm, NULL);
+       if (r)
+               return r;
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -543,11 +543,10 @@ int amdgpu_vm_validate(struct amdgpu_dev
+  * Check if all VM PDs/PTs are ready for updates
+  *
+  * Returns:
+- * True if VM is not evicting.
++ * True if VM is not evicting and all VM entities are not stopped
+  */
+ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
+ {
+-      bool empty;
+       bool ret;
+       amdgpu_vm_eviction_lock(vm);
+@@ -555,10 +554,18 @@ bool amdgpu_vm_ready(struct amdgpu_vm *v
+       amdgpu_vm_eviction_unlock(vm);
+       spin_lock(&vm->status_lock);
+-      empty = list_empty(&vm->evicted);
++      ret &= list_empty(&vm->evicted);
+       spin_unlock(&vm->status_lock);
+-      return ret && empty;
++      spin_lock(&vm->immediate.lock);
++      ret &= !vm->immediate.stopped;
++      spin_unlock(&vm->immediate.lock);
++
++      spin_lock(&vm->delayed.lock);
++      ret &= !vm->delayed.stopped;
++      spin_unlock(&vm->delayed.lock);
++
++      return ret;
+ }
+ /**
index 868c2b5ee7163c8d6dd02fe7a5706b0f1000815b..6713220799ffb0f45742c89105aca1641c56b4f9 100644 (file)
@@ -71,14 +71,12 @@ Reviewed-by: Jan Kara <jack@suse.cz>
 Signed-off-by: Christian Brauner <brauner@kernel.org>
 Signed-off-by: Sasha Levin <sashal@kernel.org>
 ---
- fs/fs-writeback.c | 9 +++++----
+ fs/fs-writeback.c |    9 +++++----
  1 file changed, 5 insertions(+), 4 deletions(-)
 
-diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
-index 2391b09f4cede..4ae226778d646 100644
 --- a/fs/fs-writeback.c
 +++ b/fs/fs-writeback.c
-@@ -2572,10 +2572,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+@@ -2572,10 +2572,6 @@ void __mark_inode_dirty(struct inode *in
                        wakeup_bdi = inode_io_list_move_locked(inode, wb,
                                                               dirty_list);
  
@@ -89,7 +87,7 @@ index 2391b09f4cede..4ae226778d646 100644
                        /*
                         * If this is the first dirty inode for this bdi,
                         * we have to wake-up the corresponding bdi thread
-@@ -2585,6 +2581,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+@@ -2585,6 +2581,11 @@ void __mark_inode_dirty(struct inode *in
                        if (wakeup_bdi &&
                            (wb->bdi->capabilities & BDI_CAP_WRITEBACK))
                                wb_wakeup_delayed(wb);
@@ -101,6 +99,3 @@ index 2391b09f4cede..4ae226778d646 100644
                        return;
                }
        }
--- 
-2.50.1
-
index 928a7c7e300a5b9a1b675ad77a51345de1f97392..ef205ef0dfbb690f66dc16d8dfd518a1c4e0b5b1 100644 (file)
@@ -13,3 +13,4 @@ bluetooth-hci_sync-avoid-adding-default-advertising-.patch
 drm-rockchip-vop2-make-vp-registers-nonvolatile.patch
 btrfs-zoned-skip-zone-finish-of-conventional-zones.patch
 fs-writeback-fix-use-after-free-in-__mark_inode_dirt.patch
+drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch