From: Greg Kroah-Hartman Date: Sat, 23 Aug 2025 11:21:51 +0000 (+0200) Subject: 6.16-stable patches X-Git-Tag: v6.16.3~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6c4f527c5f3bc36bd4278c6e1019a92038ef753d;p=thirdparty%2Fkernel%2Fstable-queue.git 6.16-stable patches added patches: drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch soc-qcom-mdt_loader-fix-error-return-values-in-mdt_header_valid.patch xfs-fix-frozen-file-system-assert-in-xfs_trans_alloc.patch --- diff --git a/queue-6.16/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch b/queue-6.16/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch new file mode 100644 index 0000000000..84764c1183 --- /dev/null +++ b/queue-6.16/drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch @@ -0,0 +1,83 @@ +From aa5fc4362fac9351557eb27c745579159a2e4520 Mon Sep 17 00:00:00 2001 +From: Liu01 Tong +Date: Mon, 11 Aug 2025 14:52:37 +0800 +Subject: drm/amdgpu: fix task hang from failed job submission during process kill +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Liu01 Tong + +commit aa5fc4362fac9351557eb27c745579159a2e4520 upstream. + +During process kill, drm_sched_entity_flush() will kill the vm +entities. The following job submissions of this process will fail, and +the resources of these jobs have not been released, nor have the fences +been signalled, causing tasks to hang and timeout. + +Fix by check entity status in amdgpu_vm_ready() and avoid submit jobs to +stopped entity. + +v2: add amdgpu_vm_ready() check before amdgpu_vm_clear_freed() in +function amdgpu_cs_vm_handling(). + +Fixes: 1f02f2044bda ("drm/amdgpu: Avoid extra evict-restore process.") +Signed-off-by: Liu01 Tong +Signed-off-by: Lin.Cao +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +(cherry picked from commit f101c13a8720c73e67f8f9d511fbbeda95bcedb1) +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +++ + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++---- + 2 files changed, 14 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +@@ -1138,6 +1138,9 @@ static int amdgpu_cs_vm_handling(struct + } + } + ++ if (!amdgpu_vm_ready(vm)) ++ return -EINVAL; ++ + r = amdgpu_vm_clear_freed(adev, vm, NULL); + if (r) + return r; +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -654,11 +654,10 @@ int amdgpu_vm_validate(struct amdgpu_dev + * Check if all VM PDs/PTs are ready for updates + * + * Returns: +- * True if VM is not evicting. ++ * True if VM is not evicting and all VM entities are not stopped + */ + bool amdgpu_vm_ready(struct amdgpu_vm *vm) + { +- bool empty; + bool ret; + + amdgpu_vm_eviction_lock(vm); +@@ -666,10 +665,18 @@ bool amdgpu_vm_ready(struct amdgpu_vm *v + amdgpu_vm_eviction_unlock(vm); + + spin_lock(&vm->status_lock); +- empty = list_empty(&vm->evicted); ++ ret &= list_empty(&vm->evicted); + spin_unlock(&vm->status_lock); + +- return ret && empty; ++ spin_lock(&vm->immediate.lock); ++ ret &= !vm->immediate.stopped; ++ spin_unlock(&vm->immediate.lock); ++ ++ spin_lock(&vm->delayed.lock); ++ ret &= !vm->delayed.stopped; ++ spin_unlock(&vm->delayed.lock); ++ ++ return ret; + } + + /** diff --git a/queue-6.16/series b/queue-6.16/series index 1e5f4f4c7b..aaf91f4156 100644 --- a/queue-6.16/series +++ b/queue-6.16/series @@ -274,3 +274,6 @@ scsi-mpi3mr-drop-unnecessary-volatile-from-__iomem-pointers.patch scsi-mpi3mr-serialize-admin-queue-bar-writes-on-32-bit-systems.patch pci-rockchip-use-standard-pcie-definitions.patch pci-rockchip-set-target-link-speed-to-5.0-gt-s-before-retraining.patch +drm-amdgpu-fix-task-hang-from-failed-job-submission-during-process-kill.patch +soc-qcom-mdt_loader-fix-error-return-values-in-mdt_header_valid.patch +xfs-fix-frozen-file-system-assert-in-xfs_trans_alloc.patch diff --git a/queue-6.16/soc-qcom-mdt_loader-fix-error-return-values-in-mdt_header_valid.patch b/queue-6.16/soc-qcom-mdt_loader-fix-error-return-values-in-mdt_header_valid.patch new file mode 100644 index 0000000000..7d46c833af --- /dev/null +++ b/queue-6.16/soc-qcom-mdt_loader-fix-error-return-values-in-mdt_header_valid.patch @@ -0,0 +1,42 @@ +From 9f35ab0e53ccbea57bb9cbad8065e0406d516195 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Wed, 25 Jun 2025 10:22:41 -0500 +Subject: soc: qcom: mdt_loader: Fix error return values in mdt_header_valid() + +From: Dan Carpenter + +commit 9f35ab0e53ccbea57bb9cbad8065e0406d516195 upstream. + +This function is supposed to return true for valid headers and false for +invalid. In a couple places it returns -EINVAL instead which means the +invalid headers are counted as true. Change it to return false. + +Fixes: 9f9967fed9d0 ("soc: qcom: mdt_loader: Ensure we don't read past the ELF header") +Signed-off-by: Dan Carpenter +Reviewed-by: Konrad Dybcio +Link: https://lore.kernel.org/r/db57c01c-bdcc-4a0f-95db-b0f2784ea91f@sabinyo.mountain +Signed-off-by: Bjorn Andersson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/soc/qcom/mdt_loader.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/soc/qcom/mdt_loader.c ++++ b/drivers/soc/qcom/mdt_loader.c +@@ -33,14 +33,14 @@ static bool mdt_header_valid(const struc + return false; + + if (ehdr->e_phentsize != sizeof(struct elf32_phdr)) +- return -EINVAL; ++ return false; + + phend = size_add(size_mul(sizeof(struct elf32_phdr), ehdr->e_phnum), ehdr->e_phoff); + if (phend > fw->size) + return false; + + if (ehdr->e_shentsize != sizeof(struct elf32_shdr)) +- return -EINVAL; ++ return false; + + shend = size_add(size_mul(sizeof(struct elf32_shdr), ehdr->e_shnum), ehdr->e_shoff); + if (shend > fw->size) diff --git a/queue-6.16/xfs-fix-frozen-file-system-assert-in-xfs_trans_alloc.patch b/queue-6.16/xfs-fix-frozen-file-system-assert-in-xfs_trans_alloc.patch new file mode 100644 index 0000000000..baea957088 --- /dev/null +++ b/queue-6.16/xfs-fix-frozen-file-system-assert-in-xfs_trans_alloc.patch @@ -0,0 +1,38 @@ +From 647b3d59c768d7638dd17c78c8044178364383ca Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Thu, 31 Jul 2025 07:19:41 -0700 +Subject: xfs: fix frozen file system assert in xfs_trans_alloc + +From: Christoph Hellwig + +commit 647b3d59c768d7638dd17c78c8044178364383ca upstream. + +Commit 83a80e95e797 ("xfs: decouple xfs_trans_alloc_empty from +xfs_trans_alloc") move the place of the assert for a frozen file system +after the sb_start_intwrite call that ensures it doesn't run on frozen +file systems, and thus allows to incorrect trigger it. + +Fix that by moving it back to where it belongs. + +Fixes: 83a80e95e797 ("xfs: decouple xfs_trans_alloc_empty from xfs_trans_alloc") +Reported-by: Dave Chinner +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_trans.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/xfs/xfs_trans.c ++++ b/fs/xfs/xfs_trans.c +@@ -284,8 +284,8 @@ xfs_trans_alloc( + * by doing GFP_KERNEL allocations inside sb_start_intwrite(). + */ + retry: +- WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); + tp = __xfs_trans_alloc(mp, flags); ++ WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); + error = xfs_trans_reserve(tp, resp, blocks, rtextents); + if (error == -ENOSPC && want_retry) { + xfs_trans_cancel(tp);