From: Perry Yuan Date: Sun, 8 Feb 2026 16:42:08 +0000 (+0800) Subject: drm/amdkfd: suspend scheduler during PTL re-enabling X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=022f071db5eb4434824e52790542e648c9bcd77d;p=thirdparty%2Fkernel%2Flinux.git drm/amdkfd: suspend scheduler during PTL re-enabling Stop the scheduler before releasing the PTL disable request to ensure the GPU is quiescent during the PTL state transition. This prevents potential queue preemption failures and GPU resets caused by modifying PTL state while waves are executing v1->v2: only stop/start the scheduler when the PTL state actually needs to transition(Yifan) Signed-off-by: Perry Yuan Reviewed-by: Yifan Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 883de31df04d..bada73b48899 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1789,10 +1789,17 @@ static int kfd_ptl_control(struct kfd_process_device *pdd, bool enable) if (!pdd->dev->kfd2kgd || !pdd->dev->kfd2kgd->ptl_ctrl) return -EOPNOTSUPP; + if (adev->kfd.init_complete) + amdgpu_amdkfd_stop_sched(adev, pdd->dev->node_id); + ret = pdd->dev->kfd2kgd->ptl_ctrl(adev, PSP_PTL_PERF_MON_SET, &ptl_state, &pref_format1, &pref_format2); + + if (adev->kfd.init_complete) + amdgpu_amdkfd_start_sched(adev, pdd->dev->node_id); + return ret; } @@ -3322,6 +3329,7 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, struct kfd_process_device *pdd; struct kfd_dev *kfd; int status = -EINVAL; + struct amdgpu_ptl *ptl; if (!p) return -EINVAL; @@ -3334,13 +3342,22 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, return -EINVAL; kfd = pdd->dev->kfd; + ptl = &pdd->dev->adev->psp.ptl; mutex_lock(&kfd->profiler_lock); if (op == 1) { if (!kfd->profiler_process) { kfd->profiler_process = p; status = 0; - kfd_ptl_disable_request(pdd, p); + mutex_unlock(&kfd->profiler_lock); + if (ptl->hw_supported) { + status = kfd_ptl_disable_request(pdd, p); + if (status != 0) + dev_err(kfd_device, + "Failed to lock device %d for profiling, error %d\n", + gpu_id, status); + } + return status; } else if (kfd->profiler_process == p) { status = -EALREADY; } else { @@ -3349,7 +3366,16 @@ static inline uint32_t profile_lock_device(struct kfd_process *p, } else if (op == 0 && kfd->profiler_process == p) { kfd->profiler_process = NULL; status = 0; - kfd_ptl_disable_release(pdd, p); + mutex_unlock(&kfd->profiler_lock); + + if (ptl->hw_supported) { + status = kfd_ptl_disable_release(pdd, p); + if (status) + dev_err(kfd_device, + "Failed to unlock device %d for profiling, error %d\n", + gpu_id, status); + } + return status; } mutex_unlock(&kfd->profiler_lock);