From: Alex Deucher Date: Wed, 6 May 2026 20:50:42 +0000 (-0400) Subject: drm/amdkfd: always resume_all after suspend_all X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=56ae73c92e200e630c2bdf1e98c88b86c8483b37;p=thirdparty%2Flinux.git drm/amdkfd: always resume_all after suspend_all Need to restore any good queues even if the suspend_all failed for some. Always run remove_queue as that will schedule a GPU reset is removing the queue fails. v2: move resume_all after remove Fixes: eb067d65c33e ("drm/amdkfd: Update BadOpcode Interrupt handling with MES") Reviewed-by: Amber Lin Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 5150511cefc5..2e010c1f8828 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -3264,32 +3264,24 @@ int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbel list_for_each_entry(q, &qpd->queues_list, list) { if (q->doorbell_id == doorbell_id && q->properties.is_active) { - ret = suspend_all_queues_mes(dqm); - if (ret) { - dev_err(dev, "Suspending all queues failed"); - goto out; - } + /* suspend all queues will save any good queues and mark the rest as bad */ + suspend_all_queues_mes(dqm); q->properties.is_evicted = true; q->properties.is_active = false; decrement_queue_count(dqm, qpd, q); + /* this will remove the bad queue and sched a GPU reset if needed */ ret = remove_queue_mes(dqm, q, qpd); - if (ret) { - dev_err(dev, "Removing bad queue failed"); - goto out; - } - - ret = resume_all_queues_mes(dqm); if (ret) - dev_err(dev, "Resuming all queues failed"); - + dev_err(dev, "Removing bad queue failed"); + /* resume the good queues */ + resume_all_queues_mes(dqm); break; } } } -out: dqm_unlock(dqm); kfd_unref_process(p); return ret;