]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdkfd: always resume_all after suspend_all
authorAlex Deucher <alexander.deucher@amd.com>
Wed, 6 May 2026 20:50:42 +0000 (16:50 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 4 Jun 2026 19:38:08 +0000 (15:38 -0400)
Need to restore any good queues even if the suspend_all
failed for some.  Always run remove_queue as that will
schedule a GPU reset is removing the queue fails.

v2: move resume_all after remove

Fixes: eb067d65c33e ("drm/amdkfd: Update BadOpcode Interrupt handling with MES")
Reviewed-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 5150511cefc599f99a1dd980b348829b22fae750..2e010c1f88285dfd62ffc714d2fe0b5748213610 100644 (file)
@@ -3264,32 +3264,24 @@ int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbel
 
                list_for_each_entry(q, &qpd->queues_list, list) {
                        if (q->doorbell_id == doorbell_id && q->properties.is_active) {
-                               ret = suspend_all_queues_mes(dqm);
-                               if (ret) {
-                                       dev_err(dev, "Suspending all queues failed");
-                                       goto out;
-                               }
+                               /* suspend all queues will save any good queues and mark the rest as bad */
+                               suspend_all_queues_mes(dqm);
 
                                q->properties.is_evicted = true;
                                q->properties.is_active = false;
                                decrement_queue_count(dqm, qpd, q);
 
+                               /* this will remove the bad queue and sched a GPU reset if needed */
                                ret = remove_queue_mes(dqm, q, qpd);
-                               if (ret) {
-                                       dev_err(dev, "Removing bad queue failed");
-                                       goto out;
-                               }
-
-                               ret = resume_all_queues_mes(dqm);
                                if (ret)
-                                       dev_err(dev, "Resuming all queues failed");
-
+                                       dev_err(dev, "Removing bad queue failed");
+                               /* resume the good queues */
+                               resume_all_queues_mes(dqm);
                                break;
                        }
                }
        }
 
-out:
        dqm_unlock(dqm);
        kfd_unref_process(p);
        return ret;