]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/msm: always recover the gpu
authorAnna Maniscalco <anna.maniscalco2000@gmail.com>
Tue, 10 Feb 2026 16:29:42 +0000 (17:29 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 17 May 2026 15:16:29 +0000 (17:16 +0200)
commit 01a0d6cd7032e9993feea19fadb03ef9d5b488f2 upstream.

Previously, in case there was no more work to do, recover worker
wouldn't trigger recovery and would instead rely on the gpu going to
sleep and then resuming when more work is submitted.

Recover_worker will first increment the fence of the hung ring so, if
there's only one job submitted to a ring and that causes an hang, it
will early out.

There's no guarantee that the gpu will suspend and resume before more
work is submitted and if the gpu is in a hung state it will stay in that
state and probably trigger a timeout again.

Just stop checking and always recover the gpu.

Signed-off-by: Anna Maniscalco <anna.maniscalco2000@gmail.com>
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.freedesktop.org/patch/704066/
Message-ID: <20260210-recovery_suspend_fix-v1-1-00ed9013da04@gmail.com>
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/msm/msm_gpu.c

index 84d6c7f50c8df4e27b725f6f6269e5a1115e3ad2..d178bb9b813ae695f7b996ab04146109878e171b 100644 (file)
@@ -546,32 +546,30 @@ static void recover_worker(struct kthread_work *work)
                msm_update_fence(ring->fctx, fence);
        }
 
-       if (msm_gpu_active(gpu)) {
-               /* retire completed submits, plus the one that hung: */
-               retire_submits(gpu);
+       /* retire completed submits, plus the one that hung: */
+       retire_submits(gpu);
 
-               gpu->funcs->recover(gpu);
+       gpu->funcs->recover(gpu);
 
-               /*
-                * Replay all remaining submits starting with highest priority
-                * ring
-                */
-               for (i = 0; i < gpu->nr_rings; i++) {
-                       struct msm_ringbuffer *ring = gpu->rb[i];
-                       unsigned long flags;
+       /*
+        * Replay all remaining submits starting with highest priority
+        * ring
+        */
+       for (i = 0; i < gpu->nr_rings; i++) {
+               struct msm_ringbuffer *ring = gpu->rb[i];
+               unsigned long flags;
 
-                       spin_lock_irqsave(&ring->submit_lock, flags);
-                       list_for_each_entry(submit, &ring->submits, node) {
-                               /*
-                                * If the submit uses an unusable vm make sure
-                                * we don't actually run it
-                                */
-                               if (to_msm_vm(submit->vm)->unusable)
-                                       submit->nr_cmds = 0;
-                               gpu->funcs->submit(gpu, submit);
-                       }
-                       spin_unlock_irqrestore(&ring->submit_lock, flags);
+               spin_lock_irqsave(&ring->submit_lock, flags);
+               list_for_each_entry(submit, &ring->submits, node) {
+                       /*
+                        * If the submit uses an unusable vm make sure
+                        * we don't actually run it
+                        */
+                       if (to_msm_vm(submit->vm)->unusable)
+                               submit->nr_cmds = 0;
+                       gpu->funcs->submit(gpu, submit);
                }
+               spin_unlock_irqrestore(&ring->submit_lock, flags);
        }
 
        pm_runtime_put(&gpu->pdev->dev);