]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
accel/amdxdna: Fix runtime suspend deadlock when there is pending job
authorLizhi Hou <lizhi.hou@amd.com>
Tue, 10 Mar 2026 18:00:58 +0000 (11:00 -0700)
committerLizhi Hou <lizhi.hou@amd.com>
Tue, 10 Mar 2026 18:46:40 +0000 (11:46 -0700)
The runtime suspend callback drains the running job workqueue before
suspending the device. If a job is still executing and calls
pm_runtime_resume_and_get(), it can deadlock with the runtime suspend
path.

Fix this by moving pm_runtime_resume_and_get() from the job execution
routine to the job submission routine, ensuring the device is resumed
before the job is queued and avoiding the deadlock during runtime
suspend.

Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Link: https://patch.msgid.link/20260310180058.336348-1-lizhi.hou@amd.com
drivers/accel/amdxdna/aie2_ctx.c
drivers/accel/amdxdna/amdxdna_ctx.c

index afee5e667f775a5a0c8d757e9e9820e55d9228c2..c0d348884f749488e2db143e25b13d0cd928572c 100644 (file)
@@ -165,7 +165,6 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
 
        trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
 
-       amdxdna_pm_suspend_put(job->hwctx->client->xdna);
        job->hwctx->priv->completed++;
        dma_fence_signal(fence);
 
@@ -290,19 +289,11 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
        struct dma_fence *fence;
        int ret;
 
-       ret = amdxdna_pm_resume_get(hwctx->client->xdna);
-       if (ret)
+       if (!hwctx->priv->mbox_chann)
                return NULL;
 
-       if (!hwctx->priv->mbox_chann) {
-               amdxdna_pm_suspend_put(hwctx->client->xdna);
-               return NULL;
-       }
-
-       if (!mmget_not_zero(job->mm)) {
-               amdxdna_pm_suspend_put(hwctx->client->xdna);
+       if (!mmget_not_zero(job->mm))
                return ERR_PTR(-ESRCH);
-       }
 
        kref_get(&job->refcnt);
        fence = dma_fence_get(job->fence);
@@ -333,7 +324,6 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
 
 out:
        if (ret) {
-               amdxdna_pm_suspend_put(hwctx->client->xdna);
                dma_fence_put(job->fence);
                aie2_job_put(job);
                mmput(job->mm);
index 666dfd7b2a805d776aa0b30e7b670366d58ef9fc..838430903a3eafb93ab02fab860d334d5d9d728e 100644 (file)
@@ -17,6 +17,7 @@
 #include "amdxdna_ctx.h"
 #include "amdxdna_gem.h"
 #include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
 
 #define MAX_HWCTX_ID           255
 #define MAX_ARG_COUNT          4095
@@ -445,6 +446,7 @@ put_shmem_bo:
 void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
 {
        trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
+       amdxdna_pm_suspend_put(job->hwctx->client->xdna);
        amdxdna_arg_bos_put(job);
        amdxdna_gem_put_obj(job->cmd_bo);
        dma_fence_put(job->fence);
@@ -482,6 +484,12 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
                goto cmd_put;
        }
 
+       ret = amdxdna_pm_resume_get(xdna);
+       if (ret) {
+               XDNA_ERR(xdna, "Resume failed, ret %d", ret);
+               goto put_bos;
+       }
+
        idx = srcu_read_lock(&client->hwctx_srcu);
        hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
        if (!hwctx) {
@@ -522,6 +530,8 @@ put_fence:
        dma_fence_put(job->fence);
 unlock_srcu:
        srcu_read_unlock(&client->hwctx_srcu, idx);
+       amdxdna_pm_suspend_put(xdna);
+put_bos:
        amdxdna_arg_bos_put(job);
 cmd_put:
        amdxdna_gem_put_obj(job->cmd_bo);