From f903b85ed0f14fc412d8a781d3fcc0c023dfcd7c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 17:11:38 -0400 Subject: [PATCH] drm/amdgpu: fix possible fence leaks from job structure MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If we don't end up initializing the fences, free them when we free the job. We can't set the hw_fence to NULL after emitting it because we need it in the cleanup path for the submit direct case. v2: take a reference to the fences if we emit them v3: handle non-job fence in error paths Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling") Reviewed-by: Jesse Zhang (v1) Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 19 +++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 39229ece83f83..586a58facca10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -176,18 +176,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); - return -EINVAL; + r = -EINVAL; + goto free_fence; } if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); - return -EINVAL; + r = -EINVAL; + goto free_fence; } if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) && (!ring->funcs->secure_submission_supported)) { dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name); - return -EINVAL; + r = -EINVAL; + goto free_fence; } alloc_size = ring->funcs->emit_frame_size + num_ibs * @@ -196,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, r = amdgpu_ring_alloc(ring, alloc_size); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); - return r; + goto free_fence; } need_ctx_switch = ring->current_ctx != fence_ctx; @@ -302,6 +305,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, return r; } *f = &af->base; + /* get a ref for the job */ + if (job) + dma_fence_get(*f); if (ring->funcs->insert_end) ring->funcs->insert_end(ring); @@ -328,6 +334,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_commit(ring); return 0; + +free_fence: + if (!job) + kfree(af); + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 9e0cd1e0afc37..7d8ef7ae10c23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -293,6 +293,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); + if (job->hw_fence->base.ops) + dma_fence_put(&job->hw_fence->base); + else + kfree(job->hw_fence); + if (job->hw_vm_fence->base.ops) + dma_fence_put(&job->hw_vm_fence->base); + else + kfree(job->hw_vm_fence); + kfree(job); } @@ -322,6 +331,15 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); + if (job->hw_fence->base.ops) + dma_fence_put(&job->hw_fence->base); + else + kfree(job->hw_fence); + if (job->hw_vm_fence->base.ops) + dma_fence_put(&job->hw_vm_fence->base); + else + kfree(job->hw_vm_fence); + kfree(job); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 453d3b576456c..700b4a776532c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -849,6 +849,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (r) return r; fence = &job->hw_vm_fence->base; + /* get a ref for the job */ + dma_fence_get(fence); } if (vm_flush_needed) { -- 2.47.3