]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: fix possible fence leaks from job structure
authorAlex Deucher <alexander.deucher@amd.com>
Wed, 22 Oct 2025 21:11:38 +0000 (17:11 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 4 Nov 2025 16:53:59 +0000 (11:53 -0500)
If we don't end up initializing the fences, free them when
we free the job.  We can't set the hw_fence to NULL after
emitting it because we need it in the cleanup path for the
submit direct case.

v2: take a reference to the fences if we emit them
v3: handle non-job fence in error paths

Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com> (v1)
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 39229ece83f83e0a4e343de864b67acdea7eac88..586a58facca10d36eb52db54b4fc53e663cf2f03 100644 (file)
@@ -176,18 +176,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
 
        if (!ring->sched.ready) {
                dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
-               return -EINVAL;
+               r = -EINVAL;
+               goto free_fence;
        }
 
        if (vm && !job->vmid) {
                dev_err(adev->dev, "VM IB without ID\n");
-               return -EINVAL;
+               r = -EINVAL;
+               goto free_fence;
        }
 
        if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
            (!ring->funcs->secure_submission_supported)) {
                dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
-               return -EINVAL;
+               r = -EINVAL;
+               goto free_fence;
        }
 
        alloc_size = ring->funcs->emit_frame_size + num_ibs *
@@ -196,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
        r = amdgpu_ring_alloc(ring, alloc_size);
        if (r) {
                dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
-               return r;
+               goto free_fence;
        }
 
        need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -302,6 +305,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                return r;
        }
        *f = &af->base;
+       /* get a ref for the job */
+       if (job)
+               dma_fence_get(*f);
 
        if (ring->funcs->insert_end)
                ring->funcs->insert_end(ring);
@@ -328,6 +334,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
        amdgpu_ring_commit(ring);
 
        return 0;
+
+free_fence:
+       if (!job)
+               kfree(af);
+       return r;
 }
 
 /**
index 9e0cd1e0afc373eafcb08b9f865e3b53e5b3ec1c..7d8ef7ae10c23a2dcfa9a3a50efc70ccba332d61 100644 (file)
@@ -293,6 +293,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
        amdgpu_sync_free(&job->explicit_sync);
 
+       if (job->hw_fence->base.ops)
+               dma_fence_put(&job->hw_fence->base);
+       else
+               kfree(job->hw_fence);
+       if (job->hw_vm_fence->base.ops)
+               dma_fence_put(&job->hw_vm_fence->base);
+       else
+               kfree(job->hw_vm_fence);
+
        kfree(job);
 }
 
@@ -322,6 +331,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
        if (job->gang_submit != &job->base.s_fence->scheduled)
                dma_fence_put(job->gang_submit);
 
+       if (job->hw_fence->base.ops)
+               dma_fence_put(&job->hw_fence->base);
+       else
+               kfree(job->hw_fence);
+       if (job->hw_vm_fence->base.ops)
+               dma_fence_put(&job->hw_vm_fence->base);
+       else
+               kfree(job->hw_vm_fence);
+
        kfree(job);
 }
 
index 453d3b576456c9e1dbba1ed1c63fbdba2560d178..700b4a776532caa1f31985b5bb89ae03440f07fe 100644 (file)
@@ -849,6 +849,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
                if (r)
                        return r;
                fence = &job->hw_vm_fence->base;
+               /* get a ref for the job */
+               dma_fence_get(fence);
        }
 
        if (vm_flush_needed) {