]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: clean up and unify hw fence handling
authorAlex Deucher <alexander.deucher@amd.com>
Wed, 27 Aug 2025 15:34:14 +0000 (11:34 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 13 Oct 2025 18:14:35 +0000 (14:14 -0400)
Decouple the amdgpu fence from the amdgpu_job structure.
This lets us clean up the separate fence ops for the embedded
fence and other fences.  This also allows us to allocate the
vm fence up front when we allocate the job.

v2: Additional cleanup suggested by Christian
v3: Additional cleanups suggested by Christian
v4: Additional cleanups suggested by David and
    vm fence fix
v5: cast seqno (David)

Cc: David.Wu3@amd.com
Cc: christian.koenig@amd.com
Tested-by: David (Ming Qiang) Wu <David.Wu3@amd.com>
Reviewed-by: David (Ming Qiang) Wu <David.Wu3@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index a70651050acff05bcb7c091fe7f425cf3bbd3864..0a62727e74f0f8ca4f63ae2744edf142002c7282 100644 (file)
@@ -1902,7 +1902,7 @@ no_preempt:
                        continue;
                }
                job = to_amdgpu_job(s_job);
-               if (preempted && (&job->hw_fence.base) == fence)
+               if (preempted && (&job->hw_fence->base) == fence)
                        /* mark the job as preempted */
                        job->preemption_status |= AMDGPU_IB_PREEMPTED;
        }
index c7c999ae2a28f637a5e73fb95c6c4a13354c2e30..b6d2b9c0bac0b600579d83c23f358b3d61a31958 100644 (file)
@@ -5809,11 +5809,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
                if (!amdgpu_ring_sched_ready(ring))
                        continue;
 
-               /* Clear job fence from fence drv to avoid force_completion
-                * leave NULL and vm flush fence in fence drv
-                */
-               amdgpu_fence_driver_clear_job_fences(ring);
-
                /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
                amdgpu_fence_driver_force_completion(ring);
        }
@@ -6542,7 +6537,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
         *
         * job->base holds a reference to parent fence
         */
-       if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
+       if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
                job_signaled = true;
                dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
                goto skip_hw_reset;
index 18a7829122d246c8be936abe9d4c930d2e81f1b0..1fe31d2f2706026f6d03de279ac8ea8e34a9df9f 100644 (file)
  * Cast helper
  */
 static const struct dma_fence_ops amdgpu_fence_ops;
-static const struct dma_fence_ops amdgpu_job_fence_ops;
 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
 {
        struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
 
-       if (__f->base.ops == &amdgpu_fence_ops ||
-           __f->base.ops == &amdgpu_job_fence_ops)
-               return __f;
-
-       return NULL;
+       return __f;
 }
 
 /**
@@ -98,51 +93,32 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  * amdgpu_fence_emit - emit a fence on the requested ring
  *
  * @ring: ring the fence is associated with
- * @f: resulting fence object
  * @af: amdgpu fence input
  * @flags: flags to pass into the subordinate .emit_fence() call
  *
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
-                     struct amdgpu_fence *af, unsigned int flags)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+                     unsigned int flags)
 {
        struct amdgpu_device *adev = ring->adev;
        struct dma_fence *fence;
-       struct amdgpu_fence *am_fence;
        struct dma_fence __rcu **ptr;
        uint32_t seq;
        int r;
 
-       if (!af) {
-               /* create a separate hw fence */
-               am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL);
-               if (!am_fence)
-                       return -ENOMEM;
-       } else {
-               am_fence = af;
-       }
-       fence = &am_fence->base;
-       am_fence->ring = ring;
+       fence = &af->base;
+       af->ring = ring;
 
        seq = ++ring->fence_drv.sync_seq;
-       am_fence->seq = seq;
-       if (af) {
-               dma_fence_init(fence, &amdgpu_job_fence_ops,
-                              &ring->fence_drv.lock,
-                              adev->fence_context + ring->idx, seq);
-               /* Against remove in amdgpu_job_{free, free_cb} */
-               dma_fence_get(fence);
-       } else {
-               dma_fence_init(fence, &amdgpu_fence_ops,
-                              &ring->fence_drv.lock,
-                              adev->fence_context + ring->idx, seq);
-       }
+       dma_fence_init(fence, &amdgpu_fence_ops,
+                      &ring->fence_drv.lock,
+                      adev->fence_context + ring->idx, seq);
 
        amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
                               seq, flags | AMDGPU_FENCE_FLAG_INT);
-       amdgpu_fence_save_wptr(fence);
+       amdgpu_fence_save_wptr(af);
        pm_runtime_get_noresume(adev_to_drm(adev)->dev);
        ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
        if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -167,8 +143,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
         */
        rcu_assign_pointer(*ptr, dma_fence_get(fence));
 
-       *f = fence;
-
        return 0;
 }
 
@@ -669,36 +643,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
        }
 }
 
-/**
- * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
- *
- * @ring: fence of the ring to be cleared
- *
- */
-void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
-{
-       int i;
-       struct dma_fence *old, **ptr;
-
-       for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
-               ptr = &ring->fence_drv.fences[i];
-               old = rcu_dereference_protected(*ptr, 1);
-               if (old && old->ops == &amdgpu_job_fence_ops) {
-                       struct amdgpu_job *job;
-
-                       /* For non-scheduler bad job, i.e. failed ib test, we need to signal
-                        * it right here or we won't be able to track them in fence_drv
-                        * and they will remain unsignaled during sa_bo free.
-                        */
-                       job = container_of(old, struct amdgpu_job, hw_fence.base);
-                       if (!job->base.s_fence && !dma_fence_is_signaled(old))
-                               dma_fence_signal(old);
-                       RCU_INIT_POINTER(*ptr, NULL);
-                       dma_fence_put(old);
-               }
-       }
-}
-
 /**
  * amdgpu_fence_driver_set_error - set error code on fences
  * @ring: the ring which contains the fences
@@ -755,7 +699,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
 /**
  * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
  *
- * @fence: fence of the ring to signal
+ * @af: fence of the ring to signal
  *
  */
 void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
@@ -792,15 +736,13 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
        } while (last_seq != seq);
        spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
        /* signal the guilty fence */
-       amdgpu_fence_write(ring, af->seq);
+       amdgpu_fence_write(ring, (u32)af->base.seqno);
        amdgpu_fence_process(ring);
 }
 
-void amdgpu_fence_save_wptr(struct dma_fence *fence)
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
 {
-       struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base);
-
-       am_fence->wptr = am_fence->ring->wptr;
+       af->wptr = af->ring->wptr;
 }
 
 static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
@@ -866,13 +808,6 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
        return (const char *)to_amdgpu_fence(f)->ring->name;
 }
 
-static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
-{
-       struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
-
-       return (const char *)to_amdgpu_ring(job->base.sched)->name;
-}
-
 /**
  * amdgpu_fence_enable_signaling - enable signalling on fence
  * @f: fence
@@ -889,23 +824,6 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
        return true;
 }
 
-/**
- * amdgpu_job_fence_enable_signaling - enable signalling on job fence
- * @f: fence
- *
- * This is the simliar function with amdgpu_fence_enable_signaling above, it
- * only handles the job embedded fence.
- */
-static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
-{
-       struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
-
-       if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
-               amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
-
-       return true;
-}
-
 /**
  * amdgpu_fence_free - free up the fence memory
  *
@@ -921,21 +839,6 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
        kfree(to_amdgpu_fence(f));
 }
 
-/**
- * amdgpu_job_fence_free - free up the job with embedded fence
- *
- * @rcu: RCU callback head
- *
- * Free up the job with embedded fence after the RCU grace period.
- */
-static void amdgpu_job_fence_free(struct rcu_head *rcu)
-{
-       struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
-
-       /* free job if fence has a parent job */
-       kfree(container_of(f, struct amdgpu_job, hw_fence.base));
-}
-
 /**
  * amdgpu_fence_release - callback that fence can be freed
  *
@@ -949,19 +852,6 @@ static void amdgpu_fence_release(struct dma_fence *f)
        call_rcu(&f->rcu, amdgpu_fence_free);
 }
 
-/**
- * amdgpu_job_fence_release - callback that job embedded fence can be freed
- *
- * @f: fence
- *
- * This is the simliar function with amdgpu_fence_release above, it
- * only handles the job embedded fence.
- */
-static void amdgpu_job_fence_release(struct dma_fence *f)
-{
-       call_rcu(&f->rcu, amdgpu_job_fence_free);
-}
-
 static const struct dma_fence_ops amdgpu_fence_ops = {
        .get_driver_name = amdgpu_fence_get_driver_name,
        .get_timeline_name = amdgpu_fence_get_timeline_name,
@@ -969,13 +859,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
        .release = amdgpu_fence_release,
 };
 
-static const struct dma_fence_ops amdgpu_job_fence_ops = {
-       .get_driver_name = amdgpu_fence_get_driver_name,
-       .get_timeline_name = amdgpu_job_fence_get_timeline_name,
-       .enable_signaling = amdgpu_job_fence_enable_signaling,
-       .release = amdgpu_job_fence_release,
-};
-
 /*
  * Fence debugfs
  */
index 7d9bcb72e8dd3c9fe29f8062f553439827bcdc27..39229ece83f83e0a4e343de864b67acdea7eac88 100644 (file)
@@ -149,17 +149,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
        if (job) {
                vm = job->vm;
                fence_ctx = job->base.s_fence ?
-                       job->base.s_fence->scheduled.context : 0;
+                       job->base.s_fence->finished.context : 0;
                shadow_va = job->shadow_va;
                csa_va = job->csa_va;
                gds_va = job->gds_va;
                init_shadow = job->init_shadow;
-               af = &job->hw_fence;
+               af = job->hw_fence;
                /* Save the context of the job for reset handling.
                 * The driver needs this so it can skip the ring
                 * contents for guilty contexts.
                 */
-               af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
+               af->context = fence_ctx;
+               /* the vm fence is also part of the job's context */
+               job->hw_vm_fence->context = fence_ctx;
        } else {
                vm = NULL;
                fence_ctx = 0;
@@ -167,7 +169,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                csa_va = 0;
                gds_va = 0;
                init_shadow = false;
-               af = NULL;
+               af = kzalloc(sizeof(*af), GFP_ATOMIC);
+               if (!af)
+                       return -ENOMEM;
        }
 
        if (!ring->sched.ready) {
@@ -289,7 +293,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
        }
 
-       r = amdgpu_fence_emit(ring, f, af, fence_flags);
+       r = amdgpu_fence_emit(ring, af, fence_flags);
        if (r) {
                dev_err(adev->dev, "failed to emit fence (%d)\n", r);
                if (job && job->vmid)
@@ -297,6 +301,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                amdgpu_ring_undo(ring);
                return r;
        }
+       *f = &af->base;
 
        if (ring->funcs->insert_end)
                ring->funcs->insert_end(ring);
@@ -317,7 +322,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
         * fence so we know what rings contents to backup
         * after we reset the queue.
         */
-       amdgpu_fence_save_wptr(*f);
+       amdgpu_fence_save_wptr(af);
 
        amdgpu_ring_ib_end(ring);
        amdgpu_ring_commit(ring);
index d020a890a0ea42e4e11189f207cdacc721ef29fb..e08d837668f134e2fb9b9622089a6f1e9acaaed4 100644 (file)
@@ -137,7 +137,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
                   ring->funcs->reset) {
                dev_err(adev->dev, "Starting %s ring reset\n",
                        s_job->sched->name);
-               r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence);
+               r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
                if (!r) {
                        atomic_inc(&ring->adev->gpu_reset_counter);
                        dev_err(adev->dev, "Ring %s reset succeeded\n",
@@ -186,6 +186,9 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                     unsigned int num_ibs, struct amdgpu_job **job,
                     u64 drm_client_id)
 {
+       struct amdgpu_fence *af;
+       int r;
+
        if (num_ibs == 0)
                return -EINVAL;
 
@@ -193,6 +196,20 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        if (!*job)
                return -ENOMEM;
 
+       af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+       if (!af) {
+               r = -ENOMEM;
+               goto err_job;
+       }
+       (*job)->hw_fence = af;
+
+       af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+       if (!af) {
+               r = -ENOMEM;
+               goto err_fence;
+       }
+       (*job)->hw_vm_fence = af;
+
        (*job)->vm = vm;
 
        amdgpu_sync_create(&(*job)->explicit_sync);
@@ -204,6 +221,13 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
        return drm_sched_job_init(&(*job)->base, entity, 1, owner,
                                  drm_client_id);
+
+err_fence:
+       kfree((*job)->hw_fence);
+err_job:
+       kfree(*job);
+
+       return r;
 }
 
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
@@ -251,11 +275,11 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
        struct dma_fence *f;
        unsigned i;
 
-       /* Check if any fences where initialized */
+       /* Check if any fences were initialized */
        if (job->base.s_fence && job->base.s_fence->finished.ops)
                f = &job->base.s_fence->finished;
-       else if (job->hw_fence.base.ops)
-               f = &job->hw_fence.base;
+       else if (job->hw_fence && job->hw_fence->base.ops)
+               f = &job->hw_fence->base;
        else
                f = NULL;
 
@@ -271,11 +295,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
        amdgpu_sync_free(&job->explicit_sync);
 
-       /* only put the hw fence if has embedded fence */
-       if (!job->hw_fence.base.ops)
-               kfree(job);
-       else
-               dma_fence_put(&job->hw_fence.base);
+       kfree(job);
 }
 
 void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -304,10 +324,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
        if (job->gang_submit != &job->base.s_fence->scheduled)
                dma_fence_put(job->gang_submit);
 
-       if (!job->hw_fence.base.ops)
-               kfree(job);
-       else
-               dma_fence_put(&job->hw_fence.base);
+       kfree(job);
 }
 
 struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
index 4a6487eb6cb59af54d977e0697a3e0862871c48c..7abf069d17d42357eb0c3c5bdbea0bed45d6cdc0 100644 (file)
@@ -64,7 +64,8 @@ struct amdgpu_job {
        struct drm_sched_job    base;
        struct amdgpu_vm        *vm;
        struct amdgpu_sync      explicit_sync;
-       struct amdgpu_fence     hw_fence;
+       struct amdgpu_fence     *hw_fence;
+       struct amdgpu_fence     *hw_vm_fence;
        struct dma_fence        *gang_submit;
        uint32_t                preamble_status;
        uint32_t                preemption_status;
index 4b46e3c26ff39f040c2527887721e168aad12d7c..87b962df546078d9b5c9f5d84d8a84bb58aa6cc9 100644 (file)
@@ -147,16 +147,14 @@ struct amdgpu_fence {
        u64                             wptr;
        /* fence context for resets */
        u64                             context;
-       uint32_t                        seq;
 };
 
 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 
-void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
 void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af);
-void amdgpu_fence_save_wptr(struct dma_fence *fence);
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
 
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -166,8 +164,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
 void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
 int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
 void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
-                     struct amdgpu_fence *af, unsigned int flags);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+                     unsigned int flags);
 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
                              uint32_t timeout);
 bool amdgpu_fence_process(struct amdgpu_ring *ring);
index 3a660ce7591338da12a56a2f3510455cc30b4a62..9309830821b7d9d5cd7747d15e171c061beed72f 100644 (file)
@@ -779,7 +779,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
        bool cleaner_shader_needed = false;
        bool pasid_mapping_needed = false;
        struct dma_fence *fence = NULL;
-       struct amdgpu_fence *af;
        unsigned int patch;
        int r;
 
@@ -842,12 +841,10 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
        }
 
        if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
-               r = amdgpu_fence_emit(ring, &fence, NULL, 0);
+               r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
                if (r)
                        return r;
-               /* this is part of the job's context */
-               af = container_of(fence, struct amdgpu_fence, base);
-               af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
+               fence = &job->hw_vm_fence->base;
        }
 
        if (vm_flush_needed) {