]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: rework ring reset backup and reemit v9
authorAlex Deucher <alexander.deucher@amd.com>
Fri, 16 Jan 2026 03:01:15 +0000 (22:01 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 23 Feb 2026 19:33:11 +0000 (14:33 -0500)
Store the start wptr and ib size in the IB fence. On queue
reset, save the ring contents of all IBs.

For reemit, reemit the entire IB state for non-guilty contexts.
For guilty contexts, replace the IB submission with nops, but reemit
the rest.  Split the reemit per fence and when we reemit, update the
wptr with the new values from reemit.  This allows us to reemit jobs
repeatedly as the wptrs get properly updated each time.

v2: further simplify the logic
v3: reemit vm state, not just vm fence
v4: just nop the IB and possibly the VM portion of the submission
v5: simplify the vm fence check
v6: split the vm and ib fences
v7: fix commit message
v8: use wptr rather than count_dw to calculate offsets
v9: fix missing documenation update spotted by the kernel test robot

Reviewed-by: Jesse Zhang <jesse.zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 07568516c506656033c21d595f33825a5d26e7d0..d209591e3710a99f557d925c893791ead72ea450 100644 (file)
@@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
        return seq;
 }
 
-static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
-{
-       af->fence_wptr_start = af->ring->wptr;
-}
-
-static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
-{
-       af->fence_wptr_end = af->ring->wptr;
-}
-
 /**
  * amdgpu_fence_emit - emit a fence on the requested ring
  *
@@ -124,11 +114,9 @@ void amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
                       &ring->fence_drv.lock,
                       adev->fence_context + ring->idx, seq);
 
-       amdgpu_fence_save_fence_wptr_start(af);
        amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
                               seq, flags | AMDGPU_FENCE_FLAG_INT);
-       amdgpu_fence_save_fence_wptr_end(af);
-       amdgpu_fence_save_wptr(af);
+
        pm_runtime_get_noresume(adev_to_drm(adev)->dev);
        ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
        if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -240,7 +228,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
 
        do {
                struct dma_fence *fence, **ptr;
-               struct amdgpu_fence *am_fence;
 
                ++last_seq;
                last_seq &= drv->num_fences_mask;
@@ -253,12 +240,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
                if (!fence)
                        continue;
 
-               /* Save the wptr in the fence driver so we know what the last processed
-                * wptr was.  This is required for re-emitting the ring state for
-                * queues that are reset but are not guilty and thus have no guilty fence.
-                */
-               am_fence = container_of(fence, struct amdgpu_fence, base);
-               drv->signalled_wptr = am_fence->wptr;
                dma_fence_signal(fence);
                dma_fence_put(fence);
                pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -707,25 +688,29 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
  */
 
 /**
- * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors
+ * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit
  *
- * @af: fence of the ring to update
+ * @ring: the ring to operate on
+ * @guilty_fence: fence of the ring to update
  *
  */
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+                                            struct amdgpu_fence *guilty_fence)
 {
        struct dma_fence *unprocessed;
        struct dma_fence __rcu **ptr;
        struct amdgpu_fence *fence;
-       struct amdgpu_ring *ring = af->ring;
        unsigned long flags;
        u32 seq, last_seq;
-       bool reemitted = false;
+       unsigned int i;
+       bool is_guilty_fence;
+       bool is_guilty_context;
 
        last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
        seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
 
-       /* mark all fences from the guilty context with an error */
+       ring->reemit = true;
+       amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy);
        spin_lock_irqsave(&ring->fence_drv.lock, flags);
        do {
                last_seq++;
@@ -737,39 +722,45 @@ void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
 
                if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
                        fence = container_of(unprocessed, struct amdgpu_fence, base);
+                       is_guilty_fence = fence == guilty_fence;
+                       is_guilty_context = fence->context == guilty_fence->context;
 
-                       if (fence->reemitted > 1)
-                               reemitted = true;
-                       else if (fence == af)
+                       /* mark all fences from the guilty context with an error */
+                       if (is_guilty_fence)
                                dma_fence_set_error(&fence->base, -ETIME);
-                       else if (fence->context == af->context)
+                       else if (is_guilty_context)
                                dma_fence_set_error(&fence->base, -ECANCELED);
+
+                       /* reemit the packet stream and update wptrs */
+                       fence->ib_wptr = ring->wptr;
+                       for (i = 0; i < fence->ib_dw_size; i++) {
+                               /* Skip the IB(s) for the guilty context. */
+                               if (is_guilty_context &&
+                                   i >= fence->skip_ib_dw_start_offset &&
+                                   i < fence->skip_ib_dw_end_offset)
+                                       amdgpu_ring_write(ring, ring->funcs->nop);
+                               else
+                                       amdgpu_ring_write(ring,
+                                                         ring->ring_backup[fence->backup_idx + i]);
+                       }
                }
                rcu_read_unlock();
        } while (last_seq != seq);
        spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
-
-       if (reemitted) {
-               /* if we've already reemitted once then just cancel everything */
-               amdgpu_fence_driver_force_completion(af->ring);
-               af->ring->ring_backup_entries_to_copy = 0;
-       }
-}
-
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
-{
-       af->wptr = af->ring->wptr;
+       amdgpu_ring_commit(ring);
+       ring->reemit = false;
 }
 
 static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
-                                                  u64 start_wptr, u64 end_wptr)
+                                                  struct amdgpu_fence *af)
 {
-       unsigned int first_idx = start_wptr & ring->buf_mask;
-       unsigned int last_idx = end_wptr & ring->buf_mask;
+       unsigned int first_idx = af->ib_wptr & ring->buf_mask;
+       unsigned int dw_size = af->ib_dw_size;
        unsigned int i;
 
+       af->backup_idx = ring->ring_backup_entries_to_copy;
        /* Backup the contents of the ring buffer. */
-       for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+       for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size)
                ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
 }
 
@@ -779,12 +770,10 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
        struct dma_fence *unprocessed;
        struct dma_fence __rcu **ptr;
        struct amdgpu_fence *fence;
-       u64 wptr;
        u32 seq, last_seq;
 
        last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
        seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
-       wptr = ring->fence_drv.signalled_wptr;
        ring->ring_backup_entries_to_copy = 0;
 
        do {
@@ -798,21 +787,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
                if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
                        fence = container_of(unprocessed, struct amdgpu_fence, base);
 
-                       /* save everything if the ring is not guilty, otherwise
-                        * just save the content from other contexts.
-                        */
-                       if (!fence->reemitted &&
-                           (!guilty_fence || (fence->context != guilty_fence->context))) {
-                               amdgpu_ring_backup_unprocessed_command(ring, wptr,
-                                                                      fence->wptr);
-                       } else if (!fence->reemitted) {
-                               /* always save the fence */
-                               amdgpu_ring_backup_unprocessed_command(ring,
-                                                                      fence->fence_wptr_start,
-                                                                      fence->fence_wptr_end);
-                       }
-                       wptr = fence->wptr;
-                       fence->reemitted++;
+                       amdgpu_ring_backup_unprocessed_command(ring, fence);
                }
                rcu_read_unlock();
        } while (last_seq != seq);
index 276e0236db45621534bb494b4c06f7a3b51ee3e3..63f62c670df56e651b68ec54831f6ea339b7d937 100644 (file)
@@ -129,6 +129,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
        struct amdgpu_ib *ib = &ibs[0];
        struct dma_fence *tmp = NULL;
        struct amdgpu_fence *af;
+       struct amdgpu_fence *vm_af;
        bool need_ctx_switch;
        struct amdgpu_vm *vm;
        uint64_t fence_ctx;
@@ -215,9 +216,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                dma_fence_put(tmp);
        }
 
-       if (job)
+       if (job) {
+               vm_af = job->hw_vm_fence;
+               /* VM sequence */
+               vm_af->ib_wptr = ring->wptr;
                amdgpu_vm_flush(ring, job, need_pipe_sync);
+               vm_af->ib_dw_size =
+                       amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, ring->wptr);
+       }
 
+       /* IB sequence */
+       af->ib_wptr = ring->wptr;
        amdgpu_ring_ib_begin(ring);
 
        if (ring->funcs->insert_start)
@@ -238,6 +247,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                cond_exec = amdgpu_ring_init_cond_exec(ring,
                                                       ring->cond_exe_gpu_addr);
 
+       /* Skip the IB for guilty contexts */
+       af->skip_ib_dw_start_offset =
+               amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
        amdgpu_device_flush_hdp(adev, ring);
 
        if (need_ctx_switch)
@@ -276,6 +288,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                amdgpu_ring_emit_frame_cntl(ring, false, secure);
 
        amdgpu_device_invalidate_hdp(adev, ring);
+       /* Skip the IB for guilty contexts */
+       af->skip_ib_dw_end_offset =
+               amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
 
        if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
                fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
@@ -312,13 +327,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                ring->funcs->emit_wave_limit(ring, false);
 
        amdgpu_ring_ib_end(ring);
-       /* Save the wptr associated with this fence.
-        * This must be last for resets to work properly
-        * as we need to save the wptr associated with this
-        * fence so we know what rings contents to backup
-        * after we reset the queue.
-        */
-       amdgpu_fence_save_wptr(af);
+
+       af->ib_dw_size = amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
 
        amdgpu_ring_commit(ring);
 
index 4638a686a84e758f8e120d2864059846ded90ea0..a345c3fb8ff4524c9b8b8044aa456d683770a688 100644 (file)
@@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
        ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
 
        /* Make sure we aren't trying to allocate more space
-        * than the maximum for one submission
+        * than the maximum for one submission.  Skip for reemit
+        * since we may be reemitting several submissions.
         */
-       if (WARN_ON_ONCE(ndw > ring->max_dw))
-               return -ENOMEM;
+       if (!ring->reemit) {
+               if (WARN_ON_ONCE(ndw > ring->max_dw))
+                       return -ENOMEM;
+       }
 
        ring->count_dw = ndw;
        ring->wptr_old = ring->wptr;
@@ -104,29 +107,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
        return 0;
 }
 
-/**
- * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
- *
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Allocate @ndw dwords in the ring buffer (all asics).
- * doesn't check the max_dw limit as we may be reemitting
- * several submissions.
- */
-static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
-{
-       /* Align requested size with padding so unlock_commit can
-        * pad safely */
-       ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
-
-       ring->count_dw = ndw;
-       ring->wptr_old = ring->wptr;
-
-       if (ring->funcs->begin_use)
-               ring->funcs->begin_use(ring);
-}
-
 /**
  * amdgpu_ring_insert_nop - insert NOP packets
  *
@@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
 int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
                                 struct amdgpu_fence *guilty_fence)
 {
-       unsigned int i;
        int r;
 
        /* verify that the ring is functional */
@@ -883,16 +862,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
        if (r)
                return r;
 
-       /* set an error on all fences from the context */
-       if (guilty_fence)
-               amdgpu_fence_driver_update_timedout_fence_state(guilty_fence);
-       /* Re-emit the non-guilty commands */
-       if (ring->ring_backup_entries_to_copy) {
-               amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
-               for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
-                       amdgpu_ring_write(ring, ring->ring_backup[i]);
-               amdgpu_ring_commit(ring);
-       }
+       /* set an error on all fences from the context and reemit */
+       amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence);
+
        return 0;
 }
 
index 5a82db0888f0ae1dfeda6b71ef97302e6d77d93c..ce5af137ee400e28d4191875931a5b9fbe5af84a 100644 (file)
@@ -121,7 +121,6 @@ struct amdgpu_fence_driver {
        /* sync_seq is protected by ring emission lock */
        uint32_t                        sync_seq;
        atomic_t                        last_seq;
-       u64                             signalled_wptr;
        bool                            initialized;
        struct amdgpu_irq_src           *irq_src;
        unsigned                        irq_type;
@@ -146,23 +145,23 @@ struct amdgpu_fence {
        struct amdgpu_ring              *ring;
        ktime_t                         start_timestamp;
 
-       /* wptr for the total submission for resets */
-       u64                             wptr;
+       /* location and size of the IB */
+       u64                             ib_wptr;
+       unsigned int                    ib_dw_size;
+       unsigned int                    skip_ib_dw_start_offset;
+       unsigned int                    skip_ib_dw_end_offset;
        /* fence context for resets */
        u64                             context;
-       /* has this fence been reemitted */
-       unsigned int                    reemitted;
-       /* wptr for the fence for the submission */
-       u64                             fence_wptr_start;
-       u64                             fence_wptr_end;
+       /* idx for ring backups */
+       unsigned int                    backup_idx;
 };
 
 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 
 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af);
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+                                            struct amdgpu_fence *guilty_fence);
 
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -313,6 +312,7 @@ struct amdgpu_ring {
        /* backups for resets */
        uint32_t                *ring_backup;
        unsigned int            ring_backup_entries_to_copy;
+       bool                    reemit;
        unsigned                rptr_offs;
        u64                     rptr_gpu_addr;
        u32                     *rptr_cpu_addr;