drm/amdgpu: rework ring reset backup and reemit v9

author Alex Deucher <alexander.deucher@amd.com>

Fri, 16 Jan 2026 03:01:15 +0000 (22:01 -0500)

committer Alex Deucher <alexander.deucher@amd.com>

Mon, 23 Feb 2026 19:33:11 +0000 (14:33 -0500)
author Alex Deucher <alexander.deucher@amd.com>
Fri, 16 Jan 2026 03:01:15 +0000 (22:01 -0500)
committer Alex Deucher <alexander.deucher@amd.com>
Mon, 23 Feb 2026 19:33:11 +0000 (14:33 -0500)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

index 07568516c506656033c21d595f33825a5d26e7d0..d209591e3710a99f557d925c893791ead72ea450 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
         return seq;
  }
  
-static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
-{
-       af->fence_wptr_start = af->ring->wptr;
-}
-
-static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
-{
-       af->fence_wptr_end = af->ring->wptr;
-}
-
  /**
   * amdgpu_fence_emit - emit a fence on the requested ring
   *
@@ -124,11 +114,9 @@ void amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
                        &ring->fence_drv.lock,
                        adev->fence_context + ring->idx, seq);
  
-       amdgpu_fence_save_fence_wptr_start(af);
         amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
                                seq, flags | AMDGPU_FENCE_FLAG_INT);
-       amdgpu_fence_save_fence_wptr_end(af);
-       amdgpu_fence_save_wptr(af);
+
         pm_runtime_get_noresume(adev_to_drm(adev)->dev);
         ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
         if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -240,7 +228,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
  
         do {
                 struct dma_fence *fence, **ptr;
-               struct amdgpu_fence *am_fence;
  
                 ++last_seq;
                 last_seq &= drv->num_fences_mask;
@@ -253,12 +240,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
                 if (!fence)
                         continue;
  
-               /* Save the wptr in the fence driver so we know what the last processed
-                * wptr was.  This is required for re-emitting the ring state for
-                * queues that are reset but are not guilty and thus have no guilty fence.
-                */
-               am_fence = container_of(fence, struct amdgpu_fence, base);
-               drv->signalled_wptr = am_fence->wptr;
                 dma_fence_signal(fence);
                 dma_fence_put(fence);
                 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -707,25 +688,29 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
   */
  
  /**
- * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors
+ * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit
   *
- * @af: fence of the ring to update
+ * @ring: the ring to operate on
+ * @guilty_fence: fence of the ring to update
   *
   */
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+                                            struct amdgpu_fence *guilty_fence)
  {
         struct dma_fence *unprocessed;
         struct dma_fence __rcu **ptr;
         struct amdgpu_fence *fence;
-       struct amdgpu_ring *ring = af->ring;
         unsigned long flags;
         u32 seq, last_seq;
-       bool reemitted = false;
+       unsigned int i;
+       bool is_guilty_fence;
+       bool is_guilty_context;
  
         last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
         seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
  
-       /* mark all fences from the guilty context with an error */
+       ring->reemit = true;
+       amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy);
         spin_lock_irqsave(&ring->fence_drv.lock, flags);
         do {
                 last_seq++;
@@ -737,39 +722,45 @@ void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
  
                 if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
                         fence = container_of(unprocessed, struct amdgpu_fence, base);
+                       is_guilty_fence = fence == guilty_fence;
+                       is_guilty_context = fence->context == guilty_fence->context;
  
-                       if (fence->reemitted > 1)
-                               reemitted = true;
-                       else if (fence == af)
+                       /* mark all fences from the guilty context with an error */
+                       if (is_guilty_fence)
                                 dma_fence_set_error(&fence->base, -ETIME);
-                       else if (fence->context == af->context)
+                       else if (is_guilty_context)
                                 dma_fence_set_error(&fence->base, -ECANCELED);
+
+                       /* reemit the packet stream and update wptrs */
+                       fence->ib_wptr = ring->wptr;
+                       for (i = 0; i < fence->ib_dw_size; i++) {
+                               /* Skip the IB(s) for the guilty context. */
+                               if (is_guilty_context &&
+                                   i >= fence->skip_ib_dw_start_offset &&
+                                   i < fence->skip_ib_dw_end_offset)
+                                       amdgpu_ring_write(ring, ring->funcs->nop);
+                               else
+                                       amdgpu_ring_write(ring,
+                                                         ring->ring_backup[fence->backup_idx + i]);
+                       }
                 }
                 rcu_read_unlock();
         } while (last_seq != seq);
         spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
-
-       if (reemitted) {
-               /* if we've already reemitted once then just cancel everything */
-               amdgpu_fence_driver_force_completion(af->ring);
-               af->ring->ring_backup_entries_to_copy = 0;
-       }
-}
-
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
-{
-       af->wptr = af->ring->wptr;
+       amdgpu_ring_commit(ring);
+       ring->reemit = false;
  }
  
  static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
-                                                  u64 start_wptr, u64 end_wptr)
+                                                  struct amdgpu_fence *af)
  {
-       unsigned int first_idx = start_wptr & ring->buf_mask;
-       unsigned int last_idx = end_wptr & ring->buf_mask;
+       unsigned int first_idx = af->ib_wptr & ring->buf_mask;
+       unsigned int dw_size = af->ib_dw_size;
         unsigned int i;
  
+       af->backup_idx = ring->ring_backup_entries_to_copy;
         /* Backup the contents of the ring buffer. */
-       for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+       for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size)
                 ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
  }
  
@@ -779,12 +770,10 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
         struct dma_fence *unprocessed;
         struct dma_fence __rcu **ptr;
         struct amdgpu_fence *fence;
-       u64 wptr;
         u32 seq, last_seq;
  
         last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
         seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
-       wptr = ring->fence_drv.signalled_wptr;
         ring->ring_backup_entries_to_copy = 0;
  
         do {
@@ -798,21 +787,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
                 if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
                         fence = container_of(unprocessed, struct amdgpu_fence, base);
  
-                       /* save everything if the ring is not guilty, otherwise
-                        * just save the content from other contexts.
-                        */
-                       if (!fence->reemitted &&
-                           (!guilty_fence || (fence->context != guilty_fence->context))) {
-                               amdgpu_ring_backup_unprocessed_command(ring, wptr,
-                                                                      fence->wptr);
-                       } else if (!fence->reemitted) {
-                               /* always save the fence */
-                               amdgpu_ring_backup_unprocessed_command(ring,
-                                                                      fence->fence_wptr_start,
-                                                                      fence->fence_wptr_end);
-                       }
-                       wptr = fence->wptr;
-                       fence->reemitted++;
+                       amdgpu_ring_backup_unprocessed_command(ring, fence);
                 }
                 rcu_read_unlock();
         } while (last_seq != seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

index 276e0236db45621534bb494b4c06f7a3b51ee3e3..63f62c670df56e651b68ec54831f6ea339b7d937 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -129,6 +129,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
         struct amdgpu_ib *ib = &ibs[0];
         struct dma_fence *tmp = NULL;
         struct amdgpu_fence *af;
+       struct amdgpu_fence *vm_af;
         bool need_ctx_switch;
         struct amdgpu_vm *vm;
         uint64_t fence_ctx;
@@ -215,9 +216,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                 dma_fence_put(tmp);
         }
  
-       if (job)
+       if (job) {
+               vm_af = job->hw_vm_fence;
+               /* VM sequence */
+               vm_af->ib_wptr = ring->wptr;
                 amdgpu_vm_flush(ring, job, need_pipe_sync);
+               vm_af->ib_dw_size =
+                       amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, ring->wptr);
+       }
  
+       /* IB sequence */
+       af->ib_wptr = ring->wptr;
         amdgpu_ring_ib_begin(ring);
  
         if (ring->funcs->insert_start)
@@ -238,6 +247,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                 cond_exec = amdgpu_ring_init_cond_exec(ring,
                                                        ring->cond_exe_gpu_addr);
  
+       /* Skip the IB for guilty contexts */
+       af->skip_ib_dw_start_offset =
+               amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
         amdgpu_device_flush_hdp(adev, ring);
  
         if (need_ctx_switch)
@@ -276,6 +288,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                 amdgpu_ring_emit_frame_cntl(ring, false, secure);
  
         amdgpu_device_invalidate_hdp(adev, ring);
+       /* Skip the IB for guilty contexts */
+       af->skip_ib_dw_end_offset =
+               amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
  
         if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
                 fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
@@ -312,13 +327,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
                 ring->funcs->emit_wave_limit(ring, false);
  
         amdgpu_ring_ib_end(ring);
-       /* Save the wptr associated with this fence.
-        * This must be last for resets to work properly
-        * as we need to save the wptr associated with this
-        * fence so we know what rings contents to backup
-        * after we reset the queue.
-        */
-       amdgpu_fence_save_wptr(af);
+
+       af->ib_dw_size = amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
  
         amdgpu_ring_commit(ring);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

index 4638a686a84e758f8e120d2864059846ded90ea0..a345c3fb8ff4524c9b8b8044aa456d683770a688 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
         ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
  
         /* Make sure we aren't trying to allocate more space
-        * than the maximum for one submission
+        * than the maximum for one submission.  Skip for reemit
+        * since we may be reemitting several submissions.
          */
-       if (WARN_ON_ONCE(ndw > ring->max_dw))
-               return -ENOMEM;
+       if (!ring->reemit) {
+               if (WARN_ON_ONCE(ndw > ring->max_dw))
+                       return -ENOMEM;
+       }
  
         ring->count_dw = ndw;
         ring->wptr_old = ring->wptr;
@@ -104,29 +107,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
         return 0;
  }
  
-/**
- * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
- *
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Allocate @ndw dwords in the ring buffer (all asics).
- * doesn't check the max_dw limit as we may be reemitting
- * several submissions.
- */
-static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
-{
-       /* Align requested size with padding so unlock_commit can
-        * pad safely */
-       ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
-
-       ring->count_dw = ndw;
-       ring->wptr_old = ring->wptr;
-
-       if (ring->funcs->begin_use)
-               ring->funcs->begin_use(ring);
-}
-
  /**
   * amdgpu_ring_insert_nop - insert NOP packets
   *
@@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
  int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
                                  struct amdgpu_fence *guilty_fence)
  {
-       unsigned int i;
         int r;
  
         /* verify that the ring is functional */
@@ -883,16 +862,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
         if (r)
                 return r;
  
-       /* set an error on all fences from the context */
-       if (guilty_fence)
-               amdgpu_fence_driver_update_timedout_fence_state(guilty_fence);
-       /* Re-emit the non-guilty commands */
-       if (ring->ring_backup_entries_to_copy) {
-               amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
-               for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
-                       amdgpu_ring_write(ring, ring->ring_backup[i]);
-               amdgpu_ring_commit(ring);
-       }
+       /* set an error on all fences from the context and reemit */
+       amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence);
+
         return 0;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 5a82db0888f0ae1dfeda6b71ef97302e6d77d93c..ce5af137ee400e28d4191875931a5b9fbe5af84a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -121,7 +121,6 @@ struct amdgpu_fence_driver {
         /* sync_seq is protected by ring emission lock */
         uint32_t                        sync_seq;
         atomic_t                        last_seq;
-       u64                             signalled_wptr;
         bool                            initialized;
         struct amdgpu_irq_src           *irq_src;
         unsigned                        irq_type;
@@ -146,23 +145,23 @@ struct amdgpu_fence {
         struct amdgpu_ring              *ring;
         ktime_t                         start_timestamp;
  
-       /* wptr for the total submission for resets */
-       u64                             wptr;
+       /* location and size of the IB */
+       u64                             ib_wptr;
+       unsigned int                    ib_dw_size;
+       unsigned int                    skip_ib_dw_start_offset;
+       unsigned int                    skip_ib_dw_end_offset;
         /* fence context for resets */
         u64                             context;
-       /* has this fence been reemitted */
-       unsigned int                    reemitted;
-       /* wptr for the fence for the submission */
-       u64                             fence_wptr_start;
-       u64                             fence_wptr_end;
+       /* idx for ring backups */
+       unsigned int                    backup_idx;
  };
  
  extern const struct drm_sched_backend_ops amdgpu_sched_ops;
  
  void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
  void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af);
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+                                            struct amdgpu_fence *guilty_fence);
  
  int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
  int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -313,6 +312,7 @@ struct amdgpu_ring {
         /* backups for resets */
         uint32_t                *ring_backup;
         unsigned int            ring_backup_entries_to_copy;
+       bool                    reemit;
         unsigned                rptr_offs;
         u64                     rptr_gpu_addr;
         u32                     *rptr_cpu_addr;
author	Alex Deucher <alexander.deucher@amd.com>
	Fri, 16 Jan 2026 03:01:15 +0000 (22:01 -0500)
committer	Alex Deucher <alexander.deucher@amd.com>
	Mon, 23 Feb 2026 19:33:11 +0000 (14:33 -0500)
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h		patch \| blob \| blame \| history