return seq;
}
-static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
-{
- af->fence_wptr_start = af->ring->wptr;
-}
-
-static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
-{
- af->fence_wptr_end = af->ring->wptr;
-}
-
/**
* amdgpu_fence_emit - emit a fence on the requested ring
*
&ring->fence_drv.lock,
adev->fence_context + ring->idx, seq);
- amdgpu_fence_save_fence_wptr_start(af);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
- amdgpu_fence_save_fence_wptr_end(af);
- amdgpu_fence_save_wptr(af);
+
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
do {
struct dma_fence *fence, **ptr;
- struct amdgpu_fence *am_fence;
++last_seq;
last_seq &= drv->num_fences_mask;
if (!fence)
continue;
- /* Save the wptr in the fence driver so we know what the last processed
- * wptr was. This is required for re-emitting the ring state for
- * queues that are reset but are not guilty and thus have no guilty fence.
- */
- am_fence = container_of(fence, struct amdgpu_fence, base);
- drv->signalled_wptr = am_fence->wptr;
dma_fence_signal(fence);
dma_fence_put(fence);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
*/
/**
- * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors
+ * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit
*
- * @af: fence of the ring to update
+ * @ring: the ring to operate on
+ * @guilty_fence: fence of the ring to update
*
*/
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence)
{
struct dma_fence *unprocessed;
struct dma_fence __rcu **ptr;
struct amdgpu_fence *fence;
- struct amdgpu_ring *ring = af->ring;
unsigned long flags;
u32 seq, last_seq;
- bool reemitted = false;
+ unsigned int i;
+ bool is_guilty_fence;
+ bool is_guilty_context;
last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
- /* mark all fences from the guilty context with an error */
+ ring->reemit = true;
+ amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy);
spin_lock_irqsave(&ring->fence_drv.lock, flags);
do {
last_seq++;
if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
fence = container_of(unprocessed, struct amdgpu_fence, base);
+ is_guilty_fence = fence == guilty_fence;
+ is_guilty_context = fence->context == guilty_fence->context;
- if (fence->reemitted > 1)
- reemitted = true;
- else if (fence == af)
+ /* mark all fences from the guilty context with an error */
+ if (is_guilty_fence)
dma_fence_set_error(&fence->base, -ETIME);
- else if (fence->context == af->context)
+ else if (is_guilty_context)
dma_fence_set_error(&fence->base, -ECANCELED);
+
+ /* reemit the packet stream and update wptrs */
+ fence->ib_wptr = ring->wptr;
+ for (i = 0; i < fence->ib_dw_size; i++) {
+ /* Skip the IB(s) for the guilty context. */
+ if (is_guilty_context &&
+ i >= fence->skip_ib_dw_start_offset &&
+ i < fence->skip_ib_dw_end_offset)
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ else
+ amdgpu_ring_write(ring,
+ ring->ring_backup[fence->backup_idx + i]);
+ }
}
rcu_read_unlock();
} while (last_seq != seq);
spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
-
- if (reemitted) {
- /* if we've already reemitted once then just cancel everything */
- amdgpu_fence_driver_force_completion(af->ring);
- af->ring->ring_backup_entries_to_copy = 0;
- }
-}
-
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
-{
- af->wptr = af->ring->wptr;
+ amdgpu_ring_commit(ring);
+ ring->reemit = false;
}
static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
- u64 start_wptr, u64 end_wptr)
+ struct amdgpu_fence *af)
{
- unsigned int first_idx = start_wptr & ring->buf_mask;
- unsigned int last_idx = end_wptr & ring->buf_mask;
+ unsigned int first_idx = af->ib_wptr & ring->buf_mask;
+ unsigned int dw_size = af->ib_dw_size;
unsigned int i;
+ af->backup_idx = ring->ring_backup_entries_to_copy;
/* Backup the contents of the ring buffer. */
- for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+ for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size)
ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
}
struct dma_fence *unprocessed;
struct dma_fence __rcu **ptr;
struct amdgpu_fence *fence;
- u64 wptr;
u32 seq, last_seq;
last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
- wptr = ring->fence_drv.signalled_wptr;
ring->ring_backup_entries_to_copy = 0;
do {
if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
fence = container_of(unprocessed, struct amdgpu_fence, base);
- /* save everything if the ring is not guilty, otherwise
- * just save the content from other contexts.
- */
- if (!fence->reemitted &&
- (!guilty_fence || (fence->context != guilty_fence->context))) {
- amdgpu_ring_backup_unprocessed_command(ring, wptr,
- fence->wptr);
- } else if (!fence->reemitted) {
- /* always save the fence */
- amdgpu_ring_backup_unprocessed_command(ring,
- fence->fence_wptr_start,
- fence->fence_wptr_end);
- }
- wptr = fence->wptr;
- fence->reemitted++;
+ amdgpu_ring_backup_unprocessed_command(ring, fence);
}
rcu_read_unlock();
} while (last_seq != seq);
struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp = NULL;
struct amdgpu_fence *af;
+ struct amdgpu_fence *vm_af;
bool need_ctx_switch;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
dma_fence_put(tmp);
}
- if (job)
+ if (job) {
+ vm_af = job->hw_vm_fence;
+ /* VM sequence */
+ vm_af->ib_wptr = ring->wptr;
amdgpu_vm_flush(ring, job, need_pipe_sync);
+ vm_af->ib_dw_size =
+ amdgpu_ring_get_dw_distance(ring, vm_af->ib_wptr, ring->wptr);
+ }
+ /* IB sequence */
+ af->ib_wptr = ring->wptr;
amdgpu_ring_ib_begin(ring);
if (ring->funcs->insert_start)
cond_exec = amdgpu_ring_init_cond_exec(ring,
ring->cond_exe_gpu_addr);
+ /* Skip the IB for guilty contexts */
+ af->skip_ib_dw_start_offset =
+ amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
amdgpu_device_flush_hdp(adev, ring);
if (need_ctx_switch)
amdgpu_ring_emit_frame_cntl(ring, false, secure);
amdgpu_device_invalidate_hdp(adev, ring);
+ /* Skip the IB for guilty contexts */
+ af->skip_ib_dw_end_offset =
+ amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
ring->funcs->emit_wave_limit(ring, false);
amdgpu_ring_ib_end(ring);
- /* Save the wptr associated with this fence.
- * This must be last for resets to work properly
- * as we need to save the wptr associated with this
- * fence so we know what rings contents to backup
- * after we reset the queue.
- */
- amdgpu_fence_save_wptr(af);
+
+ af->ib_dw_size = amdgpu_ring_get_dw_distance(ring, af->ib_wptr, ring->wptr);
amdgpu_ring_commit(ring);
ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
/* Make sure we aren't trying to allocate more space
- * than the maximum for one submission
+ * than the maximum for one submission. Skip for reemit
+ * since we may be reemitting several submissions.
*/
- if (WARN_ON_ONCE(ndw > ring->max_dw))
- return -ENOMEM;
+ if (!ring->reemit) {
+ if (WARN_ON_ONCE(ndw > ring->max_dw))
+ return -ENOMEM;
+ }
ring->count_dw = ndw;
ring->wptr_old = ring->wptr;
return 0;
}
-/**
- * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
- *
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Allocate @ndw dwords in the ring buffer (all asics).
- * doesn't check the max_dw limit as we may be reemitting
- * several submissions.
- */
-static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
-{
- /* Align requested size with padding so unlock_commit can
- * pad safely */
- ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
-
- ring->count_dw = ndw;
- ring->wptr_old = ring->wptr;
-
- if (ring->funcs->begin_use)
- ring->funcs->begin_use(ring);
-}
-
/**
* amdgpu_ring_insert_nop - insert NOP packets
*
int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
struct amdgpu_fence *guilty_fence)
{
- unsigned int i;
int r;
/* verify that the ring is functional */
if (r)
return r;
- /* set an error on all fences from the context */
- if (guilty_fence)
- amdgpu_fence_driver_update_timedout_fence_state(guilty_fence);
- /* Re-emit the non-guilty commands */
- if (ring->ring_backup_entries_to_copy) {
- amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
- for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
- amdgpu_ring_write(ring, ring->ring_backup[i]);
- amdgpu_ring_commit(ring);
- }
+ /* set an error on all fences from the context and reemit */
+ amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence);
+
return 0;
}
/* sync_seq is protected by ring emission lock */
uint32_t sync_seq;
atomic_t last_seq;
- u64 signalled_wptr;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
struct amdgpu_ring *ring;
ktime_t start_timestamp;
- /* wptr for the total submission for resets */
- u64 wptr;
+ /* location and size of the IB */
+ u64 ib_wptr;
+ unsigned int ib_dw_size;
+ unsigned int skip_ib_dw_start_offset;
+ unsigned int skip_ib_dw_end_offset;
/* fence context for resets */
u64 context;
- /* has this fence been reemitted */
- unsigned int reemitted;
- /* wptr for the fence for the submission */
- u64 fence_wptr_start;
- u64 fence_wptr_end;
+ /* idx for ring backups */
+ unsigned int backup_idx;
};
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af);
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+ struct amdgpu_fence *guilty_fence);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
/* backups for resets */
uint32_t *ring_backup;
unsigned int ring_backup_entries_to_copy;
+ bool reemit;
unsigned rptr_offs;
u64 rptr_gpu_addr;
u32 *rptr_cpu_addr;