struct amdgpu_job *job = to_amdgpu_job(s_job);
struct amdgpu_task_info *ti;
struct amdgpu_device *adev = ring->adev;
- bool set_error = false;
int idx, r;
if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
if (unlikely(adev->debug_disable_gpu_ring_reset)) {
dev_err(adev->dev, "Ring reset disabled by debug mask\n");
} else if (amdgpu_gpu_recovery && ring->funcs->reset) {
- bool is_guilty;
-
dev_err(adev->dev, "Starting %s ring reset\n",
s_job->sched->name);
*/
drm_sched_wqueue_stop(&ring->sched);
- /* for engine resets, we need to reset the engine,
- * but individual queues may be unaffected.
- * check here to make sure the accounting is correct.
- */
- if (ring->funcs->is_guilty)
- is_guilty = ring->funcs->is_guilty(ring);
- else
- is_guilty = true;
-
- if (is_guilty) {
- dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
- set_error = true;
- }
-
r = amdgpu_ring_reset(ring, job->vmid, NULL);
if (!r) {
- if (is_guilty)
- atomic_inc(&ring->adev->gpu_reset_counter);
+ atomic_inc(&ring->adev->gpu_reset_counter);
drm_sched_wqueue_start(&ring->sched);
dev_err(adev->dev, "Ring %s reset succeeded\n",
ring->sched.name);
dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
}
- if (!set_error)
- dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
struct amdgpu_reset_context reset_context;
return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0;
}
-static bool sdma_v4_4_2_ring_is_guilty(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t instance_id = ring->me;
-
- return sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
-}
-
-static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t instance_id = ring->me;
-
- if (!adev->sdma.has_page_queue)
- return false;
-
- return sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
-}
-
static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
unsigned int vmid,
struct amdgpu_fence *timedout_fence)
{
- bool is_guilty = ring->funcs->is_guilty(ring);
struct amdgpu_device *adev = ring->adev;
u32 id = ring->me;
int r;
amdgpu_amdkfd_suspend(adev, true);
r = amdgpu_sdma_reset_engine(adev, id);
amdgpu_amdkfd_resume(adev, true);
- if (r)
- return r;
-
- if (is_guilty)
- amdgpu_fence_driver_force_completion(ring);
-
- return 0;
+ return r;
}
static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
.reset = sdma_v4_4_2_reset_queue,
- .is_guilty = sdma_v4_4_2_ring_is_guilty,
};
static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
.reset = sdma_v4_4_2_reset_queue,
- .is_guilty = sdma_v4_4_2_page_ring_is_guilty,
};
static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)