From: Christian König Date: Tue, 5 May 2026 13:40:04 +0000 (+0200) Subject: drm/amdgpu: deprecate guilty handling X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=182bdd59be41595e211ac98406d3637fc6141017;p=thirdparty%2Flinux.git drm/amdgpu: deprecate guilty handling The guilty handling tried to establish a second way of signaling problems with the GPU back to userspace. This caused quite a bunch of issue we had to work around, especially lifetime issues with the drm_sched_entity. Just drop the handling altogether and use the dma_fence based approach instead. v2: fix reversed condition in entity check (Alex) Reviewed-by: Alex Deucher Signed-off-by: Christian König Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 32af8cce3df8..c42ae3e6fdd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -60,11 +60,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, if (!p->ctx) return -EINVAL; - if (atomic_read(&p->ctx->guilty)) { - amdgpu_ctx_put(p->ctx); - return -ECANCELED; - } - amdgpu_sync_create(&p->sync); drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 7af86a32c0c5..0d7f6cd74f79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -255,7 +255,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, } r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds, - &ctx->guilty); + NULL); if (r) goto error_free_entity; @@ -579,6 +579,27 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, #define AMDGPU_RAS_COUNTE_DELAY_MS 3000 +static bool amdgpu_ctx_guilty(struct amdgpu_ctx *ctx) +{ + int i, j, r; + + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { + struct amdgpu_ctx_entity *ctx_entity; + + ctx_entity = ctx->entities[i][j]; + if (!ctx_entity) + continue; + + r = drm_sched_entity_error(&ctx_entity->entity); + if (r == -ETIME) + return true; + } + } + + return false; +} + static int amdgpu_ctx_query2(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id, union drm_amdgpu_ctx_out *out) @@ -607,7 +628,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; - if (atomic_read(&ctx->guilty)) + if (amdgpu_ctx_guilty(ctx)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; if (amdgpu_in_reset(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index cf8d700a22fe..e444b2088d40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -50,7 +50,6 @@ struct amdgpu_ctx { int32_t init_priority; int32_t override_priority; uint32_t stable_pstate; - atomic_t guilty; bool preamble_presented; uint64_t generation; unsigned long ras_counter_ce; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f18e46502829..942f0251c748 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5112,12 +5112,12 @@ link_reset_failed: int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { - int i, r = 0; struct amdgpu_job *job = NULL; struct dma_fence *fence = NULL; struct amdgpu_device *tmp_adev = reset_context->reset_req_dev; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); + int i, r; if (reset_context->reset_req_dev == adev) job = reset_context->job; @@ -5143,9 +5143,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, amdgpu_fence_driver_isr_toggle(adev, false); - if (job && job->vm) - drm_sched_increase_karma(&job->base); - r = amdgpu_reset_prepare_hwcontext(adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ if (r == -EOPNOTSUPP)