From: Tvrtko Ursulin Date: Fri, 21 Feb 2025 10:50:34 +0000 (+0000) Subject: drm/amdgpu: Pop jobs from the queue more robustly X-Git-Tag: v6.15-rc1~120^2~14^2~28 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=80b6ef8ae25ade45e6418df3ddf699a5a10a7ca4;p=thirdparty%2Fkernel%2Flinux.git drm/amdgpu: Pop jobs from the queue more robustly Replace a copy of DRM scheduler's to_drm_sched_job with a copy of a newly added drm_sched_entity_queue_pop. This allows breaking the hidden dependency that queue_node has to be the first element in struct drm_sched_job. A comment is also added with a reference to the mailing list discussion explaining the copied helper will be removed when the whole broken amdgpu_job_stop_all_jobs_on_sched is removed. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Cc: Zhang, Hawking Reviewed-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250221105038.79665-3-tvrtko.ursulin@igalia.com --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 100f044759435..1899c601c95c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -411,8 +411,24 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) return fence; } -#define to_drm_sched_job(sched_job) \ - container_of((sched_job), struct drm_sched_job, queue_node) +/* + * This is a duplicate function from DRM scheduler sched_internal.h. + * Plan is to remove it when amdgpu_job_stop_all_jobs_on_sched is removed, due + * latter being incorrect and racy. + * + * See https://lore.kernel.org/amd-gfx/44edde63-7181-44fb-a4f7-94e50514f539@amd.com/ + */ +static struct drm_sched_job * +drm_sched_entity_queue_pop(struct drm_sched_entity *entity) +{ + struct spsc_node *node; + + node = spsc_queue_pop(&entity->job_queue); + if (!node) + return NULL; + + return container_of(node, struct drm_sched_job, queue_node); +} void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) { @@ -425,7 +441,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { - while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { + while ((s_job = drm_sched_entity_queue_pop(s_entity))) { struct drm_sched_fence *s_fence = s_job->s_fence; dma_fence_signal(&s_fence->scheduled);