]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/sched: Remove racy hack from drm_sched_fini()
authorPhilipp Stanner <phasta@kernel.org>
Thu, 8 Jan 2026 08:30:20 +0000 (09:30 +0100)
committerPhilipp Stanner <phasta@kernel.org>
Fri, 27 Feb 2026 09:02:01 +0000 (10:02 +0100)
drm_sched_fini() contained a hack to work around a race in amdgpu.
According to AMD, the hack should not be necessary anymore. In case
there should have been undetected users,

commit 975ca62a014c ("drm/sched: Add warning for removing hack in drm_sched_fini()")

had added a warning one release cycle ago.

Thus, it can be derived that the hack can be savely removed by now.

Remove the hack.

Acked-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Philipp Stanner <phasta@kernel.org>
Link: https://patch.msgid.link/20260108083019.63532-2-phasta@kernel.org
drivers/gpu/drm/scheduler/sched_main.c

index e6ee35406165a7d8850db2bb78164231c5e48af7..13fa55aed3daab96084a14ba99b818a6b708382b 100644 (file)
@@ -1418,48 +1418,12 @@ static void drm_sched_cancel_remaining_jobs(struct drm_gpu_scheduler *sched)
  */
 void drm_sched_fini(struct drm_gpu_scheduler *sched)
 {
-       struct drm_sched_entity *s_entity;
        int i;
 
        drm_sched_wqueue_stop(sched);
 
-       for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
-               struct drm_sched_rq *rq = sched->sched_rq[i];
-
-               spin_lock(&rq->lock);
-               list_for_each_entry(s_entity, &rq->entities, list) {
-                       /*
-                        * Prevents reinsertion and marks job_queue as idle,
-                        * it will be removed from the rq in drm_sched_entity_fini()
-                        * eventually
-                        *
-                        * FIXME:
-                        * This lacks the proper spin_lock(&s_entity->lock) and
-                        * is, therefore, a race condition. Most notably, it
-                        * can race with drm_sched_entity_push_job(). The lock
-                        * cannot be taken here, however, because this would
-                        * lead to lock inversion -> deadlock.
-                        *
-                        * The best solution probably is to enforce the life
-                        * time rule of all entities having to be torn down
-                        * before their scheduler. Then, however, locking could
-                        * be dropped alltogether from this function.
-                        *
-                        * For now, this remains a potential race in all
-                        * drivers that keep entities alive for longer than
-                        * the scheduler.
-                        *
-                        * The READ_ONCE() is there to make the lockless read
-                        * (warning about the lockless write below) slightly
-                        * less broken...
-                        */
-                       if (!READ_ONCE(s_entity->stopped))
-                               dev_warn(sched->dev, "Tearing down scheduler with active entities!\n");
-                       s_entity->stopped = true;
-               }
-               spin_unlock(&rq->lock);
+       for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++)
                kfree(sched->sched_rq[i]);
-       }
 
        /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
        wake_up_all(&sched->job_scheduled);