]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/sched: Fix dynamic job-flow control race
authorRob Clark <robdclark@chromium.org>
Fri, 13 Sep 2024 20:23:01 +0000 (13:23 -0700)
committerDanilo Krummrich <dakr@kernel.org>
Mon, 23 Sep 2024 23:14:16 +0000 (01:14 +0200)
Fixes a race condition reported here: https://github.com/AsahiLinux/linux/issues/309#issuecomment-2238968609

The whole premise of lockless access to a single-producer-single-
consumer queue is that there is just a single producer and single
consumer.  That means we can't call drm_sched_can_queue() (which is
about queueing more work to the hw, not to the spsc queue) from
anywhere other than the consumer (wq).

This call in the producer is just an optimization to avoid scheduling
the consuming worker if it cannot yet queue more work to the hw.  It
is safe to drop this optimization to avoid the race condition.

Suggested-by: Asahi Lina <lina@asahilina.net>
Fixes: a78422e9dff3 ("drm/sched: implement dynamic job-flow control")
Closes: https://github.com/AsahiLinux/linux/issues/309
Cc: stable@vger.kernel.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Danilo Krummrich <dakr@kernel.org>
Tested-by: Janne Grunau <j@jannau.net>
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20240913202301.16772-1-robdclark@gmail.com
drivers/gpu/drm/scheduler/sched_entity.c
drivers/gpu/drm/scheduler/sched_main.c
include/drm/gpu_scheduler.h

index 58c8161289fea9dfdb07c5b65a5b3ec682d918a7..567e5ace6d0c4d6bd74a31cdd98a9006cf29ece2 100644 (file)
@@ -380,7 +380,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
                container_of(cb, struct drm_sched_entity, cb);
 
        drm_sched_entity_clear_dep(f, cb);
-       drm_sched_wakeup(entity->rq->sched, entity);
+       drm_sched_wakeup(entity->rq->sched);
 }
 
 /**
@@ -612,7 +612,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
                if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
                        drm_sched_rq_update_fifo(entity, submit_ts);
 
-               drm_sched_wakeup(entity->rq->sched, entity);
+               drm_sched_wakeup(entity->rq->sched);
        }
 }
 EXPORT_SYMBOL(drm_sched_entity_push_job);
index 7e90c9f95611a00acb4a2f4fc551fdac6b02c520..a124d5e77b5e86887c40807a83bc512e4a925ad7 100644 (file)
@@ -1022,15 +1022,12 @@ EXPORT_SYMBOL(drm_sched_job_cleanup);
 /**
  * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
  * @sched: scheduler instance
- * @entity: the scheduler entity
  *
  * Wake up the scheduler if we can queue jobs.
  */
-void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
-                     struct drm_sched_entity *entity)
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
 {
-       if (drm_sched_can_queue(sched, entity))
-               drm_sched_run_job_queue(sched);
+       drm_sched_run_job_queue(sched);
 }
 
 /**
index 5acc64954a8830117773442eb670068bf5f327c2..e28bc649b5c9b7e2ad6957c9415cb300c6e72b63 100644 (file)
@@ -574,7 +574,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
 
 void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
 void drm_sched_job_cleanup(struct drm_sched_job *job);
-void drm_sched_wakeup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity);
+void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
 bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
 void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);