]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
Revert "drm/nouveau: Remove waitque for sched teardown"
authorPhilipp Stanner <phasta@kernel.org>
Mon, 1 Sep 2025 08:31:08 +0000 (10:31 +0200)
committerDanilo Krummrich <dakr@kernel.org>
Wed, 3 Sep 2025 21:16:59 +0000 (23:16 +0200)
This reverts:

commit bead88002227 ("drm/nouveau: Remove waitque for sched teardown")
commit 5f46f5c7af8c ("drm/nouveau: Add new callback for scheduler teardown")

from the drm/sched teardown leak fix series:

https://lore.kernel.org/dri-devel/20250710125412.128476-2-phasta@kernel.org/

The aforementioned series removed a blocking waitqueue from
nouveau_sched_fini(). It was mistakenly assumed that this waitqueue only
prevents jobs from leaking, which the series fixed.

The waitqueue, however, also guarantees that all VM_BIND related jobs
are finished in order, cleaning up mappings in the GPU's MMU. These jobs
must be executed sequentially. Without the waitqueue, this is no longer
guaranteed, because entity and scheduler teardown can race with each
other.

Revert all patches related to the waitqueue removal.

Fixes: bead88002227 ("drm/nouveau: Remove waitque for sched teardown")
Suggested-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Philipp Stanner <phasta@kernel.org>
Link: https://lore.kernel.org/r/20250901083107.10206-2-phasta@kernel.org
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
drivers/gpu/drm/nouveau/nouveau_fence.c
drivers/gpu/drm/nouveau/nouveau_fence.h
drivers/gpu/drm/nouveau/nouveau_sched.c
drivers/gpu/drm/nouveau/nouveau_sched.h
drivers/gpu/drm/nouveau/nouveau_uvmm.c

index 9f345a0087175cbd1cc5cf96f23fedf168b5db56..869d4335c0f45cb9d6e2319bf10d10bc83ff716c 100644 (file)
@@ -240,21 +240,6 @@ nouveau_fence_emit(struct nouveau_fence *fence)
        return ret;
 }
 
-void
-nouveau_fence_cancel(struct nouveau_fence *fence)
-{
-       struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
-       unsigned long flags;
-
-       spin_lock_irqsave(&fctx->lock, flags);
-       if (!dma_fence_is_signaled_locked(&fence->base)) {
-               dma_fence_set_error(&fence->base, -ECANCELED);
-               if (nouveau_fence_signal(fence))
-                       nvif_event_block(&fctx->event);
-       }
-       spin_unlock_irqrestore(&fctx->lock, flags);
-}
-
 bool
 nouveau_fence_done(struct nouveau_fence *fence)
 {
index 9957a919bd38e776cf20c2dbe7062d7617bcf338..183dd43ecfff4ab3c8858899bc835089fe1b655d 100644 (file)
@@ -29,7 +29,6 @@ void nouveau_fence_unref(struct nouveau_fence **);
 
 int  nouveau_fence_emit(struct nouveau_fence *);
 bool nouveau_fence_done(struct nouveau_fence *);
-void nouveau_fence_cancel(struct nouveau_fence *fence);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
 int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
 
index 0cc0bc9f9952b1cfbc4015cb2c292fbf383459c9..e60f7892f5ce9aff0c5fa1908c1a0445891927ed 100644 (file)
@@ -11,7 +11,6 @@
 #include "nouveau_exec.h"
 #include "nouveau_abi16.h"
 #include "nouveau_sched.h"
-#include "nouveau_chan.h"
 
 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS           10000
 
@@ -122,9 +121,11 @@ nouveau_job_done(struct nouveau_job *job)
 {
        struct nouveau_sched *sched = job->sched;
 
-       spin_lock(&sched->job_list.lock);
+       spin_lock(&sched->job.list.lock);
        list_del(&job->entry);
-       spin_unlock(&sched->job_list.lock);
+       spin_unlock(&sched->job.list.lock);
+
+       wake_up(&sched->job.wq);
 }
 
 void
@@ -305,9 +306,9 @@ nouveau_job_submit(struct nouveau_job *job)
        }
 
        /* Submit was successful; add the job to the schedulers job list. */
-       spin_lock(&sched->job_list.lock);
-       list_add(&job->entry, &sched->job_list.head);
-       spin_unlock(&sched->job_list.lock);
+       spin_lock(&sched->job.list.lock);
+       list_add(&job->entry, &sched->job.list.head);
+       spin_unlock(&sched->job.list.lock);
 
        drm_sched_job_arm(&job->base);
        job->done_fence = dma_fence_get(&job->base.s_fence->finished);
@@ -392,23 +393,10 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job)
        nouveau_job_fini(job);
 }
 
-static void
-nouveau_sched_cancel_job(struct drm_sched_job *sched_job)
-{
-       struct nouveau_fence *fence;
-       struct nouveau_job *job;
-
-       job = to_nouveau_job(sched_job);
-       fence = to_nouveau_fence(job->done_fence);
-
-       nouveau_fence_cancel(fence);
-}
-
 static const struct drm_sched_backend_ops nouveau_sched_ops = {
        .run_job = nouveau_sched_run_job,
        .timedout_job = nouveau_sched_timedout_job,
        .free_job = nouveau_sched_free_job,
-       .cancel_job = nouveau_sched_cancel_job,
 };
 
 static int
@@ -458,8 +446,9 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
                goto fail_sched;
 
        mutex_init(&sched->mutex);
-       spin_lock_init(&sched->job_list.lock);
-       INIT_LIST_HEAD(&sched->job_list.head);
+       spin_lock_init(&sched->job.list.lock);
+       INIT_LIST_HEAD(&sched->job.list.head);
+       init_waitqueue_head(&sched->job.wq);
 
        return 0;
 
@@ -493,12 +482,16 @@ nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
        return 0;
 }
 
+
 static void
 nouveau_sched_fini(struct nouveau_sched *sched)
 {
        struct drm_gpu_scheduler *drm_sched = &sched->base;
        struct drm_sched_entity *entity = &sched->entity;
 
+       rmb(); /* for list_empty to work without lock */
+       wait_event(sched->job.wq, list_empty(&sched->job.list.head));
+
        drm_sched_entity_fini(entity);
        drm_sched_fini(drm_sched);
 
index b98c3f0bef302990a9280202ac1160fc81ebf137..20cd1da8db73c3c9b1e7365da2cd408a6e9de312 100644 (file)
@@ -103,9 +103,12 @@ struct nouveau_sched {
        struct mutex mutex;
 
        struct {
-               struct list_head head;
-               spinlock_t lock;
-       } job_list;
+               struct {
+                       struct list_head head;
+                       spinlock_t lock;
+               } list;
+               struct wait_queue_head wq;
+       } job;
 };
 
 int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
index ddfc46bc1b3e264bf9599d91d5d2ab5d01afc8c9..48f105239f42d8ffa3cefd253bd83d52dbb3255f 100644 (file)
@@ -1019,8 +1019,8 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
        u64 end = addr + range;
 
 again:
-       spin_lock(&sched->job_list.lock);
-       list_for_each_entry(__job, &sched->job_list.head, entry) {
+       spin_lock(&sched->job.list.lock);
+       list_for_each_entry(__job, &sched->job.list.head, entry) {
                struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job);
 
                list_for_each_op(op, &bind_job->ops) {
@@ -1030,7 +1030,7 @@ again:
 
                                if (!(end <= op_addr || addr >= op_end)) {
                                        nouveau_uvmm_bind_job_get(bind_job);
-                                       spin_unlock(&sched->job_list.lock);
+                                       spin_unlock(&sched->job.list.lock);
                                        wait_for_completion(&bind_job->complete);
                                        nouveau_uvmm_bind_job_put(bind_job);
                                        goto again;
@@ -1038,7 +1038,7 @@ again:
                        }
                }
        }
-       spin_unlock(&sched->job_list.lock);
+       spin_unlock(&sched->job.list.lock);
 }
 
 static int