]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: move scheduler wqueue handling into callbacks
authorAlex Deucher <alexander.deucher@amd.com>
Mon, 16 Jun 2025 21:45:05 +0000 (17:45 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 30 Jun 2025 15:58:22 +0000 (11:58 -0400)
Move the scheduler wqueue stopping and starting into
the ring reset callbacks.  On some IPs we have to reset
an engine which may have multiple queues.  Move the wqueue
handling into the backend so we can handle them as needed
based on the type of reset available.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
19 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c

index 3b7d3844a74bc69017692b2513ca6a9b31c454ea..f0b7080dccb8d455a1957bc59b6beeb327ca8639 100644 (file)
@@ -135,17 +135,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
        } else if (amdgpu_gpu_recovery && ring->funcs->reset) {
                dev_err(adev->dev, "Starting %s ring reset\n",
                        s_job->sched->name);
-
-               /*
-                * Stop the scheduler to prevent anybody else from touching the
-                * ring buffer.
-                */
-               drm_sched_wqueue_stop(&ring->sched);
-
                r = amdgpu_ring_reset(ring, job->vmid, NULL);
                if (!r) {
                        atomic_inc(&ring->adev->gpu_reset_counter);
-                       drm_sched_wqueue_start(&ring->sched);
                        dev_err(adev->dev, "Ring %s reset succeeded\n",
                                ring->sched.name);
                        drm_dev_wedged_event(adev_to_drm(adev),
index cf5733d5d26dd009271e9e04fa7d68ac0442d292..7e26a44dcc1fd1306d892e4f9e16efc9c1c1d88c 100644 (file)
@@ -554,22 +554,16 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
        struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
        struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
        struct amdgpu_ring *page_ring = &sdma_instance->page;
-       bool gfx_sched_stopped = false, page_sched_stopped = false;
 
        mutex_lock(&sdma_instance->engine_reset_mutex);
        /* Stop the scheduler's work queue for the GFX and page rings if they are running.
        * This ensures that no new tasks are submitted to the queues while
        * the reset is in progress.
        */
-       if (!amdgpu_ring_sched_ready(gfx_ring)) {
-               drm_sched_wqueue_stop(&gfx_ring->sched);
-               gfx_sched_stopped = true;
-       }
+       drm_sched_wqueue_stop(&gfx_ring->sched);
 
-       if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) {
+       if (adev->sdma.has_page_queue)
                drm_sched_wqueue_stop(&page_ring->sched);
-               page_sched_stopped = true;
-       }
 
        if (sdma_instance->funcs->stop_kernel_queue) {
                sdma_instance->funcs->stop_kernel_queue(gfx_ring);
@@ -596,12 +590,9 @@ exit:
         * to be submitted to the queues after the reset is complete.
         */
        if (!ret) {
-               if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) {
-                       drm_sched_wqueue_start(&gfx_ring->sched);
-               }
-               if (page_sched_stopped && amdgpu_ring_sched_ready(page_ring)) {
+               drm_sched_wqueue_start(&gfx_ring->sched);
+               if (adev->sdma.has_page_queue)
                        drm_sched_wqueue_start(&page_ring->sched);
-               }
        }
        mutex_unlock(&sdma_instance->engine_reset_mutex);
 
index 4d0ee3ffe98587380cd8542edfc96f0e7926324d..8c377ecbb8a75d73e0d33acce7b070593afa49b0 100644 (file)
@@ -9540,6 +9540,8 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
        if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        spin_lock_irqsave(&kiq->ring_lock, flags);
 
        if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) {
@@ -9581,6 +9583,7 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
@@ -9600,6 +9603,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
        if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        spin_lock_irqsave(&kiq->ring_lock, flags);
 
        if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
@@ -9658,6 +9663,7 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 39f4dd18c277bd0775602ae7aff11c6f8993ee07..37dcec2d078415849eaf8c08b2edd5bc81b55cac 100644 (file)
@@ -6821,6 +6821,8 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
        if (amdgpu_sriov_vf(adev))
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
        if (r) {
 
@@ -6846,6 +6848,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
@@ -6989,6 +6992,8 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
        if (amdgpu_sriov_vf(adev))
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
        if (r) {
                dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
@@ -7012,6 +7017,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 964fa3f2e2719911ceadf8fbfa850f1fe8f297ac..e4fc42470cf3ef2c08f87db28b88080751c25441 100644 (file)
@@ -5317,6 +5317,8 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
        if (amdgpu_sriov_vf(adev))
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
        if (r) {
                dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
@@ -5341,6 +5343,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
@@ -5437,6 +5440,8 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
        if (amdgpu_sriov_vf(adev))
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
        if (r) {
                dev_warn(adev->dev, "fail(%d) to reset kcq  and try pipe reset\n", r);
@@ -5460,6 +5465,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 95e319974f221efd20448b85a67f791d2a448e26..76ba664efecb3e9f9c72511b1859ca43d9fb64a2 100644 (file)
@@ -7187,6 +7187,8 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
        if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        spin_lock_irqsave(&kiq->ring_lock, flags);
 
        if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
@@ -7247,6 +7249,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 8bfee17a826e266930c12d4543b78946e8d088cf..daed0f187bda5b252c0a350ba75e28ae8eb3da81 100644 (file)
@@ -3567,6 +3567,8 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
        if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        spin_lock_irqsave(&kiq->ring_lock, flags);
 
        if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
@@ -3625,6 +3627,7 @@ pipe_reset:
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 6621a7b1f29fccd14745b45e7e5e23d3b109f513..781a5a8a836140d5e836a164b951d7014becae09 100644 (file)
@@ -770,12 +770,14 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring,
 {
        int r;
 
+       drm_sched_wqueue_stop(&ring->sched);
        jpeg_v2_0_stop(ring->adev);
        jpeg_v2_0_start(ring->adev);
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 44a5c0e82ca432a0459ea04a9c2b4311d3cab665..5be9cdcae32c18bdc8dfc1dcc4ebfad63d4323f8 100644 (file)
@@ -649,12 +649,14 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring,
 {
        int r;
 
+       drm_sched_wqueue_stop(&ring->sched);
        jpeg_v2_5_stop_inst(ring->adev, ring->me);
        jpeg_v2_5_start_inst(ring->adev, ring->me);
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index e813af4eedd210634ec7177c7df456ae1296b09d..a24bd833d644274b1fccecc8e6daa510a703c423 100644 (file)
@@ -561,12 +561,14 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring,
 {
        int r;
 
+       drm_sched_wqueue_stop(&ring->sched);
        jpeg_v3_0_stop(ring->adev);
        jpeg_v3_0_start(ring->adev);
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 190f0742d70161be6f715426252b039341ee0f1c..1d4edd77837d0104a480f65dc059428b64ab27b1 100644 (file)
@@ -729,12 +729,14 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring,
        if (amdgpu_sriov_vf(ring->adev))
                return -EINVAL;
 
+       drm_sched_wqueue_stop(&ring->sched);
        jpeg_v4_0_stop(ring->adev);
        jpeg_v4_0_start(ring->adev);
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 04755b7a62d9bb9179fb9c55f32ad6752a68d2ef..78441f8fce972c91f39c15924b6ed4deb5016043 100644 (file)
@@ -1152,12 +1152,14 @@ static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring,
        if (amdgpu_sriov_vf(ring->adev))
                return -EOPNOTSUPP;
 
+       drm_sched_wqueue_stop(&ring->sched);
        jpeg_v4_0_3_core_stall_reset(ring);
        jpeg_v4_0_3_start_jrbc(ring);
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index e7f942dc714a7941c451c1683a93b3d13e733bdf..6f8a16da9d608abeaa7466aabb8c00a9315502d3 100644 (file)
@@ -843,12 +843,14 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring,
        if (amdgpu_sriov_vf(ring->adev))
                return -EOPNOTSUPP;
 
+       drm_sched_wqueue_stop(&ring->sched);
        jpeg_v5_0_1_core_stall_reset(ring);
        jpeg_v5_0_1_init_jrbc(ring);
        r = amdgpu_ring_test_helper(ring);
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index c6cb7ff15caab4b636d9f8b6e1e5be98f55f7a20..cac0882770fd5321dfbba07454a858ea4542da61 100644 (file)
@@ -1570,6 +1570,8 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
                return -EINVAL;
        }
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
        if (r)
                return r;
@@ -1578,6 +1580,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index b00c63812899db131e010d9b131b40024ed7be4b..99a080bad2a3d0e86fd06b9695d429d73b680d2c 100644 (file)
@@ -822,6 +822,8 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
                return -EINVAL;
        }
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
        if (r)
                return r;
@@ -830,6 +832,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 6c25e9fc4f0f90be1ab375511f2f0e52bcb3c515..eec9133e1b2c4fe20ef8e6f9679002dbdc2f46a1 100644 (file)
@@ -1978,6 +1978,7 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring,
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
 
+       drm_sched_wqueue_stop(&ring->sched);
        vcn_v4_0_stop(vinst);
        vcn_v4_0_start(vinst);
 
@@ -1985,6 +1986,7 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 1e1dd61b774ec0ebe584d819865306c29d5b6cf9..d8fd32c1e38ec27cf4249c4e59cb27c3512d1ca8 100644 (file)
@@ -1609,6 +1609,8 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
 
+       drm_sched_wqueue_stop(&ring->sched);
+
        vcn_inst = GET_INST(VCN, ring->me);
        r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
 
@@ -1626,6 +1628,7 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index 9c02446bb1a5457b15bc0910e3f809a446c92407..7e37ddea63550d89b0e0a7933735cd26feec989c 100644 (file)
@@ -1476,6 +1476,7 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring,
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
 
+       drm_sched_wqueue_stop(&ring->sched);
        vcn_v4_0_5_stop(vinst);
        vcn_v4_0_5_start(vinst);
 
@@ -1483,6 +1484,7 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
index c8924f97cf58a9d1065aa59d410e1ea5977d5abf..47c0bcc9e7d801e4ca9ff9f64cb9004ea9e901b5 100644 (file)
@@ -1203,6 +1203,7 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring,
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
 
+       drm_sched_wqueue_stop(&ring->sched);
        vcn_v5_0_0_stop(vinst);
        vcn_v5_0_0_start(vinst);
 
@@ -1210,6 +1211,7 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring,
        if (r)
                return r;
        amdgpu_fence_driver_force_completion(ring);
+       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }