]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/amdgpu: move force completion into ring resets
authorAlex Deucher <alexander.deucher@amd.com>
Thu, 29 May 2025 16:58:53 +0000 (12:58 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 15 Aug 2025 14:38:43 +0000 (16:38 +0200)
[ Upstream commit 2dee58ca471dae05c473270d0fb74efe01a78ccb ]

Move the force completion handling into each ring
reset function so that each engine can determine
whether or not it needs to force completion on the
jobs in the ring.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Stable-dep-of: 14b2d71a9a24 ("drm/amdgpu/gfx10: fix KGQ reset sequence")
Signed-off-by: Sasha Levin <sashal@kernel.org>
21 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c

index 9ea3bce01faf08eb8dadabca3ae8e7d64df6f72c..3528a27c7c1ddd924ddf3c87d53ee6a92850905f 100644 (file)
@@ -161,10 +161,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
 
                r = amdgpu_ring_reset(ring, job->vmid);
                if (!r) {
-                       if (is_guilty) {
+                       if (is_guilty)
                                atomic_inc(&ring->adev->gpu_reset_counter);
-                               amdgpu_fence_driver_force_completion(ring);
-                       }
                        drm_sched_wqueue_start(&ring->sched);
                        dev_err(adev->dev, "Ring %s reset succeeded\n",
                                ring->sched.name);
index 75ea071744eb5e23606fd648e60b6bb14412ece1..777e383d75e2578b9ef5c2b74751018d25a3c93c 100644 (file)
@@ -9575,7 +9575,11 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
@@ -9647,7 +9651,11 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
        if (r)
                return r;
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
index ec9b84f92d4670b6ac01c37fe1b4e44b7990a700..e632e97d63be0277238132aed60df48c4176e242 100644 (file)
@@ -6840,7 +6840,11 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
@@ -7000,7 +7004,11 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
index 1234c8d64e20d9e8d929bab7440ab9861acc44a4..50f04c2c0b8c0c4eb3bbe276629ac8e53c399643 100644 (file)
@@ -5335,7 +5335,11 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
@@ -5448,7 +5452,11 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring)
index ad9be3656653bbb195358c2ddc5fcf6bc20d54b8..23f9981815612486e5b2daf059887959e040939f 100644 (file)
@@ -7286,7 +7286,12 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
                DRM_ERROR("fail to remap queue\n");
                return r;
        }
-       return amdgpu_ring_test_ring(ring);
+
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
index c233edf605694c8661b9270a71c5ba437a2f0f2d..264b37e856965537802f051c880e8158b7195542 100644 (file)
@@ -3619,7 +3619,12 @@ pipe_reset:
                dev_err(adev->dev, "fail to remap queue\n");
                return r;
        }
-       return amdgpu_ring_test_ring(ring);
+
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 enum amdgpu_gfx_cp_ras_mem_id {
index 4cde8a8bcc837aae0ea293b2f585f8b84529c29f..49620fbf6c7a25a9908309e633773bbfe049b80a 100644 (file)
@@ -766,9 +766,15 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
 
 static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
+       int r;
+
        jpeg_v2_0_stop(ring->adev);
        jpeg_v2_0_start(ring->adev);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
index 8b39e114f3be144e83d70f2fc2a5c511246c9799..98ae9c0e83f7ba1ce3243b9379bc567aa945670e 100644 (file)
@@ -645,9 +645,15 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
 
 static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
+       int r;
+
        jpeg_v2_5_stop_inst(ring->adev, ring->me);
        jpeg_v2_5_start_inst(ring->adev, ring->me);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
index 2f8510c2986b9ac73cb294d89e379a4b2f6a350f..7fb5994303652159dc5d45cc45cc5847843edb04 100644 (file)
@@ -557,9 +557,15 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
 
 static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
+       int r;
+
        jpeg_v3_0_stop(ring->adev);
        jpeg_v3_0_start(ring->adev);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
index f17ec5414fd69d726b489eeb5d57a6b8ff6d3589..a6612c942b939b8d15de71d93dc3d8cb262b7fb5 100644 (file)
@@ -722,12 +722,18 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
 
 static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
+       int r;
+
        if (amdgpu_sriov_vf(ring->adev))
                return -EINVAL;
 
        jpeg_v4_0_stop(ring->adev);
        jpeg_v4_0_start(ring->adev);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
index 79e342d5ab28d87f39e43afdc9dd0c0e97bf937d..90d773dbe337cded81fb99ff510fb82eac3b553b 100644 (file)
@@ -1145,12 +1145,18 @@ static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring)
 
 static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
+       int r;
+
        if (amdgpu_sriov_vf(ring->adev))
                return -EOPNOTSUPP;
 
        jpeg_v4_0_3_core_stall_reset(ring);
        jpeg_v4_0_3_start_jrbc(ring);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
index 3b6f65a256464a949788ebaae7b1dda727de1c97..7cad77a968f160283efbfe644720afc8af521c81 100644 (file)
@@ -836,12 +836,18 @@ static void jpeg_v5_0_1_core_stall_reset(struct amdgpu_ring *ring)
 
 static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
+       int r;
+
        if (amdgpu_sriov_vf(ring->adev))
                return -EOPNOTSUPP;
 
        jpeg_v5_0_1_core_stall_reset(ring);
        jpeg_v5_0_1_init_jrbc(ring);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = {
index 5de2f047c534a387e89a84fedc47b3f4aaf565c1..9f0ad11994317768b63f27a4217858bb7f4d66c0 100644 (file)
@@ -1674,6 +1674,7 @@ static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring)
 
 static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
 {
+       bool is_guilty = ring->funcs->is_guilty(ring);
        struct amdgpu_device *adev = ring->adev;
        u32 id = ring->me;
        int r;
@@ -1684,8 +1685,13 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
        amdgpu_amdkfd_suspend(adev, true);
        r = amdgpu_sdma_reset_engine(adev, id);
        amdgpu_amdkfd_resume(adev, true);
+       if (r)
+               return r;
 
-       return r;
+       if (is_guilty)
+               amdgpu_fence_driver_force_completion(ring);
+
+       return 0;
 }
 
 static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
@@ -1729,8 +1735,8 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
 static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
-       u32 inst_mask;
-       int i;
+       u32 inst_mask, tmp_mask;
+       int i, r;
 
        inst_mask = 1 << ring->me;
        udelay(50);
@@ -1747,7 +1753,24 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
                return -ETIMEDOUT;
        }
 
-       return sdma_v4_4_2_inst_start(adev, inst_mask, true);
+       r = sdma_v4_4_2_inst_start(adev, inst_mask, true);
+       if (r)
+               return r;
+
+       tmp_mask = inst_mask;
+       for_each_inst(i, tmp_mask) {
+               ring = &adev->sdma.instance[i].ring;
+
+               amdgpu_fence_driver_force_completion(ring);
+
+               if (adev->sdma.has_page_queue) {
+                       struct amdgpu_ring *page = &adev->sdma.instance[i].page;
+
+                       amdgpu_fence_driver_force_completion(page);
+               }
+       }
+
+       return r;
 }
 
 static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
index 37f4b5b4a098ff4311845d5e8548012de8882c69..b43d6cb8a0d4ecc3e56850f42f490558d64f684c 100644 (file)
@@ -1616,7 +1616,10 @@ static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring)
 
        r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
        amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-       return r;
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
index 0b40411b92a0b8a031a7926a6ebc7da412be4802..a88aa53e887c2adcb62bc5e224204bc69cb16e57 100644 (file)
@@ -1532,7 +1532,10 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring)
        r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
 
        amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-       return r;
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
index a9bdf8d61d6ce75794b24b2cd25e0703e94cb2f5..041bca58add556d0599c890a7945419c35df4fec 100644 (file)
@@ -1572,7 +1572,11 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
        if (r)
                return r;
 
-       return sdma_v6_0_gfx_resume_instance(adev, i, true);
+       r = sdma_v6_0_gfx_resume_instance(adev, i, true);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
index 86903eccbd4e5764331d1ce7d413f86560435340..b4167f23c02dd1dd749cbbe3ff0b33b0435c4a2b 100644 (file)
@@ -824,7 +824,11 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
        if (r)
                return r;
 
-       return sdma_v7_0_gfx_resume_instance(adev, i, true);
+       r = sdma_v7_0_gfx_resume_instance(adev, i, true);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 /**
index b5071f77f78d233510587f8e9013d7e194b67cd0..46c329a1b2f5f070dcb34a57726b88596ed49277 100644 (file)
@@ -1971,6 +1971,7 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+       int r;
 
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
@@ -1978,7 +1979,11 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
        vcn_v4_0_stop(vinst);
        vcn_v4_0_start(vinst);
 
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
index 5a33140f5723519ac9d2b3b6726a92aee3b57d7c..faba11166efb6b297b806dd984384987d098c5ea 100644 (file)
@@ -1621,8 +1621,10 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
        vcn_v4_0_3_hw_init_inst(vinst);
        vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);
        r = amdgpu_ring_test_helper(ring);
-
-       return r;
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
index 16ade84facc789e1311c8bf08e54b0a3b046f121..af29a8e141a4f4cbb8211d6cee477649cbadf704 100644 (file)
@@ -1469,6 +1469,7 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+       int r;
 
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
@@ -1476,7 +1477,11 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
        vcn_v4_0_5_stop(vinst);
        vcn_v4_0_5_start(vinst);
 
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
index f8e3f0b882da563f614c60f9df2a04e9b512791c..216324f6da85f9f2720c556c661437442b7cd08b 100644 (file)
@@ -1196,6 +1196,7 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+       int r;
 
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
@@ -1203,7 +1204,11 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
        vcn_v5_0_0_stop(vinst);
        vcn_v5_0_0_start(vinst);
 
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {