]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/amdgpu: move force completion into ring resets
authorAlex Deucher <alexander.deucher@amd.com>
Thu, 29 May 2025 16:58:53 +0000 (12:58 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 30 Jun 2025 15:57:29 +0000 (11:57 -0400)
Move the force completion handling into each ring
reset function so that each engine can determine
whether or not it needs to force completion on the
jobs in the ring.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
21 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c

index 93413be59e08f8dfc808d461d488e47cbd2c7177..177f04491a11bdef6637059c6c8aa3768ba8cb36 100644 (file)
@@ -161,10 +161,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
 
                r = amdgpu_ring_reset(ring, job->vmid, NULL);
                if (!r) {
-                       if (is_guilty) {
+                       if (is_guilty)
                                atomic_inc(&ring->adev->gpu_reset_counter);
-                               amdgpu_fence_driver_force_completion(ring);
-                       }
                        drm_sched_wqueue_start(&ring->sched);
                        dev_err(adev->dev, "Ring %s reset succeeded\n",
                                ring->sched.name);
index 4b2af95203b26732db05f34e5b53c220dd9e592f..4d0ee3ffe98587380cd8542edfc96f0e7926324d 100644 (file)
@@ -9577,7 +9577,11 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
@@ -9650,7 +9654,11 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
        if (r)
                return r;
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
index 8d5c0ab016d2b03d94e5e21b54c47a8a346d18ea..39f4dd18c277bd0775602ae7aff11c6f8993ee07 100644 (file)
@@ -6842,7 +6842,11 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
@@ -7004,7 +7008,11 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
index 795bd353a9ceaa222dc89144b5d490c40a2b1d93..964fa3f2e2719911ceadf8fbfa850f1fe8f297ac 100644 (file)
@@ -5337,7 +5337,11 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
@@ -5452,7 +5456,11 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
                return r;
        }
 
-       return amdgpu_ring_test_ring(ring);
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring)
index d91e0423c4820631ffc9a7bcc30446e20096600a..95e319974f221efd20448b85a67f791d2a448e26 100644 (file)
@@ -7242,7 +7242,12 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
                DRM_ERROR("fail to remap queue\n");
                return r;
        }
-       return amdgpu_ring_test_ring(ring);
+
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
index 272f0f8e41d453de39e69e9be55e1b876bcfcc93..8bfee17a826e266930c12d4543b78946e8d088cf 100644 (file)
@@ -3620,7 +3620,12 @@ pipe_reset:
                dev_err(adev->dev, "fail to remap queue\n");
                return r;
        }
-       return amdgpu_ring_test_ring(ring);
+
+       r = amdgpu_ring_test_ring(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 enum amdgpu_gfx_cp_ras_mem_id {
index 93eb71d2ce304a2b20b0a7f280123c35842f849f..6621a7b1f29fccd14745b45e7e5e23d3b109f513 100644 (file)
@@ -768,9 +768,15 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring,
                                unsigned int vmid,
                                struct amdgpu_fence *timedout_fence)
 {
+       int r;
+
        jpeg_v2_0_stop(ring->adev);
        jpeg_v2_0_start(ring->adev);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
index 45b8702d20ad943ced5b987ad6acaa860013d87e..44a5c0e82ca432a0459ea04a9c2b4311d3cab665 100644 (file)
@@ -647,9 +647,15 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring,
                                unsigned int vmid,
                                struct amdgpu_fence *timedout_fence)
 {
+       int r;
+
        jpeg_v2_5_stop_inst(ring->adev, ring->me);
        jpeg_v2_5_start_inst(ring->adev, ring->me);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
index 9bd0ae4a24a1ae8bf614601c31e9f752630b0952..e813af4eedd210634ec7177c7df456ae1296b09d 100644 (file)
@@ -559,9 +559,15 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring,
                                unsigned int vmid,
                                struct amdgpu_fence *timedout_fence)
 {
+       int r;
+
        jpeg_v3_0_stop(ring->adev);
        jpeg_v3_0_start(ring->adev);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
index 4fc1ef85b2945bdb0888bb94ea0b76d3230fb7f7..190f0742d70161be6f715426252b039341ee0f1c 100644 (file)
@@ -724,12 +724,18 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring,
                                unsigned int vmid,
                                struct amdgpu_fence *timedout_fence)
 {
+       int r;
+
        if (amdgpu_sriov_vf(ring->adev))
                return -EINVAL;
 
        jpeg_v4_0_stop(ring->adev);
        jpeg_v4_0_start(ring->adev);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
index bdc7612ba56a515c7a9822ef304e50362424c6e0..04755b7a62d9bb9179fb9c55f32ad6752a68d2ef 100644 (file)
@@ -1147,12 +1147,18 @@ static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring,
                                  unsigned int vmid,
                                  struct amdgpu_fence *timedout_fence)
 {
+       int r;
+
        if (amdgpu_sriov_vf(ring->adev))
                return -EOPNOTSUPP;
 
        jpeg_v4_0_3_core_stall_reset(ring);
        jpeg_v4_0_3_start_jrbc(ring);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
index aee4f50a0f52e71e6b0b11bc91c194cab127ba83..e7f942dc714a7941c451c1683a93b3d13e733bdf 100644 (file)
@@ -838,12 +838,18 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring,
                                  unsigned int vmid,
                                  struct amdgpu_fence *timedout_fence)
 {
+       int r;
+
        if (amdgpu_sriov_vf(ring->adev))
                return -EOPNOTSUPP;
 
        jpeg_v5_0_1_core_stall_reset(ring);
        jpeg_v5_0_1_init_jrbc(ring);
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = {
index b8828432c98a3a8315c8275844524123a86aac09..c13ae87d4e6481a96add2012a811c1af7c3a3f9e 100644 (file)
@@ -1679,6 +1679,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
                                   unsigned int vmid,
                                   struct amdgpu_fence *timedout_fence)
 {
+       bool is_guilty = ring->funcs->is_guilty(ring);
        struct amdgpu_device *adev = ring->adev;
        u32 id = ring->me;
        int r;
@@ -1689,8 +1690,13 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring,
        amdgpu_amdkfd_suspend(adev, true);
        r = amdgpu_sdma_reset_engine(adev, id);
        amdgpu_amdkfd_resume(adev, true);
+       if (r)
+               return r;
 
-       return r;
+       if (is_guilty)
+               amdgpu_fence_driver_force_completion(ring);
+
+       return 0;
 }
 
 static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
@@ -1734,8 +1740,8 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
 static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
-       u32 inst_mask;
-       int i;
+       u32 inst_mask, tmp_mask;
+       int i, r;
 
        inst_mask = 1 << ring->me;
        udelay(50);
@@ -1752,7 +1758,24 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
                return -ETIMEDOUT;
        }
 
-       return sdma_v4_4_2_inst_start(adev, inst_mask, true);
+       r = sdma_v4_4_2_inst_start(adev, inst_mask, true);
+       if (r)
+               return r;
+
+       tmp_mask = inst_mask;
+       for_each_inst(i, tmp_mask) {
+               ring = &adev->sdma.instance[i].ring;
+
+               amdgpu_fence_driver_force_completion(ring);
+
+               if (adev->sdma.has_page_queue) {
+                       struct amdgpu_ring *page = &adev->sdma.instance[i].page;
+
+                       amdgpu_fence_driver_force_completion(page);
+               }
+       }
+
+       return r;
 }
 
 static int sdma_v4_4_2_soft_reset_engine(struct amdgpu_device *adev,
index 2fd72c85cf0148d3d06865c2476da3b5e63ddf25..4d72b085b3dd77c185545ff0aea20ff1f134a8a7 100644 (file)
@@ -1618,7 +1618,10 @@ static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring)
 
        r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
        amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-       return r;
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
index 21421f1bd209584070d0c3b7c0cdf4eda4ffab46..42a25150f83ac2a7a048515a5f374a1e147f2933 100644 (file)
@@ -1534,7 +1534,10 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring)
        r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
 
        amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-       return r;
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
index 2455965387e07e83d706ede7337a3d93de145f2b..c6cb7ff15caab4b636d9f8b6e1e5be98f55f7a20 100644 (file)
@@ -1574,7 +1574,11 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring,
        if (r)
                return r;
 
-       return sdma_v6_0_gfx_resume_instance(adev, i, true);
+       r = sdma_v6_0_gfx_resume_instance(adev, i, true);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
index 6210be7e4a52f6bae31451708b994bc6ba4a5c6e..b00c63812899db131e010d9b131b40024ed7be4b 100644 (file)
@@ -826,7 +826,11 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring,
        if (r)
                return r;
 
-       return sdma_v7_0_gfx_resume_instance(adev, i, true);
+       r = sdma_v7_0_gfx_resume_instance(adev, i, true);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 /**
index 244359fa4aacb1e2bb5053210b97552480073884..6c25e9fc4f0f90be1ab375511f2f0e52bcb3c515 100644 (file)
@@ -1973,6 +1973,7 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring,
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+       int r;
 
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
@@ -1980,7 +1981,11 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring,
        vcn_v4_0_stop(vinst);
        vcn_v4_0_start(vinst);
 
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
index 75c07ebf7fe4b6a14f8523ef9a630fcc85abc1ec..1e1dd61b774ec0ebe584d819865306c29d5b6cf9 100644 (file)
@@ -1623,8 +1623,10 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
        vcn_v4_0_3_hw_init_inst(vinst);
        vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);
        r = amdgpu_ring_test_helper(ring);
-
-       return r;
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
index 731f7762c3e0abb92f73794d0734f9d1f6110857..9c02446bb1a5457b15bc0910e3f809a446c92407 100644 (file)
@@ -1471,6 +1471,7 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring,
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+       int r;
 
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
@@ -1478,7 +1479,11 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring,
        vcn_v4_0_5_stop(vinst);
        vcn_v4_0_5_start(vinst);
 
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
index f975994b3ff44fc1fa3d318dc547be30f920b8dd..c8924f97cf58a9d1065aa59d410e1ea5977d5abf 100644 (file)
@@ -1198,6 +1198,7 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring,
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+       int r;
 
        if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
                return -EOPNOTSUPP;
@@ -1205,7 +1206,11 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring,
        vcn_v5_0_0_stop(vinst);
        vcn_v5_0_0_start(vinst);
 
-       return amdgpu_ring_test_helper(ring);
+       r = amdgpu_ring_test_helper(ring);
+       if (r)
+               return r;
+       amdgpu_fence_driver_force_completion(ring);
+       return 0;
 }
 
 static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {