]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: move reset debug disable handling
authorAlex Deucher <alexander.deucher@amd.com>
Tue, 14 Oct 2025 21:01:05 +0000 (17:01 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 4 Nov 2025 16:33:54 +0000 (11:33 -0500)
Move everything to the supported resets masks rather than
having an explicit misc checks for this.

Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
12 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c

index e08d837668f134e2fb9b9622089a6f1e9acaaed4..9e0cd1e0afc373eafcb08b9f865e3b53e5b3ec1c 100644 (file)
@@ -130,11 +130,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
        }
 
        /* attempt a per ring reset */
-       if (unlikely(adev->debug_disable_gpu_ring_reset)) {
-               dev_err(adev->dev, "Ring reset disabled by debug mask\n");
-       } else if (amdgpu_gpu_recovery &&
-                  amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
-                  ring->funcs->reset) {
+       if (amdgpu_gpu_recovery &&
+           amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+           ring->funcs->reset) {
                dev_err(adev->dev, "Starting %s ring reset\n",
                        s_job->sched->name);
                r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
index 43f769fed810ebce56994c094920d049c416b37a..bf1b90a341d8d7a826ab08dbe7e54011a9a3f740 100644 (file)
@@ -468,9 +468,6 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
        ktime_t deadline;
        bool ret;
 
-       if (unlikely(ring->adev->debug_disable_soft_recovery))
-               return false;
-
        deadline = ktime_add_us(ktime_get(), 10000);
 
        if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
index 751732f3e8839ee3622d597b46f35745485e682e..d75b9940f24874aaee983a37c1186e998f2e3b10 100644 (file)
@@ -4956,7 +4956,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
                amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
        adev->gfx.compute_supported_reset =
                amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
-       if (!amdgpu_sriov_vf(adev)) {
+       if (!amdgpu_sriov_vf(adev) &&
+           !adev->debug_disable_gpu_ring_reset) {
                adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
        }
index 252517ce5d5af41b645232f047eaef086d43aa87..02d7cfae22bde9a9f1a6e813151b80da33388811 100644 (file)
@@ -1821,13 +1821,15 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
        case IP_VERSION(11, 0, 3):
                if ((adev->gfx.me_fw_version >= 2280) &&
                    (adev->gfx.mec_fw_version >= 2410) &&
-                   !amdgpu_sriov_vf(adev)) {
+                   !amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset) {
                        adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                        adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                }
                break;
        default:
-               if (!amdgpu_sriov_vf(adev)) {
+               if (!amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset) {
                        adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                        adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                }
index 35d5a7e99a7c67b032a18db59c741180aab53713..d01d2712cf5703767a2bd5443583a4ab81c81f6f 100644 (file)
@@ -1548,7 +1548,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
        case IP_VERSION(12, 0, 1):
                if ((adev->gfx.me_fw_version >= 2660) &&
                    (adev->gfx.mec_fw_version >= 2920) &&
-                   !amdgpu_sriov_vf(adev)) {
+                   !amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset) {
                        adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                        adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                }
index f1a2efc2a8d0a4e2c9c969a5cc58ea41f5fe18cc..0148d7ff34d99a9c8f8d507a19fdb1e4a9f139c3 100644 (file)
@@ -2409,7 +2409,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
                amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
        adev->gfx.compute_supported_reset =
                amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
-       if (!amdgpu_sriov_vf(adev))
+       if (!amdgpu_sriov_vf(adev) && !adev->debug_disable_gpu_ring_reset)
                adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
 
        r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
index e0b50c690f8cb9605f70b4b0361b9cd392316ab6..c4c551ef6b874da8baa0495d0d66936d45778aba 100644 (file)
@@ -1149,14 +1149,16 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
        case IP_VERSION(9, 4, 3):
        case IP_VERSION(9, 4, 4):
                if ((adev->gfx.mec_fw_version >= 155) &&
-                   !amdgpu_sriov_vf(adev)) {
+                   !amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset) {
                        adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                        adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
                }
                break;
        case IP_VERSION(9, 5, 0):
                if ((adev->gfx.mec_fw_version >= 21) &&
-                   !amdgpu_sriov_vf(adev)) {
+                   !amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset) {
                        adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                        adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
                }
index 36b1ca73c2ed323a85b8bdbf3adb54bd0fecbcce..a1443990d5c60d499dde5f1427e5a7fa09d3d70d 100644 (file)
@@ -2361,11 +2361,15 @@ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
        switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
        case IP_VERSION(9, 4, 3):
        case IP_VERSION(9, 4, 4):
-               if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
+               if ((adev->gfx.mec_fw_version >= 0xb0) &&
+                   amdgpu_dpm_reset_sdma_is_supported(adev) &&
+                   !adev->debug_disable_gpu_ring_reset)
                        adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                break;
        case IP_VERSION(9, 5, 0):
-               if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
+               if ((adev->gfx.mec_fw_version >= 0xf) &&
+                   amdgpu_dpm_reset_sdma_is_supported(adev) &&
+                   !adev->debug_disable_gpu_ring_reset)
                        adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                break;
        default:
index 7dc67a22a7a01d9df65cb93f91acdcbd7c1f85fa..8ddc4df06a1fdecf7b8ce349ce10ba87cca63d83 100644 (file)
@@ -1429,7 +1429,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
        case IP_VERSION(5, 0, 2):
        case IP_VERSION(5, 0, 5):
                if ((adev->sdma.instance[0].fw_version >= 35) &&
-                   !amdgpu_sriov_vf(adev))
+                   !amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset)
                        adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                break;
        default:
index 3bd44c24f692dbe155353afc307cd0b14d780837..c6a619514a8ad955105f8de1033de7ec9f0e9abc 100644 (file)
@@ -1348,12 +1348,14 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
        case IP_VERSION(5, 2, 3):
        case IP_VERSION(5, 2, 4):
                if ((adev->sdma.instance[0].fw_version >= 76) &&
-                   !amdgpu_sriov_vf(adev))
+                   !amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset)
                        adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                break;
        case IP_VERSION(5, 2, 5):
                if ((adev->sdma.instance[0].fw_version >= 34) &&
-                   !amdgpu_sriov_vf(adev))
+                   !amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset)
                        adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                break;
        default:
index db6e41967f126bc9019546179111f9c340c6f016..0ceeb19df2e5d7ad3f955bd3835e5997bbd6eac6 100644 (file)
@@ -1356,7 +1356,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
        case IP_VERSION(6, 0, 2):
        case IP_VERSION(6, 0, 3):
                if ((adev->sdma.instance[0].fw_version >= 21) &&
-                   !amdgpu_sriov_vf(adev))
+                   !amdgpu_sriov_vf(adev) &&
+                   !adev->debug_disable_gpu_ring_reset)
                        adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
                break;
        default:
index 326ecc8d37d21d0c634837758822c32b1043392b..2b81344dcd668a7b386c8ae96df90a7499d24184 100644 (file)
@@ -1337,7 +1337,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
 
        adev->sdma.supported_reset =
                amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
-       if (!amdgpu_sriov_vf(adev))
+       if (!amdgpu_sriov_vf(adev) &&
+           !adev->debug_disable_gpu_ring_reset)
                adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
 
        r = amdgpu_sdma_sysfs_reset_mask_init(adev);