]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
drm/amdgpu/gfx9: manually control gfxoff for CS on RV
authorAlex Deucher <alexander.deucher@amd.com>
Tue, 28 Jan 2025 16:55:22 +0000 (11:55 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Feb 2025 12:34:22 +0000 (04:34 -0800)
commit b35eb9128ebeec534eed1cefd6b9b1b7282cf5ba upstream.

When mesa started using compute queues more often
we started seeing additional hangs with compute queues.
Disabling gfxoff seems to mitigate that.  Manually
control gfxoff and gfx pg with command submissions to avoid
any issues related to gfxoff.  KFD already does the same
thing for these chips.

v2: limit to compute
v3: limit to APUs
v4: limit to Raven/PCO
v5: only update the compute ring_funcs
v6: Disable GFX PG
v7: adjust order

Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Suggested-by: Błażej Szczygieł <mumei6102@gmail.com>
Suggested-by: Sergey Kovalenko <seryoga.engineering@gmail.com>
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3861
Link: https://lists.freedesktop.org/archives/amd-gfx/2025-January/119116.html
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 6.12.x
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index 0b6f09f2cc9bd01acf69ffc7dbd8878a4edfd8fd..d28258bb6d298548b09c1f9980ccd2a428a4a1ba 100644 (file)
@@ -7439,6 +7439,34 @@ static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
        amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
 }
 
+static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
+{
+       struct amdgpu_device *adev = ring->adev;
+
+       amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+
+       /* Raven and PCO APUs seem to have stability issues
+        * with compute and gfxoff and gfx pg.  Disable gfx pg during
+        * submission and allow again afterwards.
+        */
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+               gfx_v9_0_set_powergating_state(adev, AMD_PG_STATE_UNGATE);
+}
+
+static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
+{
+       struct amdgpu_device *adev = ring->adev;
+
+       /* Raven and PCO APUs seem to have stability issues
+        * with compute and gfxoff and gfx pg.  Disable gfx pg during
+        * submission and allow again afterwards.
+        */
+       if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+               gfx_v9_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+       amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
        .name = "gfx_v9_0",
        .early_init = gfx_v9_0_early_init,
@@ -7615,8 +7643,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
        .emit_wave_limit = gfx_v9_0_emit_wave_limit,
        .reset = gfx_v9_0_reset_kcq,
        .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
-       .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
-       .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
+       .begin_use = gfx_v9_0_ring_begin_use_compute,
+       .end_use = gfx_v9_0_ring_end_use_compute,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {