]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu/vcn4.0.3: rework reset handling
authorJesse.Zhang <Jesse.Zhang@amd.com>
Tue, 20 Jan 2026 02:23:35 +0000 (10:23 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 21 Jan 2026 19:17:48 +0000 (14:17 -0500)
Resetting VCN resets the entire tile, including jpeg.
When resetting the VCN, we need to ensure that JPEG data blocks are accessible and we also need to handle the JPEG queue.
Add a helper function to restore the JPEG queue during the VCN reset.

v2: split the jpeg helper in two, in the top helper we can stop the sched workqueues and attempt to wait for any outstanding fences.
    Then in the bottom helper, we can force completion, re-init the rings, and restart the sched workqueues (Alex)

v3: merge patches 1 and 2 into one patch (Alex)

Signed-off-by: Jesse Zhang <jesse.zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c

index 50ed7fb0e941c6f5dd225907bc1204bba3b66e10..4b4aa95536242306f5def65a89626ee025c22aa8 100644 (file)
@@ -1145,10 +1145,18 @@ static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring,
                                  unsigned int vmid,
                                  struct amdgpu_fence *timedout_fence)
 {
+       struct amdgpu_device *adev = ring->adev;
+       struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+       int r;
+
+       /* take the vcn reset mutex here because resetting VCN will reset jpeg as well */
+       mutex_lock(&vinst->engine_reset_mutex);
        amdgpu_ring_reset_helper_begin(ring, timedout_fence);
        jpeg_v4_0_3_core_stall_reset(ring);
        jpeg_v4_0_3_start_jrbc(ring);
-       return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+       r = amdgpu_ring_reset_helper_end(ring, timedout_fence);
+       mutex_unlock(&vinst->engine_reset_mutex);
+       return r;
 }
 
 static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
index 0ce85dbd7abb17acb1958869b3b10db7d7717be7..dd247abce1ab0d2b1f257ff0cc55db4c3f6c48b2 100644 (file)
@@ -1637,6 +1637,60 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
        }
 }
 
+static int vcn_v4_0_3_reset_jpeg_pre_helper(struct amdgpu_device *adev, int inst)
+{
+       struct amdgpu_ring *ring;
+       uint32_t wait_seq = 0;
+       int i;
+
+       for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) {
+               ring = &adev->jpeg.inst[inst].ring_dec[i];
+
+               drm_sched_wqueue_stop(&ring->sched);
+               /* Get the last emitted fence sequence */
+               wait_seq = atomic_read(&ring->fence_drv.last_seq);
+               if (wait_seq)
+                       continue;
+
+               /* if Jobs are still pending after timeout,
+                * We'll handle them in the bottom helper
+                */
+               amdgpu_fence_wait_polling(ring, wait_seq, adev->video_timeout);
+       }
+
+       return 0;
+}
+
+static int vcn_v4_0_3_reset_jpeg_post_helper(struct amdgpu_device *adev, int inst)
+{
+       struct amdgpu_ring *ring;
+       int i, r = 0;
+
+       for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) {
+               ring = &adev->jpeg.inst[inst].ring_dec[i];
+               /* Force completion of any remaining jobs */
+               amdgpu_fence_driver_force_completion(ring);
+
+               if (ring->use_doorbell)
+                       WREG32_SOC15_OFFSET(
+                               VCN, GET_INST(VCN, inst),
+                               regVCN_JPEG_DB_CTRL,
+                               (ring->pipe ? (ring->pipe - 0x15) : 0),
+                               ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+                               VCN_JPEG_DB_CTRL__EN_MASK);
+
+               r = amdgpu_ring_test_helper(ring);
+               if (r)
+                       return r;
+
+               drm_sched_wqueue_start(&ring->sched);
+
+               DRM_DEV_DEBUG(adev->dev, "JPEG ring %d (inst %d) restored and sched restarted\n",
+                     i, inst);
+       }
+       return 0;
+}
+
 static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
                                 unsigned int vmid,
                                 struct amdgpu_fence *timedout_fence)
@@ -1645,7 +1699,19 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
        int vcn_inst;
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+       bool pg_state = false;
+
+       /* take the vcn reset mutex here because resetting VCN will reset jpeg as well */
+       mutex_lock(&vinst->engine_reset_mutex);
+       mutex_lock(&adev->jpeg.jpeg_pg_lock);
+       /* Ensure JPEG is powered on during reset if currently gated */
+       if (adev->jpeg.cur_state == AMD_PG_STATE_GATE) {
+               amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+                                                      AMD_PG_STATE_UNGATE);
+               pg_state = true;
+       }
 
+       vcn_v4_0_3_reset_jpeg_pre_helper(adev, ring->me);
        amdgpu_ring_reset_helper_begin(ring, timedout_fence);
 
        vcn_inst = GET_INST(VCN, ring->me);
@@ -1653,7 +1719,12 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
 
        if (r) {
                DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
-               return r;
+               /* Restore JPEG power gating state if it was originally gated */
+               if (pg_state)
+                       amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+                                                              AMD_PG_STATE_GATE);
+               mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+               goto unlock;
        }
 
        /* This flag is not set for VF, assumed to be disabled always */
@@ -1662,7 +1733,25 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring,
        vcn_v4_0_3_hw_init_inst(vinst);
        vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);
 
-       return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+       r = amdgpu_ring_reset_helper_end(ring, timedout_fence);
+       if (r) {
+               if (pg_state)
+                       amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+                                                              AMD_PG_STATE_GATE);
+               mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+               goto unlock;
+       }
+
+       r = vcn_v4_0_3_reset_jpeg_post_helper(adev, ring->me);
+       if (pg_state)
+               amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+                                                      AMD_PG_STATE_GATE);
+       mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+
+unlock:
+       mutex_unlock(&vinst->engine_reset_mutex);
+
+       return r;
 }
 
 static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {