From fab47d2db5ca10c726459753224dd296506e0f97 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Wed, 14 Jan 2026 10:51:52 +0800 Subject: [PATCH] drm/amdgpu/vcn5.0.1: rework reset handling Resetting VCN resets the entire tile, including jpeg. When resetting the VCN, we need to ensure that JPEG data blocks are accessible and we also need to handle the JPEG queue. Add a helper function to restore the JPEG queue during the VCN reset. v2: split the jpeg helper in two, in the top helper we can stop the sched workqueues and attempt to wait for any outstanding fences. Then in the bottom helper, we can force completion, re-init the rings, and restart the sched workqueues (Alex) v3: merge patches 4 and 5 into one patch (Alex) Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 11 ++- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 93 +++++++++++++++++++++++- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index ab0bf880d3d8a..edecbfe66c79a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -844,10 +844,19 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; + + /* take the vcn reset mutex here because resetting VCN will reset jpeg as well */ + mutex_lock(&vinst->engine_reset_mutex); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v5_0_1_core_stall_reset(ring); jpeg_v5_0_1_init_jrbc(ring); - return amdgpu_ring_reset_helper_end(ring, timedout_fence); + r = amdgpu_ring_reset_helper_end(ring, timedout_fence); + mutex_unlock(&vinst->engine_reset_mutex); + return r; } static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 8bd457dea4cff..c28c6aff17aaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -1301,6 +1301,59 @@ static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v5_0_1_reset_jpeg_pre_helper(struct amdgpu_device *adev, int inst) +{ + struct amdgpu_ring *ring; + uint32_t wait_seq = 0; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + ring = &adev->jpeg.inst[inst].ring_dec[i]; + + drm_sched_wqueue_stop(&ring->sched); + wait_seq = atomic_read(&ring->fence_drv.last_seq); + if (wait_seq) + continue; + + /* if Jobs are still pending after timeout, + * We'll handle them in the bottom helper + */ + amdgpu_fence_wait_polling(ring, wait_seq, adev->video_timeout); + } + + return 0; +} + +static int vcn_v5_0_1_reset_jpeg_post_helper(struct amdgpu_device *adev, int inst) +{ + struct amdgpu_ring *ring; + int i, r = 0; + + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + ring = &adev->jpeg.inst[inst].ring_dec[i]; + /* Force completion of any remaining jobs */ + amdgpu_fence_driver_force_completion(ring); + + if (ring->use_doorbell) + WREG32_SOC15_OFFSET( + VCN, GET_INST(VCN, inst), + regVCN_JPEG_DB_CTRL, + (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + drm_sched_wqueue_start(&ring->sched); + + DRM_DEV_DEBUG(adev->dev, "JPEG ring %d (inst %d) restored and sched restarted\n", + i, inst); + } + return 0; +} + static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) @@ -1309,6 +1362,18 @@ static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, int vcn_inst; struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + bool pg_state = false; + + /* take the vcn reset mutex here because resetting VCN will reset jpeg as well */ + mutex_lock(&vinst->engine_reset_mutex); + vcn_v5_0_1_reset_jpeg_pre_helper(adev, ring->me); + mutex_lock(&adev->jpeg.jpeg_pg_lock); + /* Ensure JPEG is powered on during reset if currently gated */ + if (adev->jpeg.cur_state == AMD_PG_STATE_GATE) { + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_UNGATE); + pg_state = true; + } amdgpu_ring_reset_helper_begin(ring, timedout_fence); @@ -1317,13 +1382,37 @@ static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, if (r) { DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r); - return r; + /* Restore JPEG power gating state if it was originally gated */ + if (pg_state) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_GATE); + mutex_unlock(&adev->jpeg.jpeg_pg_lock); + goto unlock; } vcn_v5_0_1_hw_init_inst(adev, ring->me); vcn_v5_0_1_start_dpg_mode(vinst, vinst->indirect_sram); - return amdgpu_ring_reset_helper_end(ring, timedout_fence); + r = amdgpu_ring_reset_helper_end(ring, timedout_fence); + if (r) { + if (pg_state) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_GATE); + mutex_unlock(&adev->jpeg.jpeg_pg_lock); + goto unlock; + } + + if (pg_state) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_GATE); + mutex_unlock(&adev->jpeg.jpeg_pg_lock); + + r = vcn_v5_0_1_reset_jpeg_post_helper(adev, ring->me); + +unlock: + mutex_unlock(&vinst->engine_reset_mutex); + + return r; } static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { -- 2.47.3