From: Jesse.Zhang Date: Wed, 14 Jan 2026 02:51:52 +0000 (+0800) Subject: drm/amdgpu/vcn5.0.1: rework reset handling X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fab47d2db5ca10c726459753224dd296506e0f97;p=thirdparty%2Fkernel%2Flinux.git drm/amdgpu/vcn5.0.1: rework reset handling Resetting VCN resets the entire tile, including jpeg. When resetting the VCN, we need to ensure that JPEG data blocks are accessible and we also need to handle the JPEG queue. Add a helper function to restore the JPEG queue during the VCN reset. v2: split the jpeg helper in two, in the top helper we can stop the sched workqueues and attempt to wait for any outstanding fences. Then in the bottom helper, we can force completion, re-init the rings, and restart the sched workqueues (Alex) v3: merge patches 4 and 5 into one patch (Alex) Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index ab0bf880d3d8..edecbfe66c79 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -844,10 +844,19 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + int r; + + /* take the vcn reset mutex here because resetting VCN will reset jpeg as well */ + mutex_lock(&vinst->engine_reset_mutex); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v5_0_1_core_stall_reset(ring); jpeg_v5_0_1_init_jrbc(ring); - return amdgpu_ring_reset_helper_end(ring, timedout_fence); + r = amdgpu_ring_reset_helper_end(ring, timedout_fence); + mutex_unlock(&vinst->engine_reset_mutex); + return r; } static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 8bd457dea4cf..c28c6aff17aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -1301,6 +1301,59 @@ static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v5_0_1_reset_jpeg_pre_helper(struct amdgpu_device *adev, int inst) +{ + struct amdgpu_ring *ring; + uint32_t wait_seq = 0; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + ring = &adev->jpeg.inst[inst].ring_dec[i]; + + drm_sched_wqueue_stop(&ring->sched); + wait_seq = atomic_read(&ring->fence_drv.last_seq); + if (wait_seq) + continue; + + /* if Jobs are still pending after timeout, + * We'll handle them in the bottom helper + */ + amdgpu_fence_wait_polling(ring, wait_seq, adev->video_timeout); + } + + return 0; +} + +static int vcn_v5_0_1_reset_jpeg_post_helper(struct amdgpu_device *adev, int inst) +{ + struct amdgpu_ring *ring; + int i, r = 0; + + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + ring = &adev->jpeg.inst[inst].ring_dec[i]; + /* Force completion of any remaining jobs */ + amdgpu_fence_driver_force_completion(ring); + + if (ring->use_doorbell) + WREG32_SOC15_OFFSET( + VCN, GET_INST(VCN, inst), + regVCN_JPEG_DB_CTRL, + (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + drm_sched_wqueue_start(&ring->sched); + + DRM_DEV_DEBUG(adev->dev, "JPEG ring %d (inst %d) restored and sched restarted\n", + i, inst); + } + return 0; +} + static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) @@ -1309,6 +1362,18 @@ static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, int vcn_inst; struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; + bool pg_state = false; + + /* take the vcn reset mutex here because resetting VCN will reset jpeg as well */ + mutex_lock(&vinst->engine_reset_mutex); + vcn_v5_0_1_reset_jpeg_pre_helper(adev, ring->me); + mutex_lock(&adev->jpeg.jpeg_pg_lock); + /* Ensure JPEG is powered on during reset if currently gated */ + if (adev->jpeg.cur_state == AMD_PG_STATE_GATE) { + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_UNGATE); + pg_state = true; + } amdgpu_ring_reset_helper_begin(ring, timedout_fence); @@ -1317,13 +1382,37 @@ static int vcn_v5_0_1_ring_reset(struct amdgpu_ring *ring, if (r) { DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r); - return r; + /* Restore JPEG power gating state if it was originally gated */ + if (pg_state) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_GATE); + mutex_unlock(&adev->jpeg.jpeg_pg_lock); + goto unlock; } vcn_v5_0_1_hw_init_inst(adev, ring->me); vcn_v5_0_1_start_dpg_mode(vinst, vinst->indirect_sram); - return amdgpu_ring_reset_helper_end(ring, timedout_fence); + r = amdgpu_ring_reset_helper_end(ring, timedout_fence); + if (r) { + if (pg_state) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_GATE); + mutex_unlock(&adev->jpeg.jpeg_pg_lock); + goto unlock; + } + + if (pg_state) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG, + AMD_PG_STATE_GATE); + mutex_unlock(&adev->jpeg.jpeg_pg_lock); + + r = vcn_v5_0_1_reset_jpeg_post_helper(adev, ring->me); + +unlock: + mutex_unlock(&vinst->engine_reset_mutex); + + return r; } static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {