From: YiPeng Chai Date: Thu, 30 Oct 2025 08:49:14 +0000 (+0800) Subject: drm/amd/ras: Add sriov ras preprocessing before gpu reset X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=73c6c22694430089a6f39f1677fbf598785376d3;p=thirdparty%2Fkernel%2Flinux.git drm/amd/ras: Add sriov ras preprocessing before gpu reset Sriov host may clear all VF commands registered to auto update list during VF reset, set ecc.auto_uUpdate block to false before VF reset, and after VF reset is complete, RAS_CMD__GET_ALL_BLOCK_ECC_STATUS command will be re-registered to auto update list of sriov host. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index 4ce337b6e0e8..cb7fbc791c3c 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -642,6 +642,9 @@ int amdgpu_ras_mgr_handle_ras_cmd(struct amdgpu_device *adev, int amdgpu_ras_mgr_pre_reset(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return amdgpu_virt_ras_pre_reset(adev); + if (!amdgpu_ras_mgr_is_ready(adev)) { RAS_DEV_ERR(adev, "Invalid ras suspend!\n"); return -EPERM; @@ -653,6 +656,9 @@ int amdgpu_ras_mgr_pre_reset(struct amdgpu_device *adev) int amdgpu_ras_mgr_post_reset(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return amdgpu_virt_ras_post_reset(adev); + if (!amdgpu_ras_mgr_is_ready(adev)) { RAS_DEV_ERR(adev, "Invalid ras resume!\n"); return -EPERM; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c index b8d5482e704f..895b68785849 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c @@ -413,3 +413,18 @@ int amdgpu_virt_ras_hw_fini(struct amdgpu_device *adev) return 0; } + +int amdgpu_virt_ras_pre_reset(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + struct amdgpu_virt_ras_cmd *virt_ras = + (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd; + + virt_ras->blocks_ecc.auto_update_actived = false; + return 0; +} + +int amdgpu_virt_ras_post_reset(struct amdgpu_device *adev) +{ + return 0; +} diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.h index ae7bf67b3a3b..03c3cf8363ca 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.h @@ -49,5 +49,6 @@ int amdgpu_virt_ras_hw_init(struct amdgpu_device *adev); int amdgpu_virt_ras_hw_fini(struct amdgpu_device *adev); int amdgpu_virt_ras_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd); - +int amdgpu_virt_ras_pre_reset(struct amdgpu_device *adev); +int amdgpu_virt_ras_post_reset(struct amdgpu_device *adev); #endif