]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amd/ras: Add sriov ras preprocessing before gpu reset
authorYiPeng Chai <YiPeng.Chai@amd.com>
Thu, 30 Oct 2025 08:49:14 +0000 (16:49 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 8 Dec 2025 18:56:34 +0000 (13:56 -0500)
Sriov host may clear all VF commands registered to auto
update list during VF reset, set ecc.auto_uUpdate block
to false before VF reset, and after VF reset is complete,
RAS_CMD__GET_ALL_BLOCK_ECC_STATUS command will be re-registered
to auto update list of sriov host.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.h

index 4ce337b6e0e858f792c4160f2f6e7bae201c7b70..cb7fbc791c3cf5978a6f16bb4e8f27f4dadc93dd 100644 (file)
@@ -642,6 +642,9 @@ int amdgpu_ras_mgr_handle_ras_cmd(struct amdgpu_device *adev,
 
 int amdgpu_ras_mgr_pre_reset(struct amdgpu_device *adev)
 {
+       if (amdgpu_sriov_vf(adev))
+               return amdgpu_virt_ras_pre_reset(adev);
+
        if (!amdgpu_ras_mgr_is_ready(adev)) {
                RAS_DEV_ERR(adev, "Invalid ras suspend!\n");
                return -EPERM;
@@ -653,6 +656,9 @@ int amdgpu_ras_mgr_pre_reset(struct amdgpu_device *adev)
 
 int amdgpu_ras_mgr_post_reset(struct amdgpu_device *adev)
 {
+       if (amdgpu_sriov_vf(adev))
+               return amdgpu_virt_ras_post_reset(adev);
+
        if (!amdgpu_ras_mgr_is_ready(adev)) {
                RAS_DEV_ERR(adev, "Invalid ras resume!\n");
                return -EPERM;
index b8d5482e704f629f7a8d3f498e36170e01a4b44f..895b68785849c349251a52356fb196990595fad6 100644 (file)
@@ -413,3 +413,18 @@ int amdgpu_virt_ras_hw_fini(struct amdgpu_device *adev)
 
        return 0;
 }
+
+int amdgpu_virt_ras_pre_reset(struct amdgpu_device *adev)
+{
+       struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev);
+       struct amdgpu_virt_ras_cmd *virt_ras =
+               (struct amdgpu_virt_ras_cmd *)ras_mgr->virt_ras_cmd;
+
+       virt_ras->blocks_ecc.auto_update_actived = false;
+       return 0;
+}
+
+int amdgpu_virt_ras_post_reset(struct amdgpu_device *adev)
+{
+       return 0;
+}
index ae7bf67b3a3b20f077ea36eb2e1da81c88a3190e..03c3cf8363ca495cd798e0bdafd26fa8d66b50d7 100644 (file)
@@ -49,5 +49,6 @@ int amdgpu_virt_ras_hw_init(struct amdgpu_device *adev);
 int amdgpu_virt_ras_hw_fini(struct amdgpu_device *adev);
 int amdgpu_virt_ras_handle_cmd(struct ras_core_context *ras_core,
                struct ras_cmd_ctx *cmd);
-
+int amdgpu_virt_ras_pre_reset(struct amdgpu_device *adev);
+int amdgpu_virt_ras_post_reset(struct amdgpu_device *adev);
 #endif