]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amd/pm: implement dpm sdma reset function
authorJiadong Zhu <Jiadong.Zhu@amd.com>
Fri, 27 Sep 2024 09:55:26 +0000 (17:55 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 10 Dec 2024 15:26:45 +0000 (10:26 -0500)
Implement sdma soft reset by sending MSG_ResetSDMA on smu 13.0.6.

v2: Add firmware version for the reset message.
v3: Add ip version check. Print inst_mask on failure.

Signed-off-by: Jiadong Zhu <Jiadong.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/pm/amdgpu_dpm.c
drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

index 9dc82f4d7c937aa6e021db4212d8deed025c4fbf..9e7a652d119bcac8056e532469d85c8b0b3da6a5 100644 (file)
@@ -700,6 +700,21 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev)
        return ret;
 }
 
+int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+       int ret;
+
+       if (!is_support_sw_smu(adev))
+               return -EOPNOTSUPP;
+
+       mutex_lock(&adev->pm.mutex);
+       ret = smu_reset_sdma(smu, inst_mask);
+       mutex_unlock(&adev->pm.mutex);
+
+       return ret;
+}
+
 int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
                                  enum pp_clock_type type,
                                  uint32_t *min,
index 363af8990aa25762cc2d8d0eaf848d4ec292e9ad..b64aea7cd18892c23886126b38865582b0aaf889 100644 (file)
@@ -601,5 +601,6 @@ int amdgpu_dpm_set_pm_policy(struct amdgpu_device *adev, int policy_type,
                             int policy_level);
 ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev,
                                      enum pp_pm_policy p_type, char *buf);
+int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask);
 
 #endif
index 21bd635bcdfc1564961a747a4043b290045e8e23..6d22c7783898b06e4cd9b1be35f13ad38415a33a 100644 (file)
@@ -3895,3 +3895,13 @@ int smu_send_rma_reason(struct smu_context *smu)
 
        return ret;
 }
+
+int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
+{
+       int ret = 0;
+
+       if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma)
+               ret = smu->ppt_funcs->reset_sdma(smu, inst_mask);
+
+       return ret;
+}
index 3925815358ce98120038dfbf84d388cfddaf7baf..c73f84cb511193c209326e70033e641d3a6b2caf 100644 (file)
@@ -1372,6 +1372,11 @@ struct pptable_funcs {
         */
        int (*send_rma_reason)(struct smu_context *smu);
 
+       /**
+        * @reset_sdma: message SMU to soft reset sdma instance.
+        */
+       int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask);
+
        /**
         * @get_ecc_table:  message SMU to get ECC INFO table.
         */
@@ -1631,6 +1636,7 @@ void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
 int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
 int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
 int smu_send_rma_reason(struct smu_context *smu);
+int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask);
 int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type,
                      int level);
 ssize_t smu_get_pm_policy_info(struct smu_context *smu,
index a299dc4a807149d3e661adc04d96904686c1eb91..e4cd6a0d13dad8f0d1d176acbc5d4d0b96c75e3d 100644 (file)
        __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \
        __SMU_DUMMY_MAP(SelectPstatePolicy), \
        __SMU_DUMMY_MAP(MALLPowerController), \
-       __SMU_DUMMY_MAP(MALLPowerState),
+       __SMU_DUMMY_MAP(MALLPowerState), \
+       __SMU_DUMMY_MAP(ResetSDMA),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
index ab3c93ddce46ff2292de6434455bf52173ae4e02..8344f54e6674bfcd410435224c946611bdc813ab 100644 (file)
@@ -193,6 +193,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
        MSG_MAP(SelectPLPDMode,                      PPSMC_MSG_SelectPLPDMode,                  0),
        MSG_MAP(RmaDueToBadPageThreshold,            PPSMC_MSG_RmaDueToBadPageThreshold,        0),
        MSG_MAP(SelectPstatePolicy,                  PPSMC_MSG_SelectPstatePolicy,              0),
+       MSG_MAP(ResetSDMA,                           PPSMC_MSG_ResetSDMA,                       0),
 };
 
 // clang-format on
@@ -2716,6 +2717,27 @@ static int smu_v13_0_6_send_rma_reason(struct smu_context *smu)
        return ret;
 }
 
+static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
+{
+       struct amdgpu_device *adev = smu->adev;
+       int ret = 0;
+
+       /* the message is only valid on SMU 13.0.6 with pmfw 85.121.00 and above */
+       if ((adev->flags & AMD_IS_APU) ||
+           amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6) ||
+           smu->smc_fw_version < 0x00557900)
+               return 0;
+
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                                             SMU_MSG_ResetSDMA, inst_mask, NULL);
+       if (ret)
+               dev_err(smu->adev->dev,
+                       "failed to send ResetSDMA event with mask 0x%x\n",
+                       inst_mask);
+
+       return ret;
+}
+
 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
 {
        struct smu_context *smu = adev->powerplay.pp_handle;
@@ -3385,6 +3407,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
        .i2c_fini = smu_v13_0_6_i2c_control_fini,
        .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
        .send_rma_reason = smu_v13_0_6_send_rma_reason,
+       .reset_sdma = smu_v13_0_6_reset_sdma,
 };
 
 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)