]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amd/pm: implement ras_smu_drv interface for smu v13.0.12
authorGangliang Xie <ganglxie@amd.com>
Fri, 12 Sep 2025 04:43:35 +0000 (12:43 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 4 Nov 2025 16:53:58 +0000 (11:53 -0500)
implement ras_smu_drv interface for smu v13.0.12

Signed-off-by: Gangliang Xie <ganglxie@amd.com>
Signed-off-by: Yang Wang <kevinyang.wang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h

index 40c0bf85f1d3491693bcdacbae6e18835654b886..3c0b36dd37bf8d7de1723f0fbef4ad1acabba29b 100644 (file)
@@ -503,6 +503,32 @@ struct ras_critical_region {
        uint64_t size;
 };
 
+struct ras_eeprom_table_version {
+       uint32_t minor    : 16;
+       uint32_t major    : 16;
+};
+
+struct ras_eeprom_smu_funcs {
+       int (*get_ras_table_version)(struct amdgpu_device *adev,
+                                                       uint32_t *table_version);
+       int (*get_badpage_count)(struct amdgpu_device *adev, uint32_t *count, uint32_t timeout);
+       int (*get_badpage_mca_addr)(struct amdgpu_device *adev, uint16_t index, uint64_t *mca_addr);
+       int (*set_timestamp)(struct amdgpu_device *adev, uint64_t timestamp);
+       int (*get_timestamp)(struct amdgpu_device *adev,
+                                                       uint16_t index, uint64_t *timestamp);
+       int (*get_badpage_ipid)(struct amdgpu_device *adev, uint16_t index, uint64_t *ipid);
+       int (*erase_ras_table)(struct amdgpu_device *adev, uint32_t *result);
+};
+
+enum ras_smu_feature_flags {
+       RAS_SMU_FEATURE_BIT__RAS_EEPROM = BIT_ULL(0),
+};
+
+struct ras_smu_drv {
+       const struct ras_eeprom_smu_funcs *smu_eeprom_funcs;
+       void (*ras_smu_feature_flags)(struct amdgpu_device *adev, uint64_t *flags);
+};
+
 struct amdgpu_ras {
        void *ras_mgr;
        /* ras infrastructure */
index f2e3cae43fda2913b448227d8aeb92289ad80864..24aaef1494a468ac80f483e5dfc4f93e0cae223b 100644 (file)
@@ -34,6 +34,7 @@
 #include "amdgpu_fru_eeprom.h"
 #include <linux/pci.h>
 #include "smu_cmn.h"
+#include "amdgpu_ras.h"
 
 #undef MP1_Public
 #undef smnMP1_FIRMWARE_FLAGS
@@ -925,3 +926,131 @@ const struct smu_temp_funcs smu_v13_0_12_temp_funcs = {
        .temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported,
        .get_temp_metrics = smu_v13_0_12_get_temp_metrics,
 };
+
+static int smu_v13_0_12_get_ras_table_version(struct amdgpu_device *adev,
+                                             uint32_t *table_version)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+
+       return smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_GetRASTableVersion, 0, table_version);
+}
+
+static int smu_v13_0_12_get_badpage_count(struct amdgpu_device *adev, uint32_t *count,
+                                         uint32_t timeout)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+       uint64_t end, now;
+       int ret = 0;
+
+       now = (uint64_t)ktime_to_ms(ktime_get());
+       end = now + timeout;
+       do {
+               ret = smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_GetBadPageCount, 0, count);
+               /* eeprom is not ready */
+               if (ret != -EBUSY)
+                       return ret;
+               mdelay(10);
+               now = (uint64_t)ktime_to_ms(ktime_get());
+       } while (now < end);
+
+       return ret;
+}
+
+static int smu_v13_0_12_set_timestamp(struct amdgpu_device *adev, uint64_t timestamp)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+
+       return smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_SetTimestamp, (uint32_t)timestamp, 0);
+}
+
+static int smu_v13_0_12_get_timestamp(struct amdgpu_device *adev,
+                                     uint16_t index, uint64_t *timestamp)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+       uint32_t temp;
+       int ret;
+
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_GetTimestamp, index, &temp);
+       if (!ret)
+               *timestamp = temp;
+
+       return ret;
+}
+
+static int smu_v13_0_12_get_badpage_ipid(struct amdgpu_device *adev,
+                                        uint16_t index, uint64_t *ipid)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+       uint32_t temp_arg, temp_ipid_lo, temp_ipid_high;
+       int ret;
+
+       temp_arg = index | (1 << 16);
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_GetBadPageIpid, temp_arg, &temp_ipid_lo);
+       if (ret)
+               return ret;
+
+       temp_arg = index | (2 << 16);
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_GetBadPageIpid, temp_arg, &temp_ipid_high);
+       if (!ret)
+               *ipid = (uint64_t)temp_ipid_high << 32 | temp_ipid_lo;
+       return ret;
+}
+
+static int smu_v13_0_12_erase_ras_table(struct amdgpu_device *adev,
+                                       uint32_t *result)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+
+       return smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_EraseRasTable, 0, result);
+}
+
+static int smu_v13_0_12_get_badpage_mca_addr(struct amdgpu_device *adev,
+                                            uint16_t index, uint64_t *mca_addr)
+{
+       struct smu_context *smu = adev->powerplay.pp_handle;
+       uint32_t temp_arg, temp_addr_lo, temp_addr_high;
+       int ret;
+
+       temp_arg = index | (1 << 16);
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_GetBadPageMcaAddr, temp_arg, &temp_addr_lo);
+       if (ret)
+               return ret;
+
+       temp_arg = index | (2 << 16);
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+                       SMU_MSG_GetBadPageMcaAddr, temp_arg, &temp_addr_high);
+       if (!ret)
+               *mca_addr = (uint64_t)temp_addr_high << 32 | temp_addr_lo;
+       return ret;
+}
+
+static const struct ras_eeprom_smu_funcs smu_v13_0_12_eeprom_smu_funcs = {
+       .get_ras_table_version = smu_v13_0_12_get_ras_table_version,
+       .get_badpage_count = smu_v13_0_12_get_badpage_count,
+       .get_badpage_mca_addr = smu_v13_0_12_get_badpage_mca_addr,
+       .set_timestamp = smu_v13_0_12_set_timestamp,
+       .get_timestamp = smu_v13_0_12_get_timestamp,
+       .get_badpage_ipid = smu_v13_0_12_get_badpage_ipid,
+       .erase_ras_table = smu_v13_0_12_erase_ras_table,
+};
+
+static void smu_v13_0_12_ras_smu_feature_flags(struct amdgpu_device *adev, uint64_t *flags)
+{
+       if (!flags)
+               return;
+
+       *flags = 0ULL;
+}
+
+const struct ras_smu_drv smu_v13_0_12_ras_smu_drv = {
+       .smu_eeprom_funcs = &smu_v13_0_12_eeprom_smu_funcs,
+       .ras_smu_feature_flags = smu_v13_0_12_ras_smu_feature_flags,
+};
index ba865ae7eca2a436c6ae24fb4de19b858904bf24..ecec7af8a64f9180898038cd432dd28e2fc5a07a 100644 (file)
@@ -105,6 +105,7 @@ int smu_v13_0_12_get_npm_data(struct smu_context *smu,
 extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[];
 extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[];
 extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs;
+extern const struct ras_smu_drv smu_v13_0_12_ras_smu_drv;
 
 #if defined(SWSMU_CODE_LAYER_L2)
 #include "smu_cmn.h"