From 7f34ddf77d30959fcc24cd279074f7e0d4f732df Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 23 Jul 2025 19:04:17 +0800 Subject: [PATCH] drm/amdgpu: add ras_eeprom_read_idx interface PMFW will manage RAS eeprom data by itself, add new interface to read eeprom data via PMFW, we can read part of records by setting index. v2: use IPID parse interface. pa is not used and set it to a fixed value. v3: optimize the null pointer check for IPID parse interface. Signed-off-by: Tao Zhou Reviewed-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 51 +++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 4 ++ 2 files changed, 55 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index d7e2a81bc2743..9854238ce7bfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -970,6 +970,50 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, return res; } +int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, u32 rec_idx, + const u32 num) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint64_t ts, end_idx; + int i, ret; + u64 mca, ipid; + + if (!amdgpu_ras_smu_eeprom_supported(adev)) + return 0; + + if (!adev->umc.ras || !adev->umc.ras->mca_ipid_parse) + return -EOPNOTSUPP; + + end_idx = rec_idx + num; + for (i = rec_idx; i < end_idx; i++) { + ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca); + if (ret) + return ret; + + ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid); + if (ret) + return ret; + + ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts); + if (ret) + return ret; + + record[i - rec_idx].address = mca; + /* retired_page (pa) is unused now */ + record[i - rec_idx].retired_page = 0x1ULL; + record[i - rec_idx].ts = ts; + record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; + record[i - rec_idx].cu = 0; + + adev->umc.ras->mca_ipid_parse(adev, ipid, NULL, + (uint32_t *)&(record[i - rec_idx].mem_channel), + (uint32_t *)&(record[i - rec_idx].mcumc_id), NULL); + } + + return 0; +} + /** * amdgpu_ras_eeprom_read -- read EEPROM * @control: pointer to control structure @@ -991,6 +1035,9 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, u8 *buf, *pp; u32 g0, g1; + if (amdgpu_ras_smu_eeprom_supported(adev)) + return amdgpu_ras_eeprom_read_idx(control, record, 0, num); + if (!__is_ras_eeprom_supported(adev)) return 0; @@ -1162,6 +1209,10 @@ static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf, int res = -EFAULT; size_t data_len; + /* pmfw manages eeprom data by itself */ + if (amdgpu_ras_smu_eeprom_supported(adev)) + return 0; + mutex_lock(&control->ras_tbl_mutex); /* We want *pos - data_len > 0, which means there's diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index cfbd402ddea2f..e881007f715b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -186,6 +186,10 @@ int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev, int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev, uint32_t *result); +int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, u32 rec_idx, + const u32 num); + extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; -- 2.47.3