From: Gangliang Xie Date: Mon, 15 Dec 2025 06:19:34 +0000 (+0800) Subject: drm/amd/ras: add read func for pmfw eeprom X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=42c46be2ec30df732cea4d4682e8f70795f21cee;p=thirdparty%2Flinux.git drm/amd/ras: add read func for pmfw eeprom add read func for pmfw eeprom, and adapt address converting for bad pages loaded from pmfw eeprom v2: change label 'Out' to 'out' Signed-off-by: Tao Zhou Signed-off-by: Gangliang Xie Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h b/drivers/gpu/drm/amd/ras/rascore/ras.h index 4ceb72d24e355..04f9e09884aa9 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras.h @@ -241,6 +241,7 @@ struct ras_bank_ecc { uint64_t status; uint64_t ipid; uint64_t addr; + uint64_t ts; }; struct ras_bank_ecc_node { diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c index 01ad6ebab9022..572872ef367b3 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c @@ -241,7 +241,10 @@ static int ras_core_eeprom_recovery(struct ras_core_context *ras_core) int count; int ret; - count = ras_eeprom_get_record_count(ras_core); + if (ras_fw_eeprom_supported(ras_core)) + count = ras_fw_eeprom_get_record_count(ras_core); + else + count = ras_eeprom_get_record_count(ras_core); if (!count) return 0; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c index 580dd7b09d00d..79494ad16ee5a 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c @@ -259,3 +259,73 @@ int ras_fw_eeprom_append(struct ras_core_context *ras_core, mutex_unlock(&control->ras_tbl_mutex); return 0; } + +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core, + struct eeprom_umc_record *record_umc, + struct ras_bank_ecc *ras_ecc, + u32 rec_idx, const u32 num) +{ + struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom; + int i, ret, end_idx; + u64 mca, ipid, ts; + + if (!ras_core->ras_umc.ip_func || + !ras_core->ras_umc.ip_func->mca_ipid_parse) + return -EOPNOTSUPP; + + mutex_lock(&control->ras_tbl_mutex); + + end_idx = rec_idx + num; + for (i = rec_idx; i < end_idx; i++) { + ret = ras_fw_get_badpage_mca_addr(ras_core, i, &mca); + if (ret) + goto out; + + ret = ras_fw_get_badpage_ipid(ras_core, i, &ipid); + if (ret) + goto out; + + ret = ras_fw_get_timestamp(ras_core, i, &ts); + if (ret) + goto out; + + if (record_umc) { + record_umc[i - rec_idx].address = mca; + /* retired_page (pa) is unused now */ + record_umc[i - rec_idx].retired_row_pfn = 0x1ULL; + record_umc[i - rec_idx].ts = ts; + record_umc[i - rec_idx].err_type = RAS_EEPROM_ERR_NON_RECOVERABLE; + + ras_core->ras_umc.ip_func->mca_ipid_parse(ras_core, ipid, + (uint32_t *)&(record_umc[i - rec_idx].cu), + (uint32_t *)&(record_umc[i - rec_idx].mem_channel), + (uint32_t *)&(record_umc[i - rec_idx].mcumc_id), NULL); + + /* update bad channel bitmap */ + if ((record_umc[i - rec_idx].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) && + !(control->bad_channel_bitmap & (1 << record_umc[i - rec_idx].mem_channel))) { + control->bad_channel_bitmap |= 1 << record_umc[i - rec_idx].mem_channel; + control->update_channel_flag = true; + } + } + + if (ras_ecc) { + ras_ecc[i - rec_idx].addr = mca; + ras_ecc[i - rec_idx].ipid = ipid; + ras_ecc[i - rec_idx].ts = ts; + } + + } + +out: + mutex_unlock(&control->ras_tbl_mutex); + return ret; +} + +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core) +{ + if (!ras_core) + return 0; + + return ras_core->ras_fw_eeprom.ras_num_recs; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h index b94d3c9703e34..353977a2371eb 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h @@ -70,5 +70,10 @@ int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core); bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core); int ras_fw_eeprom_append(struct ras_core_context *ras_core, struct eeprom_umc_record *record, const u32 num); +int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core, + struct eeprom_umc_record *record_umc, + struct ras_bank_ecc *ras_ecc, + u32 rec_idx, const u32 num); +uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core); #endif diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c index f7c2cb0a8a0c2..23118f41eb96f 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c @@ -448,17 +448,27 @@ int ras_umc_load_bad_pages(struct ras_core_context *ras_core) uint32_t ras_num_recs; int ret; - ras_num_recs = ras_eeprom_get_record_count(ras_core); - /* no bad page record, skip eeprom access */ - if (!ras_num_recs || - ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE) - return 0; + if (ras_fw_eeprom_supported(ras_core)) { + ras_num_recs = ras_fw_eeprom_get_record_count(ras_core); + /* no bad page record, skip eeprom access */ + if (!ras_num_recs || + ras_core->ras_fw_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE) + return 0; + } else { + ras_num_recs = ras_eeprom_get_record_count(ras_core); + if (!ras_num_recs || + ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE) + return 0; + } bps = kzalloc_objs(*bps, ras_num_recs); if (!bps) return -ENOMEM; - ret = ras_eeprom_read(ras_core, bps, ras_num_recs); + if (ras_fw_eeprom_supported(ras_core)) + ret = ras_fw_eeprom_read_idx(ras_core, bps, 0, 0, ras_num_recs); + else + ret = ras_eeprom_read(ras_core, bps, ras_num_recs); if (ret) { RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table records!"); } else { @@ -486,7 +496,10 @@ static int ras_umc_save_bad_pages(struct ras_core_context *ras_core) if (!data->bps) return 0; - eeprom_record_num = ras_eeprom_get_record_count(ras_core); + if (ras_fw_eeprom_supported(ras_core)) + eeprom_record_num = ras_fw_eeprom_get_record_count(ras_core); + else + eeprom_record_num = ras_eeprom_get_record_count(ras_core); mutex_lock(&ras_umc->umc_lock); save_count = data->count - eeprom_record_num; /* only new entries are saved */ diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c index e2792b239bea9..53dc59e4de0c2 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c @@ -413,7 +413,7 @@ static int umc_v12_0_eeprom_record_to_nps_record(struct ras_core_context *ras_co uint64_t pa = 0; int ret = 0; - if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) { + if (nps == EEPROM_RECORD_UMC_NPS_MODE(record) && !ras_fw_eeprom_supported(ras_core)) { record->cur_nps_retired_row_pfn = EEPROM_RECORD_UMC_ADDR_PFN(record); } else { ret = convert_eeprom_record_to_nps_addr(ras_core,