From d3336c935ecebb0185cce50848d9fdea665085f2 Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Tue, 2 Dec 2025 16:09:10 +0800 Subject: [PATCH] drm/amd/ras: Support physical address convert Support physical address convert to current NPS pages in uniras. Signed-off-by: Jinzhou Su Reviewed-by: YiPeng Chai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c | 15 ++++++++ .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h | 2 ++ drivers/gpu/drm/amd/ras/rascore/ras.h | 2 ++ drivers/gpu/drm/amd/ras/rascore/ras_core.c | 23 +++++++++++++ drivers/gpu/drm/amd/ras/rascore/ras_umc.c | 34 +++++++++++++------ drivers/gpu/drm/amd/ras/rascore/ras_umc.h | 3 ++ 6 files changed, 68 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index b86638fe0f32c..59c0636259202 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -671,3 +671,18 @@ int amdgpu_ras_mgr_post_reset(struct amdgpu_device *adev) amdgpu_ras_process_post_reset(adev); return 0; } + +int amdgpu_ras_mgr_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t addr, uint64_t *nps_page_addr, uint32_t max_page_count) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + if (!amdgpu_ras_mgr_is_ready(adev)) + return -EPERM; + + if (!nps_page_addr || !max_page_count) + return -EINVAL; + + return ras_core_convert_soc_pa_to_cur_nps_pages(ras_mgr->ras_core, + addr, nps_page_addr, max_page_count); +} diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h index 080ac84fc5a5a..23c411c982319 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h @@ -81,4 +81,6 @@ int amdgpu_ras_mgr_handle_ras_cmd(struct amdgpu_device *adev, void *output, uint32_t out_size); int amdgpu_ras_mgr_pre_reset(struct amdgpu_device *adev); int amdgpu_ras_mgr_post_reset(struct amdgpu_device *adev); +int amdgpu_ras_mgr_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t addr, uint64_t *nps_page_addr, uint32_t max_page_count); #endif diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h b/drivers/gpu/drm/amd/ras/rascore/ras.h index 3396b2e0949df..71d807e8d2a7b 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras.h @@ -367,4 +367,6 @@ int ras_core_event_notify(struct ras_core_context *ras_core, enum ras_notify_event event_id, void *data); int ras_core_get_device_system_info(struct ras_core_context *ras_core, struct device_system_info *dev_info); +int ras_core_convert_soc_pa_to_cur_nps_pages(struct ras_core_context *ras_core, + uint64_t soc_pa, uint64_t *page_pfn, uint32_t max_pages); #endif diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c index 01122b55c98ab..7e70560b590bd 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c @@ -601,3 +601,26 @@ int ras_core_get_device_system_info(struct ras_core_context *ras_core, return -RAS_CORE_NOT_SUPPORTED; } + +int ras_core_convert_soc_pa_to_cur_nps_pages(struct ras_core_context *ras_core, + uint64_t soc_pa, uint64_t *page_pfn, uint32_t max_pages) +{ + struct eeprom_umc_record record; + uint32_t cur_nps_mode; + int count = 0; + + if (!ras_core || !page_pfn || !max_pages) + return -EINVAL; + + cur_nps_mode = ras_core_get_curr_nps_mode(ras_core); + if (!cur_nps_mode || cur_nps_mode > AMDGPU_NPS8_PARTITION_MODE) + return -EINVAL; + + memset(&record, 0, sizeof(record)); + record.cur_nps_retired_row_pfn = RAS_ADDR_TO_PFN(soc_pa); + + count = ras_umc_convert_record_to_nps_pages(ras_core, + &record, cur_nps_mode, page_pfn, max_pages); + + return count; +} diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c index a0fdc3fda7616..b19c26f6feaf7 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c @@ -154,22 +154,36 @@ int ras_umc_clear_logged_ecc(struct ras_core_context *ras_core) return 0; } +int ras_umc_convert_record_to_nps_pages(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, uint32_t nps, + uint64_t *page_pfn, uint32_t max_pages) +{ + int count = 0; + struct ras_umc *ras_umc = &ras_core->ras_umc; + + if (!page_pfn || !max_pages) + return -EINVAL; + + if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages) + count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core, + record, nps, page_pfn, max_pages); + + return count; +} + static void ras_umc_reserve_eeprom_record(struct ras_core_context *ras_core, struct eeprom_umc_record *record) { - struct ras_umc *ras_umc = &ras_core->ras_umc; uint64_t page_pfn[16]; int count = 0, i; memset(page_pfn, 0, sizeof(page_pfn)); - if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages) { - count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core, + count = ras_umc_convert_record_to_nps_pages(ras_core, record, record->cur_nps, page_pfn, ARRAY_SIZE(page_pfn)); - if (count <= 0) { - RAS_DEV_ERR(ras_core->dev, - "Fail to convert error address! count:%d\n", count); - return; - } + if (count <= 0) { + RAS_DEV_ERR(ras_core->dev, + "Fail to convert error address! count:%d\n", count); + return; } /* Reserve memory */ @@ -367,10 +381,8 @@ static int ras_umc_update_eeprom_ram_data(struct ras_core_context *ras_core, } memset(page_pfn, 0, sizeof(page_pfn)); - if (ras_umc->ip_func && ras_umc->ip_func->eeprom_record_to_nps_pages) - count = ras_umc->ip_func->eeprom_record_to_nps_pages(ras_core, + count = ras_umc_convert_record_to_nps_pages(ras_core, bps, bps->cur_nps, page_pfn, ARRAY_SIZE(page_pfn)); - if (count > 0) { for (j = 0; j < count; j++) { bps->cur_nps_retired_row_pfn = page_pfn[j]; diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.h b/drivers/gpu/drm/amd/ras/rascore/ras_umc.h index 7d9e779d8c4c6..3aeb04977d53b 100644 --- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.h +++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.h @@ -163,4 +163,7 @@ int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index bool ras_umc_check_retired_addr(struct ras_core_context *ras_core, uint64_t addr); int ras_umc_translate_soc_pa_and_bank(struct ras_core_context *ras_core, uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa); +int ras_umc_convert_record_to_nps_pages(struct ras_core_context *ras_core, + struct eeprom_umc_record *record, uint32_t nps, + uint64_t *page_pfn, uint32_t max_pages); #endif -- 2.47.3