From 527e3d40339b228f1c5a81ef17b4d883ead18530 Mon Sep 17 00:00:00 2001 From: Xiang Liu Date: Wed, 22 Oct 2025 15:11:42 +0800 Subject: [PATCH] drm/amd/ras: Add CPER ring read for uniras Read CPER raw data from debugfs node "/sys/kernel/debug/dri/*/ amdgpu_ring_cper". Signed-off-by: Xiang Liu Reviewed-by: Tao Zhou Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 59 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 5a7bf0661dbfb..011fa47480840 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -770,7 +770,8 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) "Saved bad pages %d reaches threshold value %d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); - if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev)) + if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) && + amdgpu_cper_generate_bp_threshold_record(adev)) dev_warn(adev->dev, "fail to generate bad page threshold cper records\n"); if ((amdgpu_bad_page_threshold != -1) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index bf1b90a341d8d..cd8873c6931af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -33,6 +33,7 @@ #include #include "amdgpu.h" +#include "amdgpu_ras_mgr.h" #include "atom.h" /* @@ -495,6 +496,61 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, */ #if defined(CONFIG_DEBUG_FS) +static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf, + size_t size, loff_t *offset) +{ + const uint8_t ring_header_size = 12; + struct amdgpu_ring *ring = file_inode(f)->i_private; + struct ras_cmd_cper_snapshot_req *snapshot_req __free(kfree) = + kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), GFP_KERNEL); + struct ras_cmd_cper_snapshot_rsp *snapshot_rsp __free(kfree) = + kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), GFP_KERNEL); + struct ras_cmd_cper_record_req *record_req __free(kfree) = + kzalloc(sizeof(struct ras_cmd_cper_record_req), GFP_KERNEL); + struct ras_cmd_cper_record_rsp *record_rsp __free(kfree) = + kzalloc(sizeof(struct ras_cmd_cper_record_rsp), GFP_KERNEL); + uint8_t *ring_header __free(kfree) = + kzalloc(ring_header_size, GFP_KERNEL); + uint32_t total_cper_num; + uint64_t start_cper_id; + int r; + + if (!snapshot_req || !snapshot_rsp || !record_req || !record_rsp || + !ring_header) + return -ENOMEM; + + if (!(*offset)) { + if (copy_to_user(buf, ring_header, ring_header_size)) + return -EFAULT; + buf += ring_header_size; + } + + r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, + RAS_CMD__GET_CPER_SNAPSHOT, + snapshot_req, sizeof(struct ras_cmd_cper_snapshot_req), + snapshot_rsp, sizeof(struct ras_cmd_cper_snapshot_rsp)); + if (r || !snapshot_rsp->total_cper_num) + return r; + + start_cper_id = snapshot_rsp->start_cper_id; + total_cper_num = snapshot_rsp->total_cper_num; + + record_req->buf_ptr = (uint64_t)(uintptr_t)buf; + record_req->buf_size = size; + record_req->cper_start_id = start_cper_id + *offset; + record_req->cper_num = total_cper_num; + r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD, + record_req, sizeof(struct ras_cmd_cper_record_req), + record_rsp, sizeof(struct ras_cmd_cper_record_rsp)); + if (r) + return r; + + r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + ring_header_size; + (*offset) += record_rsp->real_cper_num; + + return r; +} + /* Layout of file is 12 bytes consisting of * - rptr * - wptr @@ -511,6 +567,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, loff_t i; int r; + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && amdgpu_uniras_enabled(ring->adev)) + return amdgpu_ras_cper_debugfs_read(f, buf, size, pos); + if (*pos & 3 || size & 3) return -EINVAL; -- 2.47.3