]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amd/ras: Add CPER ring read for uniras
authorXiang Liu <xiang.liu@amd.com>
Wed, 22 Oct 2025 07:11:42 +0000 (15:11 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 4 Nov 2025 16:33:54 +0000 (11:33 -0500)
Read CPER raw data from debugfs node "/sys/kernel/debug/dri/*/
amdgpu_ring_cper".

Signed-off-by: Xiang Liu <xiang.liu@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

index 5a7bf0661dbfbec0580a0a34812f5a7aedd53bc7..011fa474808406559cb32fd371e6cb0711cba2fd 100644 (file)
@@ -770,7 +770,8 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
                        "Saved bad pages %d reaches threshold value %d\n",
                        control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
 
-               if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
+               if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) &&
+                   amdgpu_cper_generate_bp_threshold_record(adev))
                        dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
 
                if ((amdgpu_bad_page_threshold != -1) &&
index bf1b90a341d8d7a826ab08dbe7e54011a9a3f740..cd8873c6931af1f4f209f44c10e3a10b2e6aa9d2 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
+#include "amdgpu_ras_mgr.h"
 #include "atom.h"
 
 /*
@@ -495,6 +496,61 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
  */
 #if defined(CONFIG_DEBUG_FS)
 
+static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
+                                           size_t size, loff_t *offset)
+{
+       const uint8_t ring_header_size = 12;
+       struct amdgpu_ring *ring = file_inode(f)->i_private;
+       struct ras_cmd_cper_snapshot_req *snapshot_req __free(kfree) =
+               kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), GFP_KERNEL);
+       struct ras_cmd_cper_snapshot_rsp *snapshot_rsp __free(kfree) =
+               kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), GFP_KERNEL);
+       struct ras_cmd_cper_record_req *record_req __free(kfree) =
+               kzalloc(sizeof(struct ras_cmd_cper_record_req), GFP_KERNEL);
+       struct ras_cmd_cper_record_rsp *record_rsp __free(kfree) =
+               kzalloc(sizeof(struct ras_cmd_cper_record_rsp), GFP_KERNEL);
+       uint8_t *ring_header __free(kfree) =
+               kzalloc(ring_header_size, GFP_KERNEL);
+       uint32_t total_cper_num;
+       uint64_t start_cper_id;
+       int r;
+
+       if (!snapshot_req || !snapshot_rsp || !record_req || !record_rsp ||
+           !ring_header)
+               return -ENOMEM;
+
+       if (!(*offset)) {
+               if (copy_to_user(buf, ring_header, ring_header_size))
+                       return -EFAULT;
+               buf += ring_header_size;
+       }
+
+       r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
+                                         RAS_CMD__GET_CPER_SNAPSHOT,
+                                         snapshot_req, sizeof(struct ras_cmd_cper_snapshot_req),
+                                         snapshot_rsp, sizeof(struct ras_cmd_cper_snapshot_rsp));
+       if (r || !snapshot_rsp->total_cper_num)
+               return r;
+
+       start_cper_id = snapshot_rsp->start_cper_id;
+       total_cper_num = snapshot_rsp->total_cper_num;
+
+       record_req->buf_ptr = (uint64_t)(uintptr_t)buf;
+       record_req->buf_size = size;
+       record_req->cper_start_id = start_cper_id + *offset;
+       record_req->cper_num = total_cper_num;
+       r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD,
+                                         record_req, sizeof(struct ras_cmd_cper_record_req),
+                                         record_rsp, sizeof(struct ras_cmd_cper_record_rsp));
+       if (r)
+               return r;
+
+       r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + ring_header_size;
+       (*offset) += record_rsp->real_cper_num;
+
+       return r;
+}
+
 /* Layout of file is 12 bytes consisting of
  * - rptr
  * - wptr
@@ -511,6 +567,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
        loff_t i;
        int r;
 
+       if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && amdgpu_uniras_enabled(ring->adev))
+               return amdgpu_ras_cper_debugfs_read(f, buf, size, pos);
+
        if (*pos & 3 || size & 3)
                return -EINVAL;