From 6ef93f62533e4176f0aa94d125d742b778cee07e Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 5 Dec 2025 17:45:10 +0530 Subject: [PATCH] drm/amd/ras: Reduce stack usage in amdgpu_virt_ras_get_cper_records() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit amdgpu_virt_ras_get_cper_records() was using a large stack array of ras_log_info pointers. This contributed to the frame size warning on this function. Replace the fixed-size stack array: struct ras_log_info *trace[MAX_RECORD_PER_BATCH]; with a heap-allocated array using kcalloc(). We free the trace buffer together with out_buf on all exit paths. If allocation of trace or out_buf fails, we return a generic RAS error code. This reduces stack usage and keeps the runtime behaviour unchanged. Fixes: stack frame size: 1112 bytes (limit: 1024) Cc: Tao Zhou Cc: Hawking Zhang Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- .../drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c index 5e90a187155b3..a754795938647 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c @@ -183,7 +183,7 @@ static int amdgpu_virt_ras_get_cper_records(struct ras_core_context *ras_core, (struct ras_cmd_cper_record_rsp *)cmd->output_buff_raw; struct ras_log_batch_overview *overview = &virt_ras->batch_mgr.batch_overview; struct ras_cmd_batch_trace_record_rsp *rsp_cache = &virt_ras->batch_mgr.batch_trace; - struct ras_log_info *trace[MAX_RECORD_PER_BATCH] = {0}; + struct ras_log_info **trace; uint32_t offset = 0, real_data_len = 0; uint64_t batch_id; uint8_t *out_buf; @@ -195,9 +195,15 @@ static int amdgpu_virt_ras_get_cper_records(struct ras_core_context *ras_core, if (!req->buf_size || !req->buf_ptr || !req->cper_num) return RAS_CMD__ERROR_INVALID_INPUT_DATA; + trace = kcalloc(MAX_RECORD_PER_BATCH, sizeof(*trace), GFP_KERNEL); + if (!trace) + return RAS_CMD__ERROR_GENERIC; + out_buf = kzalloc(req->buf_size, GFP_KERNEL); - if (!out_buf) + if (!out_buf) { + kfree(trace); return RAS_CMD__ERROR_GENERIC; + } memset(out_buf, 0, req->buf_size); @@ -205,8 +211,9 @@ static int amdgpu_virt_ras_get_cper_records(struct ras_core_context *ras_core, batch_id = req->cper_start_id + i; if (batch_id >= overview->last_batch_id) break; - count = amdgpu_virt_ras_get_batch_records(ras_core, batch_id, trace, - ARRAY_SIZE(trace), rsp_cache); + count = amdgpu_virt_ras_get_batch_records(ras_core, batch_id, + trace, MAX_RECORD_PER_BATCH, + rsp_cache); if (count > 0) { ret = ras_cper_generate_cper(ras_core, trace, count, &out_buf[offset], req->buf_size - offset, &real_data_len); @@ -220,6 +227,7 @@ static int amdgpu_virt_ras_get_cper_records(struct ras_core_context *ras_core, if ((ret && (ret != -ENOMEM)) || copy_to_user(u64_to_user_ptr(req->buf_ptr), out_buf, offset)) { kfree(out_buf); + kfree(trace); return RAS_CMD__ERROR_GENERIC; } @@ -231,6 +239,7 @@ static int amdgpu_virt_ras_get_cper_records(struct ras_core_context *ras_core, cmd->output_size = sizeof(struct ras_cmd_cper_record_rsp); kfree(out_buf); + kfree(trace); return RAS_CMD__SUCCESS; } -- 2.47.3