From: YiPeng Chai Date: Thu, 30 Oct 2025 07:07:11 +0000 (+0800) Subject: drm/amd/ras: sriov supports handling VF ras commands. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1855fc6b09bc8f42c29cc4b6bf576dd08d2a6564;p=thirdparty%2Fkernel%2Flinux.git drm/amd/ras: sriov supports handling VF ras commands. Add basic framework code to sriov to handle VF ras commands. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/Makefile b/drivers/gpu/drm/amd/ras/ras_mgr/Makefile index 5e5a2cfa40688..6759635aadc26 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/Makefile +++ b/drivers/gpu/drm/amd/ras/ras_mgr/Makefile @@ -23,6 +23,7 @@ RAS_MGR_FILES = amdgpu_ras_sys.o \ amdgpu_ras_eeprom_i2c.o \ amdgpu_ras_mp1_v13_0.o \ amdgpu_ras_cmd.o \ + amdgpu_virt_ras_cmd.o \ amdgpu_ras_process.o \ amdgpu_ras_nbio_v7_9.o diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c index 78419b7f7729a..cb6498c308345 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c @@ -28,6 +28,7 @@ #include "ras_sys.h" #include "amdgpu_ras_cmd.h" #include "amdgpu_ras_mgr.h" +#include "amdgpu_virt_ras_cmd.h" /* inject address is 52 bits */ #define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) @@ -252,6 +253,7 @@ int amdgpu_ras_handle_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx int amdgpu_ras_submit_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx *cmd) { + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; struct ras_core_context *cmd_core = ras_core; int timeout = 60; int res; @@ -259,6 +261,9 @@ int amdgpu_ras_submit_cmd(struct ras_core_context *ras_core, struct ras_cmd_ctx cmd->cmd_res = RAS_CMD__ERROR_INVALID_CMD; cmd->output_size = 0; + if (amdgpu_sriov_vf(adev)) + return amdgpu_virt_ras_handle_cmd(cmd_core, cmd); + if (!ras_core_is_enabled(cmd_core)) return RAS_CMD__ERROR_ACCESS_DENIED; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index afe8135b62586..4ce337b6e0e85 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -27,6 +27,7 @@ #include "ras_sys.h" #include "amdgpu_ras_mgr.h" #include "amdgpu_ras_cmd.h" +#include "amdgpu_virt_ras_cmd.h" #include "amdgpu_ras_process.h" #include "amdgpu_ras_eeprom_i2c.h" #include "amdgpu_ras_mp1_v13_0.h" @@ -316,6 +317,16 @@ static int amdgpu_ras_mgr_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_ras_process_init(adev); ras_core_sw_init(ras_mgr->ras_core); amdgpu_ras_mgr_init_event_mgr(ras_mgr->ras_core); + + if (amdgpu_sriov_vf(adev)) { + ret = amdgpu_virt_ras_sw_init(adev); + if (ret) { + RAS_DEV_ERR(adev, + "Virt ras sw_init failed! ret:%d\n", ret); + goto err; + } + } + return 0; err: @@ -335,6 +346,9 @@ static int amdgpu_ras_mgr_sw_fini(struct amdgpu_ip_block *ip_block) if (!ras_mgr) return 0; + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_ras_sw_fini(adev); + amdgpu_ras_process_fini(adev); ras_core_sw_fini(ras_mgr->ras_core); ras_core_destroy(ras_mgr->ras_core); @@ -359,9 +373,13 @@ static int amdgpu_ras_mgr_hw_init(struct amdgpu_ip_block *ip_block) if (!ras_mgr || !ras_mgr->ras_core) return -EINVAL; - ret = ras_core_hw_init(ras_mgr->ras_core); + if (amdgpu_sriov_vf(adev)) + ret = amdgpu_virt_ras_hw_init(adev); + else + ret = ras_core_hw_init(ras_mgr->ras_core); + if (ret) { - RAS_DEV_ERR(adev, "Failed to initialize ras core!\n"); + RAS_DEV_ERR(adev, "Failed to initialize hw_init!, ret:%d\n", ret); return ret; } @@ -385,7 +403,10 @@ static int amdgpu_ras_mgr_hw_fini(struct amdgpu_ip_block *ip_block) if (!ras_mgr || !ras_mgr->ras_core) return -EINVAL; - ras_core_hw_fini(ras_mgr->ras_core); + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_ras_hw_fini(adev); + else + ras_core_hw_fini(ras_mgr->ras_core); ras_mgr->ras_is_ready = false; @@ -423,9 +444,6 @@ int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable) if (!ras_mgr || !ras_mgr->ras_core) return -EPERM; - if (amdgpu_sriov_vf(adev)) - return -EPERM; - RAS_DEV_INFO(adev, "Enable amdgpu unified ras!"); return ras_core_set_status(ras_mgr->ras_core, enable); } @@ -437,9 +455,6 @@ bool amdgpu_uniras_enabled(struct amdgpu_device *adev) if (!ras_mgr || !ras_mgr->ras_core) return false; - if (amdgpu_sriov_vf(adev)) - return false; - return ras_core_is_enabled(ras_mgr->ras_core); } @@ -603,7 +618,7 @@ int amdgpu_ras_mgr_handle_ras_cmd(struct amdgpu_device *adev, uint32_t ctx_buf_size = PAGE_SIZE; int ret; - if (!amdgpu_ras_mgr_is_ready(adev)) + if (!amdgpu_sriov_vf(adev) && !amdgpu_ras_mgr_is_ready(adev)) return -EPERM; cmd_ctx = kzalloc(ctx_buf_size, GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h index 8fb7eb4b8f132..080ac84fc5a5a 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h @@ -50,6 +50,7 @@ struct amdgpu_ras_mgr { struct ras_core_context *ras_core; struct delayed_work retire_page_dwork; struct ras_event_manager ras_event_mgr; + void *virt_ras_cmd; uint64_t last_poison_consumption_seqno; bool ras_is_ready; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c new file mode 100644 index 0000000000000..8c4be1af76b23 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include "ras_sys.h" +#include "amdgpu_ras_cmd.h" +#include "amdgpu_virt_ras_cmd.h" +#include "amdgpu_ras_mgr.h" + +static int amdgpu_virt_ras_remote_ioctl_cmd(struct ras_core_context *ras_core, + struct ras_cmd_ctx *cmd, void *output_data, uint32_t output_size) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)ras_core->dev; + uint32_t mem_len = ALIGN(sizeof(*cmd) + output_size, AMDGPU_GPU_PAGE_SIZE); + struct ras_cmd_ctx *rcmd; + struct amdgpu_bo *rcmd_bo = NULL; + uint64_t mc_addr = 0; + void *cpu_addr = NULL; + int ret = 0; + + ret = amdgpu_bo_create_kernel(adev, mem_len, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &rcmd_bo, &mc_addr, (void **)&cpu_addr); + if (ret) + return ret; + + rcmd = (struct ras_cmd_ctx *)cpu_addr; + memset(rcmd, 0, mem_len); + memcpy(rcmd, cmd, sizeof(*cmd)); + + ret = amdgpu_virt_send_remote_ras_cmd(ras_core->dev, + mc_addr - adev->gmc.vram_start, mem_len); + if (!ret) { + if (rcmd->cmd_res) { + ret = rcmd->cmd_res; + goto out; + } + + cmd->cmd_res = rcmd->cmd_res; + cmd->output_size = rcmd->output_size; + if (rcmd->output_size && (rcmd->output_size <= output_size) && output_data) + memcpy(output_data, rcmd->output_buff_raw, rcmd->output_size); + } + +out: + amdgpu_bo_free_kernel(&rcmd_bo, &mc_addr, &cpu_addr); + + return ret; +} + +static struct ras_cmd_func_map amdgpu_virt_ras_cmd_maps[] = { + +}; + +int amdgpu_virt_ras_handle_cmd(struct ras_core_context *ras_core, + struct ras_cmd_ctx *cmd) +{ + struct ras_cmd_func_map *ras_cmd = NULL; + int i, res; + + for (i = 0; i < ARRAY_SIZE(amdgpu_virt_ras_cmd_maps); i++) { + if (cmd->cmd_id == amdgpu_virt_ras_cmd_maps[i].cmd_id) { + ras_cmd = &amdgpu_virt_ras_cmd_maps[i]; + break; + } + } + + if (ras_cmd) + res = ras_cmd->func(ras_core, cmd, NULL); + else + res = amdgpu_virt_ras_remote_ioctl_cmd(ras_core, cmd, + cmd->output_buff_raw, cmd->output_buf_size); + + cmd->cmd_res = res; + + if (cmd->output_size > cmd->output_buf_size) { + RAS_DEV_ERR(ras_core->dev, + "Output data size 0x%x exceeds buffer size 0x%x!\n", + cmd->output_size, cmd->output_buf_size); + return RAS_CMD__SUCCESS_EXEED_BUFFER; + } + + return RAS_CMD__SUCCESS; +} + +int amdgpu_virt_ras_sw_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + ras_mgr->virt_ras_cmd = kzalloc(sizeof(struct amdgpu_virt_ras_cmd), GFP_KERNEL); + if (!ras_mgr->virt_ras_cmd) + return -ENOMEM; + + return 0; +} + +int amdgpu_virt_ras_sw_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); + + kfree(ras_mgr->virt_ras_cmd); + ras_mgr->virt_ras_cmd = NULL; + + return 0; +} + +int amdgpu_virt_ras_hw_init(struct amdgpu_device *adev) +{ + return 0; +} + +int amdgpu_virt_ras_hw_fini(struct amdgpu_device *adev) +{ + return 0; +} diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.h new file mode 100644 index 0000000000000..7e3a612eaeb08 --- /dev/null +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __AMDGPU_VIRT_RAS_CMD_H__ +#define __AMDGPU_VIRT_RAS_CMD_H__ +#include "ras.h" + +struct amdgpu_virt_ras_cmd { +}; + +int amdgpu_virt_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_virt_ras_sw_fini(struct amdgpu_device *adev); +int amdgpu_virt_ras_hw_init(struct amdgpu_device *adev); +int amdgpu_virt_ras_hw_fini(struct amdgpu_device *adev); +int amdgpu_virt_ras_handle_cmd(struct ras_core_context *ras_core, + struct ras_cmd_ctx *cmd); + +#endif