From: YiPeng Chai Date: Sun, 28 Sep 2025 06:25:27 +0000 (+0800) Subject: drm/amdgpu: Intercept ras interrupts to ras module X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3d72d2e5f4c47d88f292c915ce31ef14092df3f2;p=thirdparty%2Flinux.git drm/amdgpu: Intercept ras interrupts to ras module Intercept ras interrupts to ras module. V2: Change function names in ras module. Signed-off-by: YiPeng Chai Reviewed-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index a2879d2b7c8ec..644f79f3c9afb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -36,6 +36,7 @@ #include "amdgpu_ras.h" #include "amdgpu_umc.h" #include "amdgpu_reset.h" +#include "amdgpu_ras_mgr.h" /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -746,6 +747,20 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset) { + + if (amdgpu_uniras_enabled(adev)) { + struct ras_ih_info ih_info; + + memset(&ih_info, 0, sizeof(ih_info)); + ih_info.block = block; + ih_info.pasid = pasid; + ih_info.reset = reset; + ih_info.pasid_fn = pasid_fn; + ih_info.data = data; + amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info); + return; + } + amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e0ee211508607..9aa4b93ac6afd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -41,6 +41,7 @@ #include "atom.h" #include "amdgpu_reset.h" #include "amdgpu_psp.h" +#include "amdgpu_ras_mgr.h" #ifdef CONFIG_X86_MCE_AMD #include @@ -2241,6 +2242,11 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY)) return; + if (amdgpu_uniras_enabled(adev)) { + amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL); + return; + } + if (adev->nbio.ras && adev->nbio.ras->handle_ras_controller_intr_no_bifring) adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev); @@ -2411,6 +2417,16 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_manager *obj; struct ras_ih_data *data; + if (amdgpu_uniras_enabled(adev)) { + struct ras_ih_info ih_info; + + memset(&ih_info, 0, sizeof(ih_info)); + ih_info.block = info->head.block; + memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry)); + + return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info); + } + obj = amdgpu_ras_find_obj(adev, &info->head); if (!obj) return -EINVAL; diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c index e66d915831a96..a8d02bd42f908 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c @@ -381,7 +381,7 @@ static const struct amd_ip_funcs __maybe_unused ras_v1_0_ip_funcs = { .hw_fini = amdgpu_ras_mgr_hw_fini, }; -int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable) +int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable) { struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); @@ -395,7 +395,7 @@ int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable) return ras_core_set_status(ras_mgr->ras_core, enable); } -bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev) +bool amdgpu_uniras_enabled(struct amdgpu_device *adev) { struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev); diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h index fa761de381c19..8d6a1873b6669 100644 --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h @@ -56,8 +56,8 @@ struct amdgpu_ras_mgr { struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context( struct amdgpu_device *adev); -int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable); -bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev); +int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable); +bool amdgpu_uniras_enabled(struct amdgpu_device *adev); int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data); int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data); int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data);