]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdgpu: Intercept ras interrupts to ras module
authorYiPeng Chai <YiPeng.Chai@amd.com>
Sun, 28 Sep 2025 06:25:27 +0000 (14:25 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 20 Oct 2025 22:18:26 +0000 (18:18 -0400)
Intercept ras interrupts to ras module.

V2:
  Change function names in ras module.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.c
drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_mgr.h

index a2879d2b7c8ec12317287e66eb85c8363afd8433..644f79f3c9afb9a57f4b445bb9cd1b8cd9046e4b 100644 (file)
@@ -36,6 +36,7 @@
 #include "amdgpu_ras.h"
 #include "amdgpu_umc.h"
 #include "amdgpu_reset.h"
+#include "amdgpu_ras_mgr.h"
 
 /* Total memory size in system memory and all GPU VRAM. Used to
  * estimate worst case amount of memory to reserve for page tables
@@ -746,6 +747,20 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad
                                enum amdgpu_ras_block block, uint16_t pasid,
                                pasid_notify pasid_fn, void *data, uint32_t reset)
 {
+
+       if (amdgpu_uniras_enabled(adev)) {
+               struct ras_ih_info ih_info;
+
+               memset(&ih_info, 0, sizeof(ih_info));
+               ih_info.block = block;
+               ih_info.pasid = pasid;
+               ih_info.reset = reset;
+               ih_info.pasid_fn = pasid_fn;
+               ih_info.data = data;
+               amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info);
+               return;
+       }
+
        amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
 }
 
index e0ee211508607e4170e7d7c84be00ed5277d3221..9aa4b93ac6afd05dd2131457768e00f85ff60d2b 100644 (file)
@@ -41,6 +41,7 @@
 #include "atom.h"
 #include "amdgpu_reset.h"
 #include "amdgpu_psp.h"
+#include "amdgpu_ras_mgr.h"
 
 #ifdef CONFIG_X86_MCE_AMD
 #include <asm/mce.h>
@@ -2241,6 +2242,11 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
            amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
                return;
 
+       if (amdgpu_uniras_enabled(adev)) {
+               amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL);
+               return;
+       }
+
        if (adev->nbio.ras &&
            adev->nbio.ras->handle_ras_controller_intr_no_bifring)
                adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
@@ -2411,6 +2417,16 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
        struct ras_manager *obj;
        struct ras_ih_data *data;
 
+       if (amdgpu_uniras_enabled(adev)) {
+               struct ras_ih_info ih_info;
+
+               memset(&ih_info, 0, sizeof(ih_info));
+               ih_info.block = info->head.block;
+               memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry));
+
+               return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info);
+       }
+
        obj = amdgpu_ras_find_obj(adev, &info->head);
        if (!obj)
                return -EINVAL;
index e66d915831a96ff169456e68c51ca6474de5d28f..a8d02bd42f9085cc9f80a828438a41bdf36ab9f2 100644 (file)
@@ -381,7 +381,7 @@ static const struct amd_ip_funcs __maybe_unused ras_v1_0_ip_funcs = {
        .hw_fini = amdgpu_ras_mgr_hw_fini,
 };
 
-int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable)
+int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable)
 {
        struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev);
 
@@ -395,7 +395,7 @@ int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable)
        return ras_core_set_status(ras_mgr->ras_core, enable);
 }
 
-bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev)
+bool amdgpu_uniras_enabled(struct amdgpu_device *adev)
 {
        struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev);
 
index fa761de381c1983ced51279997f5b8c9331f9b7a..8d6a1873b6669c3b34fd76dfff843253d8ebaf52 100644 (file)
@@ -56,8 +56,8 @@ struct amdgpu_ras_mgr {
 
 struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context(
                        struct amdgpu_device *adev);
-int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable);
-bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev);
+int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable);
+bool amdgpu_uniras_enabled(struct amdgpu_device *adev);
 int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data);
 int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data);
 int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data);