From 4108c2be12ebe78accb40d43f4713a7cc481b18a Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 8 May 2025 14:36:35 +0800 Subject: [PATCH] drm/amdgpu: fix fence fallback timer expired error IH is not working after switching a new gpu index for the first time. During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO) may not work.The access could be blocked by nBIF protection as VF isn't in exclusive access mode. Exclusive access is enabled now, disable/enable MSIX so that QEMU reprograms MSIX table. call amdgpu_restore_msix on resume to restore msix table. Signed-off-by: Samuel Zhang Acked-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 13708dc7706a5..b16f7c4beb22b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5109,6 +5109,13 @@ static inline int amdgpu_virt_resume(struct amdgpu_device *adev) int r; unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id; + /* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO) + * may not work. The access could be blocked by nBIF protection as VF isn't in + * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX + * so that QEMU reprograms MSIX table. + */ + amdgpu_restore_msix(adev); + r = adev->gfxhub.funcs->get_xgmi_info(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 5517451fc75d6..56d973faca418 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -242,7 +242,7 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev) return true; } -static void amdgpu_restore_msix(struct amdgpu_device *adev) +void amdgpu_restore_msix(struct amdgpu_device *adev) { u16 ctrl; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 04c0b4fa17a4e..9f0417456abda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -146,5 +146,6 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev); int amdgpu_irq_add_domain(struct amdgpu_device *adev); void amdgpu_irq_remove_domain(struct amdgpu_device *adev); unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id); +void amdgpu_restore_msix(struct amdgpu_device *adev); #endif -- 2.47.2