From ea89b305b6364ea8190b5ee108b9d122e2886f34 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Fri, 16 Jan 2026 12:25:22 -0600 Subject: [PATCH] drm/amdkfd: Fix scalar load ordering in gfx12.1 trap handler Scalar loads may arrive out-of-order with respect to KMCNT. The affected code expects the two loads to arrive in-order. Signed-off-by: Jay Cornwall Reviewed-by: Lancelot Six Cc: Joseph Greathouse Cc: Vladimir Indic Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 8 ++++---- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 6281b2f9faee..453c08845d74 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -4638,8 +4638,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { 0x01ffffff, 0xb8fbf811, 0xbf0d847b, 0xbfa20078, 0xf4003eb6, 0xf8000000, - 0xf4003bb6, 0xf8000008, - 0xbfc70001, 0x8b76ff7a, + 0xbfc70000, 0xf4003bb6, + 0xf8000008, 0x8b76ff7a, 0x80000000, 0xbfa20027, 0x9376ff7a, 0x00060019, 0x81f9a376, 0xbf0b8179, @@ -4717,8 +4717,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = { 0xb980f821, 0x00000000, 0xbf0d847b, 0xbfa20078, 0xf4003eb6, 0xf8000000, - 0xf4003bb6, 0xf8000008, - 0xbfc70001, 0x8b76ff7a, + 0xbfc70000, 0xf4003bb6, + 0xf8000008, 0x8b76ff7a, 0x80000000, 0xbfa20027, 0x9376ff7a, 0x00060019, 0x81f9a376, 0xbf0b8179, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm index fc2c09139d2e..ae76ea91a3cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -1355,8 +1355,8 @@ function fixup_vgpr_bank_selection // ttmp[0:1]: {7b'0} PC[56:0] // ttmp2, 3, 10, 13, 14, 15: free s_load_b64 [ttmp14, ttmp15], [ttmp0, ttmp1], 0 scope:SCOPE_CU // Load the 2 instruction DW we are returning to + s_wait_kmcnt 0 s_load_b64 [ttmp2, ttmp3], [ttmp0, ttmp1], 8 scope:SCOPE_CU // Load the next 2 instruction DW, just in case - s_wait_kmcnt 1 s_and_b32 ttmp10, ttmp14, 0x80000000 // Check bit 31 in the first DWORD // SCC set if ttmp10 is != 0, i.e. if bit 31 == 1 s_cbranch_scc1 L_FIXUP_NOT_VOP12C // If bit 31 is 1, we are not VOP1, VOP2, or VOP3C -- 2.47.3