From 91689b5a7ce43e3742be6a249c7fc73be26b7d5f Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 9 Jan 2025 22:04:08 -0500 Subject: [PATCH] drm/amdkfd: Update CWSR area calculations for GFX 12.1 Update the SGPR, VGPR, HWREG size and number of waves supported for GFX 12.1 CWSR memory limits. The CU calculation changed in topology, as a result, the values need to be updated. Signed-off-by: Mukul Joshi Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 63 ++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 80c4fa2b0975d..56c97189e7f12 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -392,12 +392,20 @@ int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, return 0; } -#define SGPR_SIZE_PER_CU 0x4000 -#define LDS_SIZE_PER_CU 0x10000 -#define HWREG_SIZE_PER_CU 0x1000 #define DEBUGGER_BYTES_ALIGN 64 #define DEBUGGER_BYTES_PER_WAVE 32 +static u32 kfd_get_sgpr_size_per_cu(u32 gfxv) +{ + u32 sgpr_size = 0x4000; + + if (gfxv == 120500 || + gfxv == 120501) + sgpr_size = 0x8000; + + return sgpr_size; +} + static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) { u32 vgpr_size = 0x40000; @@ -413,14 +421,53 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) gfxv == 120000 || /* GFX_VERSION_GFX1200 */ gfxv == 120001) /* GFX_VERSION_GFX1201 */ vgpr_size = 0x60000; + else if (gfxv == 120500 || /* GFX_VERSION_GFX1250 */ + gfxv == 120501) /* GFX_VERSION_GFX1251 */ + vgpr_size = 0x80000; return vgpr_size; } +static u32 kfd_get_hwreg_size_per_cu(u32 gfxv) +{ + u32 hwreg_size = 0x1000; + + if (gfxv == 120500 || gfxv == 120501) + hwreg_size = 0x8000; + + return hwreg_size; +} + +static u32 kfd_get_lds_size_per_cu(u32 gfxv, struct kfd_node_properties *props) +{ + u32 lds_size = 0x10000; + + if (gfxv == 90500 || gfxv == 120500 || gfxv == 120501) + lds_size = props->lds_size_in_kb << 10; + + return lds_size; +} + +static u32 get_num_waves(struct kfd_node_properties *props, u32 gfxv, u32 cu_num) +{ + u32 wave_num = 0; + + if (gfxv < 100100) + wave_num = min(cu_num * 40, + props->array_count / props->simd_arrays_per_engine * 512); + else if (gfxv < 120500) + wave_num = cu_num * 32; + else if (gfxv <= 120501) + wave_num = cu_num * 64; + + WARN_ON(wave_num == 0); + + return wave_num; +} + #define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \ - (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ - (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\ - HWREG_SIZE_PER_CU) + (kfd_get_vgpr_size_per_cu(gfxv) + kfd_get_sgpr_size_per_cu(gfxv) +\ + kfd_get_lds_size_per_cu(gfxv, props) + kfd_get_hwreg_size_per_cu(gfxv)) #define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ @@ -440,9 +487,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) return; cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask); - wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */ - min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) - : cu_num * 32; + wave_num = get_num_waves(props, gfxv, cu_num); wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE); ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; -- 2.47.3