From 7f925450379fe7fcaecafd38d6fa67aaa499cd6d Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Thu, 25 Jul 2019 11:26:45 +0100 Subject: [PATCH] Detect number of GPU compute units. 2019-09-10 Andrew Stubbs libgomp/ * plugin/plugin-gcn.c (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT): Define. (dump_hsa_agent_info): Dump compute unit count. (get_cu_count): New function. (parse_target_attributes): Use get_cu_count for default gdims. (gcn_exec): Likewise. (cherry picked from openacc-gcc-9-branch commit 4bc3e873bcaa3b671dc7c6afbfaa02a4459daae6) --- libgomp/ChangeLog.omp | 8 ++++++++ libgomp/plugin/plugin-gcn.c | 31 ++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index d743975e0b5d..980185751f6a 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,11 @@ +2019-09-10 Andrew Stubbs + + * plugin/plugin-gcn.c (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT): Define. + (dump_hsa_agent_info): Dump compute unit count. + (get_cu_count): New function. + (parse_target_attributes): Use get_cu_count for default gdims. + (gcn_exec): Likewise. + 2019-09-10 Andrew Stubbs * plugin/plugin-gcn.c (obstack_chunk_alloc): Delete. diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 6c00c81b588b..9d03e4f9f5b4 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -44,6 +44,11 @@ #include "oacc-int.h" #include +/* Additional definitions not in HSA 1.1. + FIXME: this needs to be updated in hsa.h for upstream, but the only source + right now is the ROCr source which may cause license issues. */ +#define HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT 0xA002 + /* These probably won't be in elf.h for a while. */ #define R_AMDGPU_NONE 0 #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */ @@ -845,6 +850,14 @@ dump_hsa_agent_info (hsa_agent_t agent, void *data __attribute__((unused))) else HSA_DEBUG ("HSA_AGENT_INFO_DEVICE: FAILED\n"); + uint32_t cu_count; + status = hsa_fns.hsa_agent_get_info_fn + (agent, HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count); + if (status == HSA_STATUS_SUCCESS) + HSA_DEBUG ("HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT: %u\n", cu_count); + else + HSA_DEBUG ("HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT: FAILED\n"); + uint32_t size; status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &size); @@ -2449,6 +2462,18 @@ init_kernel (struct kernel_info *kernel) "mutex"); } +static int +get_cu_count (struct agent_info *agent) +{ + uint32_t cu_count; + hsa_status_t status = hsa_fns.hsa_agent_get_info_fn + (agent->id, HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count); + if (status == HSA_STATUS_SUCCESS) + return cu_count; + else + return 64; /* The usual number for older devices. */ +} + /* Calculate the maximum grid size for OMP threads / OACC workers. This depends on the kernel's resource usage levels. */ @@ -2527,8 +2552,8 @@ parse_target_attributes (void **input, } def->ndim = 3; - /* Fiji has 64 CUs. */ - def->gdims[0] = (gcn_teams > 0) ? gcn_teams : 64; + /* Fiji has 64 CUs, but Vega20 has 60. */ + def->gdims[0] = (gcn_teams > 0) ? gcn_teams : get_cu_count (agent); /* Each thread is 64 work items wide. */ def->gdims[1] = 64; /* A work group can have 16 wavefronts. */ @@ -3308,7 +3333,7 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, problem size, so let's do a reasonable number of single-worker gangs. 64 gangs matches a typical Fiji device. */ - if (dims[0] == 0) dims[0] = 64; /* Gangs. */ + if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs. */ if (dims[1] == 0) dims[1] = 16; /* Workers. */ /* The incoming dimensions are expressed in terms of gangs, workers, and -- 2.47.2