writes a new AMD GPU object file and the ABI version needs to be the
same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5.
- Note that Fiji is only suppored with LLVM <= 17 as version 3 is no longer
+ Note that Fiji is only supported with LLVM <= 17 as version 3 is no longer
supported in LLVM >= 18. */
#define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
"!march=*|march=*:--amdhsa-code-object-version=4"
/**
* String containing the ROCr build identifier.
*/
- HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200
+ HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
+
+ HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202
} hsa_system_info_t;
/**
such that the next reverse offload region is only executed after the previous
one returned.
@item OpenMP code that has a @code{requires} directive with
- @code{unified_shared_memory} will remove any GCN device from the list of
- available devices (``host fallback'').
+ @code{unified_shared_memory} is only supported if all AMD GPUs have the
+ @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for
+ discrete GPUs, this may require setting the @code{HSA_XNACK} environment
+ variable to @samp{1}; for systems with both an APU and a discrete GPU that
+ does not support XNACK, consider using @code{ROCR_VISIBLE_DEVICES} to
+ enable only the APU. If not supported, all AMD GPU devices are removed
+ from the list of available devices (``host fallback'').
@item The available stack size can be changed using the @code{GCN_STACK_SIZE}
environment variable; the default is 32 kiB per thread.
@item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the
if (hsa_context.agent_count > 0
&& ((omp_requires_mask
& ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+ | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
| GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0))
return -1;
+ /* Check whether host page access is supported; this is per system level
+ (all GPUs supported by HSA). While intrinsically true for APUs, it
+ requires XNACK support for discrete GPUs. */
+ if (hsa_context.agent_count > 0
+ && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))
+ {
+ bool b;
+ hsa_system_info_t type = HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT;
+ hsa_status_t status = hsa_fns.hsa_system_get_info_fn (type, &b);
+ if (status != HSA_STATUS_SUCCESS)
+ GOMP_PLUGIN_error ("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT "
+ "failed");
+ if (!b)
+ return -1;
+ }
+
return hsa_context.agent_count;
}