From 89cb5556074b85c41ae529c0c7cfca044f71e3c7 Mon Sep 17 00:00:00 2001 From: Kwok Cheung Yeung Date: Wed, 22 Jun 2022 07:43:05 -0700 Subject: [PATCH] libgomp, nvptx: Update bundled CUDA header file This updates the bundled cuda.h header file to include some new API calls and constants that are now used in the code. This patch should be included when the "libgomp, nvptx: low-latency memory allocator" or "openmp: Add support for 'target_device' context selector set" patches are upstreamed. 2022-06-21 Kwok Cheung Yeung include/ * cuda/cuda.h (CUdevice_attribute): Add definitions for CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR and CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR. (CUmemAttach_flags): New. (CUpointer_attribute): New. (cuMemAllocManaged): New prototype. (cuPointerGetAttribute): New prototype. libgomp/ * plugin/cuda-lib.def (cuMemAllocManaged): Add new call. (cuPointerGetAttribute): Likewise. --- include/ChangeLog.omp | 10 ++++++++++ include/cuda/cuda.h | 12 ++++++++++++ libgomp/ChangeLog.omp | 5 +++++ libgomp/plugin/cuda-lib.def | 2 ++ 4 files changed, 29 insertions(+) diff --git a/include/ChangeLog.omp b/include/ChangeLog.omp index e36407a91471..4c63f1107eef 100644 --- a/include/ChangeLog.omp +++ b/include/ChangeLog.omp @@ -1,3 +1,13 @@ +2022-06-21 Kwok Cheung Yeung + + * cuda/cuda.h (CUdevice_attribute): Add definitions for + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR and + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR. + (CUmemAttach_flags): New. + (CUpointer_attribute): New. + (cuMemAllocManaged): New prototype. + (cuPointerGetAttribute): New prototype. + 2021-02-02 Chung-Lin Tang * gomp-constants.h (GOMP_REQUIRES_UNIFIED_ADDRESS): New symbol. diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h index 5c813ad2cf81..bbd9fd21b965 100644 --- a/include/cuda/cuda.h +++ b/include/cuda/cuda.h @@ -73,9 +73,19 @@ typedef enum { CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75, + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82 } CUdevice_attribute; +typedef enum { + CU_MEM_ATTACH_GLOBAL = 0x1 +} CUmemAttach_flags; + +typedef enum { + CU_POINTER_ATTRIBUTE_IS_MANAGED = 8 +} CUpointer_attribute; + enum { CU_EVENT_DEFAULT = 0, CU_EVENT_DISABLE_TIMING = 2 @@ -156,6 +166,7 @@ CUresult cuMemGetInfo (size_t *, size_t *); CUresult cuMemAlloc (CUdeviceptr *, size_t); #define cuMemAllocHost cuMemAllocHost_v2 CUresult cuMemAllocHost (void **, size_t); +CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int); CUresult cuMemcpy (CUdeviceptr, CUdeviceptr, size_t); #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 CUresult cuMemcpyDtoDAsync (CUdeviceptr, CUdeviceptr, size_t, CUstream); @@ -182,6 +193,7 @@ CUresult cuModuleLoadData (CUmodule *, const void *); CUresult cuModuleUnload (CUmodule); CUresult cuOccupancyMaxPotentialBlockSize(int *, int *, CUfunction, CUoccupancyB2DSize, size_t, int); +CUresult cuPointerGetAttribute(void *, CUpointer_attribute, CUdeviceptr); typedef void (*CUstreamCallback)(CUstream, CUresult, void *); CUresult cuStreamAddCallback(CUstream, CUstreamCallback, void *, unsigned int); CUresult cuStreamCreate (CUstream *, unsigned); diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index d52cf78c8219..4c1b878ccbb6 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,8 @@ +2022-06-21 Kwok Cheung Yeung + + * plugin/cuda-lib.def (cuMemAllocManaged): Add new call. + (cuPointerGetAttribute): Likewise. + 2021-11-16 Frederik Harwath * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Adjust. diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def index cd91b39b1d27..b6d03290f352 100644 --- a/libgomp/plugin/cuda-lib.def +++ b/libgomp/plugin/cuda-lib.def @@ -29,6 +29,7 @@ CUDA_ONE_CALL_MAYBE_NULL (cuLinkCreate_v2) CUDA_ONE_CALL (cuLinkDestroy) CUDA_ONE_CALL (cuMemAlloc) CUDA_ONE_CALL (cuMemAllocHost) +CUDA_ONE_CALL (cuMemAllocManaged) CUDA_ONE_CALL (cuMemcpy) CUDA_ONE_CALL (cuMemcpyDtoDAsync) CUDA_ONE_CALL (cuMemcpyDtoH) @@ -46,6 +47,7 @@ CUDA_ONE_CALL (cuModuleLoad) CUDA_ONE_CALL (cuModuleLoadData) CUDA_ONE_CALL (cuModuleUnload) CUDA_ONE_CALL_MAYBE_NULL (cuOccupancyMaxPotentialBlockSize) +CUDA_ONE_CALL (cuPointerGetAttribute) CUDA_ONE_CALL (cuStreamAddCallback) CUDA_ONE_CALL (cuStreamCreate) CUDA_ONE_CALL (cuStreamDestroy) -- 2.47.2