]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
amdgcn: libgomp plugin USM implementation devel/omp/gcc-11
authorAndrew Stubbs <ams@codesourcery.com>
Mon, 20 Jun 2022 14:51:15 +0000 (15:51 +0100)
committerAndrew Stubbs <ams@codesourcery.com>
Mon, 27 Jun 2022 16:28:04 +0000 (17:28 +0100)
Implement the Unified Shared Memory API calls in the GCN plugin.

The allocate and free are pretty straight-forward because all "target" memory
allocations are compatible with USM, on the right hardware.  However, there's
no known way to check what memory region was used, after the fact, so we use a
splay tree to record allocations so we can answer "is_usm_ptr" later.

libgomp/ChangeLog:

* plugin/plugin-gcn.c (struct usm_splay_tree_key_s): New.
(usm_splay_compare): New.
(splay_tree_prefix): New.
(GOMP_OFFLOAD_usm_alloc): New.
(GOMP_OFFLOAD_usm_free): New.
(GOMP_OFFLOAD_is_usm_ptr): New.
(GOMP_OFFLOAD_supported_features): Move into the OpenMP API fold.
Add GOMP_REQUIRES_UNIFIED_ADDRESS and
GOMP_REQUIRES_UNIFIED_SHARED_MEMORY.
(gomp_fatal): New.
(splay_tree_c): New.
* testsuite/lib/libgomp.exp (check_effective_target_omp_usm): New.
* testsuite/libgomp.c++/usm-1.C: Use dg-require-effective-target.
* testsuite/libgomp.c-c++-common/requires-1.c: Likewise.
* testsuite/libgomp.c/usm-1.c: Likewise.
* testsuite/libgomp.c/usm-2.c: Likewise.
* testsuite/libgomp.c/usm-3.c: Likewise.
* testsuite/libgomp.c/usm-4.c: Likewise.
* testsuite/libgomp.c/usm-5.c: Likewise.
* testsuite/libgomp.c/usm-6.c: Likewise.

libgomp/plugin/plugin-gcn.c
libgomp/testsuite/lib/libgomp.exp
libgomp/testsuite/libgomp.c++/usm-1.C
libgomp/testsuite/libgomp.c-c++-common/requires-1.c
libgomp/testsuite/libgomp.c/usm-1.c
libgomp/testsuite/libgomp.c/usm-2.c
libgomp/testsuite/libgomp.c/usm-3.c
libgomp/testsuite/libgomp.c/usm-4.c
libgomp/testsuite/libgomp.c/usm-5.c
libgomp/testsuite/libgomp.c/usm-6.c

index 969683ea1d28c506cdc9525cee0fcc045479bb9e..f0af1d341c6576c193f575c7b4a8cef0c51d60fc 100644 (file)
@@ -3825,6 +3825,89 @@ GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
   return !isa || isa_code (isa) == agent->device_isa;
 }
 
+/* Use a splay tree to track USM allocations.  */
+
+typedef struct usm_splay_tree_node_s *usm_splay_tree_node;
+typedef struct usm_splay_tree_s *usm_splay_tree;
+typedef struct usm_splay_tree_key_s *usm_splay_tree_key;
+
+struct usm_splay_tree_key_s {
+  void *addr;
+  size_t size;
+};
+
+static inline int
+usm_splay_compare (usm_splay_tree_key x, usm_splay_tree_key y)
+{
+  if ((x->addr <= y->addr && x->addr + x->size > y->addr)
+      || (y->addr <= x->addr && y->addr + y->size > x->addr))
+    return 0;
+
+  return (x->addr > y->addr ? 1 : -1);
+}
+
+#define splay_tree_prefix usm
+#include "../splay-tree.h"
+
+static struct usm_splay_tree_s usm_map = { NULL };
+
+/* Allocate memory suitable for Unified Shared Memory.
+
+   In fact, AMD memory need only be "coarse grained", which target
+   allocations already are.  We do need to track allocations so that
+   GOMP_OFFLOAD_is_usm_ptr can look them up.  */
+
+void *
+GOMP_OFFLOAD_usm_alloc (int device, size_t size)
+{
+  void *ptr = GOMP_OFFLOAD_alloc (device, size);
+
+  usm_splay_tree_node node = malloc (sizeof (struct usm_splay_tree_node_s));
+  node->key.addr = ptr;
+  node->key.size = size;
+  node->left = NULL;
+  node->right = NULL;
+  usm_splay_tree_insert (&usm_map, node);
+
+  return ptr;
+}
+
+/* Free memory allocated via GOMP_OFFLOAD_usm_alloc.  */
+
+bool
+GOMP_OFFLOAD_usm_free (int device, void *ptr)
+{
+  struct usm_splay_tree_key_s key = { ptr, 1 };
+  usm_splay_tree_key node = usm_splay_tree_lookup (&usm_map, &key);
+  if (node)
+    {
+      usm_splay_tree_remove (&usm_map, &key);
+      free (node);
+    }
+
+  return GOMP_OFFLOAD_free (device, ptr);
+}
+
+/* True if the memory was allocated via GOMP_OFFLOAD_usm_alloc.  */
+
+bool
+GOMP_OFFLOAD_is_usm_ptr (void *ptr)
+{
+  struct usm_splay_tree_key_s key = { ptr, 1 };
+  return usm_splay_tree_lookup (&usm_map, &key);
+}
+
+/* Indicate which GOMP_REQUIRES_* features are supported.  */
+
+bool
+GOMP_OFFLOAD_supported_features (unsigned int *mask)
+{
+  *mask &= ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+             | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY);
+
+  return (*mask == 0);
+}
+
 /* }}} */
 /* {{{ OpenACC Plugin API  */
 
@@ -4126,12 +4209,19 @@ GOMP_OFFLOAD_openacc_destroy_thread_data (void *data)
   free (data);
 }
 
-/* Indicate which GOMP_REQUIRES_* features are supported, currently none.  */
+/* }}} */
+/* {{{ USM splay tree */
 
-bool
-GOMP_OFFLOAD_supported_features (unsigned int *mask)
-{
-  return (*mask == 0);
-}
+/* Include this now so that splay-tree.c doesn't include it later.  This
+   avoids a conflict with splay_tree_prefix.  */
+#include "libgomp.h"
 
-/* }}} */
+/* This allows splay-tree.c to call gomp_fatal in this context.  The splay
+   tree code doesn't use the variadic arguments right now.  */
+#define gomp_fatal(MSG, ...) GOMP_PLUGIN_fatal (MSG)
+
+/* Include the splay tree code inline, with the prefixes added.  */
+#define splay_tree_prefix usm
+#define splay_tree_c
+#include "../splay-tree.h"
+/* }}}  */
index 83d130769af7e24f736b404e01553a0711e52d16..d93411bd79974b25e657a8cd28a2c3d923df1ec0 100644 (file)
@@ -537,3 +537,25 @@ int main() {
     return 0;
 } } "-lcuda -lcudart" ]
 }
+
+# return 1 if OpenMP Unified Share Memory is supported
+
+proc check_effective_target_omp_usm { } {
+    if { [libgomp_check_effective_target_offload_target "nvptx"] } {
+       return 1
+    }
+
+    if { [libgomp_check_effective_target_offload_target "amdgcn"] } {
+       return [check_no_compiler_messages omp_usm executable {
+           #pragma omp requires unified_shared_memory
+          int main () {
+            #pragma omp target
+              ;
+            return 0;
+          }
+       }]
+    }
+
+    return 0
+}
+
index fea25e5f10b174accf0663bcf1a8ec76cdb39b47..6e88f90d61fdec4846ba70e7de5ca79a7bfc52ad 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
+/* { dg-require-effective-target omp_usm } */
 #include <stdint.h>
 
 #pragma omp requires unified_shared_memory
index 02585adfb6fdad8d65a7a146552cbe16c8658cd9..0dd40bc0f59ea427af7043579cf64f9347f92726 100644 (file)
@@ -1,4 +1,5 @@
 /* { dg-additional-sources requires-1-aux.c } */
+/* { dg-require-effective-target omp_usm } */
 
 #pragma omp requires unified_shared_memory
 
index 1b35f19c45b19f256415424517de8aef6ba89343..e73f1816f9ac6a6e61da111856c89e191fc8e0f3 100644 (file)
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-require-effective-target omp_usm } */
 
 #include <omp.h>
 #include <stdint.h>
index 689cee7e4568d10b3e063115fe178cf945faa14f..31f2bae7145f6229553daa157354941c5b48465e 100644 (file)
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-require-effective-target omp_usm } */
 
 #include <omp.h>
 #include <stdint.h>
index 2ca66afe93f3581b7a18d6a33b4f935e4aaba5c7..2c78a0d8ced0b807251bbd1ffb5c3da231da6322 100644 (file)
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-require-effective-target omp_usm } */
 
 #include <omp.h>
 #include <stdint.h>
index 753908c84409f326d560d3c45d1872fc597867bc..1ac5498f73f9f50fbd43ad896c06637eb9451ad4 100644 (file)
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-require-effective-target omp_usm } */
 
 #include <omp.h>
 #include <stdint.h>
index 4d8b3cf71b1c29ae3df2d64fcb52c46c5a95a45c..563397f941a6dd47090866854f4c6d1e3041b945 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-require-effective-target offload_device } */
+/* { dg-require-effective-target omp_usm } */
 
 #include <omp.h>
 #include <stdint.h>
index c207140092a7f0537b203b67c352506c9e27e8d1..bd14f8197b3e18f547e921b45edaab23df8a1131 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */
+/* { dg-require-effective-target omp_usm } */
 
 #include <stdint.h>
 #include <stdlib.h>