]> git.ipfire.org Git - people/ms/gcc.git/commitdiff
libgomp: Add offload_device_gcn check, add requires-4a.c test
authorTobias Burnus <tobias@codesourcery.com>
Thu, 20 Oct 2022 11:07:37 +0000 (13:07 +0200)
committerTobias Burnus <tobias@codesourcery.com>
Thu, 20 Oct 2022 11:07:37 +0000 (13:07 +0200)
Duplicate libgomp.c-c++-common/requires-4.c (as ...-4a.c) but
with using a heap-allocated instead of static memory for a variable.

This change and the added offload_device_gcn check prepare for
pseudo-USM, where the device hardware cannot access all host
memory but only managed and pinned memory; for those, requires-4.c
will fail and the new check permits to add
  target { ! { offload_device_nvptx || offload_device_gcn } }
to requires-4.c; however, it has not been added yet as pseuo-USM
support is not yet on mainline. (Review is pending for the USM
patches.)

include/ChangeLog:

* gomp-constants.h (GOMP_DEVICE_HSA): Comment out unused define.

libgomp/ChangeLog:

* testsuite/lib/libgomp.exp (check_effective_target_offload_device_gcn):
New.
* testsuite/libgomp.c-c++-common/on_device_arch.h (device_arch_gcn,
on_device_arch_gcn): New.
* testsuite/libgomp.c-c++-common/requires-4a.c: New test; copied from
requires-4.c but using heap-allocated memory.

(cherry picked from commit 12d9f5afbd2660862045acd41cb65a77e35bea4d)

include/ChangeLog.omp
include/gomp-constants.h
libgomp/ChangeLog.omp
libgomp/testsuite/lib/libgomp.exp
libgomp/testsuite/libgomp.c-c++-common/on_device_arch.h
libgomp/testsuite/libgomp.c-c++-common/requires-4a.c [new file with mode: 0644]

index 141ac55cf7a3d9b74a421e6a0a7213ca279a7b44..4a72c8b9af50be653c6263098bb9063accb00331 100644 (file)
@@ -1,3 +1,10 @@
+2022-10-20  Tobias Burnus  <tobias@codesourcery.com>
+
+       Backport from mainline:
+       2022-10-20  Tobias Burnus  <tobias@codesourcery.com>
+
+       * gomp-constants.h (GOMP_DEVICE_HSA): Comment out unused define.
+
 2022-08-31  Tobias Burnus  <tobias@codesourcery.com>
 
        Revert:
index 918414ec218eacb2f1504618cfa3f38388fd3423..dd753a8210644690e8f95a0bc219c440e84532b8 100644 (file)
@@ -257,7 +257,7 @@ enum gomp_map_kind
 #define GOMP_DEVICE_NOT_HOST           4
 #define GOMP_DEVICE_NVIDIA_PTX         5
 #define GOMP_DEVICE_INTEL_MIC          6
-#define GOMP_DEVICE_HSA                        7
+/* #define GOMP_DEVICE_HSA             7 removed.  */
 #define GOMP_DEVICE_GCN                        8
 
 /* We have a compatibility issue.  OpenMP 5.2 introduced
index 570aabf82c4d11282196603cba98e6a3ead072b7..cb884aa0c83cdabf94971df49156b547b420f2b5 100644 (file)
@@ -1,3 +1,15 @@
+2022-10-20  Tobias Burnus  <tobias@codesourcery.com>
+
+       Backport from mainline:
+       2022-10-20  Tobias Burnus  <tobias@codesourcery.com>
+
+       * testsuite/lib/libgomp.exp (check_effective_target_offload_device_gcn):
+       New.
+       * testsuite/libgomp.c-c++-common/on_device_arch.h (device_arch_gcn,
+       on_device_arch_gcn): New.
+       * testsuite/libgomp.c-c++-common/requires-4a.c: New test; copied from
+       requires-4.c but using heap-allocated memory.
+
 2022-10-17  Tobias Burnus  <tobias@codesourcery.com>
 
        Backport from mainline:
index c98c1edf57b45aa2c92f6b1005753941ac9a4915..ffdcfd85b72b5bce4c34c5c1bf00c8fe5069625c 100644 (file)
@@ -414,6 +414,18 @@ proc check_effective_target_offload_device_nvptx { } {
     } ]
 }
 
+# Return 1 if using a GCN offload device.
+proc check_effective_target_offload_device_gcn { } {
+    return [check_runtime_nocache offload_device_gcn {
+      #include <omp.h>
+      #include "testsuite/libgomp.c-c++-common/on_device_arch.h"
+      int main ()
+       {
+         return !on_device_arch_gcn ();
+       }
+    } ]
+}
+
 # Return 1 if at least one Nvidia GPU is accessible.
 
 proc check_effective_target_openacc_nvidia_accel_present { } {
index f92743b04d7c25e5a120d25658a2ea05cf09680b..6f66dbd784c5f37cbf230ea4edd8f5c7d0b16dea 100644 (file)
@@ -7,6 +7,12 @@ device_arch_nvptx (void)
   return GOMP_DEVICE_NVIDIA_PTX;
 }
 
+/* static */ int
+device_arch_gcn (void)
+{
+  return GOMP_DEVICE_GCN;
+}
+
 /* static */ int
 device_arch_intel_mic (void)
 {
@@ -14,6 +20,7 @@ device_arch_intel_mic (void)
 }
 
 #pragma omp declare variant (device_arch_nvptx) match(construct={target},device={arch(nvptx)})
+#pragma omp declare variant (device_arch_gcn) match(construct={target},device={arch(gcn)})
 #pragma omp declare variant (device_arch_intel_mic) match(construct={target},device={arch(intel_mic)})
 /* static */ int
 device_arch (void)
@@ -37,6 +44,12 @@ on_device_arch_nvptx ()
   return on_device_arch (GOMP_DEVICE_NVIDIA_PTX);
 }
 
+int
+on_device_arch_gcn ()
+{
+  return on_device_arch (GOMP_DEVICE_GCN);
+}
+
 int
 on_device_arch_intel_mic ()
 {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c b/libgomp/testsuite/libgomp.c-c++-common/requires-4a.c
new file mode 100644 (file)
index 0000000..4fb9783
--- /dev/null
@@ -0,0 +1,39 @@
+/* { dg-additional-options "-flto" } */
+/* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */
+/* { dg-additional-sources requires-4-aux.c } */
+
+/* Same as requires-4.c, but uses heap memory for 'a'.  */
+
+/* Check no diagnostic by device-compiler's or host compiler's lto1.
+   Other file uses: 'requires reverse_offload', but that's inactive as
+   there are no declare target directives, device constructs nor device routines  */
+
+/* Depending on offload device capabilities, it may print something like the
+   following (only) if GOMP_DEBUG=1:
+   "devices present but 'omp requires unified_address, unified_shared_memory, reverse_offload' cannot be fulfilled"
+   and in that case does host-fallback execution.
+
+   No offload devices support USM at present, so we may verify host-fallback
+   execution by presence of separate memory spaces.  */
+
+#pragma omp requires unified_address,unified_shared_memory
+
+int *a;
+extern void foo (void);
+
+int
+main (void)
+{
+  a = (int *) __builtin_calloc (sizeof (int), 10);
+  #pragma omp target map(to: a)
+  for (int i = 0; i < 10; i++)
+    a[i] = i;
+
+  for (int i = 0; i < 10; i++)
+    if (a[i] != i)
+      __builtin_abort ();
+
+  foo ();
+  __builtin_free (a);
+  return 0;
+}