libgomp: Add no-target-region rev offload test + fix plugin-nvptx

author Tobias Burnus <tobias@codesourcery.com>

Fri, 25 Nov 2022 12:48:17 +0000 (13:48 +0100)

committer Tobias Burnus <tobias@codesourcery.com>

Fri, 25 Nov 2022 12:48:17 +0000 (13:48 +0100)
author Tobias Burnus <tobias@codesourcery.com>
Fri, 25 Nov 2022 12:48:17 +0000 (13:48 +0100)
committer Tobias Burnus <tobias@codesourcery.com>
Fri, 25 Nov 2022 12:48:17 +0000 (13:48 +0100)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c

index 0768fca350bbcd3c996695d0c2627be40f2f1efe..e803f083591a43ee31c29257c167ad29f028b5ee 100644 (file)
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1390,7 +1390,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
    else if (rev_fn_table)
      {
        CUdeviceptr var;
-      size_t bytes, i;
+      size_t bytes;
+      unsigned int i;
        r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &var, &bytes, module,
                              "$offload_func_table");
        if (r != CUDA_SUCCESS)
@@ -1413,12 +1414,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
  
    if (rev_fn_table && *rev_fn_table && dev->rev_data == NULL)
      {
-      /* cuMemHostAlloc memory is accessible on the device, if unified-shared
-        address is supported; this is assumed - see comment in
-        nvptx_open_device for CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING.   */
-      CUDA_CALL_ASSERT (cuMemHostAlloc, (void **) &dev->rev_data,
-                       sizeof (*dev->rev_data), CU_MEMHOSTALLOC_DEVICEMAP);
-      CUdeviceptr dp = (CUdeviceptr) dev->rev_data;
+      /* Get the on-device GOMP_REV_OFFLOAD_VAR variable.  It should be
+        available but it might be not.  One reason could be: if the user code
+        has 'omp target device(ancestor:1)' in pure hostcode, GOMP_target_ext
+        is not called on the device and, hence, it and GOMP_REV_OFFLOAD_VAR
+        are not linked in.  */
        CUdeviceptr device_rev_offload_var;
        size_t device_rev_offload_size;
        CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal,
@@ -1426,11 +1426,23 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
                                       &device_rev_offload_size, module,
                                       XSTRING (GOMP_REV_OFFLOAD_VAR));
        if (r != CUDA_SUCCESS)
-       GOMP_PLUGIN_fatal ("cuModuleGetGlobal error - GOMP_REV_OFFLOAD_VAR: %s", cuda_error (r));
-      r = CUDA_CALL_NOCHECK (cuMemcpyHtoD, device_rev_offload_var, &dp,
-                            sizeof (dp));
-      if (r != CUDA_SUCCESS)
-       GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
+       {
+         free (*rev_fn_table);
+         *rev_fn_table = NULL;
+       }
+      else
+       {
+         /* cuMemHostAlloc memory is accessible on the device, if
+            unified-shared address is supported; this is assumed - see comment
+            in nvptx_open_device for CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. */
+         CUDA_CALL_ASSERT (cuMemHostAlloc, (void **) &dev->rev_data,
+                           sizeof (*dev->rev_data), CU_MEMHOSTALLOC_DEVICEMAP);
+         CUdeviceptr dp = (CUdeviceptr) dev->rev_data;
+         r = CUDA_CALL_NOCHECK (cuMemcpyHtoD, device_rev_offload_var, &dp,
+                                sizeof (dp));
+         if (r != CUDA_SUCCESS)
+           GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
+       }
      }
  
    nvptx_set_clocktick (module, dev);
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c

new file mode 100644 (file)

index 0000000..33bd384
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run }  */
+/* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */
+
+#pragma omp requires reverse_offload
+
+int
+main ()
+{
+  int A[10];
+  int y;
+
+  for (int i = 0; i < 10; i++)
+    A[i] = 2*i;
+
+  y = 42;
+
+  /* Pointlessly copy to the default device.  */
+  #pragma omp target data map(to: A)
+  {
+    /* Not enclosed in a target region (= i.e. running on the host); the
+       following is valid - it runs on the current device (= host).  */
+    #pragma omp target device ( ancestor:1 ) firstprivate(y) map(to: A)
+    {
+      if (y != 42)
+       __builtin_abort ();
+      for (int i = 0; i < 10; i++)
+       if (A[i] != 2*i)
+         __builtin_abort ();
+      for (int i = 0; i < 10; i++)
+       if (A[i] != 2*i)
+         A[i] = 4*i;
+      y = 31;
+    }
+
+    if (y != 42)
+      __builtin_abort ();
+    for (int i = 0; i < 10; i++)
+      if (A[i] != 2*i)
+       __builtin_abort ();
+  }
+
+  if (y != 42)
+    __builtin_abort ();
+  for (int i = 0; i < 10; i++)
+    if (A[i] != 2*i)
+      __builtin_abort ();
+
+  return 0;
+}
author	Tobias Burnus <tobias@codesourcery.com>
	Fri, 25 Nov 2022 12:48:17 +0000 (13:48 +0100)
committer	Tobias Burnus <tobias@codesourcery.com>
	Fri, 25 Nov 2022 12:48:17 +0000 (13:48 +0100)
libgomp/plugin/plugin-nvptx.c		patch \| blob \| blame \| history
libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c	[new file with mode: 0644]	patch \| blob