]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - libgomp/oacc-parallel.c
Daily bump.
[thirdparty/gcc.git] / libgomp / oacc-parallel.c
index a3007428a86d4c3c3c9c3bbe74e7b1b5b09f5bc6..16cf3948e2d09bdb591f7fde3fbbac3b66795656 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
 
    Contributed by Mentor Embedded.
 
 
 #include "openacc.h"
 #include "libgomp.h"
-#include "libgomp_g.h"
 #include "gomp-constants.h"
 #include "oacc-int.h"
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>  /* For PRIu64.  */
+#endif
 #include <string.h>
 #include <stdarg.h>
 #include <assert.h>
 
-static int
-find_pset (int pos, size_t mapnum, unsigned short *kinds)
-{
-  if (pos + 1 >= mapnum)
-    return 0;
-
-  unsigned char kind = kinds[pos+1] & 0xff;
 
-  return kind == GOMP_MAP_TO_PSET;
-}
+/* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
+   continue to support the following two legacy values.  */
+_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
+               "legacy GOMP_DEVICE_ICV broken");
+_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
+               == GOACC_FLAG_HOST_FALLBACK,
+               "legacy GOMP_DEVICE_HOST_FALLBACK broken");
 
 
-/* Ensure that the target device for DEVICE_TYPE is initialised (and that
-   plugins have been loaded if appropriate).  The ACC_dev variable for the
-   current thread will be set appropriately for the given device type on
-   return.  */
+/* Handle the mapping pair that are presented when a
+   deviceptr clause is used with Fortran.  */
 
-attribute_hidden void
-select_acc_device (int device_type)
+static void
+handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
+                    unsigned short *kinds)
 {
-  goacc_lazy_initialize ();
+  int i;
 
-  if (device_type == GOMP_DEVICE_HOST_FALLBACK)
-    return;
+  for (i = 0; i < mapnum; i++)
+    {
+      unsigned short kind1 = kinds[i] & 0xff;
 
-  if (device_type == acc_device_none)
-    device_type = acc_device_host;
+      /* Handle Fortran deviceptr clause.  */
+      if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
+       {
+         unsigned short kind2;
 
-  if (device_type >= 0)
-    {
-      /* NOTE: this will go badly if the surrounding data environment is set up
-         to use a different device type.  We'll just have to trust that users
-        know what they're doing...  */
-      acc_set_device_type (device_type);
+         if (i < (signed)mapnum - 1)
+           kind2 = kinds[i + 1] & 0xff;
+         else
+           kind2 = 0xffff;
+
+         if (sizes[i] == sizeof (void *))
+           continue;
+
+         /* At this point, we're dealing with a Fortran deviceptr.
+            If the next element is not what we're expecting, then
+            this is an instance of where the deviceptr variable was
+            not used within the region and the pointer was removed
+            by the gimplifier.  */
+         if (kind2 == GOMP_MAP_POINTER
+             && sizes[i + 1] == 0
+             && hostaddrs[i] == *(void **)hostaddrs[i + 1])
+           {
+             kinds[i+1] = kinds[i];
+             sizes[i+1] = sizeof (void *);
+           }
+
+         /* Invalidate the entry.  */
+         hostaddrs[i] = NULL;
+       }
     }
 }
 
-static void goacc_wait (int async, int num_waits, va_list ap);
+
+/* Launch a possibly offloaded function with FLAGS.  FN is the host fn
+   address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
+   blocks to be copied to/from the device.  Varadic arguments are
+   keyed optional parameters terminated with a zero.  */
 
 void
-GOACC_parallel (int device, void (*fn) (void *),
-               size_t mapnum, void **hostaddrs, size_t *sizes,
-               unsigned short *kinds,
-               int num_gangs, int num_workers, int vector_length,
-               int async, int num_waits, ...)
+GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
+                     size_t mapnum, void **hostaddrs, size_t *sizes,
+                     unsigned short *kinds, ...)
 {
-  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+  int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
+
   va_list ap;
   struct goacc_thread *thr;
   struct gomp_device_descr *acc_dev;
-  struct target_mem_desc *tgt;
-  void **devaddrs;
   unsigned int i;
   struct splay_tree_key_s k;
   splay_tree_key tgt_fn_key;
   void (*tgt_fn);
+  int async = GOMP_ASYNC_SYNC;
+  unsigned dims[GOMP_DIM_MAX];
+  unsigned tag;
+
+#ifdef HAVE_INTTYPES_H
+  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
+             __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
+#else
+  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
+             __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
+#endif
+  goacc_lazy_initialize ();
 
-  if (num_gangs != 1)
-    gomp_fatal ("num_gangs (%d) different from one is not yet supported",
-               num_gangs);
-  if (num_workers != 1)
-    gomp_fatal ("num_workers (%d) different from one is not yet supported",
-               num_workers);
+  thr = goacc_thread ();
+  acc_dev = thr->dev;
 
-  gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
-             __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async);
+  bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
 
-  select_acc_device (device);
+  acc_prof_info prof_info;
+  if (profiling_p)
+    {
+      thr->prof_info = &prof_info;
+
+      prof_info.event_type = acc_ev_compute_construct_start;
+      prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+      prof_info.version = _ACC_PROF_INFO_VERSION;
+      prof_info.device_type = acc_device_type (acc_dev->type);
+      prof_info.device_number = acc_dev->target_id;
+      prof_info.thread_id = -1;
+      prof_info.async = async;
+      prof_info.async_queue = prof_info.async;
+      prof_info.src_file = NULL;
+      prof_info.func_name = NULL;
+      prof_info.line_no = -1;
+      prof_info.end_line_no = -1;
+      prof_info.func_line_no = -1;
+      prof_info.func_end_line_no = -1;
+    }
+  acc_event_info compute_construct_event_info;
+  if (profiling_p)
+    {
+      compute_construct_event_info.other_event.event_type
+       = prof_info.event_type;
+      compute_construct_event_info.other_event.valid_bytes
+       = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+      compute_construct_event_info.other_event.parent_construct
+       = acc_construct_parallel;
+      compute_construct_event_info.other_event.implicit = 0;
+      compute_construct_event_info.other_event.tool_info = NULL;
+    }
+  acc_api_info api_info;
+  if (profiling_p)
+    {
+      thr->api_info = &api_info;
+
+      api_info.device_api = acc_device_api_none;
+      api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+      api_info.device_type = prof_info.device_type;
+      api_info.vendor = -1;
+      api_info.device_handle = NULL;
+      api_info.context_handle = NULL;
+      api_info.async_handle = NULL;
+    }
 
-  thr = goacc_thread ();
-  acc_dev = thr->dev;
+  if (profiling_p)
+    goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+                             &api_info);
+
+  handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
 
   /* Host fallback if "if" clause is false or if the current device is set to
      the host.  */
-  if (host_fallback)
+  if ((flags & GOACC_FLAG_HOST_FALLBACK)
+      /* TODO: a proper pthreads based "multi-core CPU" local device
+        implementation. Currently, this is still the same as host-fallback.  */
+      || (flags & GOACC_FLAG_LOCAL_DEVICE))
     {
+      prof_info.device_type = acc_device_host;
+      api_info.device_type = prof_info.device_type;
       goacc_save_and_set_bind (acc_device_host);
       fn (hostaddrs);
       goacc_restore_bind ();
-      return;
+      goto out_prof;
     }
   else if (acc_device_type (acc_dev->type) == acc_device_host)
     {
       fn (hostaddrs);
-      return;
+      goto out_prof;
     }
 
-  va_start (ap, num_waits);
-  
-  if (num_waits > 0)
-    goacc_wait (async, num_waits, ap);
+  /* Default: let the runtime choose.  */
+  for (i = 0; i != GOMP_DIM_MAX; i++)
+    dims[i] = 0;
 
-  va_end (ap);
+  va_start (ap, kinds);
+  /* TODO: This will need amending when device_type is implemented.  */
+  while ((tag = va_arg (ap, unsigned)) != 0)
+    {
+      if (GOMP_LAUNCH_DEVICE (tag))
+       gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
+                   GOMP_LAUNCH_DEVICE (tag));
+
+      switch (GOMP_LAUNCH_CODE (tag))
+       {
+       case GOMP_LAUNCH_DIM:
+         {
+           unsigned mask = GOMP_LAUNCH_OP (tag);
+
+           for (i = 0; i != GOMP_DIM_MAX; i++)
+             if (mask & GOMP_DIM_MASK (i))
+               dims[i] = va_arg (ap, unsigned);
+         }
+         break;
+
+       case GOMP_LAUNCH_ASYNC:
+         {
+           /* Small constant values are encoded in the operand.  */
+           async = GOMP_LAUNCH_OP (tag);
+
+           if (async == GOMP_LAUNCH_OP_MAX)
+             async = va_arg (ap, unsigned);
+
+           if (profiling_p)
+             {
+               prof_info.async = async;
+               prof_info.async_queue = prof_info.async;
+             }
+
+           break;
+         }
 
-  acc_dev->openacc.async_set_async_func (async);
+       case GOMP_LAUNCH_WAIT:
+         {
+           unsigned num_waits = GOMP_LAUNCH_OP (tag);
+           goacc_wait (async, num_waits, &ap);
+           break;
+         }
 
+       default:
+         gomp_fatal ("unrecognized offload code '%d',"
+                     " libgomp is too old", GOMP_LAUNCH_CODE (tag));
+       }
+    }
+  va_end (ap);
+  
   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
     {
       k.host_start = (uintptr_t) fn;
       k.host_end = k.host_start + 1;
-      gomp_mutex_lock (&acc_dev->mem_map.lock);
-      tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
-      gomp_mutex_unlock (&acc_dev->mem_map.lock);
+      gomp_mutex_lock (&acc_dev->lock);
+      tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
+      gomp_mutex_unlock (&acc_dev->lock);
 
       if (tgt_fn_key == NULL)
        gomp_fatal ("target function wasn't mapped");
 
-      tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
+      tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
     }
   else
     tgt_fn = (void (*)) fn;
 
-  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
-                      false);
+  acc_event_info enter_exit_data_event_info;
+  if (profiling_p)
+    {
+      prof_info.event_type = acc_ev_enter_data_start;
+      enter_exit_data_event_info.other_event.event_type
+       = prof_info.event_type;
+      enter_exit_data_event_info.other_event.valid_bytes
+       = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+      enter_exit_data_event_info.other_event.parent_construct
+       = compute_construct_event_info.other_event.parent_construct;
+      enter_exit_data_event_info.other_event.implicit = 1;
+      enter_exit_data_event_info.other_event.tool_info = NULL;
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
+    }
 
-  devaddrs = gomp_alloca (sizeof (void *) * mapnum);
-  for (i = 0; i < mapnum; i++)
-    devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
-                           + tgt->list[i]->tgt_offset);
+  goacc_aq aq = get_goacc_asyncqueue (async);
 
-  acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
-                             num_gangs, num_workers, vector_length, async,
-                             tgt);
+  struct target_mem_desc *tgt
+    = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true,
+                     GOMP_MAP_VARS_TARGET);
+
+  if (profiling_p)
+    {
+      prof_info.event_type = acc_ev_enter_data_end;
+      enter_exit_data_event_info.other_event.event_type
+       = prof_info.event_type;
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
+    }
 
-  /* If running synchronously, unmap immediately.  */
-  if (async < acc_async_noval)
-    gomp_unmap_vars (tgt, true);
+  void **devaddrs = (void **) tgt->tgt_start;
+  if (aq == NULL)
+    acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
+                               tgt);
   else
+    acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+                                     dims, tgt, aq);
+
+  if (profiling_p)
     {
-      gomp_copy_from_async (tgt);
-      acc_dev->openacc.register_async_cleanup_func (tgt);
+      prof_info.event_type = acc_ev_exit_data_start;
+      enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
+      enter_exit_data_event_info.other_event.tool_info = NULL;
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
     }
 
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
+  /* If running synchronously (aq == NULL), this will unmap immediately.  */
+  goacc_unmap_vars (tgt, true, aq);
+
+  if (profiling_p)
+    {
+      prof_info.event_type = acc_ev_exit_data_end;
+      enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
+    }
+
+ out_prof:
+  if (profiling_p)
+    {
+      prof_info.event_type = acc_ev_compute_construct_end;
+      compute_construct_event_info.other_event.event_type
+       = prof_info.event_type;
+      goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+                               &api_info);
+
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
 }
 
+/* Legacy entry point (GCC 5).  Only provide host fallback execution.  */
+
 void
-GOACC_data_start (int device, size_t mapnum,
+GOACC_parallel (int flags_m, void (*fn) (void *),
+               size_t mapnum, void **hostaddrs, size_t *sizes,
+               unsigned short *kinds,
+               int num_gangs, int num_workers, int vector_length,
+               int async, int num_waits, ...)
+{
+  goacc_save_and_set_bind (acc_device_host);
+  fn (hostaddrs);
+  goacc_restore_bind ();
+}
+
+void
+GOACC_data_start (int flags_m, size_t mapnum,
                  void **hostaddrs, size_t *sizes, unsigned short *kinds)
 {
-  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+  int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
+
   struct target_mem_desc *tgt;
 
-  gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
-             __FUNCTION__, mapnum, hostaddrs, sizes, kinds);
+#ifdef HAVE_INTTYPES_H
+  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
+             __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
+#else
+  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
+             __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
+#endif
 
-  select_acc_device (device);
+  goacc_lazy_initialize ();
 
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+  acc_prof_info prof_info;
+  if (profiling_p)
+    {
+      thr->prof_info = &prof_info;
+
+      prof_info.event_type = acc_ev_enter_data_start;
+      prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+      prof_info.version = _ACC_PROF_INFO_VERSION;
+      prof_info.device_type = acc_device_type (acc_dev->type);
+      prof_info.device_number = acc_dev->target_id;
+      prof_info.thread_id = -1;
+      prof_info.async = acc_async_sync; /* Always synchronous.  */
+      prof_info.async_queue = prof_info.async;
+      prof_info.src_file = NULL;
+      prof_info.func_name = NULL;
+      prof_info.line_no = -1;
+      prof_info.end_line_no = -1;
+      prof_info.func_line_no = -1;
+      prof_info.func_end_line_no = -1;
+    }
+  acc_event_info enter_data_event_info;
+  if (profiling_p)
+    {
+      enter_data_event_info.other_event.event_type
+       = prof_info.event_type;
+      enter_data_event_info.other_event.valid_bytes
+       = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+      enter_data_event_info.other_event.parent_construct = acc_construct_data;
+      for (int i = 0; i < mapnum; ++i)
+       if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
+           || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
+         {
+           /* If there is one such data mapping kind, then this is actually an
+              OpenACC 'host_data' construct.  (GCC maps the OpenACC
+              'host_data' construct to the OpenACC 'data' construct.)  Apart
+              from artificial test cases (such as an OpenACC 'host_data'
+              construct's (implicit) device initialization when there hasn't
+              been any device data be set up before...), there can't really
+              any meaningful events be generated from OpenACC 'host_data'
+              constructs, though.  */
+           enter_data_event_info.other_event.parent_construct
+             = acc_construct_host_data;
+           break;
+         }
+      enter_data_event_info.other_event.implicit = 0;
+      enter_data_event_info.other_event.tool_info = NULL;
+    }
+  acc_api_info api_info;
+  if (profiling_p)
+    {
+      thr->api_info = &api_info;
+
+      api_info.device_api = acc_device_api_none;
+      api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+      api_info.device_type = prof_info.device_type;
+      api_info.vendor = -1;
+      api_info.device_handle = NULL;
+      api_info.context_handle = NULL;
+      api_info.async_handle = NULL;
+    }
+
+  if (profiling_p)
+    goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
+
   /* Host fallback or 'do nothing'.  */
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
-      || host_fallback)
+      || (flags & GOACC_FLAG_HOST_FALLBACK)
+      || (flags & GOACC_FLAG_LOCAL_DEVICE))
     {
-      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
+      prof_info.device_type = acc_device_host;
+      api_info.device_type = prof_info.device_type;
+      tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0);
       tgt->prev = thr->mapped_data;
       thr->mapped_data = tgt;
 
-      return;
+      goto out_prof;
     }
 
   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
-  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
-                      false);
+  tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds,
+                       true, 0);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
   tgt->prev = thr->mapped_data;
   thr->mapped_data = tgt;
+
+ out_prof:
+  if (profiling_p)
+    {
+      prof_info.event_type = acc_ev_enter_data_end;
+      enter_data_event_info.other_event.event_type = prof_info.event_type;
+      goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
+
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
 }
 
 void
 GOACC_data_end (void)
 {
   struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
   struct target_mem_desc *tgt = thr->mapped_data;
 
-  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
-  thr->mapped_data = tgt->prev;
-  gomp_unmap_vars (tgt, true);
-  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
-}
-
-void
-GOACC_enter_exit_data (int device, size_t mapnum,
-                      void **hostaddrs, size_t *sizes, unsigned short *kinds,
-                      int async, int num_waits, ...)
-{
-  struct goacc_thread *thr;
-  struct gomp_device_descr *acc_dev;
-  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
-  bool data_enter = false;
-  size_t i;
-
-  select_acc_device (device);
+  bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
 
-  thr = goacc_thread ();
-  acc_dev = thr->dev;
-
-  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
-      || host_fallback)
-    return;
-
-  if (num_waits > 0)
+  acc_prof_info prof_info;
+  if (profiling_p)
     {
-      va_list ap;
-
-      va_start (ap, num_waits);
-
-      goacc_wait (async, num_waits, ap);
-
-      va_end (ap);
+      thr->prof_info = &prof_info;
+
+      prof_info.event_type = acc_ev_exit_data_start;
+      prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+      prof_info.version = _ACC_PROF_INFO_VERSION;
+      prof_info.device_type = acc_device_type (acc_dev->type);
+      prof_info.device_number = acc_dev->target_id;
+      prof_info.thread_id = -1;
+      prof_info.async = acc_async_sync; /* Always synchronous.  */
+      prof_info.async_queue = prof_info.async;
+      prof_info.src_file = NULL;
+      prof_info.func_name = NULL;
+      prof_info.line_no = -1;
+      prof_info.end_line_no = -1;
+      prof_info.func_line_no = -1;
+      prof_info.func_end_line_no = -1;
     }
-
-  acc_dev->openacc.async_set_async_func (async);
-
-  /* Determine if this is an "acc enter data".  */
-  for (i = 0; i < mapnum; ++i)
+  acc_event_info exit_data_event_info;
+  if (profiling_p)
     {
-      unsigned char kind = kinds[i] & 0xff;
-
-      if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
-       continue;
-
-      if (kind == GOMP_MAP_FORCE_ALLOC
-         || kind == GOMP_MAP_FORCE_PRESENT
-         || kind == GOMP_MAP_FORCE_TO)
-       {
-         data_enter = true;
-         break;
-       }
+      exit_data_event_info.other_event.event_type
+       = prof_info.event_type;
+      exit_data_event_info.other_event.valid_bytes
+       = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+      exit_data_event_info.other_event.parent_construct = acc_construct_data;
+      exit_data_event_info.other_event.implicit = 0;
+      exit_data_event_info.other_event.tool_info = NULL;
+    }
+  acc_api_info api_info;
+  if (profiling_p)
+    {
+      thr->api_info = &api_info;
+
+      api_info.device_api = acc_device_api_none;
+      api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+      api_info.device_type = prof_info.device_type;
+      api_info.vendor = -1;
+      api_info.device_handle = NULL;
+      api_info.context_handle = NULL;
+      api_info.async_handle = NULL;
+    }
 
-      if (kind == GOMP_MAP_FORCE_DEALLOC
-         || kind == GOMP_MAP_FORCE_FROM)
-       break;
+  if (profiling_p)
+    goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
 
-      gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
-                     kind);
-    }
+  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
+  thr->mapped_data = tgt->prev;
+  goacc_unmap_vars (tgt, true, NULL);
+  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
 
-  if (data_enter)
+  if (profiling_p)
     {
-      for (i = 0; i < mapnum; i++)
-       {
-         unsigned char kind = kinds[i] & 0xff;
-
-         /* Scan for PSETs.  */
-         int psets = find_pset (i, mapnum, kinds);
+      prof_info.event_type = acc_ev_exit_data_end;
+      exit_data_event_info.other_event.event_type = prof_info.event_type;
+      goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
 
-         if (!psets)
-           {
-             switch (kind)
-               {
-               case GOMP_MAP_POINTER:
-                 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
-                                       &kinds[i]);
-                 break;
-               case GOMP_MAP_FORCE_ALLOC:
-                 acc_create (hostaddrs[i], sizes[i]);
-                 break;
-               case GOMP_MAP_FORCE_PRESENT:
-                 acc_present_or_copyin (hostaddrs[i], sizes[i]);
-                 break;
-               case GOMP_MAP_FORCE_TO:
-                 acc_present_or_copyin (hostaddrs[i], sizes[i]);
-                 break;
-               default:
-                 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
-                             kind);
-                 break;
-               }
-           }
-         else
-           {
-             gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
-             /* Increment 'i' by two because OpenACC requires fortran
-                arrays to be contiguous, so each PSET is associated with
-                one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
-                one MAP_POINTER.  */
-             i += 2;
-           }
-       }
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
     }
-  else
-    for (i = 0; i < mapnum; ++i)
-      {
-       unsigned char kind = kinds[i] & 0xff;
+}
 
-       int psets = find_pset (i, mapnum, kinds);
+void
+GOACC_update (int flags_m, size_t mapnum,
+             void **hostaddrs, size_t *sizes, unsigned short *kinds,
+             int async, int num_waits, ...)
+{
+  int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
 
-       if (!psets)
-         {
-           switch (kind)
-             {
-             case GOMP_MAP_POINTER:
-               gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
-                                        == GOMP_MAP_FORCE_FROM,
-                                        async, 1);
-               break;
-             case GOMP_MAP_FORCE_DEALLOC:
-               acc_delete (hostaddrs[i], sizes[i]);
-               break;
-             case GOMP_MAP_FORCE_FROM:
-               acc_copyout (hostaddrs[i], sizes[i]);
-               break;
-             default:
-               gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
-                           kind);
-               break;
-             }
-         }
-       else
-         {
-           gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
-                                    == GOMP_MAP_FORCE_FROM, async, 3);
-           /* See the above comment.  */
-           i += 2;
-         }
-      }
+  size_t i;
 
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
-}
+  goacc_lazy_initialize ();
 
-static void
-goacc_wait (int async, int num_waits, va_list ap)
-{
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
-  int i;
 
-  assert (num_waits >= 0);
+  bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
 
-  if (async == acc_async_sync && num_waits == 0)
+  acc_prof_info prof_info;
+  if (profiling_p)
     {
-      acc_wait_all ();
-      return;
+      thr->prof_info = &prof_info;
+
+      prof_info.event_type = acc_ev_update_start;
+      prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+      prof_info.version = _ACC_PROF_INFO_VERSION;
+      prof_info.device_type = acc_device_type (acc_dev->type);
+      prof_info.device_number = acc_dev->target_id;
+      prof_info.thread_id = -1;
+      prof_info.async = async;
+      prof_info.async_queue = prof_info.async;
+      prof_info.src_file = NULL;
+      prof_info.func_name = NULL;
+      prof_info.line_no = -1;
+      prof_info.end_line_no = -1;
+      prof_info.func_line_no = -1;
+      prof_info.func_end_line_no = -1;
     }
-
-  if (async == acc_async_sync && num_waits)
+  acc_event_info update_event_info;
+  if (profiling_p)
     {
-      for (i = 0; i < num_waits; i++)
-        {
-          int qid = va_arg (ap, int);
-
-          if (acc_async_test (qid))
-            continue;
-
-          acc_wait (qid);
-        }
-      return;
+      update_event_info.other_event.event_type
+       = prof_info.event_type;
+      update_event_info.other_event.valid_bytes
+       = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+      update_event_info.other_event.parent_construct = acc_construct_update;
+      update_event_info.other_event.implicit = 0;
+      update_event_info.other_event.tool_info = NULL;
     }
-
-  if (async == acc_async_noval && num_waits == 0)
+  acc_api_info api_info;
+  if (profiling_p)
     {
-      acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
-      return;
+      thr->api_info = &api_info;
+
+      api_info.device_api = acc_device_api_none;
+      api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+      api_info.device_type = prof_info.device_type;
+      api_info.vendor = -1;
+      api_info.device_handle = NULL;
+      api_info.context_handle = NULL;
+      api_info.async_handle = NULL;
     }
 
-  for (i = 0; i < num_waits; i++)
-    {
-      int qid = va_arg (ap, int);
+  if (profiling_p)
+    goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
 
-      if (acc_async_test (qid))
-       continue;
+  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+      || (flags & GOACC_FLAG_HOST_FALLBACK))
+    {
+      prof_info.device_type = acc_device_host;
+      api_info.device_type = prof_info.device_type;
 
-      /* If we're waiting on the same asynchronous queue as we're launching on,
-         the queue itself will order work as required, so there's no need to
-        wait explicitly.  */
-      if (qid != async)
-       acc_dev->openacc.async_wait_async_func (qid, async);
+      goto out_prof;
     }
-}
 
-void
-GOACC_update (int device, size_t mapnum,
-             void **hostaddrs, size_t *sizes, unsigned short *kinds,
-             int async, int num_waits, ...)
-{
-  bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
-  size_t i;
-
-  select_acc_device (device);
-
-  struct goacc_thread *thr = goacc_thread ();
-  struct gomp_device_descr *acc_dev = thr->dev;
-
-  if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
-      || host_fallback)
-    return;
-
-  if (num_waits > 0)
+  if (num_waits)
     {
       va_list ap;
 
       va_start (ap, num_waits);
-
-      goacc_wait (async, num_waits, ap);
-
+      goacc_wait (async, num_waits, &ap);
       va_end (ap);
     }
 
-  acc_dev->openacc.async_set_async_func (async);
-
+  bool update_device = false;
   for (i = 0; i < mapnum; ++i)
     {
       unsigned char kind = kinds[i] & 0xff;
@@ -447,12 +646,49 @@ GOACC_update (int device, size_t mapnum,
        case GOMP_MAP_TO_PSET:
          break;
 
+       case GOMP_MAP_ALWAYS_POINTER:
+         if (update_device)
+           {
+             /* Save the contents of the host pointer.  */
+             void *dptr = acc_deviceptr (hostaddrs[i-1]);
+             uintptr_t t = *(uintptr_t *) hostaddrs[i];
+
+             /* Update the contents of the host pointer to reflect
+                the value of the allocated device memory in the
+                previous pointer.  */
+             *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
+             /* TODO: verify that we really cannot use acc_update_device_async
+                here.  */
+             acc_update_device (hostaddrs[i], sizeof (uintptr_t));
+
+             /* Restore the host pointer.  */
+             *(uintptr_t *) hostaddrs[i] = t;
+             update_device = false;
+           }
+         break;
+
+       case GOMP_MAP_TO:
+         if (!acc_is_present (hostaddrs[i], sizes[i]))
+           {
+             update_device = false;
+             break;
+           }
+         /* Fallthru  */
        case GOMP_MAP_FORCE_TO:
-         acc_update_device (hostaddrs[i], sizes[i]);
+         update_device = true;
+         acc_update_device_async (hostaddrs[i], sizes[i], async);
          break;
 
+       case GOMP_MAP_FROM:
+         if (!acc_is_present (hostaddrs[i], sizes[i]))
+           {
+             update_device = false;
+             break;
+           }
+         /* Fallthru  */
        case GOMP_MAP_FORCE_FROM:
-         acc_update_self (hostaddrs[i], sizes[i]);
+         update_device = false;
+         acc_update_self_async (hostaddrs[i], sizes[i], async);
          break;
 
        default:
@@ -461,20 +697,20 @@ GOACC_update (int device, size_t mapnum,
        }
     }
 
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
-}
-
-void
-GOACC_wait (int async, int num_waits, ...)
-{
-  va_list ap;
+ out_prof:
+  if (profiling_p)
+    {
+      prof_info.event_type = acc_ev_update_end;
+      update_event_info.other_event.event_type = prof_info.event_type;
+      goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
 
-  va_start (ap, num_waits);
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
+}
 
-  goacc_wait (async, num_waits, ap);
 
-  va_end (ap);
-}
+/* Legacy entry point (GCC 5).  */
 
 int
 GOACC_get_num_threads (void)
@@ -482,6 +718,8 @@ GOACC_get_num_threads (void)
   return 1;
 }
 
+/* Legacy entry point (GCC 5).  */
+
 int
 GOACC_get_thread_num (void)
 {