]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
OpenACC profiling-interface fixes for asynchronous operations
authorJulian Brown <julian@codesourcery.com>
Wed, 11 Sep 2019 03:34:45 +0000 (20:34 -0700)
committerKwok Cheung Yeung <kcy@codesourcery.com>
Thu, 22 Apr 2021 17:00:38 +0000 (10:00 -0700)
libgomp/
* oacc-host.c (host_openacc_async_queue_callback): Invoke callback
function immediately.
* oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch,
queue_async_prof_dispatch): New.
(GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous
profile-event dispatches.
(GOACC_update): Likewise.
* oacc-mem.c (GOACC_enter_exit_data): Call queue_async_prof_dispatch
for asynchronous profile-event dispatches.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
(cb_compute_construct_start): Remove/fix TODO.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
(cb_exit_data_start): Tweak expected state values.
(cb_exit_data_end): Likewise.
(cb_compute_construct_start): Remove/fix TODO.
(cb_compute_construct_end): Don't do adjustments for
acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks.
(cb_compute_construct_end): Tweak expected state values.
(cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect
launch-enqueue operations to happen synchronously with respect to
profiling events on async streams.
(main): Tweak expected state values.
* testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder
operations for async-safety.

libgomp/ChangeLog.omp
libgomp/oacc-host.c
libgomp/oacc-mem.c
libgomp/oacc-parallel.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c

index 3ea7389fc56707ca75d68498ac2c519df9a7428a..ff5b8dc785def425ed495ae42097c562d6add3cf 100644 (file)
@@ -1,3 +1,30 @@
+2019-09-17  Julian Brown  <julian@codesourcery.com>
+
+       * oacc-host.c (host_openacc_async_queue_callback): Invoke callback
+       function immediately.
+       * oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch,
+       queue_async_prof_dispatch): New.
+       (GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous
+       profile-event dispatches.
+       (GOACC_update): Likewise.
+       * oacc-mem.c (GOACC_enter_exit_data): Call queue_async_prof_dispatch
+       for asynchronous profile-event dispatches.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+       (cb_compute_construct_start): Remove/fix TODO.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+       (cb_exit_data_start): Tweak expected state values.
+       (cb_exit_data_end): Likewise.
+       (cb_compute_construct_start): Remove/fix TODO.
+       (cb_compute_construct_end): Don't do adjustments for
+       acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks.
+       (cb_compute_construct_end): Tweak expected state values.
+       (cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect
+       launch-enqueue operations to happen synchronously with respect to
+       profiling events on async streams.
+       (main): Tweak expected state values.
+       * testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder
+       operations for async-safety.
+
 2019-09-17  Julian Brown  <julian@codesourcery.com>
 
        * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (check): Skip
index f3bbd2b9c61324eb8688589fffa4ad6f8332a6af..1cbff4caacec82c26582af31355f33097a5aa369 100644 (file)
@@ -204,10 +204,9 @@ host_openacc_async_dev2host (int ord __attribute__ ((unused)),
 static void
 host_openacc_async_queue_callback (struct goacc_asyncqueue *aq
                                   __attribute__ ((unused)),
-                                  void (*callback_fn)(void *)
-                                  __attribute__ ((unused)),
-                                  void *userptr __attribute__ ((unused)))
+                                  void (*callback_fn)(void *), void *userptr)
 {
+  callback_fn (userptr);
 }
 
 static struct goacc_asyncqueue *
index 405574dfa2b5ba6275929ee56c7ff179aa14f53b..2636fb8f166d0ced77abba40829e583c9284f5ad 100644 (file)
@@ -1320,6 +1320,12 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
   gomp_mutex_unlock (&acc_dev->lock);
 }
 
+struct async_prof_callback_info *
+queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq,
+                          acc_prof_info *prof_info, acc_event_info *event_info,
+                          acc_api_info *api_info,
+                          struct async_prof_callback_info *prev_info);
+
 void
 GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
                       size_t *sizes, unsigned short *kinds, int async,
@@ -1331,6 +1337,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
   struct gomp_device_descr *acc_dev;
   bool data_enter = false;
   size_t i;
+  struct async_prof_callback_info *data_start_info = NULL;
 
   goacc_lazy_initialize ();
 
@@ -1419,9 +1426,19 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
       api_info.async_handle = NULL;
     }
 
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
   if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                             &api_info);
+    {
+      if (aq)
+       data_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &enter_exit_data_event_info, &api_info,
+                                      NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
+    }
 
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || (flags & GOACC_FLAG_HOST_FALLBACK))
@@ -1441,8 +1458,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
       va_end (ap);
     }
 
-  goacc_aq aq = get_goacc_asyncqueue (async);
-
   if (data_enter)
     goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
   else
@@ -1454,8 +1469,13 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
       prof_info.event_type
        = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                  &enter_exit_data_event_info, &api_info,
+                                  data_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
index 72b25fd17a2f273911b392b029ec554fdc482b16..9ac9a81469cba8575997eaaa2745fa38b4aba457 100644 (file)
@@ -259,6 +259,62 @@ handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
 }
 
 
+struct async_prof_callback_info {
+  acc_prof_info prof_info;
+  acc_event_info event_info;
+  acc_api_info api_info;
+  struct async_prof_callback_info *start_info;
+};
+
+static void
+async_prof_dispatch (void *ptr)
+{
+  struct async_prof_callback_info *info
+    = (struct async_prof_callback_info *) ptr;
+
+  if (info->start_info)
+    {
+      /* The TOOL_INFO must be preserved from a start event to the
+        corresponding end event.  Copy that here.  */
+      void *tool_info = info->start_info->event_info.other_event.tool_info;
+      info->event_info.other_event.tool_info = tool_info;
+    }
+
+  goacc_profiling_dispatch (&info->prof_info, &info->event_info,
+                           &info->api_info);
+
+  /* The async_prof_dispatch function is (so far) always used for start/end
+     profiling event pairs: the start and end parts are queued, then each is
+     dispatched (or the dispatches might be interleaved before the end part is
+     queued).
+     In any case, it's not safe to delete either info structure before the
+     whole bracketed event is complete.  */
+
+  if (info->start_info)
+    {
+      free (info->start_info);
+      free (info);
+    }
+}
+
+struct async_prof_callback_info *
+queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq,
+                          acc_prof_info *prof_info, acc_event_info *event_info,
+                          acc_api_info *api_info,
+                          struct async_prof_callback_info *prev_info)
+{
+  struct async_prof_callback_info *info = malloc (sizeof (*info));
+
+  info->prof_info = *prof_info;
+  info->event_info = *event_info;
+  info->api_info = *api_info;
+  info->start_info = prev_info;
+
+  devicep->openacc.async.queue_callback_func (aq, async_prof_dispatch,
+                                             (void *) info);
+  return info;
+}
+
 /* Launch a possibly offloaded function with FLAGS.  FN is the host fn
    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
    blocks to be copied to/from the device.  Varadic arguments are
@@ -284,6 +340,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
   unsigned dims[GOMP_DIM_MAX];
   unsigned tag;
   struct goacc_ncarray_info *nca_info = NULL;
+  struct async_prof_callback_info *comp_start_info = NULL,
+                                 *data_start_info = NULL;
 
 #ifdef HAVE_INTTYPES_H
   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
@@ -345,31 +403,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       api_info.async_handle = NULL;
     }
 
-  if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
-                             &api_info);
-
   handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
 
-  /* Host fallback if "if" clause is false or if the current device is set to
-     the host.  */
-  if (flags & GOACC_FLAG_HOST_FALLBACK)
-    {
-      prof_info.device_type = acc_device_host;
-      api_info.device_type = prof_info.device_type;
-      goacc_save_and_set_bind (acc_device_host);
-      fn (hostaddrs);
-      goacc_restore_bind ();
-      goto out_prof;
-    }
-  else if (acc_device_type (acc_dev->type) == acc_device_host)
-    {
-      fn (hostaddrs);
-      goto out_prof;
-    }
-  else if (profiling_p)
-    api_info.device_api = acc_device_api_cuda;
-
   /* Default: let the runtime choose.  */
   for (i = 0; i != GOMP_DIM_MAX; i++)
     dims[i] = 0;
@@ -402,11 +437,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
            if (async == GOMP_LAUNCH_OP_MAX)
              async = va_arg (ap, unsigned);
 
-           if (profiling_p)
-             {
-               prof_info.async = async;
-               prof_info.async_queue = prof_info.async;
-             }
+           /* Set async number in profiling data, unless the device is the
+              host or we're doing host fallback.  */
+           if (profiling_p
+               && !(flags & GOACC_FLAG_HOST_FALLBACK)
+               && acc_device_type (acc_dev->type) != acc_device_host)
+             prof_info.async = prof_info.async_queue = async;
 
            break;
          }
@@ -434,6 +470,39 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
 
   va_end (ap);
 
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
+  if (profiling_p)
+    {
+      if (aq)
+       comp_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &compute_construct_event_info,
+                                      &api_info, NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+                                 &api_info);
+    }
+
+  /* Host fallback if "if" clause is false or if the current device is set to
+     the host.  */
+  if (flags & GOACC_FLAG_HOST_FALLBACK)
+    {
+      prof_info.device_type = acc_device_host;
+      api_info.device_type = prof_info.device_type;
+      goacc_save_and_set_bind (acc_device_host);
+      fn (hostaddrs);
+      goacc_restore_bind ();
+      goto out_prof;
+    }
+  else if (acc_device_type (acc_dev->type) == acc_device_host)
+    {
+      fn (hostaddrs);
+      goto out_prof;
+    }
+  else if (profiling_p)
+    api_info.device_api = acc_device_api_cuda;
+
   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
     {
       k.host_start = (uintptr_t) fn;
@@ -462,12 +531,16 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
        = compute_construct_event_info.other_event.parent_construct;
       enter_exit_data_event_info.other_event.implicit = 1;
       enter_exit_data_event_info.other_event.tool_info = NULL;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       data_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &enter_exit_data_event_info, &api_info,
+                                      NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
     }
 
-  goacc_aq aq = get_goacc_asyncqueue (async);
-
   tgt = gomp_map_vars_openacc (acc_dev, aq, mapnum, hostaddrs, sizes, kinds,
                               nca_info);
   free (nca_info);
@@ -486,8 +559,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_enter_data_end;
       enter_exit_data_event_info.other_event.event_type
        = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                  &enter_exit_data_event_info, &api_info,
+                                  data_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
     }
   
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
@@ -506,8 +584,14 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_exit_data_start;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
       enter_exit_data_event_info.other_event.tool_info = NULL;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       data_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &enter_exit_data_event_info, &api_info,
+                                      NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
     }
 
   /* If running synchronously, unmap immediately.  */
@@ -520,8 +604,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
     {
       prof_info.event_type = acc_ev_exit_data_end;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                  &enter_exit_data_event_info, &api_info,
+                                  data_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
     }
 
  out_prof:
@@ -530,8 +619,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_compute_construct_end;
       compute_construct_event_info.other_event.event_type
        = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
-                               &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                  &compute_construct_event_info, &api_info,
+                                  comp_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+                                 &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
@@ -787,6 +881,8 @@ GOACC_update (int flags_m, size_t mapnum,
 
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
+  goacc_aq aq = NULL;
+  struct async_prof_callback_info *update_start_info = NULL;
 
   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
 
@@ -836,7 +932,15 @@ GOACC_update (int flags_m, size_t mapnum,
     }
 
   if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+    {
+      aq = get_goacc_asyncqueue (async);
+      if (aq)
+       update_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &update_event_info, &api_info, NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+    }
 
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || (flags & GOACC_FLAG_HOST_FALLBACK))
@@ -923,7 +1027,11 @@ GOACC_update (int flags_m, size_t mapnum,
     {
       prof_info.event_type = acc_ev_update_end;
       update_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info, &update_event_info,
+                                  &api_info, update_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
index c0982461da5240d09fffca6528c854f7332a6d16..fd7705d6b45b2c66d1f85d393691eb6f22281e77 100644 (file)
@@ -159,7 +159,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
+  if (acc_device_type == acc_device_host)
+    assert (prof_info->async == acc_async_sync);
+  else
+    assert (prof_info->async == acc_async);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
index f35c52af572a3b96abc581947fef2765ebcd37a4..c4e49745eaddcd51b91d48dcebbf51917983d4f4 100644 (file)
@@ -284,9 +284,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_
 {
   DEBUG_printf ("%s\n", __FUNCTION__);
 
-  assert (state == 7
+  assert (state == 5
 #if ASYNC_EXIT_DATA
-         || state == 107
+         || state == 105
 #endif
          );
   STATE_OP (state, ++);
@@ -340,9 +340,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in
 {
   DEBUG_printf ("%s\n", __FUNCTION__);
 
-  assert (state == 8
+  assert (state == 6
 #if ASYNC_EXIT_DATA
-         || state == 108
+         || state == 106
 #endif
          );
   STATE_OP (state, ++);
@@ -426,7 +426,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
+  if (acc_device_type == acc_device_host)
+    assert (prof_info->async == acc_async_sync);
+  else
+    assert (prof_info->async == acc_async);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
@@ -467,9 +470,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
     {
       /* Compensate for the missing 'acc_ev_enter_data_end'.  */
       state += 1;
-      /* Compensate for the missing 'acc_ev_enqueue_launch_start' and
-        'acc_ev_enqueue_launch_end'.  */
-      state += 2;
       /* Compensate for the missing 'acc_ev_exit_data_start' and
         'acc_ev_exit_data_end'.  */
       state += 2;
@@ -482,8 +482,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
       state += 2;
     }
 #endif
-  assert (state == 9
-         || state == 109);
+  assert (state == 7
+         || state == 107);
   STATE_OP (state, ++);
 
   assert (tool_info != NULL);
@@ -537,17 +537,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
 
   assert (acc_device_type != acc_device_host);
 
-  assert (state == 5
-         || state == 105);
-  STATE_OP (state, ++);
-
-  assert (tool_info != NULL);
-  assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
-  assert (tool_info->nested == NULL);
-  tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
-  assert (tool_info->nested != NULL);
-  tool_info->nested->nested = NULL;
-
   assert (prof_info->event_type == acc_ev_enqueue_launch_start);
   assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
   assert (prof_info->version == _ACC_PROF_INFO_VERSION);
@@ -591,13 +580,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
   assert (api_info->device_handle == NULL);
   assert (api_info->context_handle == NULL);
   assert (api_info->async_handle == NULL);
-
-  tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type;
-  tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name);
-  tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs;
-  tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers;
-  tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length;
-  event_info->other_event.tool_info = tool_info->nested;
 }
 
 static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
@@ -606,19 +588,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
 
   assert (acc_device_type != acc_device_host);
 
-  assert (state == 6
-         || state == 106);
-  STATE_OP (state, ++);
-
-  assert (tool_info != NULL);
-  assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
-  assert (tool_info->nested != NULL);
-  assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start);
-  assert (tool_info->nested->event_info.launch_event.kernel_name != NULL);
-  assert (tool_info->nested->event_info.launch_event.num_gangs >= 1);
-  assert (tool_info->nested->event_info.launch_event.num_workers >= 1);
-  assert (tool_info->nested->event_info.launch_event.vector_length >= 1);
-
   assert (prof_info->event_type == acc_ev_enqueue_launch_end);
   assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
   assert (prof_info->version == _ACC_PROF_INFO_VERSION);
@@ -638,12 +607,7 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
   assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
   assert (event_info->launch_event.parent_construct == acc_construct_parallel);
   assert (event_info->launch_event.implicit == 1);
-  assert (event_info->launch_event.tool_info == tool_info->nested);
   assert (event_info->launch_event.kernel_name != NULL);
-  assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0);
-  assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs);
-  assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers);
-  assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length);
 
   if (acc_device_type == acc_device_host)
     assert (api_info->device_api == acc_device_api_none);
@@ -657,10 +621,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
   assert (api_info->device_handle == NULL);
   assert (api_info->context_handle == NULL);
   assert (api_info->async_handle == NULL);
-
-  free ((void *) tool_info->nested->event_info.launch_event.kernel_name);
-  free (tool_info->nested);
-  tool_info->nested = NULL;
 }
 
 
@@ -707,7 +667,7 @@ int main()
     }
     assert (state_init == 4);
   }
-  assert (state == 10);
+  assert (state == 8);
 
   STATE_OP (state, = 100);
 
@@ -723,7 +683,7 @@ int main()
 #pragma acc wait
     assert (state_init == 104);
   }
-  assert (state == 110);
+  assert (state == 108);
 
   return 0;
 }
index 54497237b0c08229e45bcbaa4860208c9c21ac8c..baa3ac83f04adb7d6980ef395e9b81cdd1f1687b 100644 (file)
@@ -22,10 +22,10 @@ main (int argc, char **argv)
 
   acc_copyin_async (h, N, async);
 
-  memset (h, 0, N);
-
   acc_wait (async);
 
+  memset (h, 0, N);
+
   acc_copyout_async (h, N, async + 1);
 
   acc_wait (async + 1);