]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
[og9] OpenACC profiling-interface fixes for asynchronous operations
authorJulian Brown <julian@codesourcery.com>
Wed, 11 Sep 2019 03:34:45 +0000 (20:34 -0700)
committerThomas Schwinge <thomas@codesourcery.com>
Tue, 3 Mar 2020 11:51:25 +0000 (12:51 +0100)
libgomp/
* oacc-host.c (host_openacc_async_queue_callback): Invoke callback
function immediately.
* oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch,
queue_async_prof_dispatch): New.
(GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous
profile-event dispatches.
(GOACC_enter_exit_data): Likewise.
(GOACC_update): Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
(cb_compute_construct_start): Remove/fix TODO.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
(cb_exit_data_start): Tweak expected state values.
(cb_exit_data_end): Likewise.
(cb_compute_construct_start): Remove/fix TODO.
(cb_compute_construct_end): Don't do adjustments for
acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks.
(cb_compute_construct_end): Tweak expected state values.
(cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect
launch-enqueue operations to happen synchronously with respect to
profiling events on async streams.
(main): Tweak expected state values.
* testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder
operations for async-safety.

(cherry picked from openacc-gcc-9-branch commit
9bd8ebbc1a1e7a3263571105b3c1237080e9eea7)

libgomp/ChangeLog.omp
libgomp/oacc-host.c
libgomp/oacc-parallel.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c

index 41e05e9c61b26d4d9387408f52d3e3d6e1543367..5f39fae6f5177f143b6a34fdb2a661aad0205373 100644 (file)
@@ -1,3 +1,29 @@
+2019-09-17  Julian Brown  <julian@codesourcery.com>
+
+       * oacc-host.c (host_openacc_async_queue_callback): Invoke callback
+       function immediately.
+       * oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch,
+       queue_async_prof_dispatch): New.
+       (GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous
+       profile-event dispatches.
+       (GOACC_enter_exit_data): Likewise.
+       (GOACC_update): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+       (cb_compute_construct_start): Remove/fix TODO.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+       (cb_exit_data_start): Tweak expected state values.
+       (cb_exit_data_end): Likewise.
+       (cb_compute_construct_start): Remove/fix TODO.
+       (cb_compute_construct_end): Don't do adjustments for
+       acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks.
+       (cb_compute_construct_end): Tweak expected state values.
+       (cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect
+       launch-enqueue operations to happen synchronously with respect to
+       profiling events on async streams.
+       (main): Tweak expected state values.
+       * testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder
+       operations for async-safety.
+
 2019-09-17  Julian Brown  <julian@codesourcery.com>
 
        * target.c (gomp_map_vars_internal): Remove read of uninitialised
index 21f73302f035e0012715c69b45ef54231f7a7f0e..0231b597114d60c942eac84cd1f43efa56456ba6 100644 (file)
@@ -250,10 +250,9 @@ host_openacc_async_dev2host (int ord __attribute__ ((unused)),
 static void
 host_openacc_async_queue_callback (struct goacc_asyncqueue *aq
                                   __attribute__ ((unused)),
-                                  void (*callback_fn)(void *)
-                                  __attribute__ ((unused)),
-                                  void *userptr __attribute__ ((unused)))
+                                  void (*callback_fn)(void *), void *userptr)
 {
+  callback_fn (userptr);
 }
 
 static struct goacc_asyncqueue *
index 1bd0775f226a40e71e1eee037b97fe671724332c..0c9cb3c461cf1754353d74d8b1be77ba52004916 100644 (file)
@@ -169,6 +169,62 @@ goacc_call_host_fn (void (*fn) (void *), size_t mapnum, void **hostaddrs,
   fn (hostaddrs);
 }
 
+struct async_prof_callback_info {
+  acc_prof_info prof_info;
+  acc_event_info event_info;
+  acc_api_info api_info;
+  struct async_prof_callback_info *start_info;
+};
+
+static void
+async_prof_dispatch (void *ptr)
+{
+  struct async_prof_callback_info *info
+    = (struct async_prof_callback_info *) ptr;
+
+  if (info->start_info)
+    {
+      /* The TOOL_INFO must be preserved from a start event to the
+        corresponding end event.  Copy that here.  */
+      void *tool_info = info->start_info->event_info.other_event.tool_info;
+      info->event_info.other_event.tool_info = tool_info;
+    }
+
+  goacc_profiling_dispatch (&info->prof_info, &info->event_info,
+                           &info->api_info);
+
+  /* The async_prof_dispatch function is (so far) always used for start/end
+     profiling event pairs: the start and end parts are queued, then each is
+     dispatched (or the dispatches might be interleaved before the end part is
+     queued).
+     In any case, it's not safe to delete either info structure before the
+     whole bracketed event is complete.  */
+
+  if (info->start_info)
+    {
+      free (info->start_info);
+      free (info);
+    }
+}
+
+static struct async_prof_callback_info *
+queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq,
+                          acc_prof_info *prof_info, acc_event_info *event_info,
+                          acc_api_info *api_info,
+                          struct async_prof_callback_info *prev_info)
+{
+  struct async_prof_callback_info *info = malloc (sizeof (*info));
+
+  info->prof_info = *prof_info;
+  info->event_info = *event_info;
+  info->api_info = *api_info;
+  info->start_info = prev_info;
+
+  devicep->openacc.async.queue_callback_func (aq, async_prof_dispatch,
+                                             (void *) info);
+  return info;
+}
+
 /* Launch a possibly offloaded function with FLAGS.  FN is the host fn
    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
    blocks to be copied to/from the device.  Varadic arguments are
@@ -194,6 +250,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
   unsigned dims[GOMP_DIM_MAX];
   unsigned tag;
   bool args_exploded = false;
+  struct async_prof_callback_info *comp_start_info = NULL,
+                                 *data_start_info = NULL;
 
 #ifdef HAVE_INTTYPES_H
   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
@@ -255,10 +313,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
       api_info.async_handle = NULL;
     }
 
-  if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
-                             &api_info);
-
   handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
 
   /* Default: let the runtime choose.  */
@@ -294,11 +348,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
            if (async == GOMP_LAUNCH_OP_MAX)
              async = va_arg (ap, unsigned);
 
-           if (profiling_p)
-             {
-               prof_info.async = async;
-               prof_info.async_queue = prof_info.async;
-             }
+           /* Set async number in profiling data, unless the device is the
+              host or we're doing host fallback.  */
+           if (profiling_p
+               && !(flags & GOACC_FLAG_HOST_FALLBACK)
+               && acc_device_type (acc_dev->type) != acc_device_host)
+             prof_info.async = prof_info.async_queue = async;
 
            break;
          }
@@ -321,6 +376,20 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
     }
   va_end (ap);
 
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
+  if (profiling_p)
+    {
+      if (aq)
+       comp_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &compute_construct_event_info,
+                                      &api_info, NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+                                 &api_info);
+    }
+
   /* Host fallback if "if" clause is false or if the current device is set to
      the host.  */
   if (flags & GOACC_FLAG_HOST_FALLBACK)
@@ -368,12 +437,16 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
        = compute_construct_event_info.other_event.parent_construct;
       enter_exit_data_event_info.other_event.implicit = 1;
       enter_exit_data_event_info.other_event.tool_info = NULL;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       data_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &enter_exit_data_event_info, &api_info,
+                                      NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
     }
 
-  goacc_aq aq = get_goacc_asyncqueue (async);
-
   tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
                             true, GOMP_MAP_VARS_OPENACC);
 
@@ -391,8 +464,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
       prof_info.event_type = acc_ev_enter_data_end;
       enter_exit_data_event_info.other_event.event_type
        = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                  &enter_exit_data_event_info, &api_info,
+                                  data_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
     }
 
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
@@ -423,8 +501,14 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
       prof_info.event_type = acc_ev_exit_data_start;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
       enter_exit_data_event_info.other_event.tool_info = NULL;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       data_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &enter_exit_data_event_info, &api_info,
+                                      NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
     }
 
   /* If running synchronously, unmap immediately.  */
@@ -437,8 +521,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
     {
       prof_info.event_type = acc_ev_exit_data_end;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                  &enter_exit_data_event_info, &api_info,
+                                  data_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
     }
 
 #ifdef RC_CHECKING
@@ -453,8 +542,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum,
       prof_info.event_type = acc_ev_compute_construct_end;
       compute_construct_event_info.other_event.event_type
        = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
-                               &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                  &compute_construct_event_info, &api_info,
+                                  comp_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+                                 &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
@@ -697,6 +791,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
   struct gomp_device_descr *acc_dev;
   bool data_enter = false;
   size_t i;
+  struct async_prof_callback_info *data_start_info = NULL;
 
   goacc_lazy_initialize ();
 
@@ -806,9 +901,19 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
       api_info.async_handle = NULL;
     }
 
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
   if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                             &api_info);
+    {
+      if (aq)
+       data_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &enter_exit_data_event_info, &api_info,
+                                      NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
+    }
 
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || (flags & GOACC_FLAG_HOST_FALLBACK))
@@ -867,7 +972,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
                case GOMP_MAP_STRUCT:
                  {
                    int elems = sizes[i];
-                   goacc_aq aq = get_goacc_asyncqueue (async);
                    gomp_map_vars_async (acc_dev, aq, elems + 1, &hostaddrs[i],
                                         NULL, &sizes[i], &kinds[i], true,
                                         GOMP_MAP_VARS_OPENACC_ENTER_DATA);
@@ -890,7 +994,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
                                           &sizes[i], &kinds[i]);
              else
                {
-                 goacc_aq aq = get_goacc_asyncqueue (async);
                  for (int j = 0; j < 2; j++)
                    gomp_map_vars_async (acc_dev, aq,
                                         (j == 0 || pointer == 2) ? 1 : 2,
@@ -1003,7 +1106,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
                case GOMP_MAP_STRUCT:
                  {
                    int elems = sizes[i];
-                   goacc_aq aq = get_goacc_asyncqueue (async);
                    for (int j = 1; j <= elems; j++)
                      {
                        struct splay_tree_key_s k;
@@ -1067,8 +1169,13 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
       prof_info.event_type
        = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                               &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                  &enter_exit_data_event_info, &api_info,
+                                  data_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                                 &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
@@ -1120,6 +1227,8 @@ GOACC_update (int flags_m, size_t mapnum,
 
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
+  goacc_aq aq = NULL;
+  struct async_prof_callback_info *update_start_info = NULL;
 
   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
 
@@ -1169,7 +1278,15 @@ GOACC_update (int flags_m, size_t mapnum,
     }
 
   if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+    {
+      aq = get_goacc_asyncqueue (async);
+      if (aq)
+       update_start_info
+         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+                                      &update_event_info, &api_info, NULL);
+      else
+       goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+    }
 
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || (flags & GOACC_FLAG_HOST_FALLBACK))
@@ -1257,7 +1374,11 @@ GOACC_update (int flags_m, size_t mapnum,
     {
       prof_info.event_type = acc_ev_update_end;
       update_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+      if (aq)
+       queue_async_prof_dispatch (acc_dev, aq, &prof_info, &update_event_info,
+                                  &api_info, update_start_info);
+      else
+       goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
index cf980f1baec5a6c61b368eac789f699d0f9055b0..1af53cb72b9c1dd30645d3243bec1f993ee5ec5a 100644 (file)
@@ -159,7 +159,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
+  if (acc_device_type == acc_device_host)
+    assert (prof_info->async == acc_async_sync);
+  else
+    assert (prof_info->async == acc_async);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
index 5d39251159233cda8350e4fa0edc1376693e4ccd..0cb0369168b490362c99e3e6e334d7a5f17f7821 100644 (file)
@@ -284,9 +284,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_
 {
   DEBUG_printf ("%s\n", __FUNCTION__);
 
-  assert (state == 7
+  assert (state == 5
 #if ASYNC_EXIT_DATA
-         || state == 107
+         || state == 105
 #endif
          );
   STATE_OP (state, ++);
@@ -340,9 +340,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in
 {
   DEBUG_printf ("%s\n", __FUNCTION__);
 
-  assert (state == 8
+  assert (state == 6
 #if ASYNC_EXIT_DATA
-         || state == 108
+         || state == 106
 #endif
          );
   STATE_OP (state, ++);
@@ -426,7 +426,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
+  if (acc_device_type == acc_device_host)
+    assert (prof_info->async == acc_async_sync);
+  else
+    assert (prof_info->async == acc_async);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
@@ -467,9 +470,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
     {
       /* Compensate for the missing 'acc_ev_enter_data_end'.  */
       state += 1;
-      /* Compensate for the missing 'acc_ev_enqueue_launch_start' and
-        'acc_ev_enqueue_launch_end'.  */
-      state += 2;
       /* Compensate for the missing 'acc_ev_exit_data_start' and
         'acc_ev_exit_data_end'.  */
       state += 2;
@@ -482,8 +482,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
       state += 2;
     }
 #endif
-  assert (state == 9
-         || state == 109);
+  assert (state == 7
+         || state == 107);
   STATE_OP (state, ++);
 
   assert (tool_info != NULL);
@@ -537,17 +537,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
 
   assert (acc_device_type != acc_device_host);
 
-  assert (state == 5
-         || state == 105);
-  STATE_OP (state, ++);
-
-  assert (tool_info != NULL);
-  assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
-  assert (tool_info->nested == NULL);
-  tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
-  assert (tool_info->nested != NULL);
-  tool_info->nested->nested = NULL;
-
   assert (prof_info->event_type == acc_ev_enqueue_launch_start);
   assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
   assert (prof_info->version == _ACC_PROF_INFO_VERSION);
@@ -591,13 +580,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
   assert (api_info->device_handle == NULL);
   assert (api_info->context_handle == NULL);
   assert (api_info->async_handle == NULL);
-
-  tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type;
-  tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name);
-  tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs;
-  tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers;
-  tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length;
-  event_info->other_event.tool_info = tool_info->nested;
 }
 
 static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
@@ -606,19 +588,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
 
   assert (acc_device_type != acc_device_host);
 
-  assert (state == 6
-         || state == 106);
-  STATE_OP (state, ++);
-
-  assert (tool_info != NULL);
-  assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
-  assert (tool_info->nested != NULL);
-  assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start);
-  assert (tool_info->nested->event_info.launch_event.kernel_name != NULL);
-  assert (tool_info->nested->event_info.launch_event.num_gangs >= 1);
-  assert (tool_info->nested->event_info.launch_event.num_workers >= 1);
-  assert (tool_info->nested->event_info.launch_event.vector_length >= 1);
-
   assert (prof_info->event_type == acc_ev_enqueue_launch_end);
   assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
   assert (prof_info->version == _ACC_PROF_INFO_VERSION);
@@ -638,12 +607,7 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
   assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
   assert (event_info->launch_event.parent_construct == acc_construct_parallel);
   assert (event_info->launch_event.implicit == 1);
-  assert (event_info->launch_event.tool_info == tool_info->nested);
   assert (event_info->launch_event.kernel_name != NULL);
-  assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0);
-  assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs);
-  assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers);
-  assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length);
 
   if (acc_device_type == acc_device_host)
     assert (api_info->device_api == acc_device_api_none);
@@ -657,10 +621,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
   assert (api_info->device_handle == NULL);
   assert (api_info->context_handle == NULL);
   assert (api_info->async_handle == NULL);
-
-  free ((void *) tool_info->nested->event_info.launch_event.kernel_name);
-  free (tool_info->nested);
-  tool_info->nested = NULL;
 }
 
 
@@ -707,7 +667,7 @@ int main()
     }
     assert (state_init == 4);
   }
-  assert (state == 10);
+  assert (state == 8);
 
   STATE_OP (state, = 100);
 
@@ -723,7 +683,7 @@ int main()
 #pragma acc wait
     assert (state_init == 104);
   }
-  assert (state == 110);
+  assert (state == 108);
 
   return 0;
 }
index 54497237b0c08229e45bcbaa4860208c9c21ac8c..baa3ac83f04adb7d6980ef395e9b81cdd1f1687b 100644 (file)
@@ -22,10 +22,10 @@ main (int argc, char **argv)
 
   acc_copyin_async (h, N, async);
 
-  memset (h, 0, N);
-
   acc_wait (async);
 
+  memset (h, 0, N);
+
   acc_copyout_async (h, N, async + 1);
 
   acc_wait (async + 1);