]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Revert "OpenACC profiling-interface fixes for asynchronous operations"
authorThomas Schwinge <thomas@codesourcery.com>
Thu, 2 Mar 2023 10:28:24 +0000 (11:28 +0100)
committerThomas Schwinge <thomas@codesourcery.com>
Fri, 10 Mar 2023 11:22:25 +0000 (12:22 +0100)
There is occasional execution failure; these changes need to be reviewed.

This reverts og12 commit 719f93c8618a134f90b5b661ab70c918d659ad05.

libgomp/
* oacc-host.c: Revert
"OpenACC profiling-interface fixes for asynchronous operations"
changes.
* oacc-mem.c: Likewise.
* oacc-parallel.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c:
Likewise.

libgomp/ChangeLog.omp
libgomp/oacc-host.c
libgomp/oacc-mem.c
libgomp/oacc-parallel.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c

index d55b050392016db24640aaee7575e13533bccb87..0e984754bb01b2275d277fc99e60ce39dc5b6ae5 100644 (file)
@@ -1,5 +1,14 @@
 2023-03-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+       * oacc-host.c: Revert
+       "OpenACC profiling-interface fixes for asynchronous operations"
+       changes.
+       * oacc-mem.c: Likewise.
+       * oacc-parallel.c: Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c:
+       Likewise.
+
        * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Revert
        "Revert changes to acc_prof-init-1.c and acc_prof-parallel-1.c"
        changes.
index afc99c5a3741006d8e15db44aac57acd2c7c323c..94792abe5ceef7214bd921dabc782c046efd6541 100644 (file)
@@ -215,9 +215,10 @@ host_openacc_async_dev2host (int ord __attribute__ ((unused)),
 static void
 host_openacc_async_queue_callback (struct goacc_asyncqueue *aq
                                   __attribute__ ((unused)),
-                                  void (*callback_fn)(void *), void *userptr)
+                                  void (*callback_fn)(void *)
+                                  __attribute__ ((unused)),
+                                  void *userptr __attribute__ ((unused)))
 {
-  callback_fn (userptr);
 }
 
 static struct goacc_asyncqueue *
index 4b7d306f4024bf1e0c7579732a2335902baf2d49..6fb8be98542ccf2bad663100ca26b88c62b5ab7c 100644 (file)
@@ -1431,12 +1431,6 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
   gomp_mutex_unlock (&acc_dev->lock);
 }
 
-struct async_prof_callback_info *
-queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq,
-                          acc_prof_info *prof_info, acc_event_info *event_info,
-                          acc_api_info *api_info,
-                          struct async_prof_callback_info *prev_info);
-
 static void
 goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
                                size_t *sizes, unsigned short *kinds,
@@ -1447,7 +1441,6 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
 
   struct goacc_thread *thr;
   struct gomp_device_descr *acc_dev;
-  struct async_prof_callback_info *data_start_info = NULL;
 
   goacc_lazy_initialize ();
 
@@ -1503,19 +1496,9 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
       api_info.async_handle = NULL;
     }
 
-  goacc_aq aq = get_goacc_asyncqueue (async);
-
   if (profiling_p)
-    {
-      if (aq)
-       data_start_info
-         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                      &enter_exit_data_event_info, &api_info,
-                                      NULL);
-      else
-       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                                 &api_info);
-    }
+    goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                             &api_info);
 
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || (flags & GOACC_FLAG_HOST_FALLBACK))
@@ -1529,6 +1512,8 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
   if (num_waits)
     goacc_wait (async, num_waits, ap);
 
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
   if (data_enter)
     goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
   else
@@ -1540,13 +1525,8 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
       prof_info.event_type
        = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
-      if (aq)
-       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                  &enter_exit_data_event_info, &api_info,
-                                  data_start_info);
-      else
-       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                                 &api_info);
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
index 81e8eba422531058e8430106958dc776e051079e..d66bc882a5f0b202ace366e2704975bf8b0f36fb 100644 (file)
@@ -259,62 +259,6 @@ handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
 }
 
 
-struct async_prof_callback_info {
-  acc_prof_info prof_info;
-  acc_event_info event_info;
-  acc_api_info api_info;
-  struct async_prof_callback_info *start_info;
-};
-
-static void
-async_prof_dispatch (void *ptr)
-{
-  struct async_prof_callback_info *info
-    = (struct async_prof_callback_info *) ptr;
-
-  if (info->start_info)
-    {
-      /* The TOOL_INFO must be preserved from a start event to the
-        corresponding end event.  Copy that here.  */
-      void *tool_info = info->start_info->event_info.other_event.tool_info;
-      info->event_info.other_event.tool_info = tool_info;
-    }
-
-  goacc_profiling_dispatch (&info->prof_info, &info->event_info,
-                           &info->api_info);
-
-  /* The async_prof_dispatch function is (so far) always used for start/end
-     profiling event pairs: the start and end parts are queued, then each is
-     dispatched (or the dispatches might be interleaved before the end part is
-     queued).
-     In any case, it's not safe to delete either info structure before the
-     whole bracketed event is complete.  */
-
-  if (info->start_info)
-    {
-      free (info->start_info);
-      free (info);
-    }
-}
-
-struct async_prof_callback_info *
-queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq,
-                          acc_prof_info *prof_info, acc_event_info *event_info,
-                          acc_api_info *api_info,
-                          struct async_prof_callback_info *prev_info)
-{
-  struct async_prof_callback_info *info = malloc (sizeof (*info));
-
-  info->prof_info = *prof_info;
-  info->event_info = *event_info;
-  info->api_info = *api_info;
-  info->start_info = prev_info;
-
-  devicep->openacc.async.queue_callback_func (aq, async_prof_dispatch,
-                                             (void *) info);
-  return info;
-}
-
 /* Launch a possibly offloaded function with FLAGS.  FN is the host fn
    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
    blocks to be copied to/from the device.  Varadic arguments are
@@ -340,8 +284,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
   unsigned dims[GOMP_DIM_MAX];
   unsigned tag;
   struct goacc_ncarray_info *nca_info = NULL;
-  struct async_prof_callback_info *comp_start_info = NULL,
-                                 *data_start_info = NULL;
 
 #ifdef HAVE_INTTYPES_H
   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
@@ -403,8 +345,31 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       api_info.async_handle = NULL;
     }
 
+  if (profiling_p)
+    goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+                             &api_info);
+
   handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
 
+  /* Host fallback if "if" clause is false or if the current device is set to
+     the host.  */
+  if (flags & GOACC_FLAG_HOST_FALLBACK)
+    {
+      prof_info.device_type = acc_device_host;
+      api_info.device_type = prof_info.device_type;
+      goacc_save_and_set_bind (acc_device_host);
+      fn (hostaddrs);
+      goacc_restore_bind ();
+      goto out_prof;
+    }
+  else if (acc_device_type (acc_dev->type) == acc_device_host)
+    {
+      fn (hostaddrs);
+      goto out_prof;
+    }
+  else if (profiling_p)
+    api_info.device_api = acc_device_api_cuda;
+
   /* Default: let the runtime choose.  */
   for (i = 0; i != GOMP_DIM_MAX; i++)
     dims[i] = 0;
@@ -437,12 +402,11 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
            if (async == GOMP_LAUNCH_OP_MAX)
              async = va_arg (ap, unsigned);
 
-           /* Set async number in profiling data, unless the device is the
-              host or we're doing host fallback.  */
-           if (profiling_p
-               && !(flags & GOACC_FLAG_HOST_FALLBACK)
-               && acc_device_type (acc_dev->type) != acc_device_host)
-             prof_info.async = prof_info.async_queue = async;
+           if (profiling_p)
+             {
+               prof_info.async = async;
+               prof_info.async_queue = prof_info.async;
+             }
 
            break;
          }
@@ -470,39 +434,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
 
   va_end (ap);
 
-  goacc_aq aq = get_goacc_asyncqueue (async);
-
-  if (profiling_p)
-    {
-      if (aq)
-       comp_start_info
-         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                      &compute_construct_event_info,
-                                      &api_info, NULL);
-      else
-       goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
-                                 &api_info);
-    }
-
-  /* Host fallback if "if" clause is false or if the current device is set to
-     the host.  */
-  if (flags & GOACC_FLAG_HOST_FALLBACK)
-    {
-      prof_info.device_type = acc_device_host;
-      api_info.device_type = prof_info.device_type;
-      goacc_save_and_set_bind (acc_device_host);
-      fn (hostaddrs);
-      goacc_restore_bind ();
-      goto out_prof;
-    }
-  else if (acc_device_type (acc_dev->type) == acc_device_host)
-    {
-      fn (hostaddrs);
-      goto out_prof;
-    }
-  else if (profiling_p)
-    api_info.device_api = acc_device_api_cuda;
-
   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
     {
       k.host_start = (uintptr_t) fn;
@@ -531,16 +462,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
        = compute_construct_event_info.other_event.parent_construct;
       enter_exit_data_event_info.other_event.implicit = 1;
       enter_exit_data_event_info.other_event.tool_info = NULL;
-      if (aq)
-       data_start_info
-         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                      &enter_exit_data_event_info, &api_info,
-                                      NULL);
-      else
-       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                                 &api_info);
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
     }
 
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
   tgt = gomp_map_vars_openacc (acc_dev, aq, mapnum, hostaddrs, sizes, kinds,
                               nca_info);
   free (nca_info);
@@ -550,13 +477,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_enter_data_end;
       enter_exit_data_event_info.other_event.event_type
        = prof_info.event_type;
-      if (aq)
-       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                  &enter_exit_data_event_info, &api_info,
-                                  data_start_info);
-      else
-       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                                 &api_info);
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
     }
 
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
@@ -575,14 +497,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_exit_data_start;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
       enter_exit_data_event_info.other_event.tool_info = NULL;
-      if (aq)
-       data_start_info
-         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                      &enter_exit_data_event_info, &api_info,
-                                      NULL);
-      else
-       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                                 &api_info);
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
     }
 
   /* If running synchronously (aq == NULL), this will unmap immediately.  */
@@ -592,13 +508,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
     {
       prof_info.event_type = acc_ev_exit_data_end;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
-      if (aq)
-       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                  &enter_exit_data_event_info, &api_info,
-                                  data_start_info);
-      else
-       goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-                                 &api_info);
+      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+                               &api_info);
     }
 
  out_prof:
@@ -607,13 +518,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_compute_construct_end;
       compute_construct_event_info.other_event.event_type
        = prof_info.event_type;
-      if (aq)
-       queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                  &compute_construct_event_info, &api_info,
-                                  comp_start_info);
-      else
-       goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
-                                 &api_info);
+      goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+                               &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
@@ -851,8 +757,6 @@ GOACC_update (int flags_m, size_t mapnum,
 
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
-  goacc_aq aq = NULL;
-  struct async_prof_callback_info *update_start_info = NULL;
 
   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
 
@@ -902,15 +806,7 @@ GOACC_update (int flags_m, size_t mapnum,
     }
 
   if (profiling_p)
-    {
-      aq = get_goacc_asyncqueue (async);
-      if (aq)
-       update_start_info
-         = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
-                                      &update_event_info, &api_info, NULL);
-      else
-       goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
-    }
+    goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
 
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || (flags & GOACC_FLAG_HOST_FALLBACK))
@@ -997,11 +893,7 @@ GOACC_update (int flags_m, size_t mapnum,
     {
       prof_info.event_type = acc_ev_update_end;
       update_event_info.other_event.event_type = prof_info.event_type;
-      if (aq)
-       queue_async_prof_dispatch (acc_dev, aq, &prof_info, &update_event_info,
-                                  &api_info, update_start_info);
-      else
-       goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+      goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
index a33fac7556cb02d6a33ee8038b4e00e9b2ad42b9..91b373216c939e5faea492f6f227aaaadeccfc8e 100644 (file)
@@ -172,10 +172,7 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  if (acc_device_type == acc_device_host)
-    assert (prof_info->async == acc_async_sync);
-  else
-    assert (prof_info->async == acc_async);
+  assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
index 663f7f724d5bd082c8342d4f8c807221dd1f5cb0..28a47ccc27df5c9549c314e593b26ff74a240bce 100644 (file)
@@ -316,9 +316,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_
 {
   DEBUG_printf ("%s\n", __FUNCTION__);
 
-  assert (state == 5
+  assert (state == 7
 #if ASYNC_EXIT_DATA
-         || state == 105
+         || state == 107
 #endif
          );
   STATE_OP (state, ++);
@@ -372,9 +372,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in
 {
   DEBUG_printf ("%s\n", __FUNCTION__);
 
-  assert (state == 6
+  assert (state == 8
 #if ASYNC_EXIT_DATA
-         || state == 106
+         || state == 108
 #endif
          );
   STATE_OP (state, ++);
@@ -458,10 +458,7 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  if (acc_device_type == acc_device_host)
-    assert (prof_info->async == acc_async_sync);
-  else
-    assert (prof_info->async == acc_async);
+  assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
@@ -502,6 +499,9 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
     {
       /* Compensate for the missing 'acc_ev_enter_data_end'.  */
       state += 1;
+      /* Compensate for the missing 'acc_ev_enqueue_launch_start' and
+        'acc_ev_enqueue_launch_end'.  */
+      state += 2;
       /* Compensate for the missing 'acc_ev_exit_data_start' and
         'acc_ev_exit_data_end'.  */
       state += 2;
@@ -514,8 +514,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
       state += 2;
     }
 #endif
-  assert (state == 7
-         || state == 107);
+  assert (state == 9
+         || state == 109);
   STATE_OP (state, ++);
 
   assert (tool_info != NULL);
@@ -569,6 +569,17 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
 
   assert (acc_device_type != acc_device_host);
 
+  assert (state == 5
+         || state == 105);
+  STATE_OP (state, ++);
+
+  assert (tool_info != NULL);
+  assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
+  assert (tool_info->nested == NULL);
+  tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
+  assert (tool_info->nested != NULL);
+  tool_info->nested->nested = NULL;
+
   assert (prof_info->event_type == acc_ev_enqueue_launch_start);
   assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
   assert (prof_info->version == _ACC_PROF_INFO_VERSION);
@@ -612,6 +623,13 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
   assert (api_info->device_handle == NULL);
   assert (api_info->context_handle == NULL);
   assert (api_info->async_handle == NULL);
+
+  tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type;
+  tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name);
+  tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs;
+  tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers;
+  tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length;
+  event_info->other_event.tool_info = tool_info->nested;
 }
 
 static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
@@ -620,6 +638,19 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
 
   assert (acc_device_type != acc_device_host);
 
+  assert (state == 6
+         || state == 106);
+  STATE_OP (state, ++);
+
+  assert (tool_info != NULL);
+  assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
+  assert (tool_info->nested != NULL);
+  assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start);
+  assert (tool_info->nested->event_info.launch_event.kernel_name != NULL);
+  assert (tool_info->nested->event_info.launch_event.num_gangs >= 1);
+  assert (tool_info->nested->event_info.launch_event.num_workers >= 1);
+  assert (tool_info->nested->event_info.launch_event.vector_length >= 1);
+
   assert (prof_info->event_type == acc_ev_enqueue_launch_end);
   assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
   assert (prof_info->version == _ACC_PROF_INFO_VERSION);
@@ -639,7 +670,12 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
   assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
   assert (event_info->launch_event.parent_construct == acc_construct_parallel);
   assert (event_info->launch_event.implicit == 1);
+  assert (event_info->launch_event.tool_info == tool_info->nested);
   assert (event_info->launch_event.kernel_name != NULL);
+  assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0);
+  assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs);
+  assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers);
+  assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length);
 
   if (acc_device_type == acc_device_host)
     assert (api_info->device_api == acc_device_api_none);
@@ -653,6 +689,10 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
   assert (api_info->device_handle == NULL);
   assert (api_info->context_handle == NULL);
   assert (api_info->async_handle == NULL);
+
+  free ((void *) tool_info->nested->event_info.launch_event.kernel_name);
+  free (tool_info->nested);
+  tool_info->nested = NULL;
 }
 
 
@@ -685,7 +725,7 @@ int main()
     }
     assert (state_init == 4);
   }
-  assert (state == 8);
+  assert (state == 10);
 
   STATE_OP (state, = 100);
 
@@ -702,7 +742,7 @@ int main()
 #pragma acc wait
     assert (state_init == 104);
   }
-  assert (state == 108);
+  assert (state == 110);
 
   return 0;
 }