From: Julian Brown Date: Wed, 11 Sep 2019 03:34:45 +0000 (-0700) Subject: OpenACC profiling-interface fixes for asynchronous operations X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=719f93c8618a134f90b5b661ab70c918d659ad05;p=thirdparty%2Fgcc.git OpenACC profiling-interface fixes for asynchronous operations libgomp/ * oacc-host.c (host_openacc_async_queue_callback): Invoke callback function immediately. * oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch, queue_async_prof_dispatch): New. (GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous profile-event dispatches. (GOACC_update): Likewise. * oacc-mem.c (GOACC_enter_exit_data): Call queue_async_prof_dispatch for asynchronous profile-event dispatches. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c (cb_compute_construct_start): Remove/fix TODO. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c (cb_exit_data_start): Tweak expected state values. (cb_exit_data_end): Likewise. (cb_compute_construct_start): Remove/fix TODO. (cb_compute_construct_end): Don't do adjustments for acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks. (cb_compute_construct_end): Tweak expected state values. (cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect launch-enqueue operations to happen synchronously with respect to profiling events on async streams. (main): Tweak expected state values. * testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder operations for async-safety. --- diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index c25000dd33a6..fd89e921512f 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,30 @@ +2019-09-17 Julian Brown + + * oacc-host.c (host_openacc_async_queue_callback): Invoke callback + function immediately. + * oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch, + queue_async_prof_dispatch): New. + (GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous + profile-event dispatches. + (GOACC_update): Likewise. + * oacc-mem.c (GOACC_enter_exit_data): Call queue_async_prof_dispatch + for asynchronous profile-event dispatches. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c + (cb_compute_construct_start): Remove/fix TODO. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c + (cb_exit_data_start): Tweak expected state values. + (cb_exit_data_end): Likewise. + (cb_compute_construct_start): Remove/fix TODO. + (cb_compute_construct_end): Don't do adjustments for + acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks. + (cb_compute_construct_end): Tweak expected state values. + (cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect + launch-enqueue operations to happen synchronously with respect to + profiling events on async streams. + (main): Tweak expected state values. + * testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder + operations for async-safety. + 2019-09-05 Julian Brown * testsuite/libgomp.oacc-fortran/lib-13.f90: End data region after diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 5bb889926d33..8c8c524599d5 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -204,10 +204,9 @@ host_openacc_async_dev2host (int ord __attribute__ ((unused)), static void host_openacc_async_queue_callback (struct goacc_asyncqueue *aq __attribute__ ((unused)), - void (*callback_fn)(void *) - __attribute__ ((unused)), - void *userptr __attribute__ ((unused))) + void (*callback_fn)(void *), void *userptr) { + callback_fn (userptr); } static struct goacc_asyncqueue * diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 73b2710c2b80..9ea625837c56 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -1318,6 +1318,12 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, gomp_mutex_unlock (&acc_dev->lock); } +struct async_prof_callback_info * +queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq, + acc_prof_info *prof_info, acc_event_info *event_info, + acc_api_info *api_info, + struct async_prof_callback_info *prev_info); + static void goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, @@ -1328,6 +1334,7 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, struct goacc_thread *thr; struct gomp_device_descr *acc_dev; + struct async_prof_callback_info *data_start_info = NULL; goacc_lazy_initialize (); @@ -1383,9 +1390,19 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, api_info.async_handle = NULL; } + goacc_aq aq = get_goacc_asyncqueue (async); + if (profiling_p) - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + { + if (aq) + data_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + NULL); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + } if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) @@ -1399,8 +1416,6 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, if (num_waits) goacc_wait (async, num_waits, ap); - goacc_aq aq = get_goacc_asyncqueue (async); - if (data_enter) goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); else @@ -1412,8 +1427,13 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, prof_info.event_type = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + data_start_info); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); thr->prof_info = NULL; thr->api_info = NULL; diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index d66bc882a5f0..81e8eba42253 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -259,6 +259,62 @@ handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, } +struct async_prof_callback_info { + acc_prof_info prof_info; + acc_event_info event_info; + acc_api_info api_info; + struct async_prof_callback_info *start_info; +}; + +static void +async_prof_dispatch (void *ptr) +{ + struct async_prof_callback_info *info + = (struct async_prof_callback_info *) ptr; + + if (info->start_info) + { + /* The TOOL_INFO must be preserved from a start event to the + corresponding end event. Copy that here. */ + void *tool_info = info->start_info->event_info.other_event.tool_info; + info->event_info.other_event.tool_info = tool_info; + } + + goacc_profiling_dispatch (&info->prof_info, &info->event_info, + &info->api_info); + + /* The async_prof_dispatch function is (so far) always used for start/end + profiling event pairs: the start and end parts are queued, then each is + dispatched (or the dispatches might be interleaved before the end part is + queued). + In any case, it's not safe to delete either info structure before the + whole bracketed event is complete. */ + + if (info->start_info) + { + free (info->start_info); + free (info); + } +} + +struct async_prof_callback_info * +queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq, + acc_prof_info *prof_info, acc_event_info *event_info, + acc_api_info *api_info, + struct async_prof_callback_info *prev_info) +{ + struct async_prof_callback_info *info = malloc (sizeof (*info)); + + info->prof_info = *prof_info; + info->event_info = *event_info; + info->api_info = *api_info; + info->start_info = prev_info; + + devicep->openacc.async.queue_callback_func (aq, async_prof_dispatch, + (void *) info); + return info; +} + /* Launch a possibly offloaded function with FLAGS. FN is the host fn address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory blocks to be copied to/from the device. Varadic arguments are @@ -284,6 +340,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), unsigned dims[GOMP_DIM_MAX]; unsigned tag; struct goacc_ncarray_info *nca_info = NULL; + struct async_prof_callback_info *comp_start_info = NULL, + *data_start_info = NULL; #ifdef HAVE_INTTYPES_H gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", @@ -345,31 +403,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), api_info.async_handle = NULL; } - if (profiling_p) - goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, - &api_info); - handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); - /* Host fallback if "if" clause is false or if the current device is set to - the host. */ - if (flags & GOACC_FLAG_HOST_FALLBACK) - { - prof_info.device_type = acc_device_host; - api_info.device_type = prof_info.device_type; - goacc_save_and_set_bind (acc_device_host); - fn (hostaddrs); - goacc_restore_bind (); - goto out_prof; - } - else if (acc_device_type (acc_dev->type) == acc_device_host) - { - fn (hostaddrs); - goto out_prof; - } - else if (profiling_p) - api_info.device_api = acc_device_api_cuda; - /* Default: let the runtime choose. */ for (i = 0; i != GOMP_DIM_MAX; i++) dims[i] = 0; @@ -402,11 +437,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), if (async == GOMP_LAUNCH_OP_MAX) async = va_arg (ap, unsigned); - if (profiling_p) - { - prof_info.async = async; - prof_info.async_queue = prof_info.async; - } + /* Set async number in profiling data, unless the device is the + host or we're doing host fallback. */ + if (profiling_p + && !(flags & GOACC_FLAG_HOST_FALLBACK) + && acc_device_type (acc_dev->type) != acc_device_host) + prof_info.async = prof_info.async_queue = async; break; } @@ -434,6 +470,39 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), va_end (ap); + goacc_aq aq = get_goacc_asyncqueue (async); + + if (profiling_p) + { + if (aq) + comp_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &compute_construct_event_info, + &api_info, NULL); + else + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); + } + + /* Host fallback if "if" clause is false or if the current device is set to + the host. */ + if (flags & GOACC_FLAG_HOST_FALLBACK) + { + prof_info.device_type = acc_device_host; + api_info.device_type = prof_info.device_type; + goacc_save_and_set_bind (acc_device_host); + fn (hostaddrs); + goacc_restore_bind (); + goto out_prof; + } + else if (acc_device_type (acc_dev->type) == acc_device_host) + { + fn (hostaddrs); + goto out_prof; + } + else if (profiling_p) + api_info.device_api = acc_device_api_cuda; + if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) { k.host_start = (uintptr_t) fn; @@ -462,12 +531,16 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), = compute_construct_event_info.other_event.parent_construct; enter_exit_data_event_info.other_event.implicit = 1; enter_exit_data_event_info.other_event.tool_info = NULL; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + data_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + NULL); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } - goacc_aq aq = get_goacc_asyncqueue (async); - tgt = gomp_map_vars_openacc (acc_dev, aq, mapnum, hostaddrs, sizes, kinds, nca_info); free (nca_info); @@ -477,8 +550,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_enter_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + data_start_info); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } devaddrs = gomp_alloca (sizeof (void *) * mapnum); @@ -497,8 +575,14 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_exit_data_start; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; enter_exit_data_event_info.other_event.tool_info = NULL; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + data_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + NULL); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } /* If running synchronously (aq == NULL), this will unmap immediately. */ @@ -508,8 +592,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), { prof_info.event_type = acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + data_start_info); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } out_prof: @@ -518,8 +607,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_compute_construct_end; compute_construct_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, - &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &compute_construct_event_info, &api_info, + comp_start_info); + else + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); thr->prof_info = NULL; thr->api_info = NULL; @@ -757,6 +851,8 @@ GOACC_update (int flags_m, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + goacc_aq aq = NULL; + struct async_prof_callback_info *update_start_info = NULL; bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); @@ -806,7 +902,15 @@ GOACC_update (int flags_m, size_t mapnum, } if (profiling_p) - goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + { + aq = get_goacc_asyncqueue (async); + if (aq) + update_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &update_event_info, &api_info, NULL); + else + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + } if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) @@ -893,7 +997,11 @@ GOACC_update (int flags_m, size_t mapnum, { prof_info.event_type = acc_ev_update_end; update_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, &update_event_info, + &api_info, update_start_info); + else + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); thr->prof_info = NULL; thr->api_info = NULL; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index 91b373216c93..a33fac7556cb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -172,7 +172,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - assert (prof_info->async == /* TODO acc_async */ acc_async_sync); + if (acc_device_type == acc_device_host) + assert (prof_info->async == acc_async_sync); + else + assert (prof_info->async == acc_async); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 28a47ccc27df..663f7f724d5b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -316,9 +316,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 7 + assert (state == 5 #if ASYNC_EXIT_DATA - || state == 107 + || state == 105 #endif ); STATE_OP (state, ++); @@ -372,9 +372,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 8 + assert (state == 6 #if ASYNC_EXIT_DATA - || state == 108 + || state == 106 #endif ); STATE_OP (state, ++); @@ -458,7 +458,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - assert (prof_info->async == /* TODO acc_async */ acc_async_sync); + if (acc_device_type == acc_device_host) + assert (prof_info->async == acc_async_sync); + else + assert (prof_info->async == acc_async); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); @@ -499,9 +502,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * { /* Compensate for the missing 'acc_ev_enter_data_end'. */ state += 1; - /* Compensate for the missing 'acc_ev_enqueue_launch_start' and - 'acc_ev_enqueue_launch_end'. */ - state += 2; /* Compensate for the missing 'acc_ev_exit_data_start' and 'acc_ev_exit_data_end'. */ state += 2; @@ -514,8 +514,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * state += 2; } #endif - assert (state == 9 - || state == 109); + assert (state == 7 + || state == 107); STATE_OP (state, ++); assert (tool_info != NULL); @@ -569,17 +569,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (acc_device_type != acc_device_host); - assert (state == 5 - || state == 105); - STATE_OP (state, ++); - - assert (tool_info != NULL); - assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); - assert (tool_info->nested == NULL); - tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info); - assert (tool_info->nested != NULL); - tool_info->nested->nested = NULL; - assert (prof_info->event_type == acc_ev_enqueue_launch_start); assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); assert (prof_info->version == _ACC_PROF_INFO_VERSION); @@ -623,13 +612,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (api_info->device_handle == NULL); assert (api_info->context_handle == NULL); assert (api_info->async_handle == NULL); - - tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type; - tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name); - tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs; - tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers; - tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length; - event_info->other_event.tool_info = tool_info->nested; } static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) @@ -638,19 +620,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (acc_device_type != acc_device_host); - assert (state == 6 - || state == 106); - STATE_OP (state, ++); - - assert (tool_info != NULL); - assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); - assert (tool_info->nested != NULL); - assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start); - assert (tool_info->nested->event_info.launch_event.kernel_name != NULL); - assert (tool_info->nested->event_info.launch_event.num_gangs >= 1); - assert (tool_info->nested->event_info.launch_event.num_workers >= 1); - assert (tool_info->nested->event_info.launch_event.vector_length >= 1); - assert (prof_info->event_type == acc_ev_enqueue_launch_end); assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); assert (prof_info->version == _ACC_PROF_INFO_VERSION); @@ -670,12 +639,7 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES); assert (event_info->launch_event.parent_construct == acc_construct_parallel); assert (event_info->launch_event.implicit == 1); - assert (event_info->launch_event.tool_info == tool_info->nested); assert (event_info->launch_event.kernel_name != NULL); - assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0); - assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs); - assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers); - assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length); if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); @@ -689,10 +653,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (api_info->device_handle == NULL); assert (api_info->context_handle == NULL); assert (api_info->async_handle == NULL); - - free ((void *) tool_info->nested->event_info.launch_event.kernel_name); - free (tool_info->nested); - tool_info->nested = NULL; } @@ -725,7 +685,7 @@ int main() } assert (state_init == 4); } - assert (state == 10); + assert (state == 8); STATE_OP (state, = 100); @@ -742,7 +702,7 @@ int main() #pragma acc wait assert (state_init == 104); } - assert (state == 110); + assert (state == 108); return 0; }