Use 'GOMP_MAP_VARS_TARGET' for OpenACC compute constructs [PR90596]

author Thomas Schwinge <thomas@codesourcery.com>

Fri, 10 Mar 2023 17:14:44 +0000 (18:14 +0100)

committer Thomas Schwinge <thomas@codesourcery.com>

Fri, 10 Mar 2023 17:15:19 +0000 (18:15 +0100)
author Thomas Schwinge <thomas@codesourcery.com>
Fri, 10 Mar 2023 17:14:44 +0000 (18:14 +0100)
committer Thomas Schwinge <thomas@codesourcery.com>
Fri, 10 Mar 2023 17:15:19 +0000 (18:15 +0100)
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp

index e3bab148f7790cdaf66187080918e69911781b8e..2b0b4c71e9d814b47a3e12847ef294e51f6ef0ca 100644 (file)
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -3,6 +3,24 @@
         Backported from master:
         2023-03-10  Thomas Schwinge  <thomas@codesourcery.com>
  
+       PR libgomp/90596
+       * target.c (gomp_map_vars_internal): Allow for
+       'param_kind == GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_TARGET'.
+       * oacc-parallel.c (GOACC_parallel_keyed): Pass
+       'GOMP_MAP_VARS_TARGET' to 'goacc_map_vars'.
+       * plugin/plugin-gcn.c (alloc_by_agent, gcn_exec)
+       (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec):
+       Adjust, simplify.
+       (gomp_offload_free): Remove.
+       * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec)
+       (GOMP_OFFLOAD_openacc_async_exec): Adjust, simplify.
+       (cuda_free_argmem): Remove.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c:
+       Adjust.
+
+       Backported from master:
+       2023-03-10  Thomas Schwinge  <thomas@codesourcery.com>
+
         * target.c (gomp_copy_host2dev, gomp_map_vars_internal): Allow
         libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral'
         data.
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c

index 9c1db402c82bc5ee3ce2cfbadba070f827a5bfee..9cd99b4a0b45318831e0fb114f319d54726ee4d4 100644 (file)
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -274,8 +274,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
    va_list ap;
    struct goacc_thread *thr;
    struct gomp_device_descr *acc_dev;
-  struct target_mem_desc *tgt;
-  void **devaddrs;
    unsigned int i;
    struct splay_tree_key_s k;
    splay_tree_key tgt_fn_key;
@@ -468,8 +466,10 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
  
    goacc_aq aq = get_goacc_asyncqueue (async);
  
-  tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
-                       nca_info, true, 0);
+  struct target_mem_desc *tgt
+    = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
+                     nca_info, true, GOMP_MAP_VARS_TARGET);
+
    free (nca_info);
  
    if (profiling_p)
@@ -481,10 +481,7 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
                                 &api_info);
      }
  
-  devaddrs = gomp_alloca (sizeof (void *) * mapnum);
-  for (i = 0; i < mapnum; i++)
-    devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
-
+  void **devaddrs = (void **) tgt->tgt_start;
    if (aq == NULL)
      acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
                                 tgt);
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c

index 11b7f25d2234b45e3ae44c91e9487f2999c81f60..64694cdc118b4c7b8e3691e21c927d4c2dd3465e 100644 (file)
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -1874,13 +1874,6 @@ alloc_by_agent (struct agent_info *agent, size_t size)
  {
    GCN_DEBUG ("Allocating %zu bytes on device %d\n", size, agent->device_id);
  
-  /* Zero-size allocations are invalid, so in order to return a valid pointer
-     we need to pass a valid size.  One source of zero-size allocations is
-     kernargs for kernels that have no inputs or outputs (the kernel may
-     only use console output, for example).  */
-  if (size == 0)
-    size = 4;
-
    void *ptr;
    hsa_status_t status = hsa_fns.hsa_memory_allocate_fn (agent->data_region,
                                                         size, &ptr);
@@ -3048,15 +3041,6 @@ copy_data (void *data_)
    free (data);
  }
  
-/* Free device data.  This is intended for use as an async callback event.  */
-
-static void
-gomp_offload_free (void *ptr)
-{
-  GCN_DEBUG ("Async thread ?:?: Freeing %p\n", ptr);
-  GOMP_OFFLOAD_free (0, ptr);
-}
-
  /* Request an asynchronous data copy, to or from a device, on a given queue.
     The event will be registered as a callback.  */
  
@@ -3219,7 +3203,7 @@ usm_heap_create (size_t size)
  /* Execute an OpenACC kernel, synchronously or asynchronously.  */
  
  static void
-gcn_exec (struct kernel_info *kernel, size_t mapnum,
+gcn_exec (struct kernel_info *kernel,
           void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async,
           struct goacc_asyncqueue *aq)
  {
@@ -3229,11 +3213,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
    /* If we get here then this must be an OpenACC kernel.  */
    kernel->kind = KIND_OPENACC;
  
-  /* devaddrs must be double-indirect on the target.  */
-  void **ind_da = alloc_by_agent (kernel->agent, sizeof (void*) * mapnum);
-  for (size_t i = 0; i < mapnum; i++)
-    hsa_fns.hsa_memory_copy_fn (&ind_da[i], &devaddrs[i], sizeof (void *));
-
    struct hsa_kernel_description *hsa_kernel_desc = NULL;
    for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++)
      {
@@ -3345,9 +3324,9 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
      }
  
    if (!async)
-    run_kernel (kernel, ind_da, &kla, NULL, false);
+    run_kernel (kernel, devaddrs, &kla, NULL, false);
    else
-    queue_push_launch (aq, kernel, ind_da, &kla);
+    queue_push_launch (aq, kernel, devaddrs, &kla);
  
    if (profiling_dispatch_p)
      {
@@ -3357,16 +3336,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
                                             &enqueue_launch_event_info,
                                             api_info);
      }
-
-  if (!async)
-    gomp_offload_free (ind_da);
-  else
-    {
-      if (DEBUG_QUEUES)
-       GCN_DEBUG ("queue_push_callback %d:%d gomp_offload_free, %p\n",
-                  aq->agent->device_id, aq->id, ind_da);
-      queue_push_callback (aq, gomp_offload_free, ind_da);
-    }
  }
  
  /* }}}  */
@@ -4095,14 +4064,15 @@ GOMP_OFFLOAD_is_usm_ptr (void *ptr)
     already-loaded KERNEL.  */
  
  void
-GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), size_t mapnum,
+GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *),
+                          size_t mapnum __attribute__((unused)),
                            void **hostaddrs __attribute__((unused)),
                            void **devaddrs, unsigned *dims,
                            void *targ_mem_desc)
  {
    struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
  
-  gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, false, NULL);
+  gcn_exec (kernel, devaddrs, dims, targ_mem_desc, false, NULL);
  }
  
  /* Run an asynchronous OpenACC kernel on the specified queue.  */
@@ -4116,7 +4086,8 @@ GOMP_OFFLOAD_openacc_exec_params (void (*fn_ptr) (void *), size_t mapnum,
  }
  
  void
-GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
+GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *),
+                                size_t mapnum __attribute__((unused)),
                                  void **hostaddrs __attribute__((unused)),
                                  void **devaddrs,
                                  unsigned *dims, void *targ_mem_desc,
@@ -4124,7 +4095,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
  {
    struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
  
-  gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, true, aq);
+  gcn_exec (kernel, devaddrs, dims, targ_mem_desc, true, aq);
  }
  
  /* Create a new asynchronous thread and queue for running future kernels.  */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c

index 1c056592643aa17024738b45c303982648f6e07b..6ade34beb67b33efaf7ee78181fa500dccacb924 100644 (file)
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -773,7 +773,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
  }
  
  static void
-nvptx_exec (void (*fn), size_t mapnum, unsigned *dims, void *targ_mem_desc,
+nvptx_exec (void (*fn), unsigned *dims, void *targ_mem_desc,
             CUdeviceptr dp, CUstream stream)
  {
    struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
@@ -1740,70 +1740,16 @@ GOMP_OFFLOAD_page_locked_host_free (void *ptr)
  
  
  void
-GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
+GOMP_OFFLOAD_openacc_exec (void (*fn) (void *),
+                          size_t mapnum  __attribute__((unused)),
                            void **hostaddrs __attribute__((unused)),
                            void **devaddrs,
                            unsigned *dims, void *targ_mem_desc)
  {
-  GOMP_PLUGIN_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
+  GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__);
  
-  struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread ();
-  acc_prof_info *prof_info = thr->prof_info;
-  acc_event_info data_event_info;
-  acc_api_info *api_info = thr->api_info;
-  bool profiling_p = __builtin_expect (prof_info != NULL, false);
-
-  void **hp = NULL;
-  CUdeviceptr dp = 0;
-
-  if (mapnum > 0)
-    {
-      size_t s = mapnum * sizeof (void *);
-      hp = alloca (s);
-      for (int i = 0; i < mapnum; i++)
-       hp[i] = devaddrs[i];
-      CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
-      if (profiling_p)
-       goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
-    }
-
-  /* Copy the (device) pointers to arguments to the device (dp and hp might in
-     fact have the same value on a unified-memory system).  */
-  if (mapnum > 0)
-    {
-      if (profiling_p)
-       {
-         prof_info->event_type = acc_ev_enqueue_upload_start;
-
-         data_event_info.data_event.event_type = prof_info->event_type;
-         data_event_info.data_event.valid_bytes
-           = _ACC_DATA_EVENT_INFO_VALID_BYTES;
-         data_event_info.data_event.parent_construct
-           = acc_construct_parallel;
-         data_event_info.data_event.implicit = 1; /* Always implicit.  */
-         data_event_info.data_event.tool_info = NULL;
-         data_event_info.data_event.var_name = NULL;
-         data_event_info.data_event.bytes = mapnum * sizeof (void *);
-         data_event_info.data_event.host_ptr = hp;
-         data_event_info.data_event.device_ptr = (const void *) dp;
-
-         api_info->device_api = acc_device_api_cuda;
-
-         GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
-                                               api_info);
-       }
-      CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, (void *) hp,
-                       mapnum * sizeof (void *));
-      if (profiling_p)
-       {
-         prof_info->event_type = acc_ev_enqueue_upload_end;
-         data_event_info.data_event.event_type = prof_info->event_type;
-         GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
-                                               api_info);
-       }
-    }
-
-  nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, NULL);
+  CUdeviceptr dp = (CUdeviceptr) devaddrs;
+  nvptx_exec (fn, dims, targ_mem_desc, dp, NULL);
  
    CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL);
    const char *maybe_abort_msg = "(perhaps abort was called)";
@@ -1812,98 +1758,20 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
                        maybe_abort_msg);
    else if (r != CUDA_SUCCESS)
      GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
-
-  CUDA_CALL_ASSERT (cuMemFree, dp);
-  if (profiling_p)
-    goacc_profiling_acc_ev_free (thr, (void *) dp);
-}
-
-static void
-cuda_free_argmem (void *ptr)
-{
-  void **block = (void **) ptr;
-  nvptx_free (block[0], (struct ptx_device *) block[1]);
-  free (block);
  }
  
  void
-GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
+GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *),
+                                size_t mapnum __attribute__((unused)),
                                  void **hostaddrs __attribute__((unused)),
                                  void **devaddrs,
                                  unsigned *dims, void *targ_mem_desc,
                                  struct goacc_asyncqueue *aq)
  {
-  GOMP_PLUGIN_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
-
-  struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread ();
-  acc_prof_info *prof_info = thr->prof_info;
-  acc_event_info data_event_info;
-  acc_api_info *api_info = thr->api_info;
-  bool profiling_p = __builtin_expect (prof_info != NULL, false);
-
-  void **hp = NULL;
-  CUdeviceptr dp = 0;
-  void **block = NULL;
-
-  if (mapnum > 0)
-    {
-      size_t s = mapnum * sizeof (void *);
-      block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s);
-      hp = block + 2;
-      for (int i = 0; i < mapnum; i++)
-       hp[i] = devaddrs[i];
-      CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
-      if (profiling_p)
-       goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
-    }
-
-  /* Copy the (device) pointers to arguments to the device (dp and hp might in
-     fact have the same value on a unified-memory system).  */
-  if (mapnum > 0)
-    {
-      if (profiling_p)
-       {
-         prof_info->event_type = acc_ev_enqueue_upload_start;
-
-         data_event_info.data_event.event_type = prof_info->event_type;
-         data_event_info.data_event.valid_bytes
-           = _ACC_DATA_EVENT_INFO_VALID_BYTES;
-         data_event_info.data_event.parent_construct
-           = acc_construct_parallel;
-         data_event_info.data_event.implicit = 1; /* Always implicit.  */
-         data_event_info.data_event.tool_info = NULL;
-         data_event_info.data_event.var_name = NULL;
-         data_event_info.data_event.bytes = mapnum * sizeof (void *);
-         data_event_info.data_event.host_ptr = hp;
-         data_event_info.data_event.device_ptr = (const void *) dp;
-
-         api_info->device_api = acc_device_api_cuda;
-
-         GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
-                                               api_info);
-       }
-
-      CUDA_CALL_ASSERT (cuMemcpyHtoDAsync, dp, (void *) hp,
-                       mapnum * sizeof (void *), aq->cuda_stream);
-      block[0] = (void *) dp;
-
-      struct nvptx_thread *nvthd =
-       (struct nvptx_thread *) GOMP_PLUGIN_acc_thread ();
-      block[1] = (void *) nvthd->ptx_dev;
-
-      if (profiling_p)
-       {
-         prof_info->event_type = acc_ev_enqueue_upload_end;
-         data_event_info.data_event.event_type = prof_info->event_type;
-         GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
-                                               api_info);
-       }
-    }
-
-  nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, aq->cuda_stream);
+  GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__);
  
-  if (mapnum > 0)
-    GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block);
+  CUdeviceptr dp = (CUdeviceptr) devaddrs;
+  nvptx_exec (fn, dims, targ_mem_desc, dp, aq->cuda_stream);
  }
  
  void *
diff --git a/libgomp/target.c b/libgomp/target.c

index 60f9b432dc4bc0c783f39fcc2fefb2d7a3e131f7..96ece0b31fd4433ac104b281c7c614ccfc41aba1 100644 (file)
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -996,13 +996,13 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
    cbuf.chunk_cnt = -1;
    cbuf.use_cnt = 0;
    cbuf.buf = NULL;
-  if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
+  if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
      {
        size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
        cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
        cbuf.chunk_cnt = 0;
      }
-  if (pragma_kind == GOMP_MAP_VARS_TARGET)
+  if (pragma_kind & GOMP_MAP_VARS_TARGET)
      {
        size_t align = 4 * sizeof (void *);
        tgt_align = align;
@@ -1346,7 +1346,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
        tgt->tgt_start = (uintptr_t) tgt->to_free;
        tgt->tgt_end = tgt->tgt_start + sizes[0];
      }
-  else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
+  else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
      {
        /* Allocate tgt_align aligned tgt_size block of memory.  */
        /* FIXME: Perhaps change interface to allocate properly aligned
@@ -1384,7 +1384,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
      }
  
    tgt_size = 0;
-  if (pragma_kind == GOMP_MAP_VARS_TARGET)
+  if (pragma_kind & GOMP_MAP_VARS_TARGET)
      tgt_size = mapnum * sizeof (void *);
  
    tgt->array = NULL;
@@ -1944,7 +1944,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
         }
      }
  
-  if (pragma_kind == GOMP_MAP_VARS_TARGET)
+  if (pragma_kind & GOMP_MAP_VARS_TARGET)
      {
        for (i = 0; i < mapnum; i++)
         {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c

index 711ead588c49c86b53ede3ee553dcd373615a503..27f86d3160f8093b2256a99e29cdf45020ba2a33 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
@@ -203,9 +203,7 @@ static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_
  # error TODO
  #else
    assert (state == 4
-         || state == 6
-         || state == 104
-         || state == 106);
+         || state == 104);
    STATE_OP (state, ++);
  
    if (state == 5
@@ -217,13 +215,6 @@ static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_
        assert (tool_info->nested->event_info.other_event.event_type == acc_ev_enter_data_start);
        assert (tool_info->nested->nested == NULL);
      }
-  else if (state == 7
-          || state == 107)
-    {
-      assert (tool_info != NULL);
-      assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
-      assert (tool_info->nested == NULL);
-    }
    else
      abort ();
  #endif
@@ -268,17 +259,10 @@ static void cb_free (acc_prof_info *prof_info, acc_event_info *event_info, acc_a
  #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
  # error TODO
  #else
-  assert (state == 9
-         || state == 11);
+  assert (state == 9);
    STATE_OP (state, ++);
  
    if (state == 10)
-    {
-      assert (tool_info != NULL);
-      assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
-      assert (tool_info->nested == NULL);
-    }
-  else if (state == 12)
      {
        assert (tool_info != NULL);
        assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
@@ -449,19 +433,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_
  {
    DEBUG_printf ("%s\n", __FUNCTION__);
  
+  assert (state == 8
  #if ASYNC_EXIT_DATA
-  if (acc_async != acc_async_sync)
-    {
-      /* Compensate for the deferred 'acc_ev_free'.  */
-      state += 1;
-    }
-#else
-# error TODO
-#endif
-
-  assert (state == 10
-#if ASYNC_EXIT_DATA
-         || state == 110
+         || state == 108
  #endif
           );
    STATE_OP (state, ++);
@@ -525,9 +499,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in
  {
    DEBUG_printf ("%s\n", __FUNCTION__);
  
-  assert (state == 12
+  assert (state == 10
  #if ASYNC_EXIT_DATA
-         || state == 112
+         || state == 110
  #endif
           );
    STATE_OP (state, ++);
@@ -654,13 +628,9 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
      {
        /* Compensate for the missing 'acc_ev_enter_data_end'.  */
        state += 1;
-      /* Compensate for the missing 'acc_ev_alloc'.  */
-      state += 1;
        /* Compensate for the missing 'acc_ev_enqueue_launch_start' and
          'acc_ev_enqueue_launch_end'.  */
        state += 2;
-      /* Compensate for the missing 'acc_ev_free'.  */
-      state += 1;
        /* Compensate for the missing 'acc_ev_exit_data_start'.  */
        state += 1;
        /* Compensate for the missing 'acc_ev_free'.  */
@@ -676,8 +646,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
        state += 2;
      }
  #endif
-  assert (state == 13
-         || state == 113);
+  assert (state == 11
+         || state == 111);
    STATE_OP (state, ++);
  
    assert (tool_info != NULL);
@@ -731,8 +701,8 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
  
    assert (acc_device_type != acc_device_host);
  
-  assert (state == 7
-         || state == 107);
+  assert (state == 6
+         || state == 106);
    STATE_OP (state, ++);
  
    assert (tool_info != NULL);
@@ -800,8 +770,8 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
  
    assert (acc_device_type != acc_device_host);
  
-  assert (state == 8
-         || state == 108);
+  assert (state == 7
+         || state == 107);
    STATE_OP (state, ++);
  
    assert (tool_info != NULL);
@@ -889,7 +859,7 @@ int main()
      }
      assert (state_init == 5);
    }
-  assert (state == 14);
+  assert (state == 12);
  
    STATE_OP (state, = 100);
  
@@ -906,7 +876,7 @@ int main()
  #pragma acc wait
      assert (state_init == 105);
    }
-  assert (state == 114);
+  assert (state == 112);
  
    return 0;
  }
author	Thomas Schwinge <thomas@codesourcery.com>
	Fri, 10 Mar 2023 17:14:44 +0000 (18:14 +0100)
committer	Thomas Schwinge <thomas@codesourcery.com>
	Fri, 10 Mar 2023 17:15:19 +0000 (18:15 +0100)
libgomp/ChangeLog.omp		patch \| blob \| blame \| history
libgomp/oacc-parallel.c		patch \| blob \| blame \| history
libgomp/plugin/plugin-gcn.c		patch \| blob \| blame \| history
libgomp/plugin/plugin-nvptx.c		patch \| blob \| blame \| history
libgomp/target.c		patch \| blob \| blame \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c		patch \| blob \| blame \| history