]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
[og9] Fix OpenACC "ephemeral" asynchronous host-to-device copies
authorJulian Brown <julian@codesourcery.com>
Wed, 11 Sep 2019 20:22:03 +0000 (13:22 -0700)
committerThomas Schwinge <thomas@codesourcery.com>
Tue, 3 Mar 2020 11:51:25 +0000 (12:51 +0100)
libgomp/
* libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_host2dev): Update
prototype.
* libgomp.h (gomp_copy_host2dev): Update prototype.
* oacc-host.c (host_openacc_async_host2dev): Add ephemeral parameter.
* oacc-mem.c (memcpy_tofrom_device): Update call to gomp_copy_host2dev.
(update_dev_host): Likewise.
* oacc-parallel.c (GOACC_enter_exit_data): Call async versions of
acc_attach/acc_detach/acc_detach_finalize functions.
* plugin/plugin-gcn.c (wait_for_queue_nonfull): Don't lock/unlock
aq->mutex here.
(queue_push_launch): Lock aq->mutex before calling
wait_for_queue_nonfull.
(queue_push_callback): Likewise.
(queue_push_asyncwait): Likewise.
(queue_push_placeholder): Likewise.
(GOMP_OFFLOAD_openacc_async_host2dev): Add ephemeral parameter.  Copy
source data to temporary space immediately if true, and pass to
queue_push_copy.
(goacc_device_copy_async): Remove.
(gomp_copy_host2dev): Add ephemeral parameter. Update function comment.
Call async host2dev plugin hook directly.
(gomp_copy_dev2host): Call async dev2host plugin hook directly.
(gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer,
gomp_detach_pointer): Update calls to gomp_copy_host2dev.
(gomp_map_vars_internal): Don't use coalescing buffer for asynchronous
copies. Update calls to gomp_copy_host2dev.
(gomp_update): Update calls to gomp_copy_host2dev.
* testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c (main): Fix
async-safety issue. Increase number of iterations.
* testsuite/libgomp.oacc-fortran/lib-16-2.f90: Fix async-safety issue.

(cherry picked from openacc-gcc-9-branch commit
3a25e449d04d5768c3a799264ba0e5cab8ae181f)

libgomp/ChangeLog.omp
libgomp/libgomp-plugin.h
libgomp/libgomp.h
libgomp/oacc-host.c
libgomp/oacc-mem.c
libgomp/oacc-parallel.c
libgomp/plugin/plugin-gcn.c
libgomp/target.c
libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c
libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90

index 5f39fae6f5177f143b6a34fdb2a661aad0205373..1006b8149c8b9946645032611fdf64d1a0d813ad 100644 (file)
@@ -1,3 +1,36 @@
+2019-09-17  Julian Brown  <julian@codesourcery.com>
+
+       * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_host2dev): Update
+       prototype.
+       * libgomp.h (gomp_copy_host2dev): Update prototype.
+       * oacc-host.c (host_openacc_async_host2dev): Add ephemeral parameter.
+       * oacc-mem.c (memcpy_tofrom_device): Update call to gomp_copy_host2dev.
+       (update_dev_host): Likewise.
+       * oacc-parallel.c (GOACC_enter_exit_data): Call async versions of
+       acc_attach/acc_detach/acc_detach_finalize functions.
+       * plugin/plugin-gcn.c (wait_for_queue_nonfull): Don't lock/unlock
+       aq->mutex here.
+       (queue_push_launch): Lock aq->mutex before calling
+       wait_for_queue_nonfull.
+       (queue_push_callback): Likewise.
+       (queue_push_asyncwait): Likewise.
+       (queue_push_placeholder): Likewise.
+       (GOMP_OFFLOAD_openacc_async_host2dev): Add ephemeral parameter.  Copy
+       source data to temporary space immediately if true, and pass to
+       queue_push_copy.
+       (goacc_device_copy_async): Remove.
+       (gomp_copy_host2dev): Add ephemeral parameter. Update function comment.
+       Call async host2dev plugin hook directly.
+       (gomp_copy_dev2host): Call async dev2host plugin hook directly.
+       (gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer,
+       gomp_detach_pointer): Update calls to gomp_copy_host2dev.
+       (gomp_map_vars_internal): Don't use coalescing buffer for asynchronous
+       copies. Update calls to gomp_copy_host2dev.
+       (gomp_update): Update calls to gomp_copy_host2dev.
+       * testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c (main): Fix
+       async-safety issue. Increase number of iterations.
+       * testsuite/libgomp.oacc-fortran/lib-16-2.f90: Fix async-safety issue.
+
 2019-09-17  Julian Brown  <julian@codesourcery.com>
 
        * oacc-host.c (host_openacc_async_queue_callback): Invoke callback
index bd63c422b0ce065d5b2c6260dcf03ae16d349318..fcd4727933259696343b4d7db158c92759f931ee 100644 (file)
@@ -141,7 +141,8 @@ extern void GOMP_OFFLOAD_openacc_async_exec_params (void (*) (void *), size_t,
                                                    struct goacc_asyncqueue *);
 extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size_t,
                                                 struct goacc_asyncqueue *);
-extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
+extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *,
+                                                size_t, bool,
                                                 struct goacc_asyncqueue *);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
index 803f72db9223fc5e72910ac762e18eadea2cc750..ab216a31206727c2813e734d8303bdd4151f8006 100644 (file)
@@ -1120,7 +1120,7 @@ extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
 struct gomp_coalesce_buf;
 extern void gomp_copy_host2dev (struct gomp_device_descr *,
                                struct goacc_asyncqueue *, void *, const void *,
-                               size_t, struct gomp_coalesce_buf *);
+                               size_t, bool, struct gomp_coalesce_buf *);
 extern void gomp_copy_dev2host (struct gomp_device_descr *,
                                struct goacc_asyncqueue *, void *, const void *,
                                size_t);
index 0231b597114d60c942eac84cd1f43efa56456ba6..4bc2eeb3c530143d4fb6966f8b429815947381a4 100644 (file)
@@ -230,6 +230,7 @@ host_openacc_async_host2dev (int ord __attribute__ ((unused)),
                             void *dst __attribute__ ((unused)),
                             const void *src __attribute__ ((unused)),
                             size_t n __attribute__ ((unused)),
+                            bool eph __attribute__ ((unused)),
                             struct goacc_asyncqueue *aq
                             __attribute__ ((unused)))
 {
index c07a5eb42a7048d72dee7169d931b1df0e6ba6ea..f8c71bf04c5c998f85881520d0f3b2ff388a4ea5 100644 (file)
@@ -203,7 +203,7 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
   if (from)
     gomp_copy_dev2host (thr->dev, aq, h, d, s);
   else
-    gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
+    gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
 
   if (profiling_p)
     {
@@ -819,7 +819,7 @@ update_dev_host (int is_dev, void *h, size_t s, int async)
   goacc_aq aq = get_goacc_asyncqueue (async);
 
   if (is_dev)
-    gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
+    gomp_copy_host2dev (acc_dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
   else
     gomp_copy_dev2host (acc_dev, aq, h, d, s);
 
index 0c9cb3c461cf1754353d74d8b1be77ba52004916..a3ec0ed2adf479368b030c9db1877528ae1b3bf3 100644 (file)
@@ -1022,7 +1022,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
          if (!pointer)
            {
              if (kind == GOMP_MAP_ATTACH)
-               acc_attach (hostaddrs[i]);
+               acc_attach_async (hostaddrs[i], async);
              else if (kind == GOMP_MAP_STRUCT)
                i += sizes[i];
            }
@@ -1042,9 +1042,9 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
          if (!pointer)
            {
              if (kind == GOMP_MAP_DETACH)
-               acc_detach (hostaddrs[i]);
+               acc_detach_async (hostaddrs[i], async);
              else if (kind == GOMP_MAP_FORCE_DETACH)
-               acc_detach_finalize (hostaddrs[i]);
+               acc_detach_finalize_async (hostaddrs[i], async);
              else if (kind == GOMP_MAP_STRUCT)
                i += sizes[i];
            }
@@ -1053,9 +1053,9 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
              unsigned char kind2 = kinds[i + pointer - 1] & 0xff;
 
              if (kind2 == GOMP_MAP_DETACH)
-               acc_detach (hostaddrs[i + pointer - 1]);
+               acc_detach_async (hostaddrs[i + pointer - 1], async);
              else if (kind2 == GOMP_MAP_FORCE_DETACH)
-               acc_detach_finalize (hostaddrs[i + pointer - 1]);
+               acc_detach_finalize_async (hostaddrs[i + pointer - 1], async);
 
              i += pointer - 1;
            }
index b8ec96391f7f361333c1fe6e6948733ef3c8e1f8..b5995af0a06f9ea5e9f74f44a52b49713c12bd23 100644 (file)
@@ -1408,13 +1408,9 @@ wait_for_queue_nonfull (struct goacc_asyncqueue *aq)
 {
   if (aq->queue_n == ASYNC_QUEUE_SIZE)
     {
-      pthread_mutex_lock (&aq->mutex);
-
       /* Queue is full.  Wait for it to not be full.  */
       while (aq->queue_n == ASYNC_QUEUE_SIZE)
        pthread_cond_wait (&aq->queue_cond_out, &aq->mutex);
-
-      pthread_mutex_unlock (&aq->mutex);
     }
 }
 
@@ -1424,10 +1420,10 @@ queue_push_launch (struct goacc_asyncqueue *aq, struct kernel_info *kernel,
 {
   assert (aq->agent == kernel->agent);
 
-  wait_for_queue_nonfull (aq);
-
   pthread_mutex_lock (&aq->mutex);
 
+  wait_for_queue_nonfull (aq);
+
   int queue_last = ((aq->queue_first + aq->queue_n)
                    % ASYNC_QUEUE_SIZE);
   if (DEBUG_QUEUES)
@@ -1453,10 +1449,10 @@ static void
 queue_push_callback (struct goacc_asyncqueue *aq, void (*fn)(void *),
                     void *data)
 {
-  wait_for_queue_nonfull (aq);
-
   pthread_mutex_lock (&aq->mutex);
 
+  wait_for_queue_nonfull (aq);
+
   int queue_last = ((aq->queue_first + aq->queue_n)
                    % ASYNC_QUEUE_SIZE);
   if (DEBUG_QUEUES)
@@ -1484,10 +1480,10 @@ static void
 queue_push_asyncwait (struct goacc_asyncqueue *aq,
                      struct placeholder *placeholderp)
 {
-  wait_for_queue_nonfull (aq);
-
   pthread_mutex_lock (&aq->mutex);
 
+  wait_for_queue_nonfull (aq);
+
   int queue_last = ((aq->queue_first + aq->queue_n) % ASYNC_QUEUE_SIZE);
   if (DEBUG_QUEUES)
     HSA_DEBUG ("queue_push_asyncwait %d:%d: at %i\n", aq->agent->device_id,
@@ -1511,10 +1507,10 @@ queue_push_placeholder (struct goacc_asyncqueue *aq)
 {
   struct placeholder *placeholderp;
 
-  wait_for_queue_nonfull (aq);
-
   pthread_mutex_lock (&aq->mutex);
 
+  wait_for_queue_nonfull (aq);
+
   int queue_last = ((aq->queue_first + aq->queue_n) % ASYNC_QUEUE_SIZE);
   if (DEBUG_QUEUES)
     HSA_DEBUG ("queue_push_placeholder %d:%d: at %i\n", aq->agent->device_id,
@@ -3683,19 +3679,22 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
 
 bool
 GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
-                                    size_t n, struct goacc_asyncqueue *aq)
+                                    size_t n, bool ephemeral,
+                                    struct goacc_asyncqueue *aq)
 {
   struct agent_info *agent = get_agent_info (device);
   assert (agent == aq->agent);
-  /* The source data does not necessarily remain live until the deferred
-     copy happens.  Taking a snapshot of the data here avoids reading
-     uninitialised data later, but means that (a) data is copied twice and
-     (b) modifications to the copied data between the "spawning" point of
-     the asynchronous kernel and when it is executed will not be seen.
-     But, that is probably correct.  */
-  void *src_copy = GOMP_PLUGIN_malloc (n);
-  memcpy (src_copy, src, n);
-  queue_push_copy (aq, dst, src_copy, n, true);
+
+  if (ephemeral)
+    {
+      /* The source data is on the stack or otherwise may be deallocated
+        before the asynchronous copy takes place.  Take a copy of the source
+        data.  */
+      void *src_copy = GOMP_PLUGIN_malloc (n);
+      memcpy (src_copy, src, n);
+      src = src_copy;
+    }
+  queue_push_copy (aq, dst, src, n, ephemeral);
   return true;
 }
 
index 0656df1961304e86a32b8220fc420bf7e96b9f29..683a42b1164fc8e9f27ccb117b9735bac6602f64 100644 (file)
@@ -194,22 +194,6 @@ gomp_device_copy (struct gomp_device_descr *devicep,
     }
 }
 
-static inline void
-goacc_device_copy_async (struct gomp_device_descr *devicep,
-                        bool (*copy_func) (int, void *, const void *, size_t,
-                                           struct goacc_asyncqueue *),
-                        const char *dst, void *dstaddr,
-                        const char *src, const void *srcaddr,
-                        size_t size, struct goacc_asyncqueue *aq)
-{
-  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
-    {
-      gomp_mutex_unlock (&devicep->lock);
-      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
-                 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
-    }
-}
-
 /* Infrastructure for coalescing adjacent or nearly adjacent (in device
    addresses) host to device memory transfers.  */
 
@@ -303,15 +287,17 @@ gomp_to_device_kind_p (int kind)
 }
 
 /* Copy host memory to an offload device.  In asynchronous mode (if AQ is
-   non-NULL), H may point to a stack location.  It is up to the underlying
-   plugin to ensure that this data is read immediately, rather than at some
-   later point when the stack frame will likely have been destroyed.  */
+   non-NULL), when the source data is stack or may otherwise be deallocated
+   before the asynchronous copy takes place, EPHEMERAL must be passed as
+   TRUE.  The CBUF isn't used for non-ephemeral asynchronous copies, because
+   the host data might not be computed yet (by an earlier asynchronous compute
+   region).  */
 
 attribute_hidden void
 gomp_copy_host2dev (struct gomp_device_descr *devicep,
                    struct goacc_asyncqueue *aq,
                    void *d, const void *h, size_t sz,
-                   struct gomp_coalesce_buf *cbuf)
+                   bool ephemeral, struct gomp_coalesce_buf *cbuf)
 {
   if (cbuf)
     {
@@ -339,8 +325,15 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
        }
     }
   if (__builtin_expect (aq != NULL, 0))
-    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
-                            "dev", d, "host", h, sz, aq);
+    {
+      if (!devicep->openacc.async.host2dev_func (devicep->target_id, d, h, sz,
+                                                ephemeral, aq))
+       {
+         gomp_mutex_unlock (&devicep->lock);
+         gomp_fatal ("Copying of host object [%p..%p) to dev object [%p..%p) "
+                     "failed", h, h + sz, d, d + sz);
+       }
+    }
   else
     gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
 }
@@ -351,8 +344,15 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep,
                    void *h, const void *d, size_t sz)
 {
   if (__builtin_expect (aq != NULL, 0))
-    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
-                            "host", h, "dev", d, sz, aq);
+    {
+      if (!devicep->openacc.async.dev2host_func (devicep->target_id, h, d, sz,
+                                                aq))
+       {
+         gomp_mutex_unlock (&devicep->lock);
+         gomp_fatal ("Copying of dev object [%p..%p) to host object [%p..%p) "
+                     "failed", d, d + sz, h, h + sz);
+       }
+    }
   else
     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
 }
@@ -579,7 +579,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
                        (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
                                  + newn->host_start - oldn->host_start),
                        (void *) newn->host_start,
-                       newn->host_end - newn->host_start, cbuf);
+                       newn->host_end - newn->host_start, false, cbuf);
 
   if (oldn->refcount != REFCOUNT_INFINITY)
     oldn->refcount++;
@@ -607,8 +607,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
       cur_node.tgt_offset = (uintptr_t) NULL;
       gomp_copy_host2dev (devicep, aq,
                          (void *) (tgt->tgt_start + target_offset),
-                         (void *) &cur_node.tgt_offset, sizeof (void *),
-                         cbuf);
+                         (void *) &cur_node.tgt_offset,
+                         sizeof (void *), true, cbuf);
       return;
     }
   /* Add bias to the pointer value.  */
@@ -628,7 +628,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
      to initialize the pointer with.  */
   cur_node.tgt_offset -= bias;
   gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
-                     (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
+                     (void *) &cur_node.tgt_offset, sizeof (void *), true,
+                     cbuf);
 }
 
 static void
@@ -760,7 +761,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
                  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
 
       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
-                         sizeof (void *), cbufp);
+                         sizeof (void *), true, cbufp);
     }
   else
     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
@@ -815,7 +816,7 @@ gomp_detach_pointer (struct gomp_device_descr *devicep,
                  (void *) target);
 
       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
-                         sizeof (void *), cbufp);
+                         sizeof (void *), true, cbufp);
     }
   else
     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
@@ -1147,8 +1148,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
              for (i = first; i <= last; i++)
                {
                  tgt->list[i].key = NULL;
-                 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
-                                            & typemask))
+                 if (!aq
+                     && gomp_to_device_kind_p (get_kind (short_mapkind, kinds,
+                                                         i) & typemask))
                    gomp_coalesce_buf_add (&cbuf,
                                           tgt_size - cur_node.host_end
                                           + (uintptr_t) hostaddrs[i],
@@ -1209,8 +1211,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
          if (tgt_align < align)
            tgt_align = align;
          tgt_size = (tgt_size + align - 1) & ~(align - 1);
-         gomp_coalesce_buf_add (&cbuf, tgt_size,
-                                cur_node.host_end - cur_node.host_start);
+         if (!aq)
+           gomp_coalesce_buf_add (&cbuf, tgt_size,
+                                  cur_node.host_end - cur_node.host_start);
          tgt_size += cur_node.host_end - cur_node.host_start;
          has_firstprivate = true;
          continue;
@@ -1240,7 +1243,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
          if (tgt_align < align)
            tgt_align = align;
          tgt_size = (tgt_size + align - 1) & ~(align - 1);
-         if (gomp_to_device_kind_p (kind & typemask))
+         if (!aq && gomp_to_device_kind_p (kind & typemask))
            gomp_coalesce_buf_add (&cbuf, tgt_size,
                                   cur_node.host_end - cur_node.host_start);
          tgt_size += cur_node.host_end - cur_node.host_start;
@@ -1395,7 +1398,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                len = sizes[i];
                gomp_copy_host2dev (devicep, aq,
                                    (void *) (tgt->tgt_start + tgt_size),
-                                   (void *) hostaddrs[i], len, cbufp);
+                                   (void *) hostaddrs[i], len, false, cbufp);
                tgt_size += len;
                continue;
              case GOMP_MAP_FIRSTPRIVATE_INT:
@@ -1448,12 +1451,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                if (cur_node.tgt_offset)
                  cur_node.tgt_offset -= sizes[i];
                gomp_copy_host2dev (devicep, aq,
-                                   (void *) (n->tgt->tgt_start
-                                             + n->tgt_offset
+                                   (void *) (n->tgt->tgt_start + n->tgt_offset
                                              + cur_node.host_start
                                              - n->host_start),
                                    (void *) &cur_node.tgt_offset,
-                                   sizeof (void *), cbufp);
+                                   sizeof (void *), true, cbufp);
                cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
                                      + cur_node.host_start - n->host_start;
                continue;
@@ -1612,7 +1614,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                                        (void *) (tgt->tgt_start
                                                  + k->tgt_offset),
                                        (void *) k->host_start,
-                                       k->host_end - k->host_start, cbufp);
+                                       k->host_end - k->host_start, false,
+                                       cbufp);
                    break;
                  case GOMP_MAP_POINTER:
                    gomp_map_pointer (tgt, aq,
@@ -1624,7 +1627,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                                        (void *) (tgt->tgt_start
                                                  + k->tgt_offset),
                                        (void *) k->host_start,
-                                       k->host_end - k->host_start, cbufp);
+                                       k->host_end - k->host_start, false,
+                                       cbufp);
 
                    for (j = i + 1; j < mapnum; j++)
                      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
@@ -1676,7 +1680,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                                        (void *) (tgt->tgt_start
                                                  + k->tgt_offset),
                                        (void *) k->host_start,
-                                       sizeof (void *), cbufp);
+                                       sizeof (void *), false, cbufp);
                    break;
                  default:
                    gomp_mutex_unlock (&devicep->lock);
@@ -1692,7 +1696,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                    /* We intentionally do not use coalescing here, as it's not
                       data allocated by the current call to this function.  */
                    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
-                                       &tgt_addr, sizeof (void *), NULL);
+                                       &tgt_addr, sizeof (void *), true, NULL);
                  }
                array++;
              }
@@ -1779,7 +1783,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                                            (void *) tgt->tgt_start
                                                     + k->tgt_offset,
                                            (void *) k->host_start,
-                                           da->data_row_size, cbufp);
+                                           da->data_row_size, false, cbufp);
                      array++;
                    }
                  target_data_rows[row_start + j] = (void *) target_row_addr;
@@ -1793,7 +1797,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                  void *ptrblock = gomp_dynamic_array_create_ptrblock
                    (da, target_ptrblock, target_data_rows + row_start);
                  gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock,
-                                     da->ptrblock_size, cbufp);
+                                     da->ptrblock_size, true, cbufp);
                  free (ptrblock);
                }
 
@@ -1817,7 +1821,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
          gomp_copy_host2dev (devicep, aq,
                              (void *) (tgt->tgt_start + i * sizeof (void *)),
                              (void *) &cur_node.tgt_offset, sizeof (void *),
-                             cbufp);
+                             true, cbufp);
        }
     }
 
@@ -1829,7 +1833,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                            (void *) (tgt->tgt_start + cbuf.chunks[c].start),
                            (char *) cbuf.buf + (cbuf.chunks[c].start
                                                 - cbuf.chunks[0].start),
-                           cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
+                           cbuf.chunks[c].end - cbuf.chunks[c].start, true,
+                           NULL);
       free (cbuf.buf);
       cbuf.buf = NULL;
       cbufp = NULL;
@@ -2099,7 +2104,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
 
            if (GOMP_MAP_COPY_TO_P (kind & typemask))
              gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
-                                 NULL);
+                                 false, NULL);
            if (GOMP_MAP_COPY_FROM_P (kind & typemask))
              gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
          }
index 37cde4ef059ee2f9d51763bac8c0a3c7ae46c7d3..2ccb5f5851bc80255581615a72321348ce025958 100644 (file)
@@ -1,6 +1,8 @@
 #include <stdlib.h>
 
-/* Test asyncronous attach and detach operation.  */
+#define ITERATIONS 1023
+
+/* Test asynchronous attach and detach operation.  */
 
 typedef struct {
   int *a;
@@ -25,13 +27,13 @@ main (int argc, char* argv[])
 
 #pragma acc enter data copyin(m)
 
-  for (int i = 0; i < 99; i++)
+  for (int i = 0; i < ITERATIONS; i++)
     {
       int j;
-#pragma acc parallel loop copy(m.a[0:N]) async(i % 2)
+#pragma acc parallel loop copy(m.a[0:N]) async(0)
       for (j = 0; j < N; j++)
         m.a[j]++;
-#pragma acc parallel loop copy(m.b[0:N]) async((i + 1) % 2)
+#pragma acc parallel loop copy(m.b[0:N]) async(1)
       for (j = 0; j < N; j++)
         m.b[j]++;
     }
@@ -40,10 +42,10 @@ main (int argc, char* argv[])
 
   for (i = 0; i < N; i++)
     {
-      if (m.a[i] != 99)
-        abort ();
-      if (m.b[i] != 99)
-        abort ();
+      if (m.a[i] != ITERATIONS)
+       abort ();
+      if (m.b[i] != ITERATIONS)
+       abort ();
     }
 
   free (m.a);
index fa76f65912fb2d74ca8a4d0104bb9c6dca27698f..94b80d07f4fc261f971c2e5b5976124bd95bb9b5 100644 (file)
@@ -27,6 +27,9 @@ program main
 
   if (acc_is_present (h) .neqv. .TRUE.) call abort
 
+  ! We must wait for the update to be done.
+  call acc_wait (async)
+
   h(:) = 0
 
   call acc_copyout_async (h, sizeof (h), async)
@@ -45,6 +48,8 @@ program main
   
   if (acc_is_present (h) .neqv. .TRUE.) call abort
 
+  call acc_wait (async)
+
   do i = 1, N
     if (h(i) /= i + i) call abort
   end do