From: Julian Brown Date: Wed, 11 Sep 2019 20:22:03 +0000 (-0700) Subject: Fix OpenACC "ephemeral" asynchronous host-to-device copies X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0e72dbd0ba257ff2c88ec50395c3008327d66cc5;p=thirdparty%2Fgcc.git Fix OpenACC "ephemeral" asynchronous host-to-device copies libgomp/ * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_host2dev): Update prototype. * libgomp.h (gomp_copy_host2dev): Update prototype. * oacc-host.c (host_openacc_async_host2dev): Add ephemeral parameter. * oacc-mem.c (memcpy_tofrom_device): Update call to gomp_copy_host2dev. (update_dev_host): Likewise. * oacc-parallel.c (GOACC_enter_exit_data): Call async versions of acc_attach/acc_detach/acc_detach_finalize functions. * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): Add ephemeral parameter. Copy source data to temporary space immediately if true, and pass to queue_push_copy. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_openacc_async_host2dev): Add EPHEMERAL parameter, and FIXME function comment. * target.c (goacc_device_copy_async): Remove. (gomp_copy_host2dev): Add ephemeral parameter. Update function comment. Call async host2dev plugin hook directly. (gomp_copy_dev2host): Call async dev2host plugin hook directly. (gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer, gomp_detach_pointer): Update calls to gomp_copy_host2dev. (gomp_map_vars_internal): Don't use coalescing buffer for asynchronous copies. Update calls to gomp_copy_host2dev. (gomp_update): Update calls to gomp_copy_host2dev. * testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c (main): Fix async-safety issue. Increase number of iterations. * testsuite/libgomp.oacc-fortran/lib-16.f90: Fix async-safety issue. * testsuite/libgomp.oacc-fortran/lib-16-2.f90: Likewise. --- diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 8c5acc10f1c1..bcb1b5b15181 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,33 @@ +2019-09-17 Julian Brown + Kwok Cheung Yeung + + * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_host2dev): Update + prototype. + * libgomp.h (gomp_copy_host2dev): Update prototype. + * oacc-host.c (host_openacc_async_host2dev): Add ephemeral parameter. + * oacc-mem.c (memcpy_tofrom_device): Update call to gomp_copy_host2dev. + (update_dev_host): Likewise. + * oacc-parallel.c (GOACC_enter_exit_data): Call async versions of + acc_attach/acc_detach/acc_detach_finalize functions. + * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): Add + ephemeral parameter. Copy source data to temporary space immediately + if true, and pass to queue_push_copy. + * plugin/plugin-nvptx.c (GOMP_OFFLOAD_openacc_async_host2dev): + Add EPHEMERAL parameter, and FIXME function comment. + * target.c (goacc_device_copy_async): Remove. + (gomp_copy_host2dev): Add ephemeral parameter. Update function comment. + Call async host2dev plugin hook directly. + (gomp_copy_dev2host): Call async dev2host plugin hook directly. + (gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer, + gomp_detach_pointer): Update calls to gomp_copy_host2dev. + (gomp_map_vars_internal): Don't use coalescing buffer for asynchronous + copies. Update calls to gomp_copy_host2dev. + (gomp_update): Update calls to gomp_copy_host2dev. + * testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c (main): Fix + async-safety issue. Increase number of iterations. + * testsuite/libgomp.oacc-fortran/lib-16.f90: Fix async-safety issue. + * testsuite/libgomp.oacc-fortran/lib-16-2.f90: Likewise. + 2019-05-20 Julian Brown * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Expect diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 62645ce99540..bff2193dd3a7 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -152,7 +152,8 @@ extern void GOMP_OFFLOAD_openacc_async_exec (void (*) (void *), size_t, void **, struct goacc_asyncqueue *); extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size_t, struct goacc_asyncqueue *); -extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t, +extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, + size_t, bool, struct goacc_asyncqueue *); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 38aa589c8c3c..d22210b4cbd0 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1198,7 +1198,7 @@ enum gomp_map_vars_kind struct gomp_coalesce_buf; extern void gomp_copy_host2dev (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, - size_t, struct gomp_coalesce_buf *); + size_t, bool, struct gomp_coalesce_buf *); extern void gomp_copy_dev2host (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, size_t); diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 1cbff4caacec..369abfffac99 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -184,6 +184,7 @@ host_openacc_async_host2dev (int ord __attribute__ ((unused)), void *dst __attribute__ ((unused)), const void *src __attribute__ ((unused)), size_t n __attribute__ ((unused)), + bool eph __attribute__ ((unused)), struct goacc_asyncqueue *aq __attribute__ ((unused))) { diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 3b98b65a8595..685daab63418 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -202,7 +202,7 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, if (from) gomp_copy_dev2host (thr->dev, aq, h, d, s); else - gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); if (profiling_p) { @@ -876,7 +876,7 @@ update_dev_host (int is_dev, void *h, size_t s, int async) goacc_aq aq = get_goacc_asyncqueue (async); if (is_dev) - gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (acc_dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); else gomp_copy_dev2host (acc_dev, aq, h, d, s); @@ -1435,7 +1435,8 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, || kind == GOMP_MAP_ATTACH || kind == GOMP_MAP_FORCE_TO || kind == GOMP_MAP_TO - || kind == GOMP_MAP_ALLOC) + || kind == GOMP_MAP_ALLOC + || kind == GOMP_MAP_DECLARE_ALLOCATE) { data_enter = true; break; @@ -1446,7 +1447,8 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, || kind == GOMP_MAP_DETACH || kind == GOMP_MAP_FORCE_DETACH || kind == GOMP_MAP_FROM - || kind == GOMP_MAP_FORCE_FROM) + || kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_DECLARE_DEALLOCATE) break; gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 86263a67d325..cde04c0fe765 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -3928,19 +3928,22 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, bool GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, - size_t n, struct goacc_asyncqueue *aq) + size_t n, bool ephemeral, + struct goacc_asyncqueue *aq) { struct agent_info *agent = get_agent_info (device); assert (agent == aq->agent); - /* The source data does not necessarily remain live until the deferred - copy happens. Taking a snapshot of the data here avoids reading - uninitialised data later, but means that (a) data is copied twice and - (b) modifications to the copied data between the "spawning" point of - the asynchronous kernel and when it is executed will not be seen. - But, that is probably correct. */ - void *src_copy = GOMP_PLUGIN_malloc (n); - memcpy (src_copy, src, n); - queue_push_copy (aq, dst, src_copy, n, true); + + if (ephemeral) + { + /* The source data is on the stack or otherwise may be deallocated + before the asynchronous copy takes place. Take a copy of the source + data. */ + void *src_copy = GOMP_PLUGIN_malloc (n); + memcpy (src_copy, src, n); + src = src_copy; + } + queue_push_copy (aq, dst, src, n, ephemeral); return true; } diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 1bea0bedccb6..82bf97948c6f 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1658,9 +1658,20 @@ GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) return true; } +/* FIXME: It is unknown whether the cuMemcpyHtoDAsync API call caches source + data before the asynchronous copy takes place. Either way there is a data + race associated with ignoring the EPHEMERAL parameter here -- either if it + is TRUE (because we are copying uncached data that may disappear before the + async copy takes place) or if it is FALSE (because the source data may be + cached/snapshotted here before it is modified by an earlier async operation, + so stale data gets copied to the target). + Neither problem has been observed in practice, so far. */ + bool GOMP_OFFLOAD_openacc_async_host2dev (int ord, void *dst, const void *src, - size_t n, struct goacc_asyncqueue *aq) + size_t n, + bool ephemeral __attribute__((unused)), + struct goacc_asyncqueue *aq) { if (!nvptx_attach_host_thread_to_device (ord) || !cuda_memcpy_sanity_check (src, dst, n)) diff --git a/libgomp/target.c b/libgomp/target.c index bf7c86a80090..851609586bd8 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -194,22 +194,6 @@ gomp_device_copy (struct gomp_device_descr *devicep, } } -static inline void -goacc_device_copy_async (struct gomp_device_descr *devicep, - bool (*copy_func) (int, void *, const void *, size_t, - struct goacc_asyncqueue *), - const char *dst, void *dstaddr, - const char *src, const void *srcaddr, - size_t size, struct goacc_asyncqueue *aq) -{ - if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) - { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", - src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); - } -} - /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) host to device memory transfers. */ @@ -303,11 +287,18 @@ gomp_to_device_kind_p (int kind) } } + /* Copy host memory to an offload device. In asynchronous mode (if AQ is + non-NULL), when the source data is stack or may otherwise be deallocated + before the asynchronous copy takes place, EPHEMERAL must be passed as + TRUE. The CBUF isn't used for non-ephemeral asynchronous copies, because + the host data might not be computed yet (by an earlier asynchronous compute + region). */ + attribute_hidden void gomp_copy_host2dev (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, void *d, const void *h, size_t sz, - struct gomp_coalesce_buf *cbuf) + bool ephemeral, struct gomp_coalesce_buf *cbuf) { if (cbuf) { @@ -335,8 +326,15 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep, } } if (__builtin_expect (aq != NULL, 0)) - goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, - "dev", d, "host", h, sz, aq); + { + if (!devicep->openacc.async.host2dev_func (devicep->target_id, d, h, sz, + ephemeral, aq)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Copying of host object [%p..%p) to dev object [%p..%p) " + "failed", h, h + sz, d, d + sz); + } + } else gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); } @@ -347,8 +345,15 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep, void *h, const void *d, size_t sz) { if (__builtin_expect (aq != NULL, 0)) - goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, - "host", h, "dev", d, sz, aq); + { + if (!devicep->openacc.async.dev2host_func (devicep->target_id, h, d, sz, + aq)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Copying of dev object [%p..%p) to host object [%p..%p) " + "failed", d, d + sz, h, h + sz); + } + } else gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } @@ -578,7 +583,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + newn->host_start - oldn->host_start), (void *) newn->host_start, - newn->host_end - newn->host_start, cbuf); + newn->host_end - newn->host_start, false, cbuf); if (oldn->refcount != REFCOUNT_INFINITY) oldn->refcount++; @@ -607,7 +612,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), (void *) &cur_node.tgt_offset, - sizeof (void *), cbuf); + sizeof (void *), true, cbuf); return; } /* Add bias to the pointer value. */ @@ -627,7 +632,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, to initialize the pointer with. */ cur_node.tgt_offset -= bias; gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), - (void *) &cur_node.tgt_offset, sizeof (void *), cbuf); + (void *) &cur_node.tgt_offset, sizeof (void *), true, + cbuf); } static void @@ -760,7 +766,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, @@ -813,7 +819,7 @@ gomp_detach_pointer (struct gomp_device_descr *devicep, (void *) target); gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, @@ -985,8 +991,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, for (i = first; i <= last; i++) { tgt->list[i].key = NULL; - if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) - & typemask)) + if (!aq + && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, + i) & typemask)) gomp_coalesce_buf_add (&cbuf, tgt_size - cur_node.host_end + (uintptr_t) hostaddrs[i], @@ -1049,8 +1056,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - gomp_coalesce_buf_add (&cbuf, tgt_size, - cur_node.host_end - cur_node.host_start); + if (!aq) + gomp_coalesce_buf_add (&cbuf, tgt_size, + cur_node.host_end - cur_node.host_start); tgt_size += cur_node.host_end - cur_node.host_start; has_firstprivate = true; continue; @@ -1142,7 +1150,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - if (gomp_to_device_kind_p (kind & typemask)) + if (!aq && gomp_to_device_kind_p (kind & typemask)) gomp_coalesce_buf_add (&cbuf, tgt_size, cur_node.host_end - cur_node.host_start); tgt_size += cur_node.host_end - cur_node.host_start; @@ -1335,7 +1343,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, len = sizes[i]; gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + tgt_size), - (void *) hostaddrs[i], len, cbufp); + (void *) hostaddrs[i], len, false, cbufp); tgt_size += len; continue; case GOMP_MAP_FIRSTPRIVATE_INT: @@ -1423,12 +1431,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (cur_node.tgt_offset) cur_node.tgt_offset -= sizes[i]; gomp_copy_host2dev (devicep, aq, - (void *) (n->tgt->tgt_start - + n->tgt_offset + (void *) (n->tgt->tgt_start + n->tgt_offset + cur_node.host_start - n->host_start), (void *) &cur_node.tgt_offset, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start - n->host_start; continue; @@ -1548,7 +1555,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - k->host_end - k->host_start, cbufp); + k->host_end - k->host_start, false, + cbufp); break; case GOMP_MAP_POINTER: gomp_map_pointer (tgt, aq, @@ -1560,7 +1568,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - k->host_end - k->host_start, cbufp); + k->host_end - k->host_start, false, + cbufp); tgt->list[i].has_null_ptr_assoc = false; for (j = i + 1; j < mapnum; j++) @@ -1617,7 +1626,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - sizeof (void *), cbufp); + sizeof (void *), false, cbufp); break; default: gomp_mutex_unlock (&devicep->lock); @@ -1633,7 +1642,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* We intentionally do not use coalescing here, as it's not data allocated by the current call to this function. */ gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, - &tgt_addr, sizeof (void *), NULL); + &tgt_addr, sizeof (void *), true, NULL); } array++; } @@ -1712,7 +1721,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) tgt->tgt_start + k->tgt_offset, (void *) k->host_start, nca->data_row_size, - cbufp); + true, cbufp); array++; } nca->tgt_data_rows[j] @@ -1727,7 +1736,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, void *ptrblock = goacc_noncontig_array_create_ptrblock (nca, target_ptrblock); gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock, - nca->ptrblock_size, cbufp); + nca->ptrblock_size, true, cbufp); free (ptrblock); } } @@ -1742,7 +1751,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + i * sizeof (void *)), (void *) &cur_node.tgt_offset, sizeof (void *), - cbufp); + true, cbufp); } } @@ -1754,7 +1763,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + cbuf.chunks[c].start), (char *) cbuf.buf + (cbuf.chunks[c].start - cbuf.chunks[0].start), - cbuf.chunks[c].end - cbuf.chunks[c].start, NULL); + cbuf.chunks[c].end - cbuf.chunks[c].start, true, + NULL); free (cbuf.buf); cbuf.buf = NULL; cbufp = NULL; @@ -2033,7 +2043,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, if (GOMP_MAP_COPY_TO_P (kind & typemask)) gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, - NULL); + false, NULL); if (GOMP_MAP_COPY_FROM_P (kind & typemask)) gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c index 573a8214bf01..dadb6d37942f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c @@ -1,6 +1,8 @@ #include -/* Test asyncronous attach and detach operation. */ +#define ITERATIONS 1023 + +/* Test asynchronous attach and detach operation. */ typedef struct { int *a; @@ -25,13 +27,13 @@ main (int argc, char* argv[]) #pragma acc enter data copyin(m) - for (int i = 0; i < 99; i++) + for (int i = 0; i < ITERATIONS; i++) { int j; -#pragma acc parallel loop copy(m.a[0:N]) async(i % 2) +#pragma acc parallel loop copy(m.a[0:N]) async(0) for (j = 0; j < N; j++) m.a[j]++; -#pragma acc parallel loop copy(m.b[0:N]) async((i + 1) % 2) +#pragma acc parallel loop copy(m.b[0:N]) async(1) for (j = 0; j < N; j++) m.b[j]++; } @@ -40,9 +42,9 @@ main (int argc, char* argv[]) for (i = 0; i < N; i++) { - if (m.a[i] != 99) + if (m.a[i] != ITERATIONS) abort (); - if (m.b[i] != 99) + if (m.b[i] != ITERATIONS) abort (); } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 index ddd557d3be08..e2e47c967fa4 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 @@ -27,6 +27,9 @@ program main if (acc_is_present (h) .neqv. .TRUE.) stop 1 + ! We must wait for the update to be done. + call acc_wait (async) + h(:) = 0 call acc_copyout_async (h, sizeof (h), async) @@ -45,6 +48,8 @@ program main if (acc_is_present (h) .neqv. .TRUE.) stop 3 + call acc_wait (async) + do i = 1, N if (h(i) /= i + i) stop 4 end do diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 index ccd1ce6ee18e..ef9a6f6626c0 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 @@ -27,6 +27,9 @@ program main if (acc_is_present (h) .neqv. .TRUE.) stop 1 + ! We must wait for the update to be done. + call acc_wait (async) + h(:) = 0 call acc_copyout_async (h, sizeof (h), async) @@ -45,6 +48,8 @@ program main if (acc_is_present (h) .neqv. .TRUE.) stop 3 + call acc_wait (async) + do i = 1, N if (h(i) /= i + i) stop 4 end do