From: Julian Brown Date: Tue, 13 Aug 2019 16:05:38 +0000 (-0700) Subject: [og9] Use temporary buffers for async host2dev copies X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=addc8702db3df69ac119d6d99200c318897f38d3;p=thirdparty%2Fgcc.git [og9] Use temporary buffers for async host2dev copies libgomp/ * plugin/plugin-gcn.c (struct copy_data): Add using_src_copy field. (copy_data): Free temporary buffer if using. (queue_push_copy): Add using_src_copy parameter. (GOMP_OFFLOAD_dev2dev, GOMP_OFFLOAD_async_dev2host): Update calls to queue_push_copy. (GOMP_OFFLOAD_async_host2dev): Likewise. Allocate temporary buffer and copy source data to it immediately. * target.c (gomp_copy_host2dev): Update function comment. (copy_host2dev_immediate): Remove. (gomp_map_pointer, gomp_map_vars_internal): Replace calls to copy_host2dev_immediate with calls to gomp_copy_host2dev. (cherry picked from openacc-gcc-9-branch commit 6723cd26bad519660b91d8eb371d6c9d57876e72) --- diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 2279545f3618..2a9a7f18ca2e 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,17 @@ +2019-08-13 Julian Brown + + * plugin/plugin-gcn.c (struct copy_data): Add using_src_copy field. + (copy_data): Free temporary buffer if using. + (queue_push_copy): Add using_src_copy parameter. + (GOMP_OFFLOAD_dev2dev, GOMP_OFFLOAD_async_dev2host): Update calls to + queue_push_copy. + (GOMP_OFFLOAD_async_host2dev): Likewise. Allocate temporary buffer and + copy source data to it immediately. + * target.c (gomp_copy_host2dev): Update function comment. + (copy_host2dev_immediate): Remove. + (gomp_map_pointer, gomp_map_vars_internal): Replace calls to + copy_host2dev_immediate with calls to gomp_copy_host2dev. + 2019-08-08 Julian Brown * plugin/plugin-gcn.c (gcn_exec): Use 1 for the default number of diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index a41568b33067..65690e643ede 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -3063,6 +3063,7 @@ struct copy_data const void *src; size_t len; bool use_hsa_memory_copy; + bool using_src_copy; struct goacc_asyncqueue *aq; }; @@ -3077,12 +3078,14 @@ copy_data (void *data_) hsa_fns.hsa_memory_copy_fn (data->dst, data->src, data->len); else memcpy (data->dst, data->src, data->len); + if (data->using_src_copy) + free ((void *) data->src); free (data); } static void queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src, - size_t len, bool use_hsa_memory_copy) + size_t len, bool use_hsa_memory_copy, bool using_src_copy) { if (DEBUG_QUEUES) HSA_DEBUG ("queue_push_copy %d:%d: %zu bytes from (%p) to (%p)\n", @@ -3093,6 +3096,7 @@ queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src, data->src = src; data->len = len; data->use_hsa_memory_copy = use_hsa_memory_copy; + data->using_src_copy = using_src_copy; data->aq = aq; queue_push_callback (aq, copy_data, data); } @@ -3137,7 +3141,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst, const void *src, size_t n) { struct agent_info *agent = get_agent_info (device); maybe_init_omp_async (agent); - queue_push_copy (agent->omp_async_queue, dst, src, n, false); + queue_push_copy (agent->omp_async_queue, dst, src, n, false, false); return true; } @@ -3469,7 +3473,15 @@ GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, { struct agent_info *agent = get_agent_info (device); assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n, image_address_p (agent, dst)); + /* The source data does not necessarily remain live until the deferred + copy happens. Taking a snapshot of the data here avoids reading + uninitialised data later, but means that (a) data is copied twice and + (b) modifications to the copied data between the "spawning" point of + the asynchronous kernel and when it is executed will not be seen. + But, that is probably correct. */ + void *src_copy = GOMP_PLUGIN_malloc (n); + memcpy (src_copy, src, n); + queue_push_copy (aq, dst, src_copy, n, image_address_p (agent, dst), true); return true; } @@ -3479,7 +3491,7 @@ GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, { struct agent_info *agent = get_agent_info (device); assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n, image_address_p (agent, src)); + queue_push_copy (aq, dst, src, n, image_address_p (agent, src), false); return true; } diff --git a/libgomp/target.c b/libgomp/target.c index 4645894f869c..5f7f946e2ba7 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -303,10 +303,9 @@ gomp_to_device_kind_p (int kind) } /* Copy host memory to an offload device. In asynchronous mode (if AQ is - non-NULL), this is only safe when the source memory is a global or heap - location (otherwise a copy may take place from a dangling pointer to an - expired stack frame). Use copy_host2dev_immediate for copies from stack - locations. */ + non-NULL), H may point to a stack location. It is up to the underlying + plugin to ensure that this data is read immediately, rather than at some + later point when the stack frame will likely have been destroyed. */ attribute_hidden void gomp_copy_host2dev (struct gomp_device_descr *devicep, @@ -346,17 +345,6 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep, gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); } -/* Use this variant for host-to-device copies from stack locations that may not - be live at the time an asynchronous copy operation takes place. */ - -static void -copy_host2dev_immediate (struct gomp_device_descr *devicep, void *d, - const void *h, size_t sz, - struct gomp_coalesce_buf *cbuf) -{ - gomp_copy_host2dev (devicep, NULL, d, h, sz, cbuf); -} - attribute_hidden void gomp_copy_dev2host (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, @@ -617,10 +605,10 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, if (cur_node.host_start == (uintptr_t) NULL) { cur_node.tgt_offset = (uintptr_t) NULL; - copy_host2dev_immediate (devicep, - (void *) (tgt->tgt_start + target_offset), - (void *) &cur_node.tgt_offset, - sizeof (void *), cbuf); + gomp_copy_host2dev (devicep, aq, + (void *) (tgt->tgt_start + target_offset), + (void *) &cur_node.tgt_offset, sizeof (void *), + cbuf); return; } /* Add bias to the pointer value. */ @@ -639,9 +627,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, array section. Now subtract bias to get what we want to initialize the pointer with. */ cur_node.tgt_offset -= bias; - copy_host2dev_immediate (devicep, (void *) (tgt->tgt_start + target_offset), - (void *) &cur_node.tgt_offset, sizeof (void *), - cbuf); + gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), + (void *) &cur_node.tgt_offset, sizeof (void *), cbuf); } static void @@ -1460,13 +1447,13 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); if (cur_node.tgt_offset) cur_node.tgt_offset -= sizes[i]; - copy_host2dev_immediate (devicep, - (void *) (n->tgt->tgt_start - + n->tgt_offset - + cur_node.host_start - - n->host_start), - (void *) &cur_node.tgt_offset, - sizeof (void *), cbufp); + gomp_copy_host2dev (devicep, aq, + (void *) (n->tgt->tgt_start + + n->tgt_offset + + cur_node.host_start + - n->host_start), + (void *) &cur_node.tgt_offset, + sizeof (void *), cbufp); cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start - n->host_start; continue; @@ -1705,8 +1692,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); /* We intentionally do not use coalescing here, as it's not data allocated by the current call to this function. */ - copy_host2dev_immediate (devicep, (void *) n->tgt_offset, - &tgt_addr, sizeof (void *), NULL); + gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, + &tgt_addr, sizeof (void *), NULL); } array++; } @@ -1828,9 +1815,10 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, for (i = 0; i < mapnum; i++) { cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); - copy_host2dev_immediate (devicep, - (void *) (tgt->tgt_start + i * sizeof (void *)), - (void *) &cur_node.tgt_offset, sizeof (void *), cbufp); + gomp_copy_host2dev (devicep, aq, + (void *) (tgt->tgt_start + i * sizeof (void *)), + (void *) &cur_node.tgt_offset, sizeof (void *), + cbufp); } }