const void *src;
size_t len;
bool use_hsa_memory_copy;
+ bool using_src_copy;
struct goacc_asyncqueue *aq;
};
hsa_fns.hsa_memory_copy_fn (data->dst, data->src, data->len);
else
memcpy (data->dst, data->src, data->len);
+ if (data->using_src_copy)
+ free ((void *) data->src);
free (data);
}
static void
queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src,
- size_t len, bool use_hsa_memory_copy)
+ size_t len, bool use_hsa_memory_copy, bool using_src_copy)
{
if (DEBUG_QUEUES)
HSA_DEBUG ("queue_push_copy %d:%d: %zu bytes from (%p) to (%p)\n",
data->src = src;
data->len = len;
data->use_hsa_memory_copy = use_hsa_memory_copy;
+ data->using_src_copy = using_src_copy;
data->aq = aq;
queue_push_callback (aq, copy_data, data);
}
{
struct agent_info *agent = get_agent_info (device);
maybe_init_omp_async (agent);
- queue_push_copy (agent->omp_async_queue, dst, src, n, false);
+ queue_push_copy (agent->omp_async_queue, dst, src, n, false, false);
return true;
}
{
struct agent_info *agent = get_agent_info (device);
assert (agent == aq->agent);
- queue_push_copy (aq, dst, src, n, image_address_p (agent, dst));
+ /* The source data does not necessarily remain live until the deferred
+ copy happens. Taking a snapshot of the data here avoids reading
+ uninitialised data later, but means that (a) data is copied twice and
+ (b) modifications to the copied data between the "spawning" point of
+ the asynchronous kernel and when it is executed will not be seen.
+ But, that is probably correct. */
+ void *src_copy = GOMP_PLUGIN_malloc (n);
+ memcpy (src_copy, src, n);
+ queue_push_copy (aq, dst, src_copy, n, image_address_p (agent, dst), true);
return true;
}
{
struct agent_info *agent = get_agent_info (device);
assert (agent == aq->agent);
- queue_push_copy (aq, dst, src, n, image_address_p (agent, src));
+ queue_push_copy (aq, dst, src, n, image_address_p (agent, src), false);
return true;
}
}
/* Copy host memory to an offload device. In asynchronous mode (if AQ is
- non-NULL), this is only safe when the source memory is a global or heap
- location (otherwise a copy may take place from a dangling pointer to an
- expired stack frame). Use copy_host2dev_immediate for copies from stack
- locations. */
+ non-NULL), H may point to a stack location. It is up to the underlying
+ plugin to ensure that this data is read immediately, rather than at some
+ later point when the stack frame will likely have been destroyed. */
attribute_hidden void
gomp_copy_host2dev (struct gomp_device_descr *devicep,
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
}
-/* Use this variant for host-to-device copies from stack locations that may not
- be live at the time an asynchronous copy operation takes place. */
-
-static void
-copy_host2dev_immediate (struct gomp_device_descr *devicep, void *d,
- const void *h, size_t sz,
- struct gomp_coalesce_buf *cbuf)
-{
- gomp_copy_host2dev (devicep, NULL, d, h, sz, cbuf);
-}
-
attribute_hidden void
gomp_copy_dev2host (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq,
if (cur_node.host_start == (uintptr_t) NULL)
{
cur_node.tgt_offset = (uintptr_t) NULL;
- copy_host2dev_immediate (devicep,
- (void *) (tgt->tgt_start + target_offset),
- (void *) &cur_node.tgt_offset,
- sizeof (void *), cbuf);
+ gomp_copy_host2dev (devicep, aq,
+ (void *) (tgt->tgt_start + target_offset),
+ (void *) &cur_node.tgt_offset, sizeof (void *),
+ cbuf);
return;
}
/* Add bias to the pointer value. */
array section. Now subtract bias to get what we want
to initialize the pointer with. */
cur_node.tgt_offset -= bias;
- copy_host2dev_immediate (devicep, (void *) (tgt->tgt_start + target_offset),
- (void *) &cur_node.tgt_offset, sizeof (void *),
- cbuf);
+ gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
+ (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
}
static void
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
if (cur_node.tgt_offset)
cur_node.tgt_offset -= sizes[i];
- copy_host2dev_immediate (devicep,
- (void *) (n->tgt->tgt_start
- + n->tgt_offset
- + cur_node.host_start
- - n->host_start),
- (void *) &cur_node.tgt_offset,
- sizeof (void *), cbufp);
+ gomp_copy_host2dev (devicep, aq,
+ (void *) (n->tgt->tgt_start
+ + n->tgt_offset
+ + cur_node.host_start
+ - n->host_start),
+ (void *) &cur_node.tgt_offset,
+ sizeof (void *), cbufp);
cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start - n->host_start;
continue;
void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
/* We intentionally do not use coalescing here, as it's not
data allocated by the current call to this function. */
- copy_host2dev_immediate (devicep, (void *) n->tgt_offset,
- &tgt_addr, sizeof (void *), NULL);
+ gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
+ &tgt_addr, sizeof (void *), NULL);
}
array++;
}
for (i = 0; i < mapnum; i++)
{
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
- copy_host2dev_immediate (devicep,
- (void *) (tgt->tgt_start + i * sizeof (void *)),
- (void *) &cur_node.tgt_offset, sizeof (void *), cbufp);
+ gomp_copy_host2dev (devicep, aq,
+ (void *) (tgt->tgt_start + i * sizeof (void *)),
+ (void *) &cur_node.tgt_offset, sizeof (void *),
+ cbufp);
}
}