-/* Copyright (C) 2013-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
#include "openacc.h"
#include "libgomp.h"
-#include "libgomp_g.h"
#include "gomp-constants.h"
#include "oacc-int.h"
#ifdef HAVE_INTTYPES_H
#include <stdarg.h>
#include <assert.h>
-static int
-find_pset (int pos, size_t mapnum, unsigned short *kinds)
+
+/* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
+ continue to support the following two legacy values. */
+_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
+ "legacy GOMP_DEVICE_ICV broken");
+_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
+ == GOACC_FLAG_HOST_FALLBACK,
+ "legacy GOMP_DEVICE_HOST_FALLBACK broken");
+
+
+/* Handle the mapping pair that are presented when a
+ deviceptr clause is used with Fortran. */
+
+static void
+handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds)
{
- if (pos + 1 >= mapnum)
- return 0;
+ int i;
- unsigned char kind = kinds[pos+1] & 0xff;
+ for (i = 0; i < mapnum; i++)
+ {
+ unsigned short kind1 = kinds[i] & 0xff;
- return kind == GOMP_MAP_TO_PSET;
-}
+ /* Handle Fortran deviceptr clause. */
+ if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
+ {
+ unsigned short kind2;
-static void goacc_wait (int async, int num_waits, va_list *ap);
+ if (i < (signed)mapnum - 1)
+ kind2 = kinds[i + 1] & 0xff;
+ else
+ kind2 = 0xffff;
+
+ if (sizes[i] == sizeof (void *))
+ continue;
+
+ /* At this point, we're dealing with a Fortran deviceptr.
+ If the next element is not what we're expecting, then
+ this is an instance of where the deviceptr variable was
+ not used within the region and the pointer was removed
+ by the gimplifier. */
+ if (kind2 == GOMP_MAP_POINTER
+ && sizes[i + 1] == 0
+ && hostaddrs[i] == *(void **)hostaddrs[i + 1])
+ {
+ kinds[i+1] = kinds[i];
+ sizes[i+1] = sizeof (void *);
+ }
+ /* Invalidate the entry. */
+ hostaddrs[i] = NULL;
+ }
+ }
+}
-/* Launch a possibly offloaded function on DEVICE. FN is the host fn
+
+/* Launch a possibly offloaded function with FLAGS. FN is the host fn
address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
blocks to be copied to/from the device. Varadic arguments are
keyed optional parameters terminated with a zero. */
void
-GOACC_parallel_keyed (int device, void (*fn) (void *),
+GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
size_t mapnum, void **hostaddrs, size_t *sizes,
unsigned short *kinds, ...)
{
- bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
+
va_list ap;
struct goacc_thread *thr;
struct gomp_device_descr *acc_dev;
- struct target_mem_desc *tgt;
- void **devaddrs;
unsigned int i;
struct splay_tree_key_s k;
splay_tree_key tgt_fn_key;
thr = goacc_thread ();
acc_dev = thr->dev;
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type = acc_ev_compute_construct_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info compute_construct_event_info;
+ if (profiling_p)
+ {
+ compute_construct_event_info.other_event.event_type
+ = prof_info.event_type;
+ compute_construct_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ compute_construct_event_info.other_event.parent_construct
+ = acc_construct_parallel;
+ compute_construct_event_info.other_event.implicit = 0;
+ compute_construct_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+ &api_info);
+
+ handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
+
/* Host fallback if "if" clause is false or if the current device is set to
the host. */
- if (host_fallback)
+ if ((flags & GOACC_FLAG_HOST_FALLBACK)
+ /* TODO: a proper pthreads based "multi-core CPU" local device
+ implementation. Currently, this is still the same as host-fallback. */
+ || (flags & GOACC_FLAG_LOCAL_DEVICE))
{
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
goacc_save_and_set_bind (acc_device_host);
fn (hostaddrs);
goacc_restore_bind ();
- return;
+ goto out_prof;
}
else if (acc_device_type (acc_dev->type) == acc_device_host)
{
fn (hostaddrs);
- return;
+ goto out_prof;
}
/* Default: let the runtime choose. */
if (async == GOMP_LAUNCH_OP_MAX)
async = va_arg (ap, unsigned);
+
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
break;
}
case GOMP_LAUNCH_WAIT:
{
unsigned num_waits = GOMP_LAUNCH_OP (tag);
-
- if (num_waits)
- goacc_wait (async, num_waits, &ap);
+ goacc_wait (async, num_waits, &ap);
break;
}
}
va_end (ap);
- acc_dev->openacc.async_set_async_func (async);
-
if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
{
k.host_start = (uintptr_t) fn;
else
tgt_fn = (void (*)) fn;
- tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
- GOMP_MAP_VARS_OPENACC);
+ acc_event_info enter_exit_data_event_info;
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_enter_data_start;
+ enter_exit_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ enter_exit_data_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ enter_exit_data_event_info.other_event.parent_construct
+ = compute_construct_event_info.other_event.parent_construct;
+ enter_exit_data_event_info.other_event.implicit = 1;
+ enter_exit_data_event_info.other_event.tool_info = NULL;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+ }
+
+ goacc_aq aq = get_goacc_asyncqueue (async);
- devaddrs = gomp_alloca (sizeof (void *) * mapnum);
- for (i = 0; i < mapnum; i++)
- devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
- + tgt->list[i].key->tgt_offset);
+ struct target_mem_desc *tgt
+ = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true,
+ GOMP_MAP_VARS_TARGET);
- acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
- async, dims, tgt);
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_enter_data_end;
+ enter_exit_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+ }
- /* If running synchronously, unmap immediately. */
- if (async < acc_async_noval)
- gomp_unmap_vars (tgt, true);
+ void **devaddrs = (void **) tgt->tgt_start;
+ if (aq == NULL)
+ acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
+ tgt);
else
- tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
+ acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+ dims, tgt, aq);
+
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_exit_data_start;
+ enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
+ enter_exit_data_event_info.other_event.tool_info = NULL;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+ }
+
+ /* If running synchronously (aq == NULL), this will unmap immediately. */
+ goacc_unmap_vars (tgt, true, aq);
+
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_exit_data_end;
+ enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+ }
- acc_dev->openacc.async_set_async_func (acc_async_sync);
+ out_prof:
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_compute_construct_end;
+ compute_construct_event_info.other_event.event_type
+ = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+ &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
-/* Legacy entry point, only provide host execution. */
+/* Legacy entry point (GCC 5). Only provide host fallback execution. */
void
-GOACC_parallel (int device, void (*fn) (void *),
+GOACC_parallel (int flags_m, void (*fn) (void *),
size_t mapnum, void **hostaddrs, size_t *sizes,
unsigned short *kinds,
int num_gangs, int num_workers, int vector_length,
}
void
-GOACC_data_start (int device, size_t mapnum,
+GOACC_data_start (int flags_m, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned short *kinds)
{
- bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
+
struct target_mem_desc *tgt;
#ifdef HAVE_INTTYPES_H
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type = acc_ev_enter_data_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = acc_async_sync; /* Always synchronous. */
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info enter_data_event_info;
+ if (profiling_p)
+ {
+ enter_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ enter_data_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ enter_data_event_info.other_event.parent_construct = acc_construct_data;
+ for (int i = 0; i < mapnum; ++i)
+ if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
+ || (kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
+ {
+ /* If there is one such data mapping kind, then this is actually an
+ OpenACC 'host_data' construct. (GCC maps the OpenACC
+ 'host_data' construct to the OpenACC 'data' construct.) Apart
+ from artificial test cases (such as an OpenACC 'host_data'
+ construct's (implicit) device initialization when there hasn't
+ been any device data be set up before...), there can't really
+ any meaningful events be generated from OpenACC 'host_data'
+ constructs, though. */
+ enter_data_event_info.other_event.parent_construct
+ = acc_construct_host_data;
+ break;
+ }
+ enter_data_event_info.other_event.implicit = 0;
+ enter_data_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
+
/* Host fallback or 'do nothing'. */
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
- || host_fallback)
+ || (flags & GOACC_FLAG_HOST_FALLBACK)
+ || (flags & GOACC_FLAG_LOCAL_DEVICE))
{
- tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
- GOMP_MAP_VARS_OPENACC);
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
+ tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0);
tgt->prev = thr->mapped_data;
thr->mapped_data = tgt;
- return;
+ goto out_prof;
}
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
- tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
- GOMP_MAP_VARS_OPENACC);
+ tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds,
+ true, 0);
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
tgt->prev = thr->mapped_data;
thr->mapped_data = tgt;
+
+ out_prof:
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_enter_data_end;
+ enter_data_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
void
GOACC_data_end (void)
{
struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
struct target_mem_desc *tgt = thr->mapped_data;
- gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
- thr->mapped_data = tgt->prev;
- gomp_unmap_vars (tgt, true);
- gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
-}
-
-void
-GOACC_enter_exit_data (int device, size_t mapnum,
- void **hostaddrs, size_t *sizes, unsigned short *kinds,
- int async, int num_waits, ...)
-{
- struct goacc_thread *thr;
- struct gomp_device_descr *acc_dev;
- bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
- bool data_enter = false;
- size_t i;
-
- goacc_lazy_initialize ();
-
- thr = goacc_thread ();
- acc_dev = thr->dev;
-
- if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
- || host_fallback)
- return;
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
- if (num_waits)
+ acc_prof_info prof_info;
+ if (profiling_p)
{
- va_list ap;
-
- va_start (ap, num_waits);
- goacc_wait (async, num_waits, &ap);
- va_end (ap);
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type = acc_ev_exit_data_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = acc_async_sync; /* Always synchronous. */
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
}
-
- acc_dev->openacc.async_set_async_func (async);
-
- /* Determine if this is an "acc enter data". */
- for (i = 0; i < mapnum; ++i)
+ acc_event_info exit_data_event_info;
+ if (profiling_p)
{
- unsigned char kind = kinds[i] & 0xff;
-
- if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
- continue;
-
- if (kind == GOMP_MAP_FORCE_ALLOC
- || kind == GOMP_MAP_FORCE_PRESENT
- || kind == GOMP_MAP_FORCE_TO)
- {
- data_enter = true;
- break;
- }
-
- if (kind == GOMP_MAP_DELETE
- || kind == GOMP_MAP_FORCE_FROM)
- break;
-
- gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
- kind);
+ exit_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ exit_data_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ exit_data_event_info.other_event.parent_construct = acc_construct_data;
+ exit_data_event_info.other_event.implicit = 0;
+ exit_data_event_info.other_event.tool_info = NULL;
}
-
- if (data_enter)
+ acc_api_info api_info;
+ if (profiling_p)
{
- for (i = 0; i < mapnum; i++)
- {
- unsigned char kind = kinds[i] & 0xff;
-
- /* Scan for PSETs. */
- int psets = find_pset (i, mapnum, kinds);
-
- if (!psets)
- {
- switch (kind)
- {
- case GOMP_MAP_POINTER:
- gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
- &kinds[i]);
- break;
- case GOMP_MAP_FORCE_ALLOC:
- acc_create (hostaddrs[i], sizes[i]);
- break;
- case GOMP_MAP_FORCE_PRESENT:
- acc_present_or_copyin (hostaddrs[i], sizes[i]);
- break;
- case GOMP_MAP_FORCE_TO:
- acc_present_or_copyin (hostaddrs[i], sizes[i]);
- break;
- default:
- gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
- kind);
- break;
- }
- }
- else
- {
- gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
- /* Increment 'i' by two because OpenACC requires fortran
- arrays to be contiguous, so each PSET is associated with
- one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
- one MAP_POINTER. */
- i += 2;
- }
- }
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
}
- else
- for (i = 0; i < mapnum; ++i)
- {
- unsigned char kind = kinds[i] & 0xff;
-
- int psets = find_pset (i, mapnum, kinds);
-
- if (!psets)
- {
- switch (kind)
- {
- case GOMP_MAP_POINTER:
- gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
- == GOMP_MAP_FORCE_FROM,
- async, 1);
- break;
- case GOMP_MAP_DELETE:
- acc_delete (hostaddrs[i], sizes[i]);
- break;
- case GOMP_MAP_FORCE_FROM:
- acc_copyout (hostaddrs[i], sizes[i]);
- break;
- default:
- gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
- kind);
- break;
- }
- }
- else
- {
- gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
- == GOMP_MAP_FORCE_FROM, async, 3);
- /* See the above comment. */
- i += 2;
- }
- }
- acc_dev->openacc.async_set_async_func (acc_async_sync);
-}
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
-static void
-goacc_wait (int async, int num_waits, va_list *ap)
-{
- struct goacc_thread *thr = goacc_thread ();
- struct gomp_device_descr *acc_dev = thr->dev;
+ gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
+ thr->mapped_data = tgt->prev;
+ goacc_unmap_vars (tgt, true, NULL);
+ gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
- while (num_waits--)
+ if (profiling_p)
{
- int qid = va_arg (*ap, int);
-
- if (acc_async_test (qid))
- continue;
-
- if (async == acc_async_sync)
- acc_wait (qid);
- else if (qid == async)
- ;/* If we're waiting on the same asynchronous queue as we're
- launching on, the queue itself will order work as
- required, so there's no need to wait explicitly. */
- else
- acc_dev->openacc.async_wait_async_func (qid, async);
+ prof_info.event_type = acc_ev_exit_data_end;
+ exit_data_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
}
}
void
-GOACC_update (int device, size_t mapnum,
+GOACC_update (int flags_m, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned short *kinds,
int async, int num_waits, ...)
{
- bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
+
size_t i;
goacc_lazy_initialize ();
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type = acc_ev_update_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info update_event_info;
+ if (profiling_p)
+ {
+ update_event_info.other_event.event_type
+ = prof_info.event_type;
+ update_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ update_event_info.other_event.parent_construct = acc_construct_update;
+ update_event_info.other_event.implicit = 0;
+ update_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
- || host_fallback)
- return;
+ || (flags & GOACC_FLAG_HOST_FALLBACK))
+ {
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
+
+ goto out_prof;
+ }
if (num_waits)
{
va_end (ap);
}
- acc_dev->openacc.async_set_async_func (async);
-
+ bool update_device = false;
for (i = 0; i < mapnum; ++i)
{
unsigned char kind = kinds[i] & 0xff;
case GOMP_MAP_TO_PSET:
break;
+ case GOMP_MAP_ALWAYS_POINTER:
+ if (update_device)
+ {
+ /* Save the contents of the host pointer. */
+ void *dptr = acc_deviceptr (hostaddrs[i-1]);
+ uintptr_t t = *(uintptr_t *) hostaddrs[i];
+
+ /* Update the contents of the host pointer to reflect
+ the value of the allocated device memory in the
+ previous pointer. */
+ *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
+ /* TODO: verify that we really cannot use acc_update_device_async
+ here. */
+ acc_update_device (hostaddrs[i], sizeof (uintptr_t));
+
+ /* Restore the host pointer. */
+ *(uintptr_t *) hostaddrs[i] = t;
+ update_device = false;
+ }
+ break;
+
+ case GOMP_MAP_TO:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+ {
+ update_device = false;
+ break;
+ }
+ /* Fallthru */
case GOMP_MAP_FORCE_TO:
- acc_update_device (hostaddrs[i], sizes[i]);
+ update_device = true;
+ acc_update_device_async (hostaddrs[i], sizes[i], async);
break;
+ case GOMP_MAP_FROM:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+ {
+ update_device = false;
+ break;
+ }
+ /* Fallthru */
case GOMP_MAP_FORCE_FROM:
- acc_update_self (hostaddrs[i], sizes[i]);
+ update_device = false;
+ acc_update_self_async (hostaddrs[i], sizes[i], async);
break;
default:
}
}
- acc_dev->openacc.async_set_async_func (acc_async_sync);
-}
-
-void
-GOACC_wait (int async, int num_waits, ...)
-{
- if (num_waits)
+ out_prof:
+ if (profiling_p)
{
- va_list ap;
+ prof_info.event_type = acc_ev_update_end;
+ update_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
- va_start (ap, num_waits);
- goacc_wait (async, num_waits, &ap);
- va_end (ap);
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
}
- else if (async == acc_async_sync)
- acc_wait_all ();
- else if (async == acc_async_noval)
- goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
}
+
+/* Legacy entry point (GCC 5). */
+
int
GOACC_get_num_threads (void)
{
return 1;
}
+/* Legacy entry point (GCC 5). */
+
int
GOACC_get_thread_num (void)
{
return 0;
}
-
-void
-GOACC_declare (int device, size_t mapnum,
- void **hostaddrs, size_t *sizes, unsigned short *kinds)
-{
- int i;
-
- for (i = 0; i < mapnum; i++)
- {
- unsigned char kind = kinds[i] & 0xff;
-
- if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
- continue;
-
- switch (kind)
- {
- case GOMP_MAP_FORCE_ALLOC:
- case GOMP_MAP_FORCE_FROM:
- case GOMP_MAP_FORCE_TO:
- case GOMP_MAP_POINTER:
- case GOMP_MAP_DELETE:
- GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
- &kinds[i], 0, 0);
- break;
-
- case GOMP_MAP_FORCE_DEVICEPTR:
- break;
-
- case GOMP_MAP_ALLOC:
- if (!acc_is_present (hostaddrs[i], sizes[i]))
- GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
- &kinds[i], 0, 0);
- break;
-
- case GOMP_MAP_TO:
- GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
- &kinds[i], 0, 0);
-
- break;
-
- case GOMP_MAP_FROM:
- kinds[i] = GOMP_MAP_FORCE_FROM;
- GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
- &kinds[i], 0, 0);
- break;
-
- case GOMP_MAP_FORCE_PRESENT:
- if (!acc_is_present (hostaddrs[i], sizes[i]))
- gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
- (unsigned long) sizes[i]);
- break;
-
- default:
- assert (0);
- break;
- }
- }
-}