]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/oacc-parallel.c
OpenACC Profiling Interface (incomplete)
[thirdparty/gcc.git] / libgomp / oacc-parallel.c
CommitLineData
a5544970 1/* Copyright (C) 2013-2019 Free Software Foundation, Inc.
41dbbb37
TS
2
3 Contributed by Mentor Embedded.
4
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
7
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27/* This file handles OpenACC constructs. */
28
29#include "openacc.h"
30#include "libgomp.h"
31#include "libgomp_g.h"
32#include "gomp-constants.h"
33#include "oacc-int.h"
01c0b3b0
KT
34#ifdef HAVE_INTTYPES_H
35# include <inttypes.h> /* For PRIu64. */
36#endif
41dbbb37
TS
37#include <string.h>
38#include <stdarg.h>
39#include <assert.h>
41dbbb37 40
59d5960c
TS
41
42/* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
43 continue to support the following two legacy values. */
44_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
45 "legacy GOMP_DEVICE_ICV broken");
46_Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
47 == GOACC_FLAG_HOST_FALLBACK,
48 "legacy GOMP_DEVICE_HOST_FALLBACK broken");
49
50
829c6349
CLT
51/* Returns the number of mappings associated with the pointer or pset. PSET
52 have three mappings, whereas pointer have two. */
53
41dbbb37 54static int
829c6349 55find_pointer (int pos, size_t mapnum, unsigned short *kinds)
41dbbb37
TS
56{
57 if (pos + 1 >= mapnum)
58 return 0;
59
60 unsigned char kind = kinds[pos+1] & 0xff;
61
829c6349
CLT
62 if (kind == GOMP_MAP_TO_PSET)
63 return 3;
64 else if (kind == GOMP_MAP_POINTER)
65 return 2;
66
67 return 0;
68}
69
70/* Handle the mapping pair that are presented when a
71 deviceptr clause is used with Fortran. */
72
73static void
74handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
75 unsigned short *kinds)
76{
77 int i;
78
79 for (i = 0; i < mapnum; i++)
80 {
81 unsigned short kind1 = kinds[i] & 0xff;
82
83 /* Handle Fortran deviceptr clause. */
84 if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
85 {
86 unsigned short kind2;
87
88 if (i < (signed)mapnum - 1)
89 kind2 = kinds[i + 1] & 0xff;
90 else
91 kind2 = 0xffff;
92
93 if (sizes[i] == sizeof (void *))
94 continue;
95
96 /* At this point, we're dealing with a Fortran deviceptr.
97 If the next element is not what we're expecting, then
98 this is an instance of where the deviceptr variable was
99 not used within the region and the pointer was removed
100 by the gimplifier. */
101 if (kind2 == GOMP_MAP_POINTER
102 && sizes[i + 1] == 0
103 && hostaddrs[i] == *(void **)hostaddrs[i + 1])
104 {
105 kinds[i+1] = kinds[i];
106 sizes[i+1] = sizeof (void *);
107 }
108
109 /* Invalidate the entry. */
110 hostaddrs[i] = NULL;
111 }
112 }
41dbbb37
TS
113}
114
3e32ee19
NS
115static void goacc_wait (int async, int num_waits, va_list *ap);
116
117
59d5960c 118/* Launch a possibly offloaded function with FLAGS. FN is the host fn
3e32ee19
NS
119 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
120 blocks to be copied to/from the device. Varadic arguments are
121 keyed optional parameters terminated with a zero. */
41dbbb37
TS
122
123void
59d5960c 124GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
3e32ee19
NS
125 size_t mapnum, void **hostaddrs, size_t *sizes,
126 unsigned short *kinds, ...)
41dbbb37 127{
59d5960c
TS
128 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
129
41dbbb37
TS
130 va_list ap;
131 struct goacc_thread *thr;
132 struct gomp_device_descr *acc_dev;
133 struct target_mem_desc *tgt;
134 void **devaddrs;
135 unsigned int i;
136 struct splay_tree_key_s k;
137 splay_tree_key tgt_fn_key;
138 void (*tgt_fn);
3e32ee19
NS
139 int async = GOMP_ASYNC_SYNC;
140 unsigned dims[GOMP_DIM_MAX];
141 unsigned tag;
41dbbb37 142
01c0b3b0 143#ifdef HAVE_INTTYPES_H
3e32ee19
NS
144 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
145 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
01c0b3b0 146#else
3e32ee19
NS
147 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
148 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
01c0b3b0 149#endif
d93bdab5 150 goacc_lazy_initialize ();
41dbbb37
TS
151
152 thr = goacc_thread ();
153 acc_dev = thr->dev;
154
5fae049d
TS
155 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
156
157 acc_prof_info prof_info;
158 if (profiling_p)
159 {
160 thr->prof_info = &prof_info;
161
162 prof_info.event_type = acc_ev_compute_construct_start;
163 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
164 prof_info.version = _ACC_PROF_INFO_VERSION;
165 prof_info.device_type = acc_device_type (acc_dev->type);
166 prof_info.device_number = acc_dev->target_id;
167 prof_info.thread_id = -1;
168 prof_info.async = async;
169 prof_info.async_queue = prof_info.async;
170 prof_info.src_file = NULL;
171 prof_info.func_name = NULL;
172 prof_info.line_no = -1;
173 prof_info.end_line_no = -1;
174 prof_info.func_line_no = -1;
175 prof_info.func_end_line_no = -1;
176 }
177 acc_event_info compute_construct_event_info;
178 if (profiling_p)
179 {
180 compute_construct_event_info.other_event.event_type
181 = prof_info.event_type;
182 compute_construct_event_info.other_event.valid_bytes
183 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
184 compute_construct_event_info.other_event.parent_construct
185 = acc_construct_parallel;
186 compute_construct_event_info.other_event.implicit = 0;
187 compute_construct_event_info.other_event.tool_info = NULL;
188 }
189 acc_api_info api_info;
190 if (profiling_p)
191 {
192 thr->api_info = &api_info;
193
194 api_info.device_api = acc_device_api_none;
195 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
196 api_info.device_type = prof_info.device_type;
197 api_info.vendor = -1;
198 api_info.device_handle = NULL;
199 api_info.context_handle = NULL;
200 api_info.async_handle = NULL;
201 }
202
203 if (profiling_p)
204 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
205 &api_info);
206
829c6349
CLT
207 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
208
41dbbb37
TS
209 /* Host fallback if "if" clause is false or if the current device is set to
210 the host. */
59d5960c 211 if (flags & GOACC_FLAG_HOST_FALLBACK)
41dbbb37 212 {
5fae049d
TS
213 prof_info.device_type = acc_device_host;
214 api_info.device_type = prof_info.device_type;
41dbbb37
TS
215 goacc_save_and_set_bind (acc_device_host);
216 fn (hostaddrs);
217 goacc_restore_bind ();
5fae049d 218 goto out_prof;
41dbbb37
TS
219 }
220 else if (acc_device_type (acc_dev->type) == acc_device_host)
221 {
222 fn (hostaddrs);
5fae049d 223 goto out_prof;
41dbbb37
TS
224 }
225
f99c3557
TS
226 /* Default: let the runtime choose. */
227 for (i = 0; i != GOMP_DIM_MAX; i++)
228 dims[i] = 0;
229
3e32ee19
NS
230 va_start (ap, kinds);
231 /* TODO: This will need amending when device_type is implemented. */
232 while ((tag = va_arg (ap, unsigned)) != 0)
a091118d 233 {
3e32ee19
NS
234 if (GOMP_LAUNCH_DEVICE (tag))
235 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
236 GOMP_LAUNCH_DEVICE (tag));
237
238 switch (GOMP_LAUNCH_CODE (tag))
239 {
240 case GOMP_LAUNCH_DIM:
241 {
242 unsigned mask = GOMP_LAUNCH_OP (tag);
243
244 for (i = 0; i != GOMP_DIM_MAX; i++)
245 if (mask & GOMP_DIM_MASK (i))
246 dims[i] = va_arg (ap, unsigned);
247 }
248 break;
249
250 case GOMP_LAUNCH_ASYNC:
251 {
252 /* Small constant values are encoded in the operand. */
253 async = GOMP_LAUNCH_OP (tag);
254
255 if (async == GOMP_LAUNCH_OP_MAX)
256 async = va_arg (ap, unsigned);
5fae049d
TS
257
258 if (profiling_p)
259 {
260 prof_info.async = async;
261 prof_info.async_queue = prof_info.async;
262 }
263
3e32ee19
NS
264 break;
265 }
266
267 case GOMP_LAUNCH_WAIT:
268 {
269 unsigned num_waits = GOMP_LAUNCH_OP (tag);
19695f4d 270 goacc_wait (async, num_waits, &ap);
3e32ee19
NS
271 break;
272 }
273
274 default:
275 gomp_fatal ("unrecognized offload code '%d',"
276 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
277 }
a091118d 278 }
3e32ee19 279 va_end (ap);
41dbbb37 280
41dbbb37
TS
281 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
282 {
283 k.host_start = (uintptr_t) fn;
284 k.host_end = k.host_start + 1;
a51df54e
IV
285 gomp_mutex_lock (&acc_dev->lock);
286 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
287 gomp_mutex_unlock (&acc_dev->lock);
41dbbb37
TS
288
289 if (tgt_fn_key == NULL)
290 gomp_fatal ("target function wasn't mapped");
291
d93bdab5 292 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
41dbbb37
TS
293 }
294 else
295 tgt_fn = (void (*)) fn;
296
5fae049d
TS
297 acc_event_info enter_exit_data_event_info;
298 if (profiling_p)
299 {
300 prof_info.event_type = acc_ev_enter_data_start;
301 enter_exit_data_event_info.other_event.event_type
302 = prof_info.event_type;
303 enter_exit_data_event_info.other_event.valid_bytes
304 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
305 enter_exit_data_event_info.other_event.parent_construct
306 = compute_construct_event_info.other_event.parent_construct;
307 enter_exit_data_event_info.other_event.implicit = 1;
308 enter_exit_data_event_info.other_event.tool_info = NULL;
309 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
310 &api_info);
311 }
312
1f4c5b9b 313 goacc_aq aq = get_goacc_asyncqueue (async);
41dbbb37 314
1f4c5b9b
CLT
315 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
316 true, GOMP_MAP_VARS_OPENACC);
5fae049d
TS
317 if (profiling_p)
318 {
319 prof_info.event_type = acc_ev_enter_data_end;
320 enter_exit_data_event_info.other_event.event_type
321 = prof_info.event_type;
322 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
323 &api_info);
324 }
1f4c5b9b 325
6e36114c 326 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
41dbbb37 327 for (i = 0; i < mapnum; i++)
d9a6bd32 328 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
fe570ff8
CP
329 + tgt->list[i].key->tgt_offset
330 + tgt->list[i].offset);
1f4c5b9b 331 if (aq == NULL)
5fae049d
TS
332 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
333 tgt);
334 else
335 acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
336 dims, tgt, aq);
337
338 if (profiling_p)
1f4c5b9b 339 {
5fae049d
TS
340 prof_info.event_type = acc_ev_exit_data_start;
341 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
342 enter_exit_data_event_info.other_event.tool_info = NULL;
343 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
344 &api_info);
1f4c5b9b 345 }
5fae049d
TS
346
347 /* If running synchronously, unmap immediately. */
348 if (aq == NULL)
349 gomp_unmap_vars (tgt, true);
41dbbb37 350 else
5fae049d
TS
351 gomp_unmap_vars_async (tgt, true, aq);
352
353 if (profiling_p)
829c6349 354 {
5fae049d
TS
355 prof_info.event_type = acc_ev_exit_data_end;
356 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
357 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
358 &api_info);
359 }
360
361 out_prof:
362 if (profiling_p)
363 {
364 prof_info.event_type = acc_ev_compute_construct_end;
365 compute_construct_event_info.other_event.event_type
366 = prof_info.event_type;
367 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
368 &api_info);
369
370 thr->prof_info = NULL;
371 thr->api_info = NULL;
829c6349 372 }
41dbbb37
TS
373}
374
2bbbfa4e 375/* Legacy entry point (GCC 5). Only provide host fallback execution. */
3e32ee19
NS
376
377void
59d5960c 378GOACC_parallel (int flags_m, void (*fn) (void *),
3e32ee19
NS
379 size_t mapnum, void **hostaddrs, size_t *sizes,
380 unsigned short *kinds,
381 int num_gangs, int num_workers, int vector_length,
382 int async, int num_waits, ...)
383{
384 goacc_save_and_set_bind (acc_device_host);
385 fn (hostaddrs);
386 goacc_restore_bind ();
387}
388
41dbbb37 389void
59d5960c 390GOACC_data_start (int flags_m, size_t mapnum,
41dbbb37
TS
391 void **hostaddrs, size_t *sizes, unsigned short *kinds)
392{
59d5960c
TS
393 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
394
41dbbb37
TS
395 struct target_mem_desc *tgt;
396
01c0b3b0
KT
397#ifdef HAVE_INTTYPES_H
398 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
399 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
400#else
401 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
402 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
403#endif
41dbbb37 404
d93bdab5 405 goacc_lazy_initialize ();
41dbbb37
TS
406
407 struct goacc_thread *thr = goacc_thread ();
408 struct gomp_device_descr *acc_dev = thr->dev;
409
5fae049d
TS
410 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
411
412 acc_prof_info prof_info;
413 if (profiling_p)
414 {
415 thr->prof_info = &prof_info;
416
417 prof_info.event_type = acc_ev_enter_data_start;
418 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
419 prof_info.version = _ACC_PROF_INFO_VERSION;
420 prof_info.device_type = acc_device_type (acc_dev->type);
421 prof_info.device_number = acc_dev->target_id;
422 prof_info.thread_id = -1;
423 prof_info.async = acc_async_sync; /* Always synchronous. */
424 prof_info.async_queue = prof_info.async;
425 prof_info.src_file = NULL;
426 prof_info.func_name = NULL;
427 prof_info.line_no = -1;
428 prof_info.end_line_no = -1;
429 prof_info.func_line_no = -1;
430 prof_info.func_end_line_no = -1;
431 }
432 acc_event_info enter_data_event_info;
433 if (profiling_p)
434 {
435 enter_data_event_info.other_event.event_type
436 = prof_info.event_type;
437 enter_data_event_info.other_event.valid_bytes
438 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
439 enter_data_event_info.other_event.parent_construct = acc_construct_data;
440 for (int i = 0; i < mapnum; ++i)
441 if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR)
442 {
443 /* If there is one such data mapping kind, then this is actually an
444 OpenACC 'host_data' construct. (GCC maps the OpenACC
445 'host_data' construct to the OpenACC 'data' construct.) Apart
446 from artificial test cases (such as an OpenACC 'host_data'
447 construct's (implicit) device initialization when there hasn't
448 been any device data be set up before...), there can't really
449 any meaningful events be generated from OpenACC 'host_data'
450 constructs, though. */
451 enter_data_event_info.other_event.parent_construct
452 = acc_construct_host_data;
453 break;
454 }
455 enter_data_event_info.other_event.implicit = 0;
456 enter_data_event_info.other_event.tool_info = NULL;
457 }
458 acc_api_info api_info;
459 if (profiling_p)
460 {
461 thr->api_info = &api_info;
462
463 api_info.device_api = acc_device_api_none;
464 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
465 api_info.device_type = prof_info.device_type;
466 api_info.vendor = -1;
467 api_info.device_handle = NULL;
468 api_info.context_handle = NULL;
469 api_info.async_handle = NULL;
470 }
471
472 if (profiling_p)
473 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
474
41dbbb37
TS
475 /* Host fallback or 'do nothing'. */
476 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
59d5960c 477 || (flags & GOACC_FLAG_HOST_FALLBACK))
41dbbb37 478 {
5fae049d
TS
479 prof_info.device_type = acc_device_host;
480 api_info.device_type = prof_info.device_type;
d9a6bd32
JJ
481 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
482 GOMP_MAP_VARS_OPENACC);
41dbbb37
TS
483 tgt->prev = thr->mapped_data;
484 thr->mapped_data = tgt;
485
5fae049d 486 goto out_prof;
41dbbb37
TS
487 }
488
489 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
490 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
d9a6bd32 491 GOMP_MAP_VARS_OPENACC);
41dbbb37
TS
492 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
493 tgt->prev = thr->mapped_data;
494 thr->mapped_data = tgt;
5fae049d
TS
495
496 out_prof:
497 if (profiling_p)
498 {
499 prof_info.event_type = acc_ev_enter_data_end;
500 enter_data_event_info.other_event.event_type = prof_info.event_type;
501 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
502
503 thr->prof_info = NULL;
504 thr->api_info = NULL;
505 }
41dbbb37
TS
506}
507
508void
509GOACC_data_end (void)
510{
511 struct goacc_thread *thr = goacc_thread ();
5fae049d 512 struct gomp_device_descr *acc_dev = thr->dev;
41dbbb37
TS
513 struct target_mem_desc *tgt = thr->mapped_data;
514
5fae049d
TS
515 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
516
517 acc_prof_info prof_info;
518 if (profiling_p)
519 {
520 thr->prof_info = &prof_info;
521
522 prof_info.event_type = acc_ev_exit_data_start;
523 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
524 prof_info.version = _ACC_PROF_INFO_VERSION;
525 prof_info.device_type = acc_device_type (acc_dev->type);
526 prof_info.device_number = acc_dev->target_id;
527 prof_info.thread_id = -1;
528 prof_info.async = acc_async_sync; /* Always synchronous. */
529 prof_info.async_queue = prof_info.async;
530 prof_info.src_file = NULL;
531 prof_info.func_name = NULL;
532 prof_info.line_no = -1;
533 prof_info.end_line_no = -1;
534 prof_info.func_line_no = -1;
535 prof_info.func_end_line_no = -1;
536 }
537 acc_event_info exit_data_event_info;
538 if (profiling_p)
539 {
540 exit_data_event_info.other_event.event_type
541 = prof_info.event_type;
542 exit_data_event_info.other_event.valid_bytes
543 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
544 exit_data_event_info.other_event.parent_construct = acc_construct_data;
545 exit_data_event_info.other_event.implicit = 0;
546 exit_data_event_info.other_event.tool_info = NULL;
547 }
548 acc_api_info api_info;
549 if (profiling_p)
550 {
551 thr->api_info = &api_info;
552
553 api_info.device_api = acc_device_api_none;
554 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
555 api_info.device_type = prof_info.device_type;
556 api_info.vendor = -1;
557 api_info.device_handle = NULL;
558 api_info.context_handle = NULL;
559 api_info.async_handle = NULL;
560 }
561
562 if (profiling_p)
563 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
564
41dbbb37
TS
565 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
566 thr->mapped_data = tgt->prev;
567 gomp_unmap_vars (tgt, true);
568 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
5fae049d
TS
569
570 if (profiling_p)
571 {
572 prof_info.event_type = acc_ev_exit_data_end;
573 exit_data_event_info.other_event.event_type = prof_info.event_type;
574 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
575
576 thr->prof_info = NULL;
577 thr->api_info = NULL;
578 }
41dbbb37
TS
579}
580
581void
59d5960c 582GOACC_enter_exit_data (int flags_m, size_t mapnum,
41dbbb37
TS
583 void **hostaddrs, size_t *sizes, unsigned short *kinds,
584 int async, int num_waits, ...)
585{
59d5960c
TS
586 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
587
41dbbb37
TS
588 struct goacc_thread *thr;
589 struct gomp_device_descr *acc_dev;
41dbbb37
TS
590 bool data_enter = false;
591 size_t i;
592
d93bdab5 593 goacc_lazy_initialize ();
41dbbb37
TS
594
595 thr = goacc_thread ();
596 acc_dev = thr->dev;
597
829c6349
CLT
598 /* Determine whether "finalize" semantics apply to all mappings of this
599 OpenACC directive. */
600 bool finalize = false;
601 if (mapnum > 0)
602 {
603 unsigned char kind = kinds[0] & 0xff;
604 if (kind == GOMP_MAP_DELETE
605 || kind == GOMP_MAP_FORCE_FROM)
606 finalize = true;
607 }
608
41dbbb37
TS
609 /* Determine if this is an "acc enter data". */
610 for (i = 0; i < mapnum; ++i)
611 {
612 unsigned char kind = kinds[i] & 0xff;
613
614 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
615 continue;
616
617 if (kind == GOMP_MAP_FORCE_ALLOC
618 || kind == GOMP_MAP_FORCE_PRESENT
829c6349
CLT
619 || kind == GOMP_MAP_FORCE_TO
620 || kind == GOMP_MAP_TO
621 || kind == GOMP_MAP_ALLOC)
41dbbb37
TS
622 {
623 data_enter = true;
624 break;
625 }
626
829c6349
CLT
627 if (kind == GOMP_MAP_RELEASE
628 || kind == GOMP_MAP_DELETE
629 || kind == GOMP_MAP_FROM
41dbbb37
TS
630 || kind == GOMP_MAP_FORCE_FROM)
631 break;
632
633 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
634 kind);
635 }
636
5fae049d
TS
637 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
638
639 acc_prof_info prof_info;
640 if (profiling_p)
641 {
642 thr->prof_info = &prof_info;
643
644 prof_info.event_type
645 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
646 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
647 prof_info.version = _ACC_PROF_INFO_VERSION;
648 prof_info.device_type = acc_device_type (acc_dev->type);
649 prof_info.device_number = acc_dev->target_id;
650 prof_info.thread_id = -1;
651 prof_info.async = async;
652 prof_info.async_queue = prof_info.async;
653 prof_info.src_file = NULL;
654 prof_info.func_name = NULL;
655 prof_info.line_no = -1;
656 prof_info.end_line_no = -1;
657 prof_info.func_line_no = -1;
658 prof_info.func_end_line_no = -1;
659 }
660 acc_event_info enter_exit_data_event_info;
661 if (profiling_p)
662 {
663 enter_exit_data_event_info.other_event.event_type
664 = prof_info.event_type;
665 enter_exit_data_event_info.other_event.valid_bytes
666 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
667 enter_exit_data_event_info.other_event.parent_construct
668 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
669 enter_exit_data_event_info.other_event.implicit = 0;
670 enter_exit_data_event_info.other_event.tool_info = NULL;
671 }
672 acc_api_info api_info;
673 if (profiling_p)
674 {
675 thr->api_info = &api_info;
676
677 api_info.device_api = acc_device_api_none;
678 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
679 api_info.device_type = prof_info.device_type;
680 api_info.vendor = -1;
681 api_info.device_handle = NULL;
682 api_info.context_handle = NULL;
683 api_info.async_handle = NULL;
684 }
685
686 if (profiling_p)
687 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
688 &api_info);
689
690 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
691 || (flags & GOACC_FLAG_HOST_FALLBACK))
692 {
693 prof_info.device_type = acc_device_host;
694 api_info.device_type = prof_info.device_type;
695
696 goto out_prof;
697 }
698
699 if (num_waits)
700 {
701 va_list ap;
702
703 va_start (ap, num_waits);
704 goacc_wait (async, num_waits, &ap);
705 va_end (ap);
706 }
707
829c6349
CLT
708 /* In c, non-pointers and arrays are represented by a single data clause.
709 Dynamically allocated arrays and subarrays are represented by a data
710 clause followed by an internal GOMP_MAP_POINTER.
711
712 In fortran, scalars and not allocated arrays are represented by a
713 single data clause. Allocated arrays and subarrays have three mappings:
714 1) the original data clause, 2) a PSET 3) a pointer to the array data.
715 */
716
41dbbb37
TS
717 if (data_enter)
718 {
719 for (i = 0; i < mapnum; i++)
720 {
721 unsigned char kind = kinds[i] & 0xff;
722
829c6349
CLT
723 /* Scan for pointers and PSETs. */
724 int pointer = find_pointer (i, mapnum, kinds);
41dbbb37 725
829c6349 726 if (!pointer)
41dbbb37
TS
727 {
728 switch (kind)
729 {
829c6349 730 case GOMP_MAP_ALLOC:
41dbbb37 731 case GOMP_MAP_FORCE_ALLOC:
1f4c5b9b 732 acc_create_async (hostaddrs[i], sizes[i], async);
41dbbb37 733 break;
829c6349 734 case GOMP_MAP_TO:
41dbbb37 735 case GOMP_MAP_FORCE_TO:
1f4c5b9b 736 acc_copyin_async (hostaddrs[i], sizes[i], async);
41dbbb37
TS
737 break;
738 default:
739 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
740 kind);
741 break;
742 }
743 }
744 else
745 {
829c6349 746 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
1f4c5b9b 747 &sizes[i], &kinds[i], async);
41dbbb37
TS
748 /* Increment 'i' by two because OpenACC requires fortran
749 arrays to be contiguous, so each PSET is associated with
750 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
751 one MAP_POINTER. */
829c6349 752 i += pointer - 1;
41dbbb37
TS
753 }
754 }
755 }
756 else
757 for (i = 0; i < mapnum; ++i)
758 {
759 unsigned char kind = kinds[i] & 0xff;
760
829c6349 761 int pointer = find_pointer (i, mapnum, kinds);
41dbbb37 762
829c6349 763 if (!pointer)
41dbbb37
TS
764 {
765 switch (kind)
766 {
829c6349 767 case GOMP_MAP_RELEASE:
91106e84 768 case GOMP_MAP_DELETE:
829c6349
CLT
769 if (acc_is_present (hostaddrs[i], sizes[i]))
770 {
771 if (finalize)
1f4c5b9b 772 acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
829c6349 773 else
1f4c5b9b 774 acc_delete_async (hostaddrs[i], sizes[i], async);
829c6349 775 }
41dbbb37 776 break;
829c6349 777 case GOMP_MAP_FROM:
41dbbb37 778 case GOMP_MAP_FORCE_FROM:
829c6349 779 if (finalize)
1f4c5b9b 780 acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
829c6349 781 else
1f4c5b9b 782 acc_copyout_async (hostaddrs[i], sizes[i], async);
41dbbb37
TS
783 break;
784 default:
785 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
786 kind);
787 break;
788 }
789 }
790 else
791 {
829c6349
CLT
792 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
793 || kind == GOMP_MAP_FROM);
794 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
795 finalize, pointer);
41dbbb37 796 /* See the above comment. */
829c6349 797 i += pointer - 1;
41dbbb37
TS
798 }
799 }
5fae049d
TS
800
801 out_prof:
802 if (profiling_p)
803 {
804 prof_info.event_type
805 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
806 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
807 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
808 &api_info);
809
810 thr->prof_info = NULL;
811 thr->api_info = NULL;
812 }
41dbbb37
TS
813}
814
815static void
3e32ee19 816goacc_wait (int async, int num_waits, va_list *ap)
41dbbb37 817{
a091118d 818 while (num_waits--)
41dbbb37 819 {
3e32ee19 820 int qid = va_arg (*ap, int);
19695f4d
CLT
821
822 /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
823 if (qid == acc_async_noval)
824 {
825 if (async == acc_async_sync)
826 acc_wait_all ();
827 else
828 acc_wait_all_async (async);
829 break;
830 }
831
41dbbb37
TS
832 if (acc_async_test (qid))
833 continue;
834
a091118d
NS
835 if (async == acc_async_sync)
836 acc_wait (qid);
837 else if (qid == async)
1f4c5b9b
CLT
838 /* If we're waiting on the same asynchronous queue as we're
839 launching on, the queue itself will order work as
840 required, so there's no need to wait explicitly. */
841 ;
a091118d 842 else
19695f4d 843 acc_wait_async (qid, async);
41dbbb37
TS
844 }
845}
846
847void
59d5960c 848GOACC_update (int flags_m, size_t mapnum,
41dbbb37
TS
849 void **hostaddrs, size_t *sizes, unsigned short *kinds,
850 int async, int num_waits, ...)
851{
59d5960c
TS
852 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
853
41dbbb37
TS
854 size_t i;
855
d93bdab5 856 goacc_lazy_initialize ();
41dbbb37
TS
857
858 struct goacc_thread *thr = goacc_thread ();
859 struct gomp_device_descr *acc_dev = thr->dev;
860
5fae049d
TS
861 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
862
863 acc_prof_info prof_info;
864 if (profiling_p)
865 {
866 thr->prof_info = &prof_info;
867
868 prof_info.event_type = acc_ev_update_start;
869 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
870 prof_info.version = _ACC_PROF_INFO_VERSION;
871 prof_info.device_type = acc_device_type (acc_dev->type);
872 prof_info.device_number = acc_dev->target_id;
873 prof_info.thread_id = -1;
874 prof_info.async = async;
875 prof_info.async_queue = prof_info.async;
876 prof_info.src_file = NULL;
877 prof_info.func_name = NULL;
878 prof_info.line_no = -1;
879 prof_info.end_line_no = -1;
880 prof_info.func_line_no = -1;
881 prof_info.func_end_line_no = -1;
882 }
883 acc_event_info update_event_info;
884 if (profiling_p)
885 {
886 update_event_info.other_event.event_type
887 = prof_info.event_type;
888 update_event_info.other_event.valid_bytes
889 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
890 update_event_info.other_event.parent_construct = acc_construct_update;
891 update_event_info.other_event.implicit = 0;
892 update_event_info.other_event.tool_info = NULL;
893 }
894 acc_api_info api_info;
895 if (profiling_p)
896 {
897 thr->api_info = &api_info;
898
899 api_info.device_api = acc_device_api_none;
900 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
901 api_info.device_type = prof_info.device_type;
902 api_info.vendor = -1;
903 api_info.device_handle = NULL;
904 api_info.context_handle = NULL;
905 api_info.async_handle = NULL;
906 }
907
908 if (profiling_p)
909 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
910
41dbbb37 911 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
59d5960c 912 || (flags & GOACC_FLAG_HOST_FALLBACK))
5fae049d
TS
913 {
914 prof_info.device_type = acc_device_host;
915 api_info.device_type = prof_info.device_type;
916
917 goto out_prof;
918 }
41dbbb37 919
a091118d 920 if (num_waits)
41dbbb37
TS
921 {
922 va_list ap;
923
924 va_start (ap, num_waits);
3e32ee19 925 goacc_wait (async, num_waits, &ap);
41dbbb37
TS
926 va_end (ap);
927 }
928
829c6349 929 bool update_device = false;
41dbbb37
TS
930 for (i = 0; i < mapnum; ++i)
931 {
932 unsigned char kind = kinds[i] & 0xff;
933
934 switch (kind)
935 {
936 case GOMP_MAP_POINTER:
937 case GOMP_MAP_TO_PSET:
938 break;
939
829c6349
CLT
940 case GOMP_MAP_ALWAYS_POINTER:
941 if (update_device)
942 {
943 /* Save the contents of the host pointer. */
944 void *dptr = acc_deviceptr (hostaddrs[i-1]);
945 uintptr_t t = *(uintptr_t *) hostaddrs[i];
946
947 /* Update the contents of the host pointer to reflect
948 the value of the allocated device memory in the
949 previous pointer. */
950 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
1f4c5b9b
CLT
951 /* TODO: verify that we really cannot use acc_update_device_async
952 here. */
829c6349
CLT
953 acc_update_device (hostaddrs[i], sizeof (uintptr_t));
954
955 /* Restore the host pointer. */
956 *(uintptr_t *) hostaddrs[i] = t;
957 update_device = false;
958 }
959 break;
960
961 case GOMP_MAP_TO:
962 if (!acc_is_present (hostaddrs[i], sizes[i]))
963 {
964 update_device = false;
965 break;
966 }
967 /* Fallthru */
41dbbb37 968 case GOMP_MAP_FORCE_TO:
829c6349 969 update_device = true;
1f4c5b9b 970 acc_update_device_async (hostaddrs[i], sizes[i], async);
41dbbb37
TS
971 break;
972
829c6349
CLT
973 case GOMP_MAP_FROM:
974 if (!acc_is_present (hostaddrs[i], sizes[i]))
975 {
976 update_device = false;
977 break;
978 }
979 /* Fallthru */
41dbbb37 980 case GOMP_MAP_FORCE_FROM:
829c6349 981 update_device = false;
1f4c5b9b 982 acc_update_self_async (hostaddrs[i], sizes[i], async);
41dbbb37
TS
983 break;
984
985 default:
986 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
987 break;
988 }
989 }
5fae049d
TS
990
991 out_prof:
992 if (profiling_p)
993 {
994 prof_info.event_type = acc_ev_update_end;
995 update_event_info.other_event.event_type = prof_info.event_type;
996 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
997
998 thr->prof_info = NULL;
999 thr->api_info = NULL;
1000 }
41dbbb37
TS
1001}
1002
1003void
1004GOACC_wait (int async, int num_waits, ...)
1005{
5fae049d
TS
1006 goacc_lazy_initialize ();
1007
1008 struct goacc_thread *thr = goacc_thread ();
1009
1010 /* No nesting. */
1011 assert (thr->prof_info == NULL);
1012 assert (thr->api_info == NULL);
1013 acc_prof_info prof_info;
1014 acc_api_info api_info;
1015 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
1016 if (profiling_p)
1017 {
1018 prof_info.async = async;
1019 prof_info.async_queue = prof_info.async;
1020 }
1021
a091118d
NS
1022 if (num_waits)
1023 {
1024 va_list ap;
41dbbb37 1025
a091118d 1026 va_start (ap, num_waits);
3e32ee19 1027 goacc_wait (async, num_waits, &ap);
a091118d
NS
1028 va_end (ap);
1029 }
1030 else if (async == acc_async_sync)
1031 acc_wait_all ();
c8ab8aab
TS
1032 else
1033 acc_wait_all_async (async);
5fae049d
TS
1034
1035 if (profiling_p)
1036 {
1037 thr->prof_info = NULL;
1038 thr->api_info = NULL;
1039 }
41dbbb37
TS
1040}
1041
2bbbfa4e
TS
1042/* Legacy entry point (GCC 5). */
1043
41dbbb37
TS
1044int
1045GOACC_get_num_threads (void)
1046{
1047 return 1;
1048}
1049
2bbbfa4e
TS
1050/* Legacy entry point (GCC 5). */
1051
41dbbb37
TS
1052int
1053GOACC_get_thread_num (void)
1054{
1055 return 0;
1056}
6e232ba4
JN
1057
1058void
59d5960c 1059GOACC_declare (int flags_m, size_t mapnum,
6e232ba4
JN
1060 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1061{
1062 int i;
1063
1064 for (i = 0; i < mapnum; i++)
1065 {
1066 unsigned char kind = kinds[i] & 0xff;
1067
1068 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1069 continue;
1070
1071 switch (kind)
1072 {
1073 case GOMP_MAP_FORCE_ALLOC:
6e232ba4
JN
1074 case GOMP_MAP_FORCE_FROM:
1075 case GOMP_MAP_FORCE_TO:
1076 case GOMP_MAP_POINTER:
829c6349 1077 case GOMP_MAP_RELEASE:
91106e84 1078 case GOMP_MAP_DELETE:
59d5960c 1079 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
f5ad16f1 1080 &kinds[i], GOMP_ASYNC_SYNC, 0);
6e232ba4
JN
1081 break;
1082
1083 case GOMP_MAP_FORCE_DEVICEPTR:
1084 break;
1085
1086 case GOMP_MAP_ALLOC:
1087 if (!acc_is_present (hostaddrs[i], sizes[i]))
59d5960c 1088 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
f5ad16f1 1089 &kinds[i], GOMP_ASYNC_SYNC, 0);
6e232ba4
JN
1090 break;
1091
1092 case GOMP_MAP_TO:
59d5960c 1093 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
f5ad16f1 1094 &kinds[i], GOMP_ASYNC_SYNC, 0);
6e232ba4
JN
1095
1096 break;
1097
1098 case GOMP_MAP_FROM:
59d5960c 1099 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
f5ad16f1 1100 &kinds[i], GOMP_ASYNC_SYNC, 0);
6e232ba4
JN
1101 break;
1102
1103 case GOMP_MAP_FORCE_PRESENT:
1104 if (!acc_is_present (hostaddrs[i], sizes[i]))
1105 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
1106 (unsigned long) sizes[i]);
1107 break;
1108
1109 default:
1110 assert (0);
1111 break;
1112 }
1113 }
1114}