]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/oacc-parallel.c
e56330f6226b35391898a7f10cfe2fd6ff1240d4
[thirdparty/gcc.git] / libgomp / oacc-parallel.c
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2
3 Contributed by Mentor Embedded.
4
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
7
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27 /* This file handles OpenACC constructs. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
40
41
42 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
43 continue to support the following two legacy values. */
44 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
45 "legacy GOMP_DEVICE_ICV broken");
46 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
47 == GOACC_FLAG_HOST_FALLBACK,
48 "legacy GOMP_DEVICE_HOST_FALLBACK broken");
49
50
51 /* Returns the number of mappings associated with the pointer or pset. PSET
52 have three mappings, whereas pointer have two. */
53
54 static int
55 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
56 {
57 if (pos + 1 >= mapnum)
58 return 0;
59
60 unsigned char kind = kinds[pos+1] & 0xff;
61
62 if (kind == GOMP_MAP_TO_PSET)
63 return 3;
64 else if (kind == GOMP_MAP_POINTER)
65 return 2;
66
67 return 0;
68 }
69
70 /* Handle the mapping pair that are presented when a
71 deviceptr clause is used with Fortran. */
72
73 static void
74 handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
75 unsigned short *kinds)
76 {
77 int i;
78
79 for (i = 0; i < mapnum; i++)
80 {
81 unsigned short kind1 = kinds[i] & 0xff;
82
83 /* Handle Fortran deviceptr clause. */
84 if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
85 {
86 unsigned short kind2;
87
88 if (i < (signed)mapnum - 1)
89 kind2 = kinds[i + 1] & 0xff;
90 else
91 kind2 = 0xffff;
92
93 if (sizes[i] == sizeof (void *))
94 continue;
95
96 /* At this point, we're dealing with a Fortran deviceptr.
97 If the next element is not what we're expecting, then
98 this is an instance of where the deviceptr variable was
99 not used within the region and the pointer was removed
100 by the gimplifier. */
101 if (kind2 == GOMP_MAP_POINTER
102 && sizes[i + 1] == 0
103 && hostaddrs[i] == *(void **)hostaddrs[i + 1])
104 {
105 kinds[i+1] = kinds[i];
106 sizes[i+1] = sizeof (void *);
107 }
108
109 /* Invalidate the entry. */
110 hostaddrs[i] = NULL;
111 }
112 }
113 }
114
115 static void goacc_wait (int async, int num_waits, va_list *ap);
116
117
118 /* Launch a possibly offloaded function with FLAGS. FN is the host fn
119 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
120 blocks to be copied to/from the device. Varadic arguments are
121 keyed optional parameters terminated with a zero. */
122
123 void
124 GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
125 size_t mapnum, void **hostaddrs, size_t *sizes,
126 unsigned short *kinds, ...)
127 {
128 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
129
130 va_list ap;
131 struct goacc_thread *thr;
132 struct gomp_device_descr *acc_dev;
133 struct target_mem_desc *tgt;
134 void **devaddrs;
135 unsigned int i;
136 struct splay_tree_key_s k;
137 splay_tree_key tgt_fn_key;
138 void (*tgt_fn);
139 int async = GOMP_ASYNC_SYNC;
140 unsigned dims[GOMP_DIM_MAX];
141 unsigned tag;
142
143 #ifdef HAVE_INTTYPES_H
144 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
145 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
146 #else
147 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
148 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
149 #endif
150 goacc_lazy_initialize ();
151
152 thr = goacc_thread ();
153 acc_dev = thr->dev;
154
155 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
156
157 acc_prof_info prof_info;
158 if (profiling_p)
159 {
160 thr->prof_info = &prof_info;
161
162 prof_info.event_type = acc_ev_compute_construct_start;
163 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
164 prof_info.version = _ACC_PROF_INFO_VERSION;
165 prof_info.device_type = acc_device_type (acc_dev->type);
166 prof_info.device_number = acc_dev->target_id;
167 prof_info.thread_id = -1;
168 prof_info.async = async;
169 prof_info.async_queue = prof_info.async;
170 prof_info.src_file = NULL;
171 prof_info.func_name = NULL;
172 prof_info.line_no = -1;
173 prof_info.end_line_no = -1;
174 prof_info.func_line_no = -1;
175 prof_info.func_end_line_no = -1;
176 }
177 acc_event_info compute_construct_event_info;
178 if (profiling_p)
179 {
180 compute_construct_event_info.other_event.event_type
181 = prof_info.event_type;
182 compute_construct_event_info.other_event.valid_bytes
183 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
184 compute_construct_event_info.other_event.parent_construct
185 = acc_construct_parallel;
186 compute_construct_event_info.other_event.implicit = 0;
187 compute_construct_event_info.other_event.tool_info = NULL;
188 }
189 acc_api_info api_info;
190 if (profiling_p)
191 {
192 thr->api_info = &api_info;
193
194 api_info.device_api = acc_device_api_none;
195 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
196 api_info.device_type = prof_info.device_type;
197 api_info.vendor = -1;
198 api_info.device_handle = NULL;
199 api_info.context_handle = NULL;
200 api_info.async_handle = NULL;
201 }
202
203 if (profiling_p)
204 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
205 &api_info);
206
207 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
208
209 /* Host fallback if "if" clause is false or if the current device is set to
210 the host. */
211 if (flags & GOACC_FLAG_HOST_FALLBACK)
212 {
213 prof_info.device_type = acc_device_host;
214 api_info.device_type = prof_info.device_type;
215 goacc_save_and_set_bind (acc_device_host);
216 fn (hostaddrs);
217 goacc_restore_bind ();
218 goto out_prof;
219 }
220 else if (acc_device_type (acc_dev->type) == acc_device_host)
221 {
222 fn (hostaddrs);
223 goto out_prof;
224 }
225
226 /* Default: let the runtime choose. */
227 for (i = 0; i != GOMP_DIM_MAX; i++)
228 dims[i] = 0;
229
230 va_start (ap, kinds);
231 /* TODO: This will need amending when device_type is implemented. */
232 while ((tag = va_arg (ap, unsigned)) != 0)
233 {
234 if (GOMP_LAUNCH_DEVICE (tag))
235 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
236 GOMP_LAUNCH_DEVICE (tag));
237
238 switch (GOMP_LAUNCH_CODE (tag))
239 {
240 case GOMP_LAUNCH_DIM:
241 {
242 unsigned mask = GOMP_LAUNCH_OP (tag);
243
244 for (i = 0; i != GOMP_DIM_MAX; i++)
245 if (mask & GOMP_DIM_MASK (i))
246 dims[i] = va_arg (ap, unsigned);
247 }
248 break;
249
250 case GOMP_LAUNCH_ASYNC:
251 {
252 /* Small constant values are encoded in the operand. */
253 async = GOMP_LAUNCH_OP (tag);
254
255 if (async == GOMP_LAUNCH_OP_MAX)
256 async = va_arg (ap, unsigned);
257
258 if (profiling_p)
259 {
260 prof_info.async = async;
261 prof_info.async_queue = prof_info.async;
262 }
263
264 break;
265 }
266
267 case GOMP_LAUNCH_WAIT:
268 {
269 unsigned num_waits = GOMP_LAUNCH_OP (tag);
270 goacc_wait (async, num_waits, &ap);
271 break;
272 }
273
274 default:
275 gomp_fatal ("unrecognized offload code '%d',"
276 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
277 }
278 }
279 va_end (ap);
280
281 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
282 {
283 k.host_start = (uintptr_t) fn;
284 k.host_end = k.host_start + 1;
285 gomp_mutex_lock (&acc_dev->lock);
286 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
287 gomp_mutex_unlock (&acc_dev->lock);
288
289 if (tgt_fn_key == NULL)
290 gomp_fatal ("target function wasn't mapped");
291
292 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
293 }
294 else
295 tgt_fn = (void (*)) fn;
296
297 acc_event_info enter_exit_data_event_info;
298 if (profiling_p)
299 {
300 prof_info.event_type = acc_ev_enter_data_start;
301 enter_exit_data_event_info.other_event.event_type
302 = prof_info.event_type;
303 enter_exit_data_event_info.other_event.valid_bytes
304 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
305 enter_exit_data_event_info.other_event.parent_construct
306 = compute_construct_event_info.other_event.parent_construct;
307 enter_exit_data_event_info.other_event.implicit = 1;
308 enter_exit_data_event_info.other_event.tool_info = NULL;
309 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
310 &api_info);
311 }
312
313 goacc_aq aq = get_goacc_asyncqueue (async);
314
315 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
316 true, GOMP_MAP_VARS_OPENACC);
317 if (profiling_p)
318 {
319 prof_info.event_type = acc_ev_enter_data_end;
320 enter_exit_data_event_info.other_event.event_type
321 = prof_info.event_type;
322 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
323 &api_info);
324 }
325
326 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
327 for (i = 0; i < mapnum; i++)
328 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
329 + tgt->list[i].key->tgt_offset
330 + tgt->list[i].offset);
331 if (aq == NULL)
332 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
333 tgt);
334 else
335 acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
336 dims, tgt, aq);
337
338 if (profiling_p)
339 {
340 prof_info.event_type = acc_ev_exit_data_start;
341 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
342 enter_exit_data_event_info.other_event.tool_info = NULL;
343 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
344 &api_info);
345 }
346
347 /* If running synchronously, unmap immediately. */
348 if (aq == NULL)
349 gomp_unmap_vars (tgt, true);
350 else
351 gomp_unmap_vars_async (tgt, true, aq);
352
353 if (profiling_p)
354 {
355 prof_info.event_type = acc_ev_exit_data_end;
356 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
357 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
358 &api_info);
359 }
360
361 out_prof:
362 if (profiling_p)
363 {
364 prof_info.event_type = acc_ev_compute_construct_end;
365 compute_construct_event_info.other_event.event_type
366 = prof_info.event_type;
367 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
368 &api_info);
369
370 thr->prof_info = NULL;
371 thr->api_info = NULL;
372 }
373 }
374
375 /* Legacy entry point (GCC 5). Only provide host fallback execution. */
376
377 void
378 GOACC_parallel (int flags_m, void (*fn) (void *),
379 size_t mapnum, void **hostaddrs, size_t *sizes,
380 unsigned short *kinds,
381 int num_gangs, int num_workers, int vector_length,
382 int async, int num_waits, ...)
383 {
384 goacc_save_and_set_bind (acc_device_host);
385 fn (hostaddrs);
386 goacc_restore_bind ();
387 }
388
389 void
390 GOACC_data_start (int flags_m, size_t mapnum,
391 void **hostaddrs, size_t *sizes, unsigned short *kinds)
392 {
393 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
394
395 struct target_mem_desc *tgt;
396
397 #ifdef HAVE_INTTYPES_H
398 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
399 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
400 #else
401 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
402 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
403 #endif
404
405 goacc_lazy_initialize ();
406
407 struct goacc_thread *thr = goacc_thread ();
408 struct gomp_device_descr *acc_dev = thr->dev;
409
410 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
411
412 acc_prof_info prof_info;
413 if (profiling_p)
414 {
415 thr->prof_info = &prof_info;
416
417 prof_info.event_type = acc_ev_enter_data_start;
418 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
419 prof_info.version = _ACC_PROF_INFO_VERSION;
420 prof_info.device_type = acc_device_type (acc_dev->type);
421 prof_info.device_number = acc_dev->target_id;
422 prof_info.thread_id = -1;
423 prof_info.async = acc_async_sync; /* Always synchronous. */
424 prof_info.async_queue = prof_info.async;
425 prof_info.src_file = NULL;
426 prof_info.func_name = NULL;
427 prof_info.line_no = -1;
428 prof_info.end_line_no = -1;
429 prof_info.func_line_no = -1;
430 prof_info.func_end_line_no = -1;
431 }
432 acc_event_info enter_data_event_info;
433 if (profiling_p)
434 {
435 enter_data_event_info.other_event.event_type
436 = prof_info.event_type;
437 enter_data_event_info.other_event.valid_bytes
438 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
439 enter_data_event_info.other_event.parent_construct = acc_construct_data;
440 for (int i = 0; i < mapnum; ++i)
441 if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR)
442 {
443 /* If there is one such data mapping kind, then this is actually an
444 OpenACC 'host_data' construct. (GCC maps the OpenACC
445 'host_data' construct to the OpenACC 'data' construct.) Apart
446 from artificial test cases (such as an OpenACC 'host_data'
447 construct's (implicit) device initialization when there hasn't
448 been any device data be set up before...), there can't really
449 any meaningful events be generated from OpenACC 'host_data'
450 constructs, though. */
451 enter_data_event_info.other_event.parent_construct
452 = acc_construct_host_data;
453 break;
454 }
455 enter_data_event_info.other_event.implicit = 0;
456 enter_data_event_info.other_event.tool_info = NULL;
457 }
458 acc_api_info api_info;
459 if (profiling_p)
460 {
461 thr->api_info = &api_info;
462
463 api_info.device_api = acc_device_api_none;
464 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
465 api_info.device_type = prof_info.device_type;
466 api_info.vendor = -1;
467 api_info.device_handle = NULL;
468 api_info.context_handle = NULL;
469 api_info.async_handle = NULL;
470 }
471
472 if (profiling_p)
473 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
474
475 /* Host fallback or 'do nothing'. */
476 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
477 || (flags & GOACC_FLAG_HOST_FALLBACK))
478 {
479 prof_info.device_type = acc_device_host;
480 api_info.device_type = prof_info.device_type;
481 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
482 GOMP_MAP_VARS_OPENACC);
483 tgt->prev = thr->mapped_data;
484 thr->mapped_data = tgt;
485
486 goto out_prof;
487 }
488
489 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
490 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
491 GOMP_MAP_VARS_OPENACC);
492 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
493 tgt->prev = thr->mapped_data;
494 thr->mapped_data = tgt;
495
496 out_prof:
497 if (profiling_p)
498 {
499 prof_info.event_type = acc_ev_enter_data_end;
500 enter_data_event_info.other_event.event_type = prof_info.event_type;
501 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
502
503 thr->prof_info = NULL;
504 thr->api_info = NULL;
505 }
506 }
507
508 void
509 GOACC_data_end (void)
510 {
511 struct goacc_thread *thr = goacc_thread ();
512 struct gomp_device_descr *acc_dev = thr->dev;
513 struct target_mem_desc *tgt = thr->mapped_data;
514
515 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
516
517 acc_prof_info prof_info;
518 if (profiling_p)
519 {
520 thr->prof_info = &prof_info;
521
522 prof_info.event_type = acc_ev_exit_data_start;
523 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
524 prof_info.version = _ACC_PROF_INFO_VERSION;
525 prof_info.device_type = acc_device_type (acc_dev->type);
526 prof_info.device_number = acc_dev->target_id;
527 prof_info.thread_id = -1;
528 prof_info.async = acc_async_sync; /* Always synchronous. */
529 prof_info.async_queue = prof_info.async;
530 prof_info.src_file = NULL;
531 prof_info.func_name = NULL;
532 prof_info.line_no = -1;
533 prof_info.end_line_no = -1;
534 prof_info.func_line_no = -1;
535 prof_info.func_end_line_no = -1;
536 }
537 acc_event_info exit_data_event_info;
538 if (profiling_p)
539 {
540 exit_data_event_info.other_event.event_type
541 = prof_info.event_type;
542 exit_data_event_info.other_event.valid_bytes
543 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
544 exit_data_event_info.other_event.parent_construct = acc_construct_data;
545 exit_data_event_info.other_event.implicit = 0;
546 exit_data_event_info.other_event.tool_info = NULL;
547 }
548 acc_api_info api_info;
549 if (profiling_p)
550 {
551 thr->api_info = &api_info;
552
553 api_info.device_api = acc_device_api_none;
554 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
555 api_info.device_type = prof_info.device_type;
556 api_info.vendor = -1;
557 api_info.device_handle = NULL;
558 api_info.context_handle = NULL;
559 api_info.async_handle = NULL;
560 }
561
562 if (profiling_p)
563 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
564
565 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
566 thr->mapped_data = tgt->prev;
567 gomp_unmap_vars (tgt, true);
568 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
569
570 if (profiling_p)
571 {
572 prof_info.event_type = acc_ev_exit_data_end;
573 exit_data_event_info.other_event.event_type = prof_info.event_type;
574 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
575
576 thr->prof_info = NULL;
577 thr->api_info = NULL;
578 }
579 }
580
581 void
582 GOACC_enter_exit_data (int flags_m, size_t mapnum,
583 void **hostaddrs, size_t *sizes, unsigned short *kinds,
584 int async, int num_waits, ...)
585 {
586 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
587
588 struct goacc_thread *thr;
589 struct gomp_device_descr *acc_dev;
590 bool data_enter = false;
591 size_t i;
592
593 goacc_lazy_initialize ();
594
595 thr = goacc_thread ();
596 acc_dev = thr->dev;
597
598 /* Determine whether "finalize" semantics apply to all mappings of this
599 OpenACC directive. */
600 bool finalize = false;
601 if (mapnum > 0)
602 {
603 unsigned char kind = kinds[0] & 0xff;
604 if (kind == GOMP_MAP_DELETE
605 || kind == GOMP_MAP_FORCE_FROM)
606 finalize = true;
607 }
608
609 /* Determine if this is an "acc enter data". */
610 for (i = 0; i < mapnum; ++i)
611 {
612 unsigned char kind = kinds[i] & 0xff;
613
614 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
615 continue;
616
617 if (kind == GOMP_MAP_FORCE_ALLOC
618 || kind == GOMP_MAP_FORCE_PRESENT
619 || kind == GOMP_MAP_FORCE_TO
620 || kind == GOMP_MAP_TO
621 || kind == GOMP_MAP_ALLOC)
622 {
623 data_enter = true;
624 break;
625 }
626
627 if (kind == GOMP_MAP_RELEASE
628 || kind == GOMP_MAP_DELETE
629 || kind == GOMP_MAP_FROM
630 || kind == GOMP_MAP_FORCE_FROM)
631 break;
632
633 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
634 kind);
635 }
636
637 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
638
639 acc_prof_info prof_info;
640 if (profiling_p)
641 {
642 thr->prof_info = &prof_info;
643
644 prof_info.event_type
645 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
646 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
647 prof_info.version = _ACC_PROF_INFO_VERSION;
648 prof_info.device_type = acc_device_type (acc_dev->type);
649 prof_info.device_number = acc_dev->target_id;
650 prof_info.thread_id = -1;
651 prof_info.async = async;
652 prof_info.async_queue = prof_info.async;
653 prof_info.src_file = NULL;
654 prof_info.func_name = NULL;
655 prof_info.line_no = -1;
656 prof_info.end_line_no = -1;
657 prof_info.func_line_no = -1;
658 prof_info.func_end_line_no = -1;
659 }
660 acc_event_info enter_exit_data_event_info;
661 if (profiling_p)
662 {
663 enter_exit_data_event_info.other_event.event_type
664 = prof_info.event_type;
665 enter_exit_data_event_info.other_event.valid_bytes
666 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
667 enter_exit_data_event_info.other_event.parent_construct
668 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
669 enter_exit_data_event_info.other_event.implicit = 0;
670 enter_exit_data_event_info.other_event.tool_info = NULL;
671 }
672 acc_api_info api_info;
673 if (profiling_p)
674 {
675 thr->api_info = &api_info;
676
677 api_info.device_api = acc_device_api_none;
678 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
679 api_info.device_type = prof_info.device_type;
680 api_info.vendor = -1;
681 api_info.device_handle = NULL;
682 api_info.context_handle = NULL;
683 api_info.async_handle = NULL;
684 }
685
686 if (profiling_p)
687 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
688 &api_info);
689
690 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
691 || (flags & GOACC_FLAG_HOST_FALLBACK))
692 {
693 prof_info.device_type = acc_device_host;
694 api_info.device_type = prof_info.device_type;
695
696 goto out_prof;
697 }
698
699 if (num_waits)
700 {
701 va_list ap;
702
703 va_start (ap, num_waits);
704 goacc_wait (async, num_waits, &ap);
705 va_end (ap);
706 }
707
708 /* In c, non-pointers and arrays are represented by a single data clause.
709 Dynamically allocated arrays and subarrays are represented by a data
710 clause followed by an internal GOMP_MAP_POINTER.
711
712 In fortran, scalars and not allocated arrays are represented by a
713 single data clause. Allocated arrays and subarrays have three mappings:
714 1) the original data clause, 2) a PSET 3) a pointer to the array data.
715 */
716
717 if (data_enter)
718 {
719 for (i = 0; i < mapnum; i++)
720 {
721 unsigned char kind = kinds[i] & 0xff;
722
723 /* Scan for pointers and PSETs. */
724 int pointer = find_pointer (i, mapnum, kinds);
725
726 if (!pointer)
727 {
728 switch (kind)
729 {
730 case GOMP_MAP_ALLOC:
731 case GOMP_MAP_FORCE_ALLOC:
732 acc_create_async (hostaddrs[i], sizes[i], async);
733 break;
734 case GOMP_MAP_TO:
735 case GOMP_MAP_FORCE_TO:
736 acc_copyin_async (hostaddrs[i], sizes[i], async);
737 break;
738 default:
739 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
740 kind);
741 break;
742 }
743 }
744 else
745 {
746 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
747 &sizes[i], &kinds[i], async);
748 /* Increment 'i' by two because OpenACC requires fortran
749 arrays to be contiguous, so each PSET is associated with
750 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
751 one MAP_POINTER. */
752 i += pointer - 1;
753 }
754 }
755 }
756 else
757 for (i = 0; i < mapnum; ++i)
758 {
759 unsigned char kind = kinds[i] & 0xff;
760
761 int pointer = find_pointer (i, mapnum, kinds);
762
763 if (!pointer)
764 {
765 switch (kind)
766 {
767 case GOMP_MAP_RELEASE:
768 case GOMP_MAP_DELETE:
769 if (acc_is_present (hostaddrs[i], sizes[i]))
770 {
771 if (finalize)
772 acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
773 else
774 acc_delete_async (hostaddrs[i], sizes[i], async);
775 }
776 break;
777 case GOMP_MAP_FROM:
778 case GOMP_MAP_FORCE_FROM:
779 if (finalize)
780 acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
781 else
782 acc_copyout_async (hostaddrs[i], sizes[i], async);
783 break;
784 default:
785 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
786 kind);
787 break;
788 }
789 }
790 else
791 {
792 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
793 || kind == GOMP_MAP_FROM);
794 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
795 finalize, pointer);
796 /* See the above comment. */
797 i += pointer - 1;
798 }
799 }
800
801 out_prof:
802 if (profiling_p)
803 {
804 prof_info.event_type
805 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
806 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
807 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
808 &api_info);
809
810 thr->prof_info = NULL;
811 thr->api_info = NULL;
812 }
813 }
814
815 static void
816 goacc_wait (int async, int num_waits, va_list *ap)
817 {
818 while (num_waits--)
819 {
820 int qid = va_arg (*ap, int);
821
822 /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
823 if (qid == acc_async_noval)
824 {
825 if (async == acc_async_sync)
826 acc_wait_all ();
827 else
828 acc_wait_all_async (async);
829 break;
830 }
831
832 if (acc_async_test (qid))
833 continue;
834
835 if (async == acc_async_sync)
836 acc_wait (qid);
837 else if (qid == async)
838 /* If we're waiting on the same asynchronous queue as we're
839 launching on, the queue itself will order work as
840 required, so there's no need to wait explicitly. */
841 ;
842 else
843 acc_wait_async (qid, async);
844 }
845 }
846
847 void
848 GOACC_update (int flags_m, size_t mapnum,
849 void **hostaddrs, size_t *sizes, unsigned short *kinds,
850 int async, int num_waits, ...)
851 {
852 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
853
854 size_t i;
855
856 goacc_lazy_initialize ();
857
858 struct goacc_thread *thr = goacc_thread ();
859 struct gomp_device_descr *acc_dev = thr->dev;
860
861 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
862
863 acc_prof_info prof_info;
864 if (profiling_p)
865 {
866 thr->prof_info = &prof_info;
867
868 prof_info.event_type = acc_ev_update_start;
869 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
870 prof_info.version = _ACC_PROF_INFO_VERSION;
871 prof_info.device_type = acc_device_type (acc_dev->type);
872 prof_info.device_number = acc_dev->target_id;
873 prof_info.thread_id = -1;
874 prof_info.async = async;
875 prof_info.async_queue = prof_info.async;
876 prof_info.src_file = NULL;
877 prof_info.func_name = NULL;
878 prof_info.line_no = -1;
879 prof_info.end_line_no = -1;
880 prof_info.func_line_no = -1;
881 prof_info.func_end_line_no = -1;
882 }
883 acc_event_info update_event_info;
884 if (profiling_p)
885 {
886 update_event_info.other_event.event_type
887 = prof_info.event_type;
888 update_event_info.other_event.valid_bytes
889 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
890 update_event_info.other_event.parent_construct = acc_construct_update;
891 update_event_info.other_event.implicit = 0;
892 update_event_info.other_event.tool_info = NULL;
893 }
894 acc_api_info api_info;
895 if (profiling_p)
896 {
897 thr->api_info = &api_info;
898
899 api_info.device_api = acc_device_api_none;
900 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
901 api_info.device_type = prof_info.device_type;
902 api_info.vendor = -1;
903 api_info.device_handle = NULL;
904 api_info.context_handle = NULL;
905 api_info.async_handle = NULL;
906 }
907
908 if (profiling_p)
909 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
910
911 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
912 || (flags & GOACC_FLAG_HOST_FALLBACK))
913 {
914 prof_info.device_type = acc_device_host;
915 api_info.device_type = prof_info.device_type;
916
917 goto out_prof;
918 }
919
920 if (num_waits)
921 {
922 va_list ap;
923
924 va_start (ap, num_waits);
925 goacc_wait (async, num_waits, &ap);
926 va_end (ap);
927 }
928
929 bool update_device = false;
930 for (i = 0; i < mapnum; ++i)
931 {
932 unsigned char kind = kinds[i] & 0xff;
933
934 switch (kind)
935 {
936 case GOMP_MAP_POINTER:
937 case GOMP_MAP_TO_PSET:
938 break;
939
940 case GOMP_MAP_ALWAYS_POINTER:
941 if (update_device)
942 {
943 /* Save the contents of the host pointer. */
944 void *dptr = acc_deviceptr (hostaddrs[i-1]);
945 uintptr_t t = *(uintptr_t *) hostaddrs[i];
946
947 /* Update the contents of the host pointer to reflect
948 the value of the allocated device memory in the
949 previous pointer. */
950 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
951 /* TODO: verify that we really cannot use acc_update_device_async
952 here. */
953 acc_update_device (hostaddrs[i], sizeof (uintptr_t));
954
955 /* Restore the host pointer. */
956 *(uintptr_t *) hostaddrs[i] = t;
957 update_device = false;
958 }
959 break;
960
961 case GOMP_MAP_TO:
962 if (!acc_is_present (hostaddrs[i], sizes[i]))
963 {
964 update_device = false;
965 break;
966 }
967 /* Fallthru */
968 case GOMP_MAP_FORCE_TO:
969 update_device = true;
970 acc_update_device_async (hostaddrs[i], sizes[i], async);
971 break;
972
973 case GOMP_MAP_FROM:
974 if (!acc_is_present (hostaddrs[i], sizes[i]))
975 {
976 update_device = false;
977 break;
978 }
979 /* Fallthru */
980 case GOMP_MAP_FORCE_FROM:
981 update_device = false;
982 acc_update_self_async (hostaddrs[i], sizes[i], async);
983 break;
984
985 default:
986 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
987 break;
988 }
989 }
990
991 out_prof:
992 if (profiling_p)
993 {
994 prof_info.event_type = acc_ev_update_end;
995 update_event_info.other_event.event_type = prof_info.event_type;
996 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
997
998 thr->prof_info = NULL;
999 thr->api_info = NULL;
1000 }
1001 }
1002
1003 void
1004 GOACC_wait (int async, int num_waits, ...)
1005 {
1006 goacc_lazy_initialize ();
1007
1008 struct goacc_thread *thr = goacc_thread ();
1009
1010 /* No nesting. */
1011 assert (thr->prof_info == NULL);
1012 assert (thr->api_info == NULL);
1013 acc_prof_info prof_info;
1014 acc_api_info api_info;
1015 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
1016 if (profiling_p)
1017 {
1018 prof_info.async = async;
1019 prof_info.async_queue = prof_info.async;
1020 }
1021
1022 if (num_waits)
1023 {
1024 va_list ap;
1025
1026 va_start (ap, num_waits);
1027 goacc_wait (async, num_waits, &ap);
1028 va_end (ap);
1029 }
1030 else if (async == acc_async_sync)
1031 acc_wait_all ();
1032 else
1033 acc_wait_all_async (async);
1034
1035 if (profiling_p)
1036 {
1037 thr->prof_info = NULL;
1038 thr->api_info = NULL;
1039 }
1040 }
1041
1042 /* Legacy entry point (GCC 5). */
1043
1044 int
1045 GOACC_get_num_threads (void)
1046 {
1047 return 1;
1048 }
1049
1050 /* Legacy entry point (GCC 5). */
1051
1052 int
1053 GOACC_get_thread_num (void)
1054 {
1055 return 0;
1056 }
1057
1058 void
1059 GOACC_declare (int flags_m, size_t mapnum,
1060 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1061 {
1062 int i;
1063
1064 for (i = 0; i < mapnum; i++)
1065 {
1066 unsigned char kind = kinds[i] & 0xff;
1067
1068 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1069 continue;
1070
1071 switch (kind)
1072 {
1073 case GOMP_MAP_FORCE_ALLOC:
1074 case GOMP_MAP_FORCE_FROM:
1075 case GOMP_MAP_FORCE_TO:
1076 case GOMP_MAP_POINTER:
1077 case GOMP_MAP_RELEASE:
1078 case GOMP_MAP_DELETE:
1079 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1080 &kinds[i], GOMP_ASYNC_SYNC, 0);
1081 break;
1082
1083 case GOMP_MAP_FORCE_DEVICEPTR:
1084 break;
1085
1086 case GOMP_MAP_ALLOC:
1087 if (!acc_is_present (hostaddrs[i], sizes[i]))
1088 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1089 &kinds[i], GOMP_ASYNC_SYNC, 0);
1090 break;
1091
1092 case GOMP_MAP_TO:
1093 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1094 &kinds[i], GOMP_ASYNC_SYNC, 0);
1095
1096 break;
1097
1098 case GOMP_MAP_FROM:
1099 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1100 &kinds[i], GOMP_ASYNC_SYNC, 0);
1101 break;
1102
1103 case GOMP_MAP_FORCE_PRESENT:
1104 if (!acc_is_present (hostaddrs[i], sizes[i]))
1105 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
1106 (unsigned long) sizes[i]);
1107 break;
1108
1109 default:
1110 assert (0);
1111 break;
1112 }
1113 }
1114 }