]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/oacc-parallel.c
[PR90743] Fortran 'allocatable' with OpenACC data/OpenMP 'target' 'map' clauses
[thirdparty/gcc.git] / libgomp / oacc-parallel.c
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2
3 Contributed by Mentor Embedded.
4
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
7
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27 /* This file handles OpenACC constructs. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
40
41
42 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
43 continue to support the following two legacy values. */
44 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
45 "legacy GOMP_DEVICE_ICV broken");
46 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
47 == GOACC_FLAG_HOST_FALLBACK,
48 "legacy GOMP_DEVICE_HOST_FALLBACK broken");
49
50
51 /* Returns the number of mappings associated with the pointer or pset. PSET
52 have three mappings, whereas pointer have two. */
53
54 static int
55 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
56 {
57 if (pos + 1 >= mapnum)
58 return 0;
59
60 unsigned char kind = kinds[pos+1] & 0xff;
61
62 if (kind == GOMP_MAP_TO_PSET)
63 return 3;
64 else if (kind == GOMP_MAP_POINTER)
65 return 2;
66
67 return 0;
68 }
69
70 /* Handle the mapping pair that are presented when a
71 deviceptr clause is used with Fortran. */
72
73 static void
74 handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
75 unsigned short *kinds)
76 {
77 int i;
78
79 for (i = 0; i < mapnum; i++)
80 {
81 unsigned short kind1 = kinds[i] & 0xff;
82
83 /* Handle Fortran deviceptr clause. */
84 if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
85 {
86 unsigned short kind2;
87
88 if (i < (signed)mapnum - 1)
89 kind2 = kinds[i + 1] & 0xff;
90 else
91 kind2 = 0xffff;
92
93 if (sizes[i] == sizeof (void *))
94 continue;
95
96 /* At this point, we're dealing with a Fortran deviceptr.
97 If the next element is not what we're expecting, then
98 this is an instance of where the deviceptr variable was
99 not used within the region and the pointer was removed
100 by the gimplifier. */
101 if (kind2 == GOMP_MAP_POINTER
102 && sizes[i + 1] == 0
103 && hostaddrs[i] == *(void **)hostaddrs[i + 1])
104 {
105 kinds[i+1] = kinds[i];
106 sizes[i+1] = sizeof (void *);
107 }
108
109 /* Invalidate the entry. */
110 hostaddrs[i] = NULL;
111 }
112 }
113 }
114
115 static void goacc_wait (int async, int num_waits, va_list *ap);
116
117
118 /* Launch a possibly offloaded function with FLAGS. FN is the host fn
119 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
120 blocks to be copied to/from the device. Varadic arguments are
121 keyed optional parameters terminated with a zero. */
122
123 void
124 GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
125 size_t mapnum, void **hostaddrs, size_t *sizes,
126 unsigned short *kinds, ...)
127 {
128 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
129
130 va_list ap;
131 struct goacc_thread *thr;
132 struct gomp_device_descr *acc_dev;
133 struct target_mem_desc *tgt;
134 void **devaddrs;
135 unsigned int i;
136 struct splay_tree_key_s k;
137 splay_tree_key tgt_fn_key;
138 void (*tgt_fn);
139 int async = GOMP_ASYNC_SYNC;
140 unsigned dims[GOMP_DIM_MAX];
141 unsigned tag;
142
143 #ifdef HAVE_INTTYPES_H
144 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
145 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
146 #else
147 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
148 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
149 #endif
150 goacc_lazy_initialize ();
151
152 thr = goacc_thread ();
153 acc_dev = thr->dev;
154
155 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
156
157 acc_prof_info prof_info;
158 if (profiling_p)
159 {
160 thr->prof_info = &prof_info;
161
162 prof_info.event_type = acc_ev_compute_construct_start;
163 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
164 prof_info.version = _ACC_PROF_INFO_VERSION;
165 prof_info.device_type = acc_device_type (acc_dev->type);
166 prof_info.device_number = acc_dev->target_id;
167 prof_info.thread_id = -1;
168 prof_info.async = async;
169 prof_info.async_queue = prof_info.async;
170 prof_info.src_file = NULL;
171 prof_info.func_name = NULL;
172 prof_info.line_no = -1;
173 prof_info.end_line_no = -1;
174 prof_info.func_line_no = -1;
175 prof_info.func_end_line_no = -1;
176 }
177 acc_event_info compute_construct_event_info;
178 if (profiling_p)
179 {
180 compute_construct_event_info.other_event.event_type
181 = prof_info.event_type;
182 compute_construct_event_info.other_event.valid_bytes
183 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
184 compute_construct_event_info.other_event.parent_construct
185 = acc_construct_parallel;
186 compute_construct_event_info.other_event.implicit = 0;
187 compute_construct_event_info.other_event.tool_info = NULL;
188 }
189 acc_api_info api_info;
190 if (profiling_p)
191 {
192 thr->api_info = &api_info;
193
194 api_info.device_api = acc_device_api_none;
195 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
196 api_info.device_type = prof_info.device_type;
197 api_info.vendor = -1;
198 api_info.device_handle = NULL;
199 api_info.context_handle = NULL;
200 api_info.async_handle = NULL;
201 }
202
203 if (profiling_p)
204 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
205 &api_info);
206
207 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
208
209 /* Host fallback if "if" clause is false or if the current device is set to
210 the host. */
211 if (flags & GOACC_FLAG_HOST_FALLBACK)
212 {
213 prof_info.device_type = acc_device_host;
214 api_info.device_type = prof_info.device_type;
215 goacc_save_and_set_bind (acc_device_host);
216 fn (hostaddrs);
217 goacc_restore_bind ();
218 goto out_prof;
219 }
220 else if (acc_device_type (acc_dev->type) == acc_device_host)
221 {
222 fn (hostaddrs);
223 goto out_prof;
224 }
225
226 /* Default: let the runtime choose. */
227 for (i = 0; i != GOMP_DIM_MAX; i++)
228 dims[i] = 0;
229
230 va_start (ap, kinds);
231 /* TODO: This will need amending when device_type is implemented. */
232 while ((tag = va_arg (ap, unsigned)) != 0)
233 {
234 if (GOMP_LAUNCH_DEVICE (tag))
235 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
236 GOMP_LAUNCH_DEVICE (tag));
237
238 switch (GOMP_LAUNCH_CODE (tag))
239 {
240 case GOMP_LAUNCH_DIM:
241 {
242 unsigned mask = GOMP_LAUNCH_OP (tag);
243
244 for (i = 0; i != GOMP_DIM_MAX; i++)
245 if (mask & GOMP_DIM_MASK (i))
246 dims[i] = va_arg (ap, unsigned);
247 }
248 break;
249
250 case GOMP_LAUNCH_ASYNC:
251 {
252 /* Small constant values are encoded in the operand. */
253 async = GOMP_LAUNCH_OP (tag);
254
255 if (async == GOMP_LAUNCH_OP_MAX)
256 async = va_arg (ap, unsigned);
257
258 if (profiling_p)
259 {
260 prof_info.async = async;
261 prof_info.async_queue = prof_info.async;
262 }
263
264 break;
265 }
266
267 case GOMP_LAUNCH_WAIT:
268 {
269 unsigned num_waits = GOMP_LAUNCH_OP (tag);
270 goacc_wait (async, num_waits, &ap);
271 break;
272 }
273
274 default:
275 gomp_fatal ("unrecognized offload code '%d',"
276 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
277 }
278 }
279 va_end (ap);
280
281 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
282 {
283 k.host_start = (uintptr_t) fn;
284 k.host_end = k.host_start + 1;
285 gomp_mutex_lock (&acc_dev->lock);
286 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
287 gomp_mutex_unlock (&acc_dev->lock);
288
289 if (tgt_fn_key == NULL)
290 gomp_fatal ("target function wasn't mapped");
291
292 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
293 }
294 else
295 tgt_fn = (void (*)) fn;
296
297 acc_event_info enter_exit_data_event_info;
298 if (profiling_p)
299 {
300 prof_info.event_type = acc_ev_enter_data_start;
301 enter_exit_data_event_info.other_event.event_type
302 = prof_info.event_type;
303 enter_exit_data_event_info.other_event.valid_bytes
304 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
305 enter_exit_data_event_info.other_event.parent_construct
306 = compute_construct_event_info.other_event.parent_construct;
307 enter_exit_data_event_info.other_event.implicit = 1;
308 enter_exit_data_event_info.other_event.tool_info = NULL;
309 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
310 &api_info);
311 }
312
313 goacc_aq aq = get_goacc_asyncqueue (async);
314
315 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
316 true, GOMP_MAP_VARS_OPENACC);
317 if (profiling_p)
318 {
319 prof_info.event_type = acc_ev_enter_data_end;
320 enter_exit_data_event_info.other_event.event_type
321 = prof_info.event_type;
322 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
323 &api_info);
324 }
325
326 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
327 for (i = 0; i < mapnum; i++)
328 if (tgt->list[i].key != NULL)
329 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
330 + tgt->list[i].key->tgt_offset
331 + tgt->list[i].offset);
332 else
333 devaddrs[i] = NULL;
334 if (aq == NULL)
335 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
336 tgt);
337 else
338 acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
339 dims, tgt, aq);
340
341 if (profiling_p)
342 {
343 prof_info.event_type = acc_ev_exit_data_start;
344 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
345 enter_exit_data_event_info.other_event.tool_info = NULL;
346 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
347 &api_info);
348 }
349
350 /* If running synchronously, unmap immediately. */
351 if (aq == NULL)
352 gomp_unmap_vars (tgt, true);
353 else
354 gomp_unmap_vars_async (tgt, true, aq);
355
356 if (profiling_p)
357 {
358 prof_info.event_type = acc_ev_exit_data_end;
359 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
360 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
361 &api_info);
362 }
363
364 out_prof:
365 if (profiling_p)
366 {
367 prof_info.event_type = acc_ev_compute_construct_end;
368 compute_construct_event_info.other_event.event_type
369 = prof_info.event_type;
370 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
371 &api_info);
372
373 thr->prof_info = NULL;
374 thr->api_info = NULL;
375 }
376 }
377
378 /* Legacy entry point (GCC 5). Only provide host fallback execution. */
379
380 void
381 GOACC_parallel (int flags_m, void (*fn) (void *),
382 size_t mapnum, void **hostaddrs, size_t *sizes,
383 unsigned short *kinds,
384 int num_gangs, int num_workers, int vector_length,
385 int async, int num_waits, ...)
386 {
387 goacc_save_and_set_bind (acc_device_host);
388 fn (hostaddrs);
389 goacc_restore_bind ();
390 }
391
392 void
393 GOACC_data_start (int flags_m, size_t mapnum,
394 void **hostaddrs, size_t *sizes, unsigned short *kinds)
395 {
396 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
397
398 struct target_mem_desc *tgt;
399
400 #ifdef HAVE_INTTYPES_H
401 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
402 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
403 #else
404 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
405 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
406 #endif
407
408 goacc_lazy_initialize ();
409
410 struct goacc_thread *thr = goacc_thread ();
411 struct gomp_device_descr *acc_dev = thr->dev;
412
413 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
414
415 acc_prof_info prof_info;
416 if (profiling_p)
417 {
418 thr->prof_info = &prof_info;
419
420 prof_info.event_type = acc_ev_enter_data_start;
421 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
422 prof_info.version = _ACC_PROF_INFO_VERSION;
423 prof_info.device_type = acc_device_type (acc_dev->type);
424 prof_info.device_number = acc_dev->target_id;
425 prof_info.thread_id = -1;
426 prof_info.async = acc_async_sync; /* Always synchronous. */
427 prof_info.async_queue = prof_info.async;
428 prof_info.src_file = NULL;
429 prof_info.func_name = NULL;
430 prof_info.line_no = -1;
431 prof_info.end_line_no = -1;
432 prof_info.func_line_no = -1;
433 prof_info.func_end_line_no = -1;
434 }
435 acc_event_info enter_data_event_info;
436 if (profiling_p)
437 {
438 enter_data_event_info.other_event.event_type
439 = prof_info.event_type;
440 enter_data_event_info.other_event.valid_bytes
441 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
442 enter_data_event_info.other_event.parent_construct = acc_construct_data;
443 for (int i = 0; i < mapnum; ++i)
444 if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR)
445 {
446 /* If there is one such data mapping kind, then this is actually an
447 OpenACC 'host_data' construct. (GCC maps the OpenACC
448 'host_data' construct to the OpenACC 'data' construct.) Apart
449 from artificial test cases (such as an OpenACC 'host_data'
450 construct's (implicit) device initialization when there hasn't
451 been any device data be set up before...), there can't really
452 any meaningful events be generated from OpenACC 'host_data'
453 constructs, though. */
454 enter_data_event_info.other_event.parent_construct
455 = acc_construct_host_data;
456 break;
457 }
458 enter_data_event_info.other_event.implicit = 0;
459 enter_data_event_info.other_event.tool_info = NULL;
460 }
461 acc_api_info api_info;
462 if (profiling_p)
463 {
464 thr->api_info = &api_info;
465
466 api_info.device_api = acc_device_api_none;
467 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
468 api_info.device_type = prof_info.device_type;
469 api_info.vendor = -1;
470 api_info.device_handle = NULL;
471 api_info.context_handle = NULL;
472 api_info.async_handle = NULL;
473 }
474
475 if (profiling_p)
476 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
477
478 /* Host fallback or 'do nothing'. */
479 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
480 || (flags & GOACC_FLAG_HOST_FALLBACK))
481 {
482 prof_info.device_type = acc_device_host;
483 api_info.device_type = prof_info.device_type;
484 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
485 GOMP_MAP_VARS_OPENACC);
486 tgt->prev = thr->mapped_data;
487 thr->mapped_data = tgt;
488
489 goto out_prof;
490 }
491
492 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
493 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
494 GOMP_MAP_VARS_OPENACC);
495 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
496 tgt->prev = thr->mapped_data;
497 thr->mapped_data = tgt;
498
499 out_prof:
500 if (profiling_p)
501 {
502 prof_info.event_type = acc_ev_enter_data_end;
503 enter_data_event_info.other_event.event_type = prof_info.event_type;
504 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
505
506 thr->prof_info = NULL;
507 thr->api_info = NULL;
508 }
509 }
510
511 void
512 GOACC_data_end (void)
513 {
514 struct goacc_thread *thr = goacc_thread ();
515 struct gomp_device_descr *acc_dev = thr->dev;
516 struct target_mem_desc *tgt = thr->mapped_data;
517
518 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
519
520 acc_prof_info prof_info;
521 if (profiling_p)
522 {
523 thr->prof_info = &prof_info;
524
525 prof_info.event_type = acc_ev_exit_data_start;
526 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
527 prof_info.version = _ACC_PROF_INFO_VERSION;
528 prof_info.device_type = acc_device_type (acc_dev->type);
529 prof_info.device_number = acc_dev->target_id;
530 prof_info.thread_id = -1;
531 prof_info.async = acc_async_sync; /* Always synchronous. */
532 prof_info.async_queue = prof_info.async;
533 prof_info.src_file = NULL;
534 prof_info.func_name = NULL;
535 prof_info.line_no = -1;
536 prof_info.end_line_no = -1;
537 prof_info.func_line_no = -1;
538 prof_info.func_end_line_no = -1;
539 }
540 acc_event_info exit_data_event_info;
541 if (profiling_p)
542 {
543 exit_data_event_info.other_event.event_type
544 = prof_info.event_type;
545 exit_data_event_info.other_event.valid_bytes
546 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
547 exit_data_event_info.other_event.parent_construct = acc_construct_data;
548 exit_data_event_info.other_event.implicit = 0;
549 exit_data_event_info.other_event.tool_info = NULL;
550 }
551 acc_api_info api_info;
552 if (profiling_p)
553 {
554 thr->api_info = &api_info;
555
556 api_info.device_api = acc_device_api_none;
557 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
558 api_info.device_type = prof_info.device_type;
559 api_info.vendor = -1;
560 api_info.device_handle = NULL;
561 api_info.context_handle = NULL;
562 api_info.async_handle = NULL;
563 }
564
565 if (profiling_p)
566 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
567
568 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
569 thr->mapped_data = tgt->prev;
570 gomp_unmap_vars (tgt, true);
571 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
572
573 if (profiling_p)
574 {
575 prof_info.event_type = acc_ev_exit_data_end;
576 exit_data_event_info.other_event.event_type = prof_info.event_type;
577 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
578
579 thr->prof_info = NULL;
580 thr->api_info = NULL;
581 }
582 }
583
584 void
585 GOACC_enter_exit_data (int flags_m, size_t mapnum,
586 void **hostaddrs, size_t *sizes, unsigned short *kinds,
587 int async, int num_waits, ...)
588 {
589 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
590
591 struct goacc_thread *thr;
592 struct gomp_device_descr *acc_dev;
593 bool data_enter = false;
594 size_t i;
595
596 goacc_lazy_initialize ();
597
598 thr = goacc_thread ();
599 acc_dev = thr->dev;
600
601 /* Determine whether "finalize" semantics apply to all mappings of this
602 OpenACC directive. */
603 bool finalize = false;
604 if (mapnum > 0)
605 {
606 unsigned char kind = kinds[0] & 0xff;
607 if (kind == GOMP_MAP_DELETE
608 || kind == GOMP_MAP_FORCE_FROM)
609 finalize = true;
610 }
611
612 /* Determine if this is an "acc enter data". */
613 for (i = 0; i < mapnum; ++i)
614 {
615 unsigned char kind = kinds[i] & 0xff;
616
617 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
618 continue;
619
620 if (kind == GOMP_MAP_FORCE_ALLOC
621 || kind == GOMP_MAP_FORCE_PRESENT
622 || kind == GOMP_MAP_FORCE_TO
623 || kind == GOMP_MAP_TO
624 || kind == GOMP_MAP_ALLOC)
625 {
626 data_enter = true;
627 break;
628 }
629
630 if (kind == GOMP_MAP_RELEASE
631 || kind == GOMP_MAP_DELETE
632 || kind == GOMP_MAP_FROM
633 || kind == GOMP_MAP_FORCE_FROM)
634 break;
635
636 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
637 kind);
638 }
639
640 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
641
642 acc_prof_info prof_info;
643 if (profiling_p)
644 {
645 thr->prof_info = &prof_info;
646
647 prof_info.event_type
648 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
649 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
650 prof_info.version = _ACC_PROF_INFO_VERSION;
651 prof_info.device_type = acc_device_type (acc_dev->type);
652 prof_info.device_number = acc_dev->target_id;
653 prof_info.thread_id = -1;
654 prof_info.async = async;
655 prof_info.async_queue = prof_info.async;
656 prof_info.src_file = NULL;
657 prof_info.func_name = NULL;
658 prof_info.line_no = -1;
659 prof_info.end_line_no = -1;
660 prof_info.func_line_no = -1;
661 prof_info.func_end_line_no = -1;
662 }
663 acc_event_info enter_exit_data_event_info;
664 if (profiling_p)
665 {
666 enter_exit_data_event_info.other_event.event_type
667 = prof_info.event_type;
668 enter_exit_data_event_info.other_event.valid_bytes
669 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
670 enter_exit_data_event_info.other_event.parent_construct
671 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
672 enter_exit_data_event_info.other_event.implicit = 0;
673 enter_exit_data_event_info.other_event.tool_info = NULL;
674 }
675 acc_api_info api_info;
676 if (profiling_p)
677 {
678 thr->api_info = &api_info;
679
680 api_info.device_api = acc_device_api_none;
681 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
682 api_info.device_type = prof_info.device_type;
683 api_info.vendor = -1;
684 api_info.device_handle = NULL;
685 api_info.context_handle = NULL;
686 api_info.async_handle = NULL;
687 }
688
689 if (profiling_p)
690 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
691 &api_info);
692
693 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
694 || (flags & GOACC_FLAG_HOST_FALLBACK))
695 {
696 prof_info.device_type = acc_device_host;
697 api_info.device_type = prof_info.device_type;
698
699 goto out_prof;
700 }
701
702 if (num_waits)
703 {
704 va_list ap;
705
706 va_start (ap, num_waits);
707 goacc_wait (async, num_waits, &ap);
708 va_end (ap);
709 }
710
711 /* In c, non-pointers and arrays are represented by a single data clause.
712 Dynamically allocated arrays and subarrays are represented by a data
713 clause followed by an internal GOMP_MAP_POINTER.
714
715 In fortran, scalars and not allocated arrays are represented by a
716 single data clause. Allocated arrays and subarrays have three mappings:
717 1) the original data clause, 2) a PSET 3) a pointer to the array data.
718 */
719
720 if (data_enter)
721 {
722 for (i = 0; i < mapnum; i++)
723 {
724 unsigned char kind = kinds[i] & 0xff;
725
726 /* Scan for pointers and PSETs. */
727 int pointer = find_pointer (i, mapnum, kinds);
728
729 if (!pointer)
730 {
731 switch (kind)
732 {
733 case GOMP_MAP_ALLOC:
734 case GOMP_MAP_FORCE_ALLOC:
735 acc_create_async (hostaddrs[i], sizes[i], async);
736 break;
737 case GOMP_MAP_TO:
738 case GOMP_MAP_FORCE_TO:
739 acc_copyin_async (hostaddrs[i], sizes[i], async);
740 break;
741 default:
742 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
743 kind);
744 break;
745 }
746 }
747 else
748 {
749 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
750 &sizes[i], &kinds[i], async);
751 /* Increment 'i' by two because OpenACC requires fortran
752 arrays to be contiguous, so each PSET is associated with
753 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
754 one MAP_POINTER. */
755 i += pointer - 1;
756 }
757 }
758 }
759 else
760 for (i = 0; i < mapnum; ++i)
761 {
762 unsigned char kind = kinds[i] & 0xff;
763
764 int pointer = find_pointer (i, mapnum, kinds);
765
766 if (!pointer)
767 {
768 switch (kind)
769 {
770 case GOMP_MAP_RELEASE:
771 case GOMP_MAP_DELETE:
772 if (acc_is_present (hostaddrs[i], sizes[i]))
773 {
774 if (finalize)
775 acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
776 else
777 acc_delete_async (hostaddrs[i], sizes[i], async);
778 }
779 break;
780 case GOMP_MAP_FROM:
781 case GOMP_MAP_FORCE_FROM:
782 if (finalize)
783 acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
784 else
785 acc_copyout_async (hostaddrs[i], sizes[i], async);
786 break;
787 default:
788 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
789 kind);
790 break;
791 }
792 }
793 else
794 {
795 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
796 || kind == GOMP_MAP_FROM);
797 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
798 finalize, pointer);
799 /* See the above comment. */
800 i += pointer - 1;
801 }
802 }
803
804 out_prof:
805 if (profiling_p)
806 {
807 prof_info.event_type
808 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
809 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
810 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
811 &api_info);
812
813 thr->prof_info = NULL;
814 thr->api_info = NULL;
815 }
816 }
817
818 static void
819 goacc_wait (int async, int num_waits, va_list *ap)
820 {
821 while (num_waits--)
822 {
823 int qid = va_arg (*ap, int);
824
825 /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
826 if (qid == acc_async_noval)
827 {
828 if (async == acc_async_sync)
829 acc_wait_all ();
830 else
831 acc_wait_all_async (async);
832 break;
833 }
834
835 if (acc_async_test (qid))
836 continue;
837
838 if (async == acc_async_sync)
839 acc_wait (qid);
840 else if (qid == async)
841 /* If we're waiting on the same asynchronous queue as we're
842 launching on, the queue itself will order work as
843 required, so there's no need to wait explicitly. */
844 ;
845 else
846 acc_wait_async (qid, async);
847 }
848 }
849
850 void
851 GOACC_update (int flags_m, size_t mapnum,
852 void **hostaddrs, size_t *sizes, unsigned short *kinds,
853 int async, int num_waits, ...)
854 {
855 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
856
857 size_t i;
858
859 goacc_lazy_initialize ();
860
861 struct goacc_thread *thr = goacc_thread ();
862 struct gomp_device_descr *acc_dev = thr->dev;
863
864 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
865
866 acc_prof_info prof_info;
867 if (profiling_p)
868 {
869 thr->prof_info = &prof_info;
870
871 prof_info.event_type = acc_ev_update_start;
872 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
873 prof_info.version = _ACC_PROF_INFO_VERSION;
874 prof_info.device_type = acc_device_type (acc_dev->type);
875 prof_info.device_number = acc_dev->target_id;
876 prof_info.thread_id = -1;
877 prof_info.async = async;
878 prof_info.async_queue = prof_info.async;
879 prof_info.src_file = NULL;
880 prof_info.func_name = NULL;
881 prof_info.line_no = -1;
882 prof_info.end_line_no = -1;
883 prof_info.func_line_no = -1;
884 prof_info.func_end_line_no = -1;
885 }
886 acc_event_info update_event_info;
887 if (profiling_p)
888 {
889 update_event_info.other_event.event_type
890 = prof_info.event_type;
891 update_event_info.other_event.valid_bytes
892 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
893 update_event_info.other_event.parent_construct = acc_construct_update;
894 update_event_info.other_event.implicit = 0;
895 update_event_info.other_event.tool_info = NULL;
896 }
897 acc_api_info api_info;
898 if (profiling_p)
899 {
900 thr->api_info = &api_info;
901
902 api_info.device_api = acc_device_api_none;
903 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
904 api_info.device_type = prof_info.device_type;
905 api_info.vendor = -1;
906 api_info.device_handle = NULL;
907 api_info.context_handle = NULL;
908 api_info.async_handle = NULL;
909 }
910
911 if (profiling_p)
912 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
913
914 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
915 || (flags & GOACC_FLAG_HOST_FALLBACK))
916 {
917 prof_info.device_type = acc_device_host;
918 api_info.device_type = prof_info.device_type;
919
920 goto out_prof;
921 }
922
923 if (num_waits)
924 {
925 va_list ap;
926
927 va_start (ap, num_waits);
928 goacc_wait (async, num_waits, &ap);
929 va_end (ap);
930 }
931
932 bool update_device = false;
933 for (i = 0; i < mapnum; ++i)
934 {
935 unsigned char kind = kinds[i] & 0xff;
936
937 switch (kind)
938 {
939 case GOMP_MAP_POINTER:
940 case GOMP_MAP_TO_PSET:
941 break;
942
943 case GOMP_MAP_ALWAYS_POINTER:
944 if (update_device)
945 {
946 /* Save the contents of the host pointer. */
947 void *dptr = acc_deviceptr (hostaddrs[i-1]);
948 uintptr_t t = *(uintptr_t *) hostaddrs[i];
949
950 /* Update the contents of the host pointer to reflect
951 the value of the allocated device memory in the
952 previous pointer. */
953 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
954 /* TODO: verify that we really cannot use acc_update_device_async
955 here. */
956 acc_update_device (hostaddrs[i], sizeof (uintptr_t));
957
958 /* Restore the host pointer. */
959 *(uintptr_t *) hostaddrs[i] = t;
960 update_device = false;
961 }
962 break;
963
964 case GOMP_MAP_TO:
965 if (!acc_is_present (hostaddrs[i], sizes[i]))
966 {
967 update_device = false;
968 break;
969 }
970 /* Fallthru */
971 case GOMP_MAP_FORCE_TO:
972 update_device = true;
973 acc_update_device_async (hostaddrs[i], sizes[i], async);
974 break;
975
976 case GOMP_MAP_FROM:
977 if (!acc_is_present (hostaddrs[i], sizes[i]))
978 {
979 update_device = false;
980 break;
981 }
982 /* Fallthru */
983 case GOMP_MAP_FORCE_FROM:
984 update_device = false;
985 acc_update_self_async (hostaddrs[i], sizes[i], async);
986 break;
987
988 default:
989 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
990 break;
991 }
992 }
993
994 out_prof:
995 if (profiling_p)
996 {
997 prof_info.event_type = acc_ev_update_end;
998 update_event_info.other_event.event_type = prof_info.event_type;
999 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
1000
1001 thr->prof_info = NULL;
1002 thr->api_info = NULL;
1003 }
1004 }
1005
1006 void
1007 GOACC_wait (int async, int num_waits, ...)
1008 {
1009 goacc_lazy_initialize ();
1010
1011 struct goacc_thread *thr = goacc_thread ();
1012
1013 /* No nesting. */
1014 assert (thr->prof_info == NULL);
1015 assert (thr->api_info == NULL);
1016 acc_prof_info prof_info;
1017 acc_api_info api_info;
1018 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
1019 if (profiling_p)
1020 {
1021 prof_info.async = async;
1022 prof_info.async_queue = prof_info.async;
1023 }
1024
1025 if (num_waits)
1026 {
1027 va_list ap;
1028
1029 va_start (ap, num_waits);
1030 goacc_wait (async, num_waits, &ap);
1031 va_end (ap);
1032 }
1033 else if (async == acc_async_sync)
1034 acc_wait_all ();
1035 else
1036 acc_wait_all_async (async);
1037
1038 if (profiling_p)
1039 {
1040 thr->prof_info = NULL;
1041 thr->api_info = NULL;
1042 }
1043 }
1044
1045 /* Legacy entry point (GCC 5). */
1046
1047 int
1048 GOACC_get_num_threads (void)
1049 {
1050 return 1;
1051 }
1052
1053 /* Legacy entry point (GCC 5). */
1054
1055 int
1056 GOACC_get_thread_num (void)
1057 {
1058 return 0;
1059 }
1060
1061 void
1062 GOACC_declare (int flags_m, size_t mapnum,
1063 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1064 {
1065 int i;
1066
1067 for (i = 0; i < mapnum; i++)
1068 {
1069 unsigned char kind = kinds[i] & 0xff;
1070
1071 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1072 continue;
1073
1074 switch (kind)
1075 {
1076 case GOMP_MAP_FORCE_ALLOC:
1077 case GOMP_MAP_FORCE_FROM:
1078 case GOMP_MAP_FORCE_TO:
1079 case GOMP_MAP_POINTER:
1080 case GOMP_MAP_RELEASE:
1081 case GOMP_MAP_DELETE:
1082 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1083 &kinds[i], GOMP_ASYNC_SYNC, 0);
1084 break;
1085
1086 case GOMP_MAP_FORCE_DEVICEPTR:
1087 break;
1088
1089 case GOMP_MAP_ALLOC:
1090 if (!acc_is_present (hostaddrs[i], sizes[i]))
1091 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1092 &kinds[i], GOMP_ASYNC_SYNC, 0);
1093 break;
1094
1095 case GOMP_MAP_TO:
1096 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1097 &kinds[i], GOMP_ASYNC_SYNC, 0);
1098
1099 break;
1100
1101 case GOMP_MAP_FROM:
1102 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1103 &kinds[i], GOMP_ASYNC_SYNC, 0);
1104 break;
1105
1106 case GOMP_MAP_FORCE_PRESENT:
1107 if (!acc_is_present (hostaddrs[i], sizes[i]))
1108 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
1109 (unsigned long) sizes[i]);
1110 break;
1111
1112 default:
1113 assert (0);
1114 break;
1115 }
1116 }
1117 }