1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* For PRIu64. */
42 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
43 continue to support the following two legacy values. */
44 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV
) == 0,
45 "legacy GOMP_DEVICE_ICV broken");
46 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK
)
47 == GOACC_FLAG_HOST_FALLBACK
,
48 "legacy GOMP_DEVICE_HOST_FALLBACK broken");
51 /* Returns the number of mappings associated with the pointer or pset. PSET
52 have three mappings, whereas pointer have two. */
55 find_pointer (int pos
, size_t mapnum
, unsigned short *kinds
)
57 if (pos
+ 1 >= mapnum
)
60 unsigned char kind
= kinds
[pos
+1] & 0xff;
62 if (kind
== GOMP_MAP_TO_PSET
)
64 else if (kind
== GOMP_MAP_POINTER
)
70 /* Handle the mapping pair that are presented when a
71 deviceptr clause is used with Fortran. */
74 handle_ftn_pointers (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
75 unsigned short *kinds
)
79 for (i
= 0; i
< mapnum
; i
++)
81 unsigned short kind1
= kinds
[i
] & 0xff;
83 /* Handle Fortran deviceptr clause. */
84 if (kind1
== GOMP_MAP_FORCE_DEVICEPTR
)
88 if (i
< (signed)mapnum
- 1)
89 kind2
= kinds
[i
+ 1] & 0xff;
93 if (sizes
[i
] == sizeof (void *))
96 /* At this point, we're dealing with a Fortran deviceptr.
97 If the next element is not what we're expecting, then
98 this is an instance of where the deviceptr variable was
99 not used within the region and the pointer was removed
100 by the gimplifier. */
101 if (kind2
== GOMP_MAP_POINTER
103 && hostaddrs
[i
] == *(void **)hostaddrs
[i
+ 1])
105 kinds
[i
+1] = kinds
[i
];
106 sizes
[i
+1] = sizeof (void *);
109 /* Invalidate the entry. */
115 static void goacc_wait (int async
, int num_waits
, va_list *ap
);
118 /* Launch a possibly offloaded function with FLAGS. FN is the host fn
119 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
120 blocks to be copied to/from the device. Varadic arguments are
121 keyed optional parameters terminated with a zero. */
124 GOACC_parallel_keyed (int flags_m
, void (*fn
) (void *),
125 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
126 unsigned short *kinds
, ...)
128 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
131 struct goacc_thread
*thr
;
132 struct gomp_device_descr
*acc_dev
;
133 struct target_mem_desc
*tgt
;
136 struct splay_tree_key_s k
;
137 splay_tree_key tgt_fn_key
;
139 int async
= GOMP_ASYNC_SYNC
;
140 unsigned dims
[GOMP_DIM_MAX
];
143 #ifdef HAVE_INTTYPES_H
144 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
145 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
147 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
148 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
150 goacc_lazy_initialize ();
152 thr
= goacc_thread ();
155 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
157 acc_prof_info prof_info
;
160 thr
->prof_info
= &prof_info
;
162 prof_info
.event_type
= acc_ev_compute_construct_start
;
163 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
164 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
165 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
166 prof_info
.device_number
= acc_dev
->target_id
;
167 prof_info
.thread_id
= -1;
168 prof_info
.async
= async
;
169 prof_info
.async_queue
= prof_info
.async
;
170 prof_info
.src_file
= NULL
;
171 prof_info
.func_name
= NULL
;
172 prof_info
.line_no
= -1;
173 prof_info
.end_line_no
= -1;
174 prof_info
.func_line_no
= -1;
175 prof_info
.func_end_line_no
= -1;
177 acc_event_info compute_construct_event_info
;
180 compute_construct_event_info
.other_event
.event_type
181 = prof_info
.event_type
;
182 compute_construct_event_info
.other_event
.valid_bytes
183 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
184 compute_construct_event_info
.other_event
.parent_construct
185 = acc_construct_parallel
;
186 compute_construct_event_info
.other_event
.implicit
= 0;
187 compute_construct_event_info
.other_event
.tool_info
= NULL
;
189 acc_api_info api_info
;
192 thr
->api_info
= &api_info
;
194 api_info
.device_api
= acc_device_api_none
;
195 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
196 api_info
.device_type
= prof_info
.device_type
;
197 api_info
.vendor
= -1;
198 api_info
.device_handle
= NULL
;
199 api_info
.context_handle
= NULL
;
200 api_info
.async_handle
= NULL
;
204 goacc_profiling_dispatch (&prof_info
, &compute_construct_event_info
,
207 handle_ftn_pointers (mapnum
, hostaddrs
, sizes
, kinds
);
209 /* Host fallback if "if" clause is false or if the current device is set to
211 if (flags
& GOACC_FLAG_HOST_FALLBACK
)
213 prof_info
.device_type
= acc_device_host
;
214 api_info
.device_type
= prof_info
.device_type
;
215 goacc_save_and_set_bind (acc_device_host
);
217 goacc_restore_bind ();
220 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
226 /* Default: let the runtime choose. */
227 for (i
= 0; i
!= GOMP_DIM_MAX
; i
++)
230 va_start (ap
, kinds
);
231 /* TODO: This will need amending when device_type is implemented. */
232 while ((tag
= va_arg (ap
, unsigned)) != 0)
234 if (GOMP_LAUNCH_DEVICE (tag
))
235 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
236 GOMP_LAUNCH_DEVICE (tag
));
238 switch (GOMP_LAUNCH_CODE (tag
))
240 case GOMP_LAUNCH_DIM
:
242 unsigned mask
= GOMP_LAUNCH_OP (tag
);
244 for (i
= 0; i
!= GOMP_DIM_MAX
; i
++)
245 if (mask
& GOMP_DIM_MASK (i
))
246 dims
[i
] = va_arg (ap
, unsigned);
250 case GOMP_LAUNCH_ASYNC
:
252 /* Small constant values are encoded in the operand. */
253 async
= GOMP_LAUNCH_OP (tag
);
255 if (async
== GOMP_LAUNCH_OP_MAX
)
256 async
= va_arg (ap
, unsigned);
260 prof_info
.async
= async
;
261 prof_info
.async_queue
= prof_info
.async
;
267 case GOMP_LAUNCH_WAIT
:
269 unsigned num_waits
= GOMP_LAUNCH_OP (tag
);
270 goacc_wait (async
, num_waits
, &ap
);
275 gomp_fatal ("unrecognized offload code '%d',"
276 " libgomp is too old", GOMP_LAUNCH_CODE (tag
));
281 if (!(acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
))
283 k
.host_start
= (uintptr_t) fn
;
284 k
.host_end
= k
.host_start
+ 1;
285 gomp_mutex_lock (&acc_dev
->lock
);
286 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
, &k
);
287 gomp_mutex_unlock (&acc_dev
->lock
);
289 if (tgt_fn_key
== NULL
)
290 gomp_fatal ("target function wasn't mapped");
292 tgt_fn
= (void (*)) tgt_fn_key
->tgt_offset
;
295 tgt_fn
= (void (*)) fn
;
297 acc_event_info enter_exit_data_event_info
;
300 prof_info
.event_type
= acc_ev_enter_data_start
;
301 enter_exit_data_event_info
.other_event
.event_type
302 = prof_info
.event_type
;
303 enter_exit_data_event_info
.other_event
.valid_bytes
304 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
305 enter_exit_data_event_info
.other_event
.parent_construct
306 = compute_construct_event_info
.other_event
.parent_construct
;
307 enter_exit_data_event_info
.other_event
.implicit
= 1;
308 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
309 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
313 goacc_aq aq
= get_goacc_asyncqueue (async
);
315 tgt
= gomp_map_vars_async (acc_dev
, aq
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
316 true, GOMP_MAP_VARS_OPENACC
);
319 prof_info
.event_type
= acc_ev_enter_data_end
;
320 enter_exit_data_event_info
.other_event
.event_type
321 = prof_info
.event_type
;
322 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
326 devaddrs
= gomp_alloca (sizeof (void *) * mapnum
);
327 for (i
= 0; i
< mapnum
; i
++)
328 devaddrs
[i
] = (void *) (tgt
->list
[i
].key
->tgt
->tgt_start
329 + tgt
->list
[i
].key
->tgt_offset
330 + tgt
->list
[i
].offset
);
332 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
, dims
,
335 acc_dev
->openacc
.async
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
,
340 prof_info
.event_type
= acc_ev_exit_data_start
;
341 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
342 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
343 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
347 /* If running synchronously, unmap immediately. */
349 gomp_unmap_vars (tgt
, true);
351 gomp_unmap_vars_async (tgt
, true, aq
);
355 prof_info
.event_type
= acc_ev_exit_data_end
;
356 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
357 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
364 prof_info
.event_type
= acc_ev_compute_construct_end
;
365 compute_construct_event_info
.other_event
.event_type
366 = prof_info
.event_type
;
367 goacc_profiling_dispatch (&prof_info
, &compute_construct_event_info
,
370 thr
->prof_info
= NULL
;
371 thr
->api_info
= NULL
;
375 /* Legacy entry point (GCC 5). Only provide host fallback execution. */
378 GOACC_parallel (int flags_m
, void (*fn
) (void *),
379 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
380 unsigned short *kinds
,
381 int num_gangs
, int num_workers
, int vector_length
,
382 int async
, int num_waits
, ...)
384 goacc_save_and_set_bind (acc_device_host
);
386 goacc_restore_bind ();
390 GOACC_data_start (int flags_m
, size_t mapnum
,
391 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
393 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
395 struct target_mem_desc
*tgt
;
397 #ifdef HAVE_INTTYPES_H
398 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
399 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
401 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
402 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
405 goacc_lazy_initialize ();
407 struct goacc_thread
*thr
= goacc_thread ();
408 struct gomp_device_descr
*acc_dev
= thr
->dev
;
410 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
412 acc_prof_info prof_info
;
415 thr
->prof_info
= &prof_info
;
417 prof_info
.event_type
= acc_ev_enter_data_start
;
418 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
419 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
420 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
421 prof_info
.device_number
= acc_dev
->target_id
;
422 prof_info
.thread_id
= -1;
423 prof_info
.async
= acc_async_sync
; /* Always synchronous. */
424 prof_info
.async_queue
= prof_info
.async
;
425 prof_info
.src_file
= NULL
;
426 prof_info
.func_name
= NULL
;
427 prof_info
.line_no
= -1;
428 prof_info
.end_line_no
= -1;
429 prof_info
.func_line_no
= -1;
430 prof_info
.func_end_line_no
= -1;
432 acc_event_info enter_data_event_info
;
435 enter_data_event_info
.other_event
.event_type
436 = prof_info
.event_type
;
437 enter_data_event_info
.other_event
.valid_bytes
438 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
439 enter_data_event_info
.other_event
.parent_construct
= acc_construct_data
;
440 for (int i
= 0; i
< mapnum
; ++i
)
441 if ((kinds
[i
] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
)
443 /* If there is one such data mapping kind, then this is actually an
444 OpenACC 'host_data' construct. (GCC maps the OpenACC
445 'host_data' construct to the OpenACC 'data' construct.) Apart
446 from artificial test cases (such as an OpenACC 'host_data'
447 construct's (implicit) device initialization when there hasn't
448 been any device data be set up before...), there can't really
449 any meaningful events be generated from OpenACC 'host_data'
450 constructs, though. */
451 enter_data_event_info
.other_event
.parent_construct
452 = acc_construct_host_data
;
455 enter_data_event_info
.other_event
.implicit
= 0;
456 enter_data_event_info
.other_event
.tool_info
= NULL
;
458 acc_api_info api_info
;
461 thr
->api_info
= &api_info
;
463 api_info
.device_api
= acc_device_api_none
;
464 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
465 api_info
.device_type
= prof_info
.device_type
;
466 api_info
.vendor
= -1;
467 api_info
.device_handle
= NULL
;
468 api_info
.context_handle
= NULL
;
469 api_info
.async_handle
= NULL
;
473 goacc_profiling_dispatch (&prof_info
, &enter_data_event_info
, &api_info
);
475 /* Host fallback or 'do nothing'. */
476 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
477 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
479 prof_info
.device_type
= acc_device_host
;
480 api_info
.device_type
= prof_info
.device_type
;
481 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true,
482 GOMP_MAP_VARS_OPENACC
);
483 tgt
->prev
= thr
->mapped_data
;
484 thr
->mapped_data
= tgt
;
489 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
490 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
491 GOMP_MAP_VARS_OPENACC
);
492 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
493 tgt
->prev
= thr
->mapped_data
;
494 thr
->mapped_data
= tgt
;
499 prof_info
.event_type
= acc_ev_enter_data_end
;
500 enter_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
501 goacc_profiling_dispatch (&prof_info
, &enter_data_event_info
, &api_info
);
503 thr
->prof_info
= NULL
;
504 thr
->api_info
= NULL
;
509 GOACC_data_end (void)
511 struct goacc_thread
*thr
= goacc_thread ();
512 struct gomp_device_descr
*acc_dev
= thr
->dev
;
513 struct target_mem_desc
*tgt
= thr
->mapped_data
;
515 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
517 acc_prof_info prof_info
;
520 thr
->prof_info
= &prof_info
;
522 prof_info
.event_type
= acc_ev_exit_data_start
;
523 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
524 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
525 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
526 prof_info
.device_number
= acc_dev
->target_id
;
527 prof_info
.thread_id
= -1;
528 prof_info
.async
= acc_async_sync
; /* Always synchronous. */
529 prof_info
.async_queue
= prof_info
.async
;
530 prof_info
.src_file
= NULL
;
531 prof_info
.func_name
= NULL
;
532 prof_info
.line_no
= -1;
533 prof_info
.end_line_no
= -1;
534 prof_info
.func_line_no
= -1;
535 prof_info
.func_end_line_no
= -1;
537 acc_event_info exit_data_event_info
;
540 exit_data_event_info
.other_event
.event_type
541 = prof_info
.event_type
;
542 exit_data_event_info
.other_event
.valid_bytes
543 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
544 exit_data_event_info
.other_event
.parent_construct
= acc_construct_data
;
545 exit_data_event_info
.other_event
.implicit
= 0;
546 exit_data_event_info
.other_event
.tool_info
= NULL
;
548 acc_api_info api_info
;
551 thr
->api_info
= &api_info
;
553 api_info
.device_api
= acc_device_api_none
;
554 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
555 api_info
.device_type
= prof_info
.device_type
;
556 api_info
.vendor
= -1;
557 api_info
.device_handle
= NULL
;
558 api_info
.context_handle
= NULL
;
559 api_info
.async_handle
= NULL
;
563 goacc_profiling_dispatch (&prof_info
, &exit_data_event_info
, &api_info
);
565 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
566 thr
->mapped_data
= tgt
->prev
;
567 gomp_unmap_vars (tgt
, true);
568 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
572 prof_info
.event_type
= acc_ev_exit_data_end
;
573 exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
574 goacc_profiling_dispatch (&prof_info
, &exit_data_event_info
, &api_info
);
576 thr
->prof_info
= NULL
;
577 thr
->api_info
= NULL
;
582 GOACC_enter_exit_data (int flags_m
, size_t mapnum
,
583 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
584 int async
, int num_waits
, ...)
586 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
588 struct goacc_thread
*thr
;
589 struct gomp_device_descr
*acc_dev
;
590 bool data_enter
= false;
593 goacc_lazy_initialize ();
595 thr
= goacc_thread ();
598 /* Determine whether "finalize" semantics apply to all mappings of this
599 OpenACC directive. */
600 bool finalize
= false;
603 unsigned char kind
= kinds
[0] & 0xff;
604 if (kind
== GOMP_MAP_DELETE
605 || kind
== GOMP_MAP_FORCE_FROM
)
609 /* Determine if this is an "acc enter data". */
610 for (i
= 0; i
< mapnum
; ++i
)
612 unsigned char kind
= kinds
[i
] & 0xff;
614 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
617 if (kind
== GOMP_MAP_FORCE_ALLOC
618 || kind
== GOMP_MAP_FORCE_PRESENT
619 || kind
== GOMP_MAP_FORCE_TO
620 || kind
== GOMP_MAP_TO
621 || kind
== GOMP_MAP_ALLOC
)
627 if (kind
== GOMP_MAP_RELEASE
628 || kind
== GOMP_MAP_DELETE
629 || kind
== GOMP_MAP_FROM
630 || kind
== GOMP_MAP_FORCE_FROM
)
633 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
637 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
639 acc_prof_info prof_info
;
642 thr
->prof_info
= &prof_info
;
645 = data_enter
? acc_ev_enter_data_start
: acc_ev_exit_data_start
;
646 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
647 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
648 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
649 prof_info
.device_number
= acc_dev
->target_id
;
650 prof_info
.thread_id
= -1;
651 prof_info
.async
= async
;
652 prof_info
.async_queue
= prof_info
.async
;
653 prof_info
.src_file
= NULL
;
654 prof_info
.func_name
= NULL
;
655 prof_info
.line_no
= -1;
656 prof_info
.end_line_no
= -1;
657 prof_info
.func_line_no
= -1;
658 prof_info
.func_end_line_no
= -1;
660 acc_event_info enter_exit_data_event_info
;
663 enter_exit_data_event_info
.other_event
.event_type
664 = prof_info
.event_type
;
665 enter_exit_data_event_info
.other_event
.valid_bytes
666 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
667 enter_exit_data_event_info
.other_event
.parent_construct
668 = data_enter
? acc_construct_enter_data
: acc_construct_exit_data
;
669 enter_exit_data_event_info
.other_event
.implicit
= 0;
670 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
672 acc_api_info api_info
;
675 thr
->api_info
= &api_info
;
677 api_info
.device_api
= acc_device_api_none
;
678 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
679 api_info
.device_type
= prof_info
.device_type
;
680 api_info
.vendor
= -1;
681 api_info
.device_handle
= NULL
;
682 api_info
.context_handle
= NULL
;
683 api_info
.async_handle
= NULL
;
687 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
690 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
691 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
693 prof_info
.device_type
= acc_device_host
;
694 api_info
.device_type
= prof_info
.device_type
;
703 va_start (ap
, num_waits
);
704 goacc_wait (async
, num_waits
, &ap
);
708 /* In c, non-pointers and arrays are represented by a single data clause.
709 Dynamically allocated arrays and subarrays are represented by a data
710 clause followed by an internal GOMP_MAP_POINTER.
712 In fortran, scalars and not allocated arrays are represented by a
713 single data clause. Allocated arrays and subarrays have three mappings:
714 1) the original data clause, 2) a PSET 3) a pointer to the array data.
719 for (i
= 0; i
< mapnum
; i
++)
721 unsigned char kind
= kinds
[i
] & 0xff;
723 /* Scan for pointers and PSETs. */
724 int pointer
= find_pointer (i
, mapnum
, kinds
);
731 case GOMP_MAP_FORCE_ALLOC
:
732 acc_create_async (hostaddrs
[i
], sizes
[i
], async
);
735 case GOMP_MAP_FORCE_TO
:
736 acc_copyin_async (hostaddrs
[i
], sizes
[i
], async
);
739 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
746 gomp_acc_insert_pointer (pointer
, &hostaddrs
[i
],
747 &sizes
[i
], &kinds
[i
], async
);
748 /* Increment 'i' by two because OpenACC requires fortran
749 arrays to be contiguous, so each PSET is associated with
750 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
757 for (i
= 0; i
< mapnum
; ++i
)
759 unsigned char kind
= kinds
[i
] & 0xff;
761 int pointer
= find_pointer (i
, mapnum
, kinds
);
767 case GOMP_MAP_RELEASE
:
768 case GOMP_MAP_DELETE
:
769 if (acc_is_present (hostaddrs
[i
], sizes
[i
]))
772 acc_delete_finalize_async (hostaddrs
[i
], sizes
[i
], async
);
774 acc_delete_async (hostaddrs
[i
], sizes
[i
], async
);
778 case GOMP_MAP_FORCE_FROM
:
780 acc_copyout_finalize_async (hostaddrs
[i
], sizes
[i
], async
);
782 acc_copyout_async (hostaddrs
[i
], sizes
[i
], async
);
785 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
792 bool copyfrom
= (kind
== GOMP_MAP_FORCE_FROM
793 || kind
== GOMP_MAP_FROM
);
794 gomp_acc_remove_pointer (hostaddrs
[i
], sizes
[i
], copyfrom
, async
,
796 /* See the above comment. */
805 = data_enter
? acc_ev_enter_data_end
: acc_ev_exit_data_end
;
806 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
807 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
810 thr
->prof_info
= NULL
;
811 thr
->api_info
= NULL
;
816 goacc_wait (int async
, int num_waits
, va_list *ap
)
820 int qid
= va_arg (*ap
, int);
822 /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
823 if (qid
== acc_async_noval
)
825 if (async
== acc_async_sync
)
828 acc_wait_all_async (async
);
832 if (acc_async_test (qid
))
835 if (async
== acc_async_sync
)
837 else if (qid
== async
)
838 /* If we're waiting on the same asynchronous queue as we're
839 launching on, the queue itself will order work as
840 required, so there's no need to wait explicitly. */
843 acc_wait_async (qid
, async
);
848 GOACC_update (int flags_m
, size_t mapnum
,
849 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
850 int async
, int num_waits
, ...)
852 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
856 goacc_lazy_initialize ();
858 struct goacc_thread
*thr
= goacc_thread ();
859 struct gomp_device_descr
*acc_dev
= thr
->dev
;
861 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
863 acc_prof_info prof_info
;
866 thr
->prof_info
= &prof_info
;
868 prof_info
.event_type
= acc_ev_update_start
;
869 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
870 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
871 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
872 prof_info
.device_number
= acc_dev
->target_id
;
873 prof_info
.thread_id
= -1;
874 prof_info
.async
= async
;
875 prof_info
.async_queue
= prof_info
.async
;
876 prof_info
.src_file
= NULL
;
877 prof_info
.func_name
= NULL
;
878 prof_info
.line_no
= -1;
879 prof_info
.end_line_no
= -1;
880 prof_info
.func_line_no
= -1;
881 prof_info
.func_end_line_no
= -1;
883 acc_event_info update_event_info
;
886 update_event_info
.other_event
.event_type
887 = prof_info
.event_type
;
888 update_event_info
.other_event
.valid_bytes
889 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
890 update_event_info
.other_event
.parent_construct
= acc_construct_update
;
891 update_event_info
.other_event
.implicit
= 0;
892 update_event_info
.other_event
.tool_info
= NULL
;
894 acc_api_info api_info
;
897 thr
->api_info
= &api_info
;
899 api_info
.device_api
= acc_device_api_none
;
900 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
901 api_info
.device_type
= prof_info
.device_type
;
902 api_info
.vendor
= -1;
903 api_info
.device_handle
= NULL
;
904 api_info
.context_handle
= NULL
;
905 api_info
.async_handle
= NULL
;
909 goacc_profiling_dispatch (&prof_info
, &update_event_info
, &api_info
);
911 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
912 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
914 prof_info
.device_type
= acc_device_host
;
915 api_info
.device_type
= prof_info
.device_type
;
924 va_start (ap
, num_waits
);
925 goacc_wait (async
, num_waits
, &ap
);
929 bool update_device
= false;
930 for (i
= 0; i
< mapnum
; ++i
)
932 unsigned char kind
= kinds
[i
] & 0xff;
936 case GOMP_MAP_POINTER
:
937 case GOMP_MAP_TO_PSET
:
940 case GOMP_MAP_ALWAYS_POINTER
:
943 /* Save the contents of the host pointer. */
944 void *dptr
= acc_deviceptr (hostaddrs
[i
-1]);
945 uintptr_t t
= *(uintptr_t *) hostaddrs
[i
];
947 /* Update the contents of the host pointer to reflect
948 the value of the allocated device memory in the
950 *(uintptr_t *) hostaddrs
[i
] = (uintptr_t)dptr
;
951 /* TODO: verify that we really cannot use acc_update_device_async
953 acc_update_device (hostaddrs
[i
], sizeof (uintptr_t));
955 /* Restore the host pointer. */
956 *(uintptr_t *) hostaddrs
[i
] = t
;
957 update_device
= false;
962 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
964 update_device
= false;
968 case GOMP_MAP_FORCE_TO
:
969 update_device
= true;
970 acc_update_device_async (hostaddrs
[i
], sizes
[i
], async
);
974 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
976 update_device
= false;
980 case GOMP_MAP_FORCE_FROM
:
981 update_device
= false;
982 acc_update_self_async (hostaddrs
[i
], sizes
[i
], async
);
986 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
994 prof_info
.event_type
= acc_ev_update_end
;
995 update_event_info
.other_event
.event_type
= prof_info
.event_type
;
996 goacc_profiling_dispatch (&prof_info
, &update_event_info
, &api_info
);
998 thr
->prof_info
= NULL
;
999 thr
->api_info
= NULL
;
1004 GOACC_wait (int async
, int num_waits
, ...)
1006 goacc_lazy_initialize ();
1008 struct goacc_thread
*thr
= goacc_thread ();
1011 assert (thr
->prof_info
== NULL
);
1012 assert (thr
->api_info
== NULL
);
1013 acc_prof_info prof_info
;
1014 acc_api_info api_info
;
1015 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
1018 prof_info
.async
= async
;
1019 prof_info
.async_queue
= prof_info
.async
;
1026 va_start (ap
, num_waits
);
1027 goacc_wait (async
, num_waits
, &ap
);
1030 else if (async
== acc_async_sync
)
1033 acc_wait_all_async (async
);
1037 thr
->prof_info
= NULL
;
1038 thr
->api_info
= NULL
;
1042 /* Legacy entry point (GCC 5). */
1045 GOACC_get_num_threads (void)
1050 /* Legacy entry point (GCC 5). */
1053 GOACC_get_thread_num (void)
1059 GOACC_declare (int flags_m
, size_t mapnum
,
1060 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
1064 for (i
= 0; i
< mapnum
; i
++)
1066 unsigned char kind
= kinds
[i
] & 0xff;
1068 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
1073 case GOMP_MAP_FORCE_ALLOC
:
1074 case GOMP_MAP_FORCE_FROM
:
1075 case GOMP_MAP_FORCE_TO
:
1076 case GOMP_MAP_POINTER
:
1077 case GOMP_MAP_RELEASE
:
1078 case GOMP_MAP_DELETE
:
1079 GOACC_enter_exit_data (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1080 &kinds
[i
], GOMP_ASYNC_SYNC
, 0);
1083 case GOMP_MAP_FORCE_DEVICEPTR
:
1086 case GOMP_MAP_ALLOC
:
1087 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
1088 GOACC_enter_exit_data (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1089 &kinds
[i
], GOMP_ASYNC_SYNC
, 0);
1093 GOACC_enter_exit_data (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1094 &kinds
[i
], GOMP_ASYNC_SYNC
, 0);
1099 GOACC_enter_exit_data (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1100 &kinds
[i
], GOMP_ASYNC_SYNC
, 0);
1103 case GOMP_MAP_FORCE_PRESENT
:
1104 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
1105 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs
[i
],
1106 (unsigned long) sizes
[i
]);