1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
44 #include "plugin-suffix.h"
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
49 static void gomp_target_init (void);
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock
;
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr
{
62 enum offload_target_type type
;
63 const void *host_table
;
64 const void *target_data
;
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr
*offload_images
;
70 /* Total number of offload images. */
71 static int num_offload_images
;
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr
*devices
;
76 /* Total number of available devices. */
77 static int num_devices
;
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp
;
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
85 gomp_realloc_unlock (void *old
, size_t size
)
87 void *ret
= realloc (old
, size
);
90 gomp_mutex_unlock (®ister_lock
);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
97 gomp_init_targets_once (void)
99 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
103 gomp_get_num_devices (void)
105 gomp_init_targets_once ();
106 return num_devices_openmp
;
109 static struct gomp_device_descr
*
110 resolve_device (int device_id
)
112 if (device_id
== GOMP_DEVICE_ICV
)
114 struct gomp_task_icv
*icv
= gomp_icv (false);
115 device_id
= icv
->default_device_var
;
118 if (device_id
< 0 || device_id
>= gomp_get_num_devices ())
120 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
121 && device_id
!= GOMP_DEVICE_HOST_FALLBACK
122 && device_id
!= num_devices_openmp
)
123 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
124 "but device not found");
129 gomp_mutex_lock (&devices
[device_id
].lock
);
130 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
131 gomp_init_device (&devices
[device_id
]);
132 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
134 gomp_mutex_unlock (&devices
[device_id
].lock
);
136 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
137 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
138 "but device is finalized");
142 gomp_mutex_unlock (&devices
[device_id
].lock
);
144 return &devices
[device_id
];
148 static inline splay_tree_key
149 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
151 if (key
->host_start
!= key
->host_end
)
152 return splay_tree_lookup (mem_map
, key
);
155 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
160 n
= splay_tree_lookup (mem_map
, key
);
164 return splay_tree_lookup (mem_map
, key
);
167 static inline splay_tree_key
168 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
170 if (key
->host_start
!= key
->host_end
)
171 return splay_tree_lookup (mem_map
, key
);
174 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
180 gomp_device_copy (struct gomp_device_descr
*devicep
,
181 bool (*copy_func
) (int, void *, const void *, size_t),
182 const char *dst
, void *dstaddr
,
183 const char *src
, const void *srcaddr
,
186 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
188 gomp_mutex_unlock (&devicep
->lock
);
189 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
190 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
195 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
196 bool (*copy_func
) (int, void *, const void *, size_t,
197 struct goacc_asyncqueue
*),
198 const char *dst
, void *dstaddr
,
199 const char *src
, const void *srcaddr
,
200 size_t size
, struct goacc_asyncqueue
*aq
)
202 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
204 gomp_mutex_unlock (&devicep
->lock
);
205 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
206 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
210 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
211 host to device memory transfers. */
213 struct gomp_coalesce_chunk
215 /* The starting and ending point of a coalesced chunk of memory. */
219 struct gomp_coalesce_buf
221 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
222 it will be copied to the device. */
224 struct target_mem_desc
*tgt
;
225 /* Array with offsets, chunks[i].start is the starting offset and
226 chunks[i].end ending offset relative to tgt->tgt_start device address
227 of chunks which are to be copied to buf and later copied to device. */
228 struct gomp_coalesce_chunk
*chunks
;
229 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
232 /* During construction of chunks array, how many memory regions are within
233 the last chunk. If there is just one memory region for a chunk, we copy
234 it directly to device rather than going through buf. */
238 /* Maximum size of memory region considered for coalescing. Larger copies
239 are performed directly. */
240 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
242 /* Maximum size of a gap in between regions to consider them being copied
243 within the same chunk. All the device offsets considered are within
244 newly allocated device memory, so it isn't fatal if we copy some padding
245 in between from host to device. The gaps come either from alignment
246 padding or from memory regions which are not supposed to be copied from
247 host to device (e.g. map(alloc:), map(from:) etc.). */
248 #define MAX_COALESCE_BUF_GAP (4 * 1024)
250 /* Add region with device tgt_start relative offset and length to CBUF. */
253 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
255 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
259 if (cbuf
->chunk_cnt
< 0)
261 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
263 cbuf
->chunk_cnt
= -1;
266 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
268 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
272 /* If the last chunk is only used by one mapping, discard it,
273 as it will be one host to device copy anyway and
274 memcpying it around will only waste cycles. */
275 if (cbuf
->use_cnt
== 1)
278 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
279 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
284 /* Return true for mapping kinds which need to copy data from the
285 host to device for regions that weren't previously mapped. */
288 gomp_to_device_kind_p (int kind
)
294 case GOMP_MAP_FORCE_ALLOC
:
295 case GOMP_MAP_FORCE_FROM
:
296 case GOMP_MAP_ALWAYS_FROM
:
303 attribute_hidden
void
304 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
305 struct goacc_asyncqueue
*aq
,
306 void *d
, const void *h
, size_t sz
,
307 struct gomp_coalesce_buf
*cbuf
)
311 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
312 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
315 long last
= cbuf
->chunk_cnt
- 1;
316 while (first
<= last
)
318 long middle
= (first
+ last
) >> 1;
319 if (cbuf
->chunks
[middle
].end
<= doff
)
321 else if (cbuf
->chunks
[middle
].start
<= doff
)
323 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
324 gomp_fatal ("internal libgomp cbuf error");
325 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
334 if (__builtin_expect (aq
!= NULL
, 0))
335 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
336 "dev", d
, "host", h
, sz
, aq
);
338 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
341 attribute_hidden
void
342 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
343 struct goacc_asyncqueue
*aq
,
344 void *h
, const void *d
, size_t sz
)
346 if (__builtin_expect (aq
!= NULL
, 0))
347 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
348 "host", h
, "dev", d
, sz
, aq
);
350 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
354 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
356 if (!devicep
->free_func (devicep
->target_id
, devptr
))
358 gomp_mutex_unlock (&devicep
->lock
);
359 gomp_fatal ("error in freeing device memory block at %p", devptr
);
363 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
364 gomp_map_0len_lookup found oldn for newn.
365 Helper function of gomp_map_vars. */
368 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
369 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
370 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
371 unsigned char kind
, bool always_to_flag
,
372 struct gomp_coalesce_buf
*cbuf
)
374 assert (kind
!= GOMP_MAP_ATTACH
);
377 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
378 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
379 tgt_var
->is_attach
= false;
380 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
381 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
383 if ((kind
& GOMP_MAP_FLAG_FORCE
)
384 || oldn
->host_start
> newn
->host_start
385 || oldn
->host_end
< newn
->host_end
)
387 gomp_mutex_unlock (&devicep
->lock
);
388 gomp_fatal ("Trying to map into device [%p..%p) object when "
389 "[%p..%p) is already mapped",
390 (void *) newn
->host_start
, (void *) newn
->host_end
,
391 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
394 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
395 gomp_copy_host2dev (devicep
, aq
,
396 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
397 + newn
->host_start
- oldn
->host_start
),
398 (void *) newn
->host_start
,
399 newn
->host_end
- newn
->host_start
, cbuf
);
401 if (oldn
->refcount
!= REFCOUNT_INFINITY
)
406 get_kind (bool short_mapkind
, void *kinds
, int idx
)
408 return short_mapkind
? ((unsigned short *) kinds
)[idx
]
409 : ((unsigned char *) kinds
)[idx
];
413 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
414 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
415 struct gomp_coalesce_buf
*cbuf
)
417 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
418 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
419 struct splay_tree_key_s cur_node
;
421 cur_node
.host_start
= host_ptr
;
422 if (cur_node
.host_start
== (uintptr_t) NULL
)
424 cur_node
.tgt_offset
= (uintptr_t) NULL
;
425 gomp_copy_host2dev (devicep
, aq
,
426 (void *) (tgt
->tgt_start
+ target_offset
),
427 (void *) &cur_node
.tgt_offset
,
428 sizeof (void *), cbuf
);
431 /* Add bias to the pointer value. */
432 cur_node
.host_start
+= bias
;
433 cur_node
.host_end
= cur_node
.host_start
;
434 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
437 gomp_mutex_unlock (&devicep
->lock
);
438 gomp_fatal ("Pointer target of array section wasn't mapped");
440 cur_node
.host_start
-= n
->host_start
;
442 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
443 /* At this point tgt_offset is target address of the
444 array section. Now subtract bias to get what we want
445 to initialize the pointer with. */
446 cur_node
.tgt_offset
-= bias
;
447 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
448 (void *) &cur_node
.tgt_offset
, sizeof (void *), cbuf
);
452 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
453 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
454 size_t first
, size_t i
, void **hostaddrs
,
455 size_t *sizes
, void *kinds
,
456 struct gomp_coalesce_buf
*cbuf
)
458 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
459 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
460 struct splay_tree_key_s cur_node
;
462 const bool short_mapkind
= true;
463 const int typemask
= short_mapkind
? 0xff : 0x7;
465 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
466 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
467 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
468 kind
= get_kind (short_mapkind
, kinds
, i
);
471 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
473 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
474 kind
& typemask
, false, cbuf
);
479 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
481 cur_node
.host_start
--;
482 n2
= splay_tree_lookup (mem_map
, &cur_node
);
483 cur_node
.host_start
++;
486 && n2
->host_start
- n
->host_start
487 == n2
->tgt_offset
- n
->tgt_offset
)
489 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
490 kind
& typemask
, false, cbuf
);
495 n2
= splay_tree_lookup (mem_map
, &cur_node
);
499 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
501 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
502 kind
& typemask
, false, cbuf
);
506 gomp_mutex_unlock (&devicep
->lock
);
507 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
508 "other mapped elements from the same structure weren't mapped "
509 "together with it", (void *) cur_node
.host_start
,
510 (void *) cur_node
.host_end
);
513 attribute_hidden
void
514 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
515 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
516 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
517 struct gomp_coalesce_buf
*cbufp
)
519 struct splay_tree_key_s s
;
524 gomp_mutex_unlock (&devicep
->lock
);
525 gomp_fatal ("enclosing struct not mapped for attach");
528 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
529 /* We might have a pointer in a packed struct: however we cannot have more
530 than one such pointer in each pointer-sized portion of the struct, so
532 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
535 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
537 if (!n
->aux
->attach_count
)
539 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
541 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
542 n
->aux
->attach_count
[idx
]++;
545 gomp_mutex_unlock (&devicep
->lock
);
546 gomp_fatal ("attach count overflow");
549 if (n
->aux
->attach_count
[idx
] == 1)
551 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
553 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
557 if ((void *) target
== NULL
)
559 gomp_mutex_unlock (&devicep
->lock
);
560 gomp_fatal ("attempt to attach null pointer");
563 s
.host_start
= target
+ bias
;
564 s
.host_end
= s
.host_start
+ 1;
565 tn
= splay_tree_lookup (mem_map
, &s
);
569 gomp_mutex_unlock (&devicep
->lock
);
570 gomp_fatal ("pointer target not mapped for attach");
573 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
576 "%s: attaching host %p, target %p (struct base %p) to %p\n",
577 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
578 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
580 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
581 sizeof (void *), cbufp
);
584 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
585 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
588 attribute_hidden
void
589 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
590 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
591 uintptr_t detach_from
, bool finalize
,
592 struct gomp_coalesce_buf
*cbufp
)
598 gomp_mutex_unlock (&devicep
->lock
);
599 gomp_fatal ("enclosing struct not mapped for detach");
602 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
604 if (!n
->aux
|| !n
->aux
->attach_count
)
606 gomp_mutex_unlock (&devicep
->lock
);
607 gomp_fatal ("no attachment counters for struct");
611 n
->aux
->attach_count
[idx
] = 1;
613 if (n
->aux
->attach_count
[idx
] == 0)
615 gomp_mutex_unlock (&devicep
->lock
);
616 gomp_fatal ("attach count underflow");
619 n
->aux
->attach_count
[idx
]--;
621 if (n
->aux
->attach_count
[idx
] == 0)
623 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
625 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
628 "%s: detaching host %p, target %p (struct base %p) to %p\n",
629 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
630 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
633 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
634 sizeof (void *), cbufp
);
637 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
638 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
641 attribute_hidden
uintptr_t
642 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
644 if (tgt
->list
[i
].key
!= NULL
)
645 return tgt
->list
[i
].key
->tgt
->tgt_start
646 + tgt
->list
[i
].key
->tgt_offset
647 + tgt
->list
[i
].offset
;
649 switch (tgt
->list
[i
].offset
)
652 return (uintptr_t) hostaddrs
[i
];
658 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
659 + tgt
->list
[i
+ 1].key
->tgt_offset
660 + tgt
->list
[i
+ 1].offset
661 + (uintptr_t) hostaddrs
[i
]
662 - (uintptr_t) hostaddrs
[i
+ 1];
665 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
669 static inline __attribute__((always_inline
)) struct target_mem_desc
*
670 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
671 struct goacc_asyncqueue
*aq
, size_t mapnum
,
672 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
673 void *kinds
, bool short_mapkind
,
674 enum gomp_map_vars_kind pragma_kind
)
676 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
677 bool has_firstprivate
= false;
678 bool has_always_ptrset
= false;
679 const int rshift
= short_mapkind
? 8 : 3;
680 const int typemask
= short_mapkind
? 0xff : 0x7;
681 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
682 struct splay_tree_key_s cur_node
;
683 struct target_mem_desc
*tgt
684 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
685 tgt
->list_count
= mapnum
;
686 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
687 tgt
->device_descr
= devicep
;
689 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
698 tgt_align
= sizeof (void *);
704 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
706 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
707 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
710 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
712 size_t align
= 4 * sizeof (void *);
714 tgt_size
= mapnum
* sizeof (void *);
716 cbuf
.use_cnt
= 1 + (mapnum
> 1);
717 cbuf
.chunks
[0].start
= 0;
718 cbuf
.chunks
[0].end
= tgt_size
;
721 gomp_mutex_lock (&devicep
->lock
);
722 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
724 gomp_mutex_unlock (&devicep
->lock
);
729 for (i
= 0; i
< mapnum
; i
++)
731 int kind
= get_kind (short_mapkind
, kinds
, i
);
732 if (hostaddrs
[i
] == NULL
733 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
735 tgt
->list
[i
].key
= NULL
;
736 tgt
->list
[i
].offset
= OFFSET_INLINED
;
739 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
740 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
742 tgt
->list
[i
].key
= NULL
;
745 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
746 on a separate construct prior to using use_device_{addr,ptr}.
747 In OpenMP 5.0, map directives need to be ordered by the
748 middle-end before the use_device_* clauses. If
749 !not_found_cnt, all mappings requested (if any) are already
750 mapped, so use_device_{addr,ptr} can be resolved right away.
751 Otherwise, if not_found_cnt, gomp_map_lookup might fail
752 now but would succeed after performing the mappings in the
753 following loop. We can't defer this always to the second
754 loop, because it is not even invoked when !not_found_cnt
755 after the first loop. */
756 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
757 cur_node
.host_end
= cur_node
.host_start
;
758 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
761 cur_node
.host_start
-= n
->host_start
;
763 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
764 + cur_node
.host_start
);
766 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
768 gomp_mutex_unlock (&devicep
->lock
);
769 gomp_fatal ("use_device_ptr pointer wasn't mapped");
771 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
772 /* If not present, continue using the host address. */
775 __builtin_unreachable ();
776 tgt
->list
[i
].offset
= OFFSET_INLINED
;
779 tgt
->list
[i
].offset
= 0;
782 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
784 size_t first
= i
+ 1;
785 size_t last
= i
+ sizes
[i
];
786 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
787 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
789 tgt
->list
[i
].key
= NULL
;
790 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
791 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
794 size_t align
= (size_t) 1 << (kind
>> rshift
);
795 if (tgt_align
< align
)
797 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
798 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
799 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
800 not_found_cnt
+= last
- i
;
801 for (i
= first
; i
<= last
; i
++)
803 tgt
->list
[i
].key
= NULL
;
804 if (gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
806 gomp_coalesce_buf_add (&cbuf
,
807 tgt_size
- cur_node
.host_end
808 + (uintptr_t) hostaddrs
[i
],
814 for (i
= first
; i
<= last
; i
++)
815 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
820 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
822 tgt
->list
[i
].key
= NULL
;
823 tgt
->list
[i
].offset
= OFFSET_POINTER
;
824 has_firstprivate
= true;
827 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
)
829 tgt
->list
[i
].key
= NULL
;
830 has_firstprivate
= true;
833 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
834 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
835 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
837 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
838 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
840 tgt
->list
[i
].key
= NULL
;
842 size_t align
= (size_t) 1 << (kind
>> rshift
);
843 if (tgt_align
< align
)
845 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
846 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
847 cur_node
.host_end
- cur_node
.host_start
);
848 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
849 has_firstprivate
= true;
853 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
855 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
858 tgt
->list
[i
].key
= NULL
;
859 tgt
->list
[i
].offset
= OFFSET_POINTER
;
864 n
= splay_tree_lookup (mem_map
, &cur_node
);
865 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
867 int always_to_cnt
= 0;
868 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
870 bool has_nullptr
= false;
872 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
873 if (n
->tgt
->list
[j
].key
== n
)
875 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
878 if (n
->tgt
->list_count
== 0)
880 /* 'declare target'; assume has_nullptr; it could also be
881 statically assigned pointer, but that it should be to
882 the equivalent variable on the host. */
883 assert (n
->refcount
== REFCOUNT_INFINITY
);
887 assert (j
< n
->tgt
->list_count
);
888 /* Re-map the data if there is an 'always' modifier or if it a
889 null pointer was there and non a nonnull has been found; that
890 permits transparent re-mapping for Fortran array descriptors
891 which were previously mapped unallocated. */
892 for (j
= i
+ 1; j
< mapnum
; j
++)
894 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
895 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
897 || !GOMP_MAP_POINTER_P (ptr_kind
)
898 || *(void **) hostaddrs
[j
] == NULL
))
900 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
901 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
902 > cur_node
.host_end
))
906 has_always_ptrset
= true;
911 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
912 kind
& typemask
, always_to_cnt
> 0, NULL
);
917 tgt
->list
[i
].key
= NULL
;
919 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
921 /* Not present, hence, skip entry - including its MAP_POINTER,
923 tgt
->list
[i
].offset
= OFFSET_POINTER
;
925 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
926 == GOMP_MAP_POINTER
))
929 tgt
->list
[i
].key
= NULL
;
930 tgt
->list
[i
].offset
= 0;
934 size_t align
= (size_t) 1 << (kind
>> rshift
);
936 if (tgt_align
< align
)
938 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
939 if (gomp_to_device_kind_p (kind
& typemask
))
940 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
941 cur_node
.host_end
- cur_node
.host_start
);
942 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
943 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
947 for (j
= i
+ 1; j
< mapnum
; j
++)
948 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
949 kinds
, j
)) & typemask
))
950 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
952 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
953 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
954 > cur_node
.host_end
))
958 tgt
->list
[j
].key
= NULL
;
969 gomp_mutex_unlock (&devicep
->lock
);
970 gomp_fatal ("unexpected aggregation");
972 tgt
->to_free
= devaddrs
[0];
973 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
974 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
976 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
978 /* Allocate tgt_align aligned tgt_size block of memory. */
979 /* FIXME: Perhaps change interface to allocate properly aligned
981 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
982 tgt_size
+ tgt_align
- 1);
985 gomp_mutex_unlock (&devicep
->lock
);
986 gomp_fatal ("device memory allocation fail");
989 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
990 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
991 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
993 if (cbuf
.use_cnt
== 1)
995 if (cbuf
.chunk_cnt
> 0)
998 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1008 tgt
->to_free
= NULL
;
1014 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1015 tgt_size
= mapnum
* sizeof (void *);
1018 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1021 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1022 splay_tree_node array
= tgt
->array
;
1023 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1024 uintptr_t field_tgt_base
= 0;
1026 for (i
= 0; i
< mapnum
; i
++)
1027 if (has_always_ptrset
1029 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1030 == GOMP_MAP_TO_PSET
)
1032 splay_tree_key k
= tgt
->list
[i
].key
;
1033 bool has_nullptr
= false;
1035 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1036 if (k
->tgt
->list
[j
].key
== k
)
1038 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1041 if (k
->tgt
->list_count
== 0)
1044 assert (j
< k
->tgt
->list_count
);
1046 tgt
->list
[i
].has_null_ptr_assoc
= false;
1047 for (j
= i
+ 1; j
< mapnum
; j
++)
1049 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1050 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1052 || !GOMP_MAP_POINTER_P (ptr_kind
)
1053 || *(void **) hostaddrs
[j
] == NULL
))
1055 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1056 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1061 if (*(void **) hostaddrs
[j
] == NULL
)
1062 tgt
->list
[i
].has_null_ptr_assoc
= true;
1063 tgt
->list
[j
].key
= k
;
1064 tgt
->list
[j
].copy_from
= false;
1065 tgt
->list
[j
].always_copy_from
= false;
1066 tgt
->list
[j
].is_attach
= false;
1067 if (k
->refcount
!= REFCOUNT_INFINITY
)
1069 gomp_map_pointer (k
->tgt
, aq
,
1070 (uintptr_t) *(void **) hostaddrs
[j
],
1071 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1078 else if (tgt
->list
[i
].key
== NULL
)
1080 int kind
= get_kind (short_mapkind
, kinds
, i
);
1081 if (hostaddrs
[i
] == NULL
)
1083 switch (kind
& typemask
)
1085 size_t align
, len
, first
, last
;
1087 case GOMP_MAP_FIRSTPRIVATE
:
1088 align
= (size_t) 1 << (kind
>> rshift
);
1089 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1090 tgt
->list
[i
].offset
= tgt_size
;
1092 gomp_copy_host2dev (devicep
, aq
,
1093 (void *) (tgt
->tgt_start
+ tgt_size
),
1094 (void *) hostaddrs
[i
], len
, cbufp
);
1097 case GOMP_MAP_FIRSTPRIVATE_INT
:
1098 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1100 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1101 /* The OpenACC 'host_data' construct only allows 'use_device'
1102 "mapping" clauses, so in the first loop, 'not_found_cnt'
1103 must always have been zero, so all OpenACC 'use_device'
1104 clauses have already been handled. (We can only easily test
1105 'use_device' with 'if_present' clause here.) */
1106 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1107 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1108 code conceptually simple, similar to the first loop. */
1109 case GOMP_MAP_USE_DEVICE_PTR
:
1110 if (tgt
->list
[i
].offset
== 0)
1112 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1113 cur_node
.host_end
= cur_node
.host_start
;
1114 n
= gomp_map_lookup (mem_map
, &cur_node
);
1117 cur_node
.host_start
-= n
->host_start
;
1119 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1120 + cur_node
.host_start
);
1122 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1124 gomp_mutex_unlock (&devicep
->lock
);
1125 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1127 else if ((kind
& typemask
)
1128 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1129 /* If not present, continue using the host address. */
1132 __builtin_unreachable ();
1133 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1136 case GOMP_MAP_STRUCT
:
1138 last
= i
+ sizes
[i
];
1139 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1140 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1142 if (tgt
->list
[first
].key
!= NULL
)
1144 n
= splay_tree_lookup (mem_map
, &cur_node
);
1147 size_t align
= (size_t) 1 << (kind
>> rshift
);
1148 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1149 - (uintptr_t) hostaddrs
[i
];
1150 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1151 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1152 - (uintptr_t) hostaddrs
[i
];
1153 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1154 field_tgt_offset
= tgt_size
;
1155 field_tgt_clear
= last
;
1156 tgt_size
+= cur_node
.host_end
1157 - (uintptr_t) hostaddrs
[first
];
1160 for (i
= first
; i
<= last
; i
++)
1161 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1162 sizes
, kinds
, cbufp
);
1165 case GOMP_MAP_ALWAYS_POINTER
:
1166 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1167 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1168 n
= splay_tree_lookup (mem_map
, &cur_node
);
1170 || n
->host_start
> cur_node
.host_start
1171 || n
->host_end
< cur_node
.host_end
)
1173 gomp_mutex_unlock (&devicep
->lock
);
1174 gomp_fatal ("always pointer not mapped");
1176 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1177 != GOMP_MAP_ALWAYS_POINTER
)
1178 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1179 if (cur_node
.tgt_offset
)
1180 cur_node
.tgt_offset
-= sizes
[i
];
1181 gomp_copy_host2dev (devicep
, aq
,
1182 (void *) (n
->tgt
->tgt_start
1184 + cur_node
.host_start
1186 (void *) &cur_node
.tgt_offset
,
1187 sizeof (void *), cbufp
);
1188 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1189 + cur_node
.host_start
- n
->host_start
;
1191 case GOMP_MAP_IF_PRESENT
:
1192 /* Not present - otherwise handled above. Skip over its
1193 MAP_POINTER as well. */
1195 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1196 == GOMP_MAP_POINTER
))
1199 case GOMP_MAP_ATTACH
:
1201 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1202 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1203 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1206 tgt
->list
[i
].key
= n
;
1207 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1208 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1209 tgt
->list
[i
].copy_from
= false;
1210 tgt
->list
[i
].always_copy_from
= false;
1211 tgt
->list
[i
].is_attach
= true;
1212 /* OpenACC 'attach'/'detach' doesn't affect
1213 structured/dynamic reference counts ('n->refcount',
1214 'n->dynamic_refcount'). */
1216 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1217 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1220 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1222 gomp_mutex_unlock (&devicep
->lock
);
1223 gomp_fatal ("outer struct not mapped for attach");
1230 splay_tree_key k
= &array
->key
;
1231 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1232 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1233 k
->host_end
= k
->host_start
+ sizes
[i
];
1235 k
->host_end
= k
->host_start
+ sizeof (void *);
1236 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1237 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1238 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1239 kind
& typemask
, false, cbufp
);
1243 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1245 /* Replace target address of the pointer with target address
1246 of mapped object in the splay tree. */
1247 splay_tree_remove (mem_map
, n
);
1249 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1250 k
->aux
->link_key
= n
;
1252 size_t align
= (size_t) 1 << (kind
>> rshift
);
1253 tgt
->list
[i
].key
= k
;
1255 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1257 k
->tgt_offset
= k
->host_start
- field_tgt_base
1259 if (i
== field_tgt_clear
)
1260 field_tgt_clear
= FIELD_TGT_EMPTY
;
1264 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1265 k
->tgt_offset
= tgt_size
;
1266 tgt_size
+= k
->host_end
- k
->host_start
;
1268 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1269 tgt
->list
[i
].always_copy_from
1270 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1271 tgt
->list
[i
].is_attach
= false;
1272 tgt
->list
[i
].offset
= 0;
1273 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1275 k
->dynamic_refcount
= 0;
1278 array
->right
= NULL
;
1279 splay_tree_insert (mem_map
, array
);
1280 switch (kind
& typemask
)
1282 case GOMP_MAP_ALLOC
:
1284 case GOMP_MAP_FORCE_ALLOC
:
1285 case GOMP_MAP_FORCE_FROM
:
1286 case GOMP_MAP_ALWAYS_FROM
:
1289 case GOMP_MAP_TOFROM
:
1290 case GOMP_MAP_FORCE_TO
:
1291 case GOMP_MAP_FORCE_TOFROM
:
1292 case GOMP_MAP_ALWAYS_TO
:
1293 case GOMP_MAP_ALWAYS_TOFROM
:
1294 gomp_copy_host2dev (devicep
, aq
,
1295 (void *) (tgt
->tgt_start
1297 (void *) k
->host_start
,
1298 k
->host_end
- k
->host_start
, cbufp
);
1300 case GOMP_MAP_POINTER
:
1301 gomp_map_pointer (tgt
, aq
,
1302 (uintptr_t) *(void **) k
->host_start
,
1303 k
->tgt_offset
, sizes
[i
], cbufp
);
1305 case GOMP_MAP_TO_PSET
:
1306 gomp_copy_host2dev (devicep
, aq
,
1307 (void *) (tgt
->tgt_start
1309 (void *) k
->host_start
,
1310 k
->host_end
- k
->host_start
, cbufp
);
1311 tgt
->list
[i
].has_null_ptr_assoc
= false;
1313 for (j
= i
+ 1; j
< mapnum
; j
++)
1315 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1317 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1318 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1320 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1321 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1326 tgt
->list
[j
].key
= k
;
1327 tgt
->list
[j
].copy_from
= false;
1328 tgt
->list
[j
].always_copy_from
= false;
1329 tgt
->list
[j
].is_attach
= false;
1330 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1331 if (k
->refcount
!= REFCOUNT_INFINITY
)
1333 gomp_map_pointer (tgt
, aq
,
1334 (uintptr_t) *(void **) hostaddrs
[j
],
1336 + ((uintptr_t) hostaddrs
[j
]
1343 case GOMP_MAP_FORCE_PRESENT
:
1345 /* We already looked up the memory region above and it
1347 size_t size
= k
->host_end
- k
->host_start
;
1348 gomp_mutex_unlock (&devicep
->lock
);
1349 #ifdef HAVE_INTTYPES_H
1350 gomp_fatal ("present clause: !acc_is_present (%p, "
1351 "%"PRIu64
" (0x%"PRIx64
"))",
1352 (void *) k
->host_start
,
1353 (uint64_t) size
, (uint64_t) size
);
1355 gomp_fatal ("present clause: !acc_is_present (%p, "
1356 "%lu (0x%lx))", (void *) k
->host_start
,
1357 (unsigned long) size
, (unsigned long) size
);
1361 case GOMP_MAP_FORCE_DEVICEPTR
:
1362 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1363 gomp_copy_host2dev (devicep
, aq
,
1364 (void *) (tgt
->tgt_start
1366 (void *) k
->host_start
,
1367 sizeof (void *), cbufp
);
1370 gomp_mutex_unlock (&devicep
->lock
);
1371 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1375 if (k
->aux
&& k
->aux
->link_key
)
1377 /* Set link pointer on target to the device address of the
1379 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1380 /* We intentionally do not use coalescing here, as it's not
1381 data allocated by the current call to this function. */
1382 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1383 &tgt_addr
, sizeof (void *), NULL
);
1390 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1392 for (i
= 0; i
< mapnum
; i
++)
1394 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1395 gomp_copy_host2dev (devicep
, aq
,
1396 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1397 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1405 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1406 gomp_copy_host2dev (devicep
, aq
,
1407 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1408 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1409 - cbuf
.chunks
[0].start
),
1410 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
, NULL
);
1416 /* If the variable from "omp target enter data" map-list was already mapped,
1417 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1419 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1425 gomp_mutex_unlock (&devicep
->lock
);
1429 attribute_hidden
struct target_mem_desc
*
1430 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1431 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1432 bool short_mapkind
, enum gomp_map_vars_kind pragma_kind
)
1434 return gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1435 sizes
, kinds
, short_mapkind
, pragma_kind
);
1438 attribute_hidden
struct target_mem_desc
*
1439 gomp_map_vars_async (struct gomp_device_descr
*devicep
,
1440 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1441 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1442 void *kinds
, bool short_mapkind
,
1443 enum gomp_map_vars_kind pragma_kind
)
1445 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1446 sizes
, kinds
, short_mapkind
, pragma_kind
);
1450 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1452 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1454 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1461 gomp_unref_tgt (void *ptr
)
1463 bool is_tgt_unmapped
= false;
1465 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1467 if (tgt
->refcount
> 1)
1471 gomp_unmap_tgt (tgt
);
1472 is_tgt_unmapped
= true;
1475 return is_tgt_unmapped
;
1479 gomp_unref_tgt_void (void *ptr
)
1481 (void) gomp_unref_tgt (ptr
);
1484 static inline __attribute__((always_inline
)) bool
1485 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1486 struct goacc_asyncqueue
*aq
)
1488 bool is_tgt_unmapped
= false;
1489 splay_tree_remove (&devicep
->mem_map
, k
);
1492 if (k
->aux
->link_key
)
1493 splay_tree_insert (&devicep
->mem_map
,
1494 (splay_tree_node
) k
->aux
->link_key
);
1495 if (k
->aux
->attach_count
)
1496 free (k
->aux
->attach_count
);
1501 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1504 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1505 return is_tgt_unmapped
;
1508 attribute_hidden
bool
1509 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1511 return gomp_remove_var_internal (devicep
, k
, NULL
);
1514 /* Remove a variable asynchronously. This actually removes the variable
1515 mapping immediately, but retains the linked target_mem_desc until the
1516 asynchronous operation has completed (as it may still refer to target
1517 memory). The device lock must be held before entry, and remains locked on
1520 attribute_hidden
void
1521 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1522 struct goacc_asyncqueue
*aq
)
1524 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1527 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1528 variables back from device to host: if it is false, it is assumed that this
1529 has been done already. */
1531 static inline __attribute__((always_inline
)) void
1532 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1533 struct goacc_asyncqueue
*aq
)
1535 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1537 if (tgt
->list_count
== 0)
1543 gomp_mutex_lock (&devicep
->lock
);
1544 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1546 gomp_mutex_unlock (&devicep
->lock
);
1554 /* We must perform detachments before any copies back to the host. */
1555 for (i
= 0; i
< tgt
->list_count
; i
++)
1557 splay_tree_key k
= tgt
->list
[i
].key
;
1559 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1560 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1561 + tgt
->list
[i
].offset
,
1565 for (i
= 0; i
< tgt
->list_count
; i
++)
1567 splay_tree_key k
= tgt
->list
[i
].key
;
1571 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1572 counts ('n->refcount', 'n->dynamic_refcount'). */
1573 if (tgt
->list
[i
].is_attach
)
1576 bool do_unmap
= false;
1577 if (k
->refcount
> 1 && k
->refcount
!= REFCOUNT_INFINITY
)
1579 else if (k
->refcount
== 1)
1585 if ((do_unmap
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1586 || tgt
->list
[i
].always_copy_from
)
1587 gomp_copy_dev2host (devicep
, aq
,
1588 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1589 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1590 + tgt
->list
[i
].offset
),
1591 tgt
->list
[i
].length
);
1594 struct target_mem_desc
*k_tgt
= k
->tgt
;
1595 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1596 /* It would be bad if TGT got unmapped while we're still iterating
1597 over its LIST_COUNT, and also expect to use it in the following
1599 assert (!is_tgt_unmapped
1605 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1608 gomp_unref_tgt ((void *) tgt
);
1610 gomp_mutex_unlock (&devicep
->lock
);
1613 attribute_hidden
void
1614 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
)
1616 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
);
1619 attribute_hidden
void
1620 gomp_unmap_vars_async (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1621 struct goacc_asyncqueue
*aq
)
1623 gomp_unmap_vars_internal (tgt
, do_copyfrom
, aq
);
1627 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
1628 size_t *sizes
, void *kinds
, bool short_mapkind
)
1631 struct splay_tree_key_s cur_node
;
1632 const int typemask
= short_mapkind
? 0xff : 0x7;
1640 gomp_mutex_lock (&devicep
->lock
);
1641 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1643 gomp_mutex_unlock (&devicep
->lock
);
1647 for (i
= 0; i
< mapnum
; i
++)
1650 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1651 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1652 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
1655 int kind
= get_kind (short_mapkind
, kinds
, i
);
1656 if (n
->host_start
> cur_node
.host_start
1657 || n
->host_end
< cur_node
.host_end
)
1659 gomp_mutex_unlock (&devicep
->lock
);
1660 gomp_fatal ("Trying to update [%p..%p) object when "
1661 "only [%p..%p) is mapped",
1662 (void *) cur_node
.host_start
,
1663 (void *) cur_node
.host_end
,
1664 (void *) n
->host_start
,
1665 (void *) n
->host_end
);
1669 void *hostaddr
= (void *) cur_node
.host_start
;
1670 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1671 + cur_node
.host_start
- n
->host_start
);
1672 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
1674 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
1675 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
1677 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
1678 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
1681 gomp_mutex_unlock (&devicep
->lock
);
1684 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1685 And insert to splay tree the mapping between addresses from HOST_TABLE and
1686 from loaded target image. We rely in the host and device compiler
1687 emitting variable and functions in the same order. */
1690 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
1691 const void *host_table
, const void *target_data
,
1692 bool is_register_lock
)
1694 void **host_func_table
= ((void ***) host_table
)[0];
1695 void **host_funcs_end
= ((void ***) host_table
)[1];
1696 void **host_var_table
= ((void ***) host_table
)[2];
1697 void **host_vars_end
= ((void ***) host_table
)[3];
1699 /* The func table contains only addresses, the var table contains addresses
1700 and corresponding sizes. */
1701 int num_funcs
= host_funcs_end
- host_func_table
;
1702 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1704 /* Load image to device and get target addresses for the image. */
1705 struct addr_pair
*target_table
= NULL
;
1706 int i
, num_target_entries
;
1709 = devicep
->load_image_func (devicep
->target_id
, version
,
1710 target_data
, &target_table
);
1712 if (num_target_entries
!= num_funcs
+ num_vars
)
1714 gomp_mutex_unlock (&devicep
->lock
);
1715 if (is_register_lock
)
1716 gomp_mutex_unlock (®ister_lock
);
1717 gomp_fatal ("Cannot map target functions or variables"
1718 " (expected %u, have %u)", num_funcs
+ num_vars
,
1719 num_target_entries
);
1722 /* Insert host-target address mapping into splay tree. */
1723 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
1724 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
1725 tgt
->refcount
= REFCOUNT_INFINITY
;
1728 tgt
->to_free
= NULL
;
1730 tgt
->list_count
= 0;
1731 tgt
->device_descr
= devicep
;
1732 splay_tree_node array
= tgt
->array
;
1734 for (i
= 0; i
< num_funcs
; i
++)
1736 splay_tree_key k
= &array
->key
;
1737 k
->host_start
= (uintptr_t) host_func_table
[i
];
1738 k
->host_end
= k
->host_start
+ 1;
1740 k
->tgt_offset
= target_table
[i
].start
;
1741 k
->refcount
= REFCOUNT_INFINITY
;
1742 k
->dynamic_refcount
= 0;
1745 array
->right
= NULL
;
1746 splay_tree_insert (&devicep
->mem_map
, array
);
1750 /* Most significant bit of the size in host and target tables marks
1751 "omp declare target link" variables. */
1752 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1753 const uintptr_t size_mask
= ~link_bit
;
1755 for (i
= 0; i
< num_vars
; i
++)
1757 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
1758 uintptr_t target_size
= target_var
->end
- target_var
->start
;
1759 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
1761 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
1763 gomp_mutex_unlock (&devicep
->lock
);
1764 if (is_register_lock
)
1765 gomp_mutex_unlock (®ister_lock
);
1766 gomp_fatal ("Cannot map target variables (size mismatch)");
1769 splay_tree_key k
= &array
->key
;
1770 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
1772 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1774 k
->tgt_offset
= target_var
->start
;
1775 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
1776 k
->dynamic_refcount
= 0;
1779 array
->right
= NULL
;
1780 splay_tree_insert (&devicep
->mem_map
, array
);
1784 free (target_table
);
1787 /* Unload the mappings described by target_data from device DEVICE_P.
1788 The device must be locked. */
1791 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
1793 const void *host_table
, const void *target_data
)
1795 void **host_func_table
= ((void ***) host_table
)[0];
1796 void **host_funcs_end
= ((void ***) host_table
)[1];
1797 void **host_var_table
= ((void ***) host_table
)[2];
1798 void **host_vars_end
= ((void ***) host_table
)[3];
1800 /* The func table contains only addresses, the var table contains addresses
1801 and corresponding sizes. */
1802 int num_funcs
= host_funcs_end
- host_func_table
;
1803 int num_vars
= (host_vars_end
- host_var_table
) / 2;
1805 struct splay_tree_key_s k
;
1806 splay_tree_key node
= NULL
;
1808 /* Find mapping at start of node array */
1809 if (num_funcs
|| num_vars
)
1811 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
1812 : (uintptr_t) host_var_table
[0]);
1813 k
.host_end
= k
.host_start
+ 1;
1814 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1817 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
1819 gomp_mutex_unlock (&devicep
->lock
);
1820 gomp_fatal ("image unload fail");
1823 /* Remove mappings from splay tree. */
1825 for (i
= 0; i
< num_funcs
; i
++)
1827 k
.host_start
= (uintptr_t) host_func_table
[i
];
1828 k
.host_end
= k
.host_start
+ 1;
1829 splay_tree_remove (&devicep
->mem_map
, &k
);
1832 /* Most significant bit of the size in host and target tables marks
1833 "omp declare target link" variables. */
1834 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
1835 const uintptr_t size_mask
= ~link_bit
;
1836 bool is_tgt_unmapped
= false;
1838 for (i
= 0; i
< num_vars
; i
++)
1840 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
1842 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
1844 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
1845 splay_tree_remove (&devicep
->mem_map
, &k
);
1848 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
1849 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
1853 if (node
&& !is_tgt_unmapped
)
1860 /* This function should be called from every offload image while loading.
1861 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1862 the target, and TARGET_DATA needed by target plugin. */
1865 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
1866 int target_type
, const void *target_data
)
1870 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
1871 gomp_fatal ("Library too old for offload (version %u < %u)",
1872 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
1874 gomp_mutex_lock (®ister_lock
);
1876 /* Load image to all initialized devices. */
1877 for (i
= 0; i
< num_devices
; i
++)
1879 struct gomp_device_descr
*devicep
= &devices
[i
];
1880 gomp_mutex_lock (&devicep
->lock
);
1881 if (devicep
->type
== target_type
1882 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1883 gomp_load_image_to_device (devicep
, version
,
1884 host_table
, target_data
, true);
1885 gomp_mutex_unlock (&devicep
->lock
);
1888 /* Insert image to array of pending images. */
1890 = gomp_realloc_unlock (offload_images
,
1891 (num_offload_images
+ 1)
1892 * sizeof (struct offload_image_descr
));
1893 offload_images
[num_offload_images
].version
= version
;
1894 offload_images
[num_offload_images
].type
= target_type
;
1895 offload_images
[num_offload_images
].host_table
= host_table
;
1896 offload_images
[num_offload_images
].target_data
= target_data
;
1898 num_offload_images
++;
1899 gomp_mutex_unlock (®ister_lock
);
1903 GOMP_offload_register (const void *host_table
, int target_type
,
1904 const void *target_data
)
1906 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
1909 /* This function should be called from every offload image while unloading.
1910 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1911 the target, and TARGET_DATA needed by target plugin. */
1914 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
1915 int target_type
, const void *target_data
)
1919 gomp_mutex_lock (®ister_lock
);
1921 /* Unload image from all initialized devices. */
1922 for (i
= 0; i
< num_devices
; i
++)
1924 struct gomp_device_descr
*devicep
= &devices
[i
];
1925 gomp_mutex_lock (&devicep
->lock
);
1926 if (devicep
->type
== target_type
1927 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1928 gomp_unload_image_from_device (devicep
, version
,
1929 host_table
, target_data
);
1930 gomp_mutex_unlock (&devicep
->lock
);
1933 /* Remove image from array of pending images. */
1934 for (i
= 0; i
< num_offload_images
; i
++)
1935 if (offload_images
[i
].target_data
== target_data
)
1937 offload_images
[i
] = offload_images
[--num_offload_images
];
1941 gomp_mutex_unlock (®ister_lock
);
1945 GOMP_offload_unregister (const void *host_table
, int target_type
,
1946 const void *target_data
)
1948 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
1951 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1952 must be locked on entry, and remains locked on return. */
1954 attribute_hidden
void
1955 gomp_init_device (struct gomp_device_descr
*devicep
)
1958 if (!devicep
->init_device_func (devicep
->target_id
))
1960 gomp_mutex_unlock (&devicep
->lock
);
1961 gomp_fatal ("device initialization failed");
1964 /* Load to device all images registered by the moment. */
1965 for (i
= 0; i
< num_offload_images
; i
++)
1967 struct offload_image_descr
*image
= &offload_images
[i
];
1968 if (image
->type
== devicep
->type
)
1969 gomp_load_image_to_device (devicep
, image
->version
,
1970 image
->host_table
, image
->target_data
,
1974 /* Initialize OpenACC asynchronous queues. */
1975 goacc_init_asyncqueues (devicep
);
1977 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
1980 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1981 must be locked on entry, and remains locked on return. */
1983 attribute_hidden
bool
1984 gomp_fini_device (struct gomp_device_descr
*devicep
)
1986 bool ret
= goacc_fini_asyncqueues (devicep
);
1987 ret
&= devicep
->fini_device_func (devicep
->target_id
);
1988 devicep
->state
= GOMP_DEVICE_FINALIZED
;
1992 attribute_hidden
void
1993 gomp_unload_device (struct gomp_device_descr
*devicep
)
1995 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
1999 /* Unload from device all images registered at the moment. */
2000 for (i
= 0; i
< num_offload_images
; i
++)
2002 struct offload_image_descr
*image
= &offload_images
[i
];
2003 if (image
->type
== devicep
->type
)
2004 gomp_unload_image_from_device (devicep
, image
->version
,
2006 image
->target_data
);
2011 /* Host fallback for GOMP_target{,_ext} routines. */
2014 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2015 struct gomp_device_descr
*devicep
)
2017 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2019 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2021 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2022 "be used for offloading");
2025 memset (thr
, '\0', sizeof (*thr
));
2026 if (gomp_places_list
)
2028 thr
->place
= old_thr
.place
;
2029 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2032 gomp_free_thread (thr
);
2036 /* Calculate alignment and size requirements of a private copy of data shared
2037 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2040 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2041 unsigned short *kinds
, size_t *tgt_align
,
2045 for (i
= 0; i
< mapnum
; i
++)
2046 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2048 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2049 if (*tgt_align
< align
)
2051 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2052 *tgt_size
+= sizes
[i
];
2056 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2059 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2060 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2063 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2065 tgt
+= tgt_align
- al
;
2068 for (i
= 0; i
< mapnum
; i
++)
2069 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2071 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2072 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2073 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2074 hostaddrs
[i
] = tgt
+ tgt_size
;
2075 tgt_size
= tgt_size
+ sizes
[i
];
2079 /* Helper function of GOMP_target{,_ext} routines. */
2082 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2083 void (*host_fn
) (void *))
2085 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2086 return (void *) host_fn
;
2089 gomp_mutex_lock (&devicep
->lock
);
2090 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2092 gomp_mutex_unlock (&devicep
->lock
);
2096 struct splay_tree_key_s k
;
2097 k
.host_start
= (uintptr_t) host_fn
;
2098 k
.host_end
= k
.host_start
+ 1;
2099 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2100 gomp_mutex_unlock (&devicep
->lock
);
2104 return (void *) tgt_fn
->tgt_offset
;
2108 /* Called when encountering a target directive. If DEVICE
2109 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2110 GOMP_DEVICE_HOST_FALLBACK (or any value
2111 larger than last available hw device), use host fallback.
2112 FN is address of host code, UNUSED is part of the current ABI, but
2113 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2114 with MAPNUM entries, with addresses of the host objects,
2115 sizes of the host objects (resp. for pointer kind pointer bias
2116 and assumed sizeof (void *) size) and kinds. */
2119 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2120 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2121 unsigned char *kinds
)
2123 struct gomp_device_descr
*devicep
= resolve_device (device
);
2127 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2128 /* All shared memory devices should use the GOMP_target_ext function. */
2129 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2130 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2131 return gomp_target_fallback (fn
, hostaddrs
, devicep
);
2133 struct target_mem_desc
*tgt_vars
2134 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2135 GOMP_MAP_VARS_TARGET
);
2136 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2138 gomp_unmap_vars (tgt_vars
, true);
2141 static inline unsigned int
2142 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2144 /* If we cannot run asynchronously, simply ignore nowait. */
2145 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2146 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2151 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2152 and several arguments have been added:
2153 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2154 DEPEND is array of dependencies, see GOMP_task for details.
2156 ARGS is a pointer to an array consisting of a variable number of both
2157 device-independent and device-specific arguments, which can take one two
2158 elements where the first specifies for which device it is intended, the type
2159 and optionally also the value. If the value is not present in the first
2160 one, the whole second element the actual value. The last element of the
2161 array is a single NULL. Among the device independent can be for example
2162 NUM_TEAMS and THREAD_LIMIT.
2164 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2165 that value, or 1 if teams construct is not present, or 0, if
2166 teams construct does not have num_teams clause and so the choice is
2167 implementation defined, and -1 if it can't be determined on the host
2168 what value will GOMP_teams have on the device.
2169 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2170 body with that value, or 0, if teams construct does not have thread_limit
2171 clause or the teams construct is not present, or -1 if it can't be
2172 determined on the host what value will GOMP_teams have on the device. */
2175 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2176 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2177 unsigned int flags
, void **depend
, void **args
)
2179 struct gomp_device_descr
*devicep
= resolve_device (device
);
2180 size_t tgt_align
= 0, tgt_size
= 0;
2181 bool fpc_done
= false;
2183 flags
= clear_unsupported_flags (devicep
, flags
);
2185 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2187 struct gomp_thread
*thr
= gomp_thread ();
2188 /* Create a team if we don't have any around, as nowait
2189 target tasks make sense to run asynchronously even when
2190 outside of any parallel. */
2191 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2193 struct gomp_team
*team
= gomp_new_team (1);
2194 struct gomp_task
*task
= thr
->task
;
2195 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2196 team
->prev_ts
= thr
->ts
;
2197 thr
->ts
.team
= team
;
2198 thr
->ts
.team_id
= 0;
2199 thr
->ts
.work_share
= &team
->work_shares
[0];
2200 thr
->ts
.last_work_share
= NULL
;
2201 #ifdef HAVE_SYNC_BUILTINS
2202 thr
->ts
.single_count
= 0;
2204 thr
->ts
.static_trip
= 0;
2205 thr
->task
= &team
->implicit_task
[0];
2206 gomp_init_task (thr
->task
, NULL
, icv
);
2212 thr
->task
= &team
->implicit_task
[0];
2215 pthread_setspecific (gomp_thread_destructor
, thr
);
2218 && !thr
->task
->final_task
)
2220 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2221 sizes
, kinds
, flags
, depend
, args
,
2222 GOMP_TARGET_TASK_BEFORE_MAP
);
2227 /* If there are depend clauses, but nowait is not present
2228 (or we are in a final task), block the parent task until the
2229 dependencies are resolved and then just continue with the rest
2230 of the function as if it is a merged task. */
2233 struct gomp_thread
*thr
= gomp_thread ();
2234 if (thr
->task
&& thr
->task
->depend_hash
)
2236 /* If we might need to wait, copy firstprivate now. */
2237 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2238 &tgt_align
, &tgt_size
);
2241 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2242 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2243 tgt_align
, tgt_size
);
2246 gomp_task_maybe_wait_for_dependencies (depend
);
2252 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2253 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2254 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2258 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2259 &tgt_align
, &tgt_size
);
2262 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2263 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2264 tgt_align
, tgt_size
);
2267 gomp_target_fallback (fn
, hostaddrs
, devicep
);
2271 struct target_mem_desc
*tgt_vars
;
2272 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2276 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2277 &tgt_align
, &tgt_size
);
2280 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2281 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2282 tgt_align
, tgt_size
);
2288 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2289 true, GOMP_MAP_VARS_TARGET
);
2290 devicep
->run_func (devicep
->target_id
, fn_addr
,
2291 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2294 gomp_unmap_vars (tgt_vars
, true);
2297 /* Host fallback for GOMP_target_data{,_ext} routines. */
2300 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
2302 struct gomp_task_icv
*icv
= gomp_icv (false);
2304 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2306 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2307 "be used for offloading");
2309 if (icv
->target_data
)
2311 /* Even when doing a host fallback, if there are any active
2312 #pragma omp target data constructs, need to remember the
2313 new #pragma omp target data, otherwise GOMP_target_end_data
2314 would get out of sync. */
2315 struct target_mem_desc
*tgt
2316 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2317 GOMP_MAP_VARS_DATA
);
2318 tgt
->prev
= icv
->target_data
;
2319 icv
->target_data
= tgt
;
2324 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2325 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2327 struct gomp_device_descr
*devicep
= resolve_device (device
);
2330 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2331 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2332 return gomp_target_data_fallback (devicep
);
2334 struct target_mem_desc
*tgt
2335 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2336 GOMP_MAP_VARS_DATA
);
2337 struct gomp_task_icv
*icv
= gomp_icv (true);
2338 tgt
->prev
= icv
->target_data
;
2339 icv
->target_data
= tgt
;
2343 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2344 size_t *sizes
, unsigned short *kinds
)
2346 struct gomp_device_descr
*devicep
= resolve_device (device
);
2349 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2350 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2351 return gomp_target_data_fallback (devicep
);
2353 struct target_mem_desc
*tgt
2354 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2355 GOMP_MAP_VARS_DATA
);
2356 struct gomp_task_icv
*icv
= gomp_icv (true);
2357 tgt
->prev
= icv
->target_data
;
2358 icv
->target_data
= tgt
;
2362 GOMP_target_end_data (void)
2364 struct gomp_task_icv
*icv
= gomp_icv (false);
2365 if (icv
->target_data
)
2367 struct target_mem_desc
*tgt
= icv
->target_data
;
2368 icv
->target_data
= tgt
->prev
;
2369 gomp_unmap_vars (tgt
, true);
2374 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2375 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2377 struct gomp_device_descr
*devicep
= resolve_device (device
);
2380 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2381 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2384 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2388 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2389 size_t *sizes
, unsigned short *kinds
,
2390 unsigned int flags
, void **depend
)
2392 struct gomp_device_descr
*devicep
= resolve_device (device
);
2394 /* If there are depend clauses, but nowait is not present,
2395 block the parent task until the dependencies are resolved
2396 and then just continue with the rest of the function as if it
2397 is a merged task. Until we are able to schedule task during
2398 variable mapping or unmapping, ignore nowait if depend clauses
2402 struct gomp_thread
*thr
= gomp_thread ();
2403 if (thr
->task
&& thr
->task
->depend_hash
)
2405 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2407 && !thr
->task
->final_task
)
2409 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2410 mapnum
, hostaddrs
, sizes
, kinds
,
2411 flags
| GOMP_TARGET_FLAG_UPDATE
,
2412 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2417 struct gomp_team
*team
= thr
->ts
.team
;
2418 /* If parallel or taskgroup has been cancelled, don't start new
2420 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2422 if (gomp_team_barrier_cancelled (&team
->barrier
))
2424 if (thr
->task
->taskgroup
)
2426 if (thr
->task
->taskgroup
->cancelled
)
2428 if (thr
->task
->taskgroup
->workshare
2429 && thr
->task
->taskgroup
->prev
2430 && thr
->task
->taskgroup
->prev
->cancelled
)
2435 gomp_task_maybe_wait_for_dependencies (depend
);
2441 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2442 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2445 struct gomp_thread
*thr
= gomp_thread ();
2446 struct gomp_team
*team
= thr
->ts
.team
;
2447 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2448 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2450 if (gomp_team_barrier_cancelled (&team
->barrier
))
2452 if (thr
->task
->taskgroup
)
2454 if (thr
->task
->taskgroup
->cancelled
)
2456 if (thr
->task
->taskgroup
->workshare
2457 && thr
->task
->taskgroup
->prev
2458 && thr
->task
->taskgroup
->prev
->cancelled
)
2463 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
2467 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
2468 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
2470 const int typemask
= 0xff;
2472 gomp_mutex_lock (&devicep
->lock
);
2473 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2475 gomp_mutex_unlock (&devicep
->lock
);
2479 for (i
= 0; i
< mapnum
; i
++)
2480 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
2482 struct splay_tree_key_s cur_node
;
2483 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2484 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
2485 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2488 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
2492 for (i
= 0; i
< mapnum
; i
++)
2494 struct splay_tree_key_s cur_node
;
2495 unsigned char kind
= kinds
[i
] & typemask
;
2499 case GOMP_MAP_ALWAYS_FROM
:
2500 case GOMP_MAP_DELETE
:
2501 case GOMP_MAP_RELEASE
:
2502 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
2503 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
2504 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2505 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2506 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2507 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
2508 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
2509 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2513 if (k
->refcount
> 0 && k
->refcount
!= REFCOUNT_INFINITY
)
2515 if ((kind
== GOMP_MAP_DELETE
2516 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
2517 && k
->refcount
!= REFCOUNT_INFINITY
)
2520 if ((kind
== GOMP_MAP_FROM
&& k
->refcount
== 0)
2521 || kind
== GOMP_MAP_ALWAYS_FROM
)
2522 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
2523 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2524 + cur_node
.host_start
2526 cur_node
.host_end
- cur_node
.host_start
);
2527 if (k
->refcount
== 0)
2528 gomp_remove_var (devicep
, k
);
2531 case GOMP_MAP_DETACH
:
2534 gomp_mutex_unlock (&devicep
->lock
);
2535 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2540 gomp_mutex_unlock (&devicep
->lock
);
2544 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
2545 size_t *sizes
, unsigned short *kinds
,
2546 unsigned int flags
, void **depend
)
2548 struct gomp_device_descr
*devicep
= resolve_device (device
);
2550 /* If there are depend clauses, but nowait is not present,
2551 block the parent task until the dependencies are resolved
2552 and then just continue with the rest of the function as if it
2553 is a merged task. Until we are able to schedule task during
2554 variable mapping or unmapping, ignore nowait if depend clauses
2558 struct gomp_thread
*thr
= gomp_thread ();
2559 if (thr
->task
&& thr
->task
->depend_hash
)
2561 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2563 && !thr
->task
->final_task
)
2565 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2566 mapnum
, hostaddrs
, sizes
, kinds
,
2567 flags
, depend
, NULL
,
2568 GOMP_TARGET_TASK_DATA
))
2573 struct gomp_team
*team
= thr
->ts
.team
;
2574 /* If parallel or taskgroup has been cancelled, don't start new
2576 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2578 if (gomp_team_barrier_cancelled (&team
->barrier
))
2580 if (thr
->task
->taskgroup
)
2582 if (thr
->task
->taskgroup
->cancelled
)
2584 if (thr
->task
->taskgroup
->workshare
2585 && thr
->task
->taskgroup
->prev
2586 && thr
->task
->taskgroup
->prev
->cancelled
)
2591 gomp_task_maybe_wait_for_dependencies (depend
);
2597 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2598 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2601 struct gomp_thread
*thr
= gomp_thread ();
2602 struct gomp_team
*team
= thr
->ts
.team
;
2603 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2604 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2606 if (gomp_team_barrier_cancelled (&team
->barrier
))
2608 if (thr
->task
->taskgroup
)
2610 if (thr
->task
->taskgroup
->cancelled
)
2612 if (thr
->task
->taskgroup
->workshare
2613 && thr
->task
->taskgroup
->prev
2614 && thr
->task
->taskgroup
->prev
->cancelled
)
2619 /* The variables are mapped separately such that they can be released
2622 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2623 for (i
= 0; i
< mapnum
; i
++)
2624 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2626 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
2627 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2630 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
2632 for (j
= i
+ 1; j
< mapnum
; j
++)
2633 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
2634 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
2636 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
2637 &kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2640 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
2642 /* An attach operation must be processed together with the mapped
2643 base-pointer list item. */
2644 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2645 true, GOMP_MAP_VARS_ENTER_DATA
);
2649 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
2650 true, GOMP_MAP_VARS_ENTER_DATA
);
2652 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
);
2656 gomp_target_task_fn (void *data
)
2658 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
2659 struct gomp_device_descr
*devicep
= ttask
->devicep
;
2661 if (ttask
->fn
!= NULL
)
2665 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2666 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
2667 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2669 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
2670 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
);
2674 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
2677 gomp_unmap_vars (ttask
->tgt
, true);
2681 void *actual_arguments
;
2682 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2685 actual_arguments
= ttask
->hostaddrs
;
2689 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
2690 NULL
, ttask
->sizes
, ttask
->kinds
, true,
2691 GOMP_MAP_VARS_TARGET
);
2692 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
2694 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
2696 assert (devicep
->async_run_func
);
2697 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
2698 ttask
->args
, (void *) ttask
);
2701 else if (devicep
== NULL
2702 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2703 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2707 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
2708 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2709 ttask
->kinds
, true);
2710 else if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
2711 for (i
= 0; i
< ttask
->mapnum
; i
++)
2712 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
2714 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
2715 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
2716 GOMP_MAP_VARS_ENTER_DATA
);
2717 i
+= ttask
->sizes
[i
];
2720 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
2721 &ttask
->kinds
[i
], true, GOMP_MAP_VARS_ENTER_DATA
);
2723 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
2729 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
2733 struct gomp_task_icv
*icv
= gomp_icv (true);
2734 icv
->thread_limit_var
2735 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
2741 omp_target_alloc (size_t size
, int device_num
)
2743 if (device_num
== gomp_get_num_devices ())
2744 return malloc (size
);
2749 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2750 if (devicep
== NULL
)
2753 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2754 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2755 return malloc (size
);
2757 gomp_mutex_lock (&devicep
->lock
);
2758 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
2759 gomp_mutex_unlock (&devicep
->lock
);
2764 omp_target_free (void *device_ptr
, int device_num
)
2766 if (device_ptr
== NULL
)
2769 if (device_num
== gomp_get_num_devices ())
2778 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2779 if (devicep
== NULL
)
2782 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2783 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2789 gomp_mutex_lock (&devicep
->lock
);
2790 gomp_free_device_memory (devicep
, device_ptr
);
2791 gomp_mutex_unlock (&devicep
->lock
);
2795 omp_target_is_present (const void *ptr
, int device_num
)
2800 if (device_num
== gomp_get_num_devices ())
2806 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
2807 if (devicep
== NULL
)
2810 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2811 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2814 gomp_mutex_lock (&devicep
->lock
);
2815 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
2816 struct splay_tree_key_s cur_node
;
2818 cur_node
.host_start
= (uintptr_t) ptr
;
2819 cur_node
.host_end
= cur_node
.host_start
;
2820 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
2821 int ret
= n
!= NULL
;
2822 gomp_mutex_unlock (&devicep
->lock
);
2827 omp_target_memcpy (void *dst
, const void *src
, size_t length
,
2828 size_t dst_offset
, size_t src_offset
, int dst_device_num
,
2831 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2834 if (dst_device_num
!= gomp_get_num_devices ())
2836 if (dst_device_num
< 0)
2839 dst_devicep
= resolve_device (dst_device_num
);
2840 if (dst_devicep
== NULL
)
2843 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2844 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2847 if (src_device_num
!= num_devices_openmp
)
2849 if (src_device_num
< 0)
2852 src_devicep
= resolve_device (src_device_num
);
2853 if (src_devicep
== NULL
)
2856 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2857 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2860 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
2862 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
2865 if (src_devicep
== NULL
)
2867 gomp_mutex_lock (&dst_devicep
->lock
);
2868 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2869 (char *) dst
+ dst_offset
,
2870 (char *) src
+ src_offset
, length
);
2871 gomp_mutex_unlock (&dst_devicep
->lock
);
2872 return (ret
? 0 : EINVAL
);
2874 if (dst_devicep
== NULL
)
2876 gomp_mutex_lock (&src_devicep
->lock
);
2877 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2878 (char *) dst
+ dst_offset
,
2879 (char *) src
+ src_offset
, length
);
2880 gomp_mutex_unlock (&src_devicep
->lock
);
2881 return (ret
? 0 : EINVAL
);
2883 if (src_devicep
== dst_devicep
)
2885 gomp_mutex_lock (&src_devicep
->lock
);
2886 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2887 (char *) dst
+ dst_offset
,
2888 (char *) src
+ src_offset
, length
);
2889 gomp_mutex_unlock (&src_devicep
->lock
);
2890 return (ret
? 0 : EINVAL
);
2896 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
2897 int num_dims
, const size_t *volume
,
2898 const size_t *dst_offsets
,
2899 const size_t *src_offsets
,
2900 const size_t *dst_dimensions
,
2901 const size_t *src_dimensions
,
2902 struct gomp_device_descr
*dst_devicep
,
2903 struct gomp_device_descr
*src_devicep
)
2905 size_t dst_slice
= element_size
;
2906 size_t src_slice
= element_size
;
2907 size_t j
, dst_off
, src_off
, length
;
2912 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
2913 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
2914 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
2916 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
2918 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
2922 else if (src_devicep
== NULL
)
2923 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
2924 (char *) dst
+ dst_off
,
2925 (const char *) src
+ src_off
,
2927 else if (dst_devicep
== NULL
)
2928 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
2929 (char *) dst
+ dst_off
,
2930 (const char *) src
+ src_off
,
2932 else if (src_devicep
== dst_devicep
)
2933 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
2934 (char *) dst
+ dst_off
,
2935 (const char *) src
+ src_off
,
2939 return ret
? 0 : EINVAL
;
2942 /* FIXME: it would be nice to have some plugin function to handle
2943 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2944 be handled in the generic recursion below, and for host-host it
2945 should be used even for any num_dims >= 2. */
2947 for (i
= 1; i
< num_dims
; i
++)
2948 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
2949 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
2951 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
2952 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
2954 for (j
= 0; j
< volume
[0]; j
++)
2956 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
2957 (const char *) src
+ src_off
,
2958 element_size
, num_dims
- 1,
2959 volume
+ 1, dst_offsets
+ 1,
2960 src_offsets
+ 1, dst_dimensions
+ 1,
2961 src_dimensions
+ 1, dst_devicep
,
2965 dst_off
+= dst_slice
;
2966 src_off
+= src_slice
;
2972 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
2973 int num_dims
, const size_t *volume
,
2974 const size_t *dst_offsets
,
2975 const size_t *src_offsets
,
2976 const size_t *dst_dimensions
,
2977 const size_t *src_dimensions
,
2978 int dst_device_num
, int src_device_num
)
2980 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
2985 if (dst_device_num
!= gomp_get_num_devices ())
2987 if (dst_device_num
< 0)
2990 dst_devicep
= resolve_device (dst_device_num
);
2991 if (dst_devicep
== NULL
)
2994 if (!(dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2995 || dst_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2998 if (src_device_num
!= num_devices_openmp
)
3000 if (src_device_num
< 0)
3003 src_devicep
= resolve_device (src_device_num
);
3004 if (src_devicep
== NULL
)
3007 if (!(src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3008 || src_devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3012 if (src_devicep
!= NULL
&& dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
)
3016 gomp_mutex_lock (&src_devicep
->lock
);
3017 else if (dst_devicep
)
3018 gomp_mutex_lock (&dst_devicep
->lock
);
3019 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
3020 volume
, dst_offsets
, src_offsets
,
3021 dst_dimensions
, src_dimensions
,
3022 dst_devicep
, src_devicep
);
3024 gomp_mutex_unlock (&src_devicep
->lock
);
3025 else if (dst_devicep
)
3026 gomp_mutex_unlock (&dst_devicep
->lock
);
3031 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
3032 size_t size
, size_t device_offset
, int device_num
)
3034 if (device_num
== gomp_get_num_devices ())
3040 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3041 if (devicep
== NULL
)
3044 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3045 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3048 gomp_mutex_lock (&devicep
->lock
);
3050 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3051 struct splay_tree_key_s cur_node
;
3054 cur_node
.host_start
= (uintptr_t) host_ptr
;
3055 cur_node
.host_end
= cur_node
.host_start
+ size
;
3056 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3059 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3060 == (uintptr_t) device_ptr
+ device_offset
3061 && n
->host_start
<= cur_node
.host_start
3062 && n
->host_end
>= cur_node
.host_end
)
3067 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3068 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3072 tgt
->to_free
= NULL
;
3074 tgt
->list_count
= 0;
3075 tgt
->device_descr
= devicep
;
3076 splay_tree_node array
= tgt
->array
;
3077 splay_tree_key k
= &array
->key
;
3078 k
->host_start
= cur_node
.host_start
;
3079 k
->host_end
= cur_node
.host_end
;
3081 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3082 k
->refcount
= REFCOUNT_INFINITY
;
3083 k
->dynamic_refcount
= 0;
3086 array
->right
= NULL
;
3087 splay_tree_insert (&devicep
->mem_map
, array
);
3090 gomp_mutex_unlock (&devicep
->lock
);
3095 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3097 if (device_num
== gomp_get_num_devices ())
3103 struct gomp_device_descr
*devicep
= resolve_device (device_num
);
3104 if (devicep
== NULL
)
3107 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3110 gomp_mutex_lock (&devicep
->lock
);
3112 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3113 struct splay_tree_key_s cur_node
;
3116 cur_node
.host_start
= (uintptr_t) ptr
;
3117 cur_node
.host_end
= cur_node
.host_start
;
3118 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3120 && n
->host_start
== cur_node
.host_start
3121 && n
->refcount
== REFCOUNT_INFINITY
3122 && n
->tgt
->tgt_start
== 0
3123 && n
->tgt
->to_free
== NULL
3124 && n
->tgt
->refcount
== 1
3125 && n
->tgt
->list_count
== 0)
3127 splay_tree_remove (&devicep
->mem_map
, n
);
3128 gomp_unmap_tgt (n
->tgt
);
3132 gomp_mutex_unlock (&devicep
->lock
);
3137 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
3140 if (device_num
== gomp_get_num_devices ())
3141 return gomp_pause_host ();
3142 if (device_num
< 0 || device_num
>= num_devices_openmp
)
3144 /* Do nothing for target devices for now. */
3149 omp_pause_resource_all (omp_pause_resource_t kind
)
3152 if (gomp_pause_host ())
3154 /* Do nothing for target devices for now. */
3158 ialias (omp_pause_resource
)
3159 ialias (omp_pause_resource_all
)
3161 #ifdef PLUGIN_SUPPORT
3163 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3165 The handles of the found functions are stored in the corresponding fields
3166 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3169 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
3170 const char *plugin_name
)
3172 const char *err
= NULL
, *last_missing
= NULL
;
3174 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
3176 #if OFFLOAD_DEFAULTED
3182 /* Check if all required functions are available in the plugin and store
3183 their handlers. None of the symbols can legitimately be NULL,
3184 so we don't need to check dlerror all the time. */
3186 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3188 /* Similar, but missing functions are not an error. Return false if
3189 failed, true otherwise. */
3190 #define DLSYM_OPT(f, n) \
3191 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3192 || (last_missing = #n, 0))
3195 if (device
->version_func () != GOMP_VERSION
)
3197 err
= "plugin version mismatch";
3204 DLSYM (get_num_devices
);
3205 DLSYM (init_device
);
3206 DLSYM (fini_device
);
3208 DLSYM (unload_image
);
3213 device
->capabilities
= device
->get_caps_func ();
3214 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3217 DLSYM_OPT (async_run
, async_run
);
3218 DLSYM_OPT (can_run
, can_run
);
3221 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3223 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
3224 || !DLSYM_OPT (openacc
.create_thread_data
,
3225 openacc_create_thread_data
)
3226 || !DLSYM_OPT (openacc
.destroy_thread_data
,
3227 openacc_destroy_thread_data
)
3228 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
3229 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
3230 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
3231 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
3232 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
3233 || !DLSYM_OPT (openacc
.async
.queue_callback
,
3234 openacc_async_queue_callback
)
3235 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
3236 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
3237 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
3238 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
3240 /* Require all the OpenACC handlers if we have
3241 GOMP_OFFLOAD_CAP_OPENACC_200. */
3242 err
= "plugin missing OpenACC handler function";
3247 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
3248 openacc_cuda_get_current_device
);
3249 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
3250 openacc_cuda_get_current_context
);
3251 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
3252 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
3253 if (cuda
&& cuda
!= 4)
3255 /* Make sure all the CUDA functions are there if any of them are. */
3256 err
= "plugin missing OpenACC CUDA handler function";
3268 gomp_error ("while loading %s: %s", plugin_name
, err
);
3270 gomp_error ("missing function was %s", last_missing
);
3272 dlclose (plugin_handle
);
3277 /* This function finalizes all initialized devices. */
3280 gomp_target_fini (void)
3283 for (i
= 0; i
< num_devices
; i
++)
3286 struct gomp_device_descr
*devicep
= &devices
[i
];
3287 gomp_mutex_lock (&devicep
->lock
);
3288 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3289 ret
= gomp_fini_device (devicep
);
3290 gomp_mutex_unlock (&devicep
->lock
);
3292 gomp_fatal ("device finalization failed");
3296 /* This function initializes the runtime for offloading.
3297 It parses the list of offload plugins, and tries to load these.
3298 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3299 will be set, and the array DEVICES initialized, containing descriptors for
3300 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3304 gomp_target_init (void)
3306 const char *prefix
="libgomp-plugin-";
3307 const char *suffix
= SONAME_SUFFIX (1);
3308 const char *cur
, *next
;
3310 int i
, new_num_devs
;
3311 int num_devs
= 0, num_devs_openmp
;
3312 struct gomp_device_descr
*devs
= NULL
;
3314 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
3317 cur
= OFFLOAD_PLUGINS
;
3321 struct gomp_device_descr current_device
;
3322 size_t prefix_len
, suffix_len
, cur_len
;
3324 next
= strchr (cur
, ',');
3326 prefix_len
= strlen (prefix
);
3327 cur_len
= next
? next
- cur
: strlen (cur
);
3328 suffix_len
= strlen (suffix
);
3330 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
3337 memcpy (plugin_name
, prefix
, prefix_len
);
3338 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
3339 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
3341 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
3343 new_num_devs
= current_device
.get_num_devices_func ();
3344 if (new_num_devs
>= 1)
3346 /* Augment DEVICES and NUM_DEVICES. */
3348 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
3349 * sizeof (struct gomp_device_descr
));
3357 current_device
.name
= current_device
.get_name_func ();
3358 /* current_device.capabilities has already been set. */
3359 current_device
.type
= current_device
.get_type_func ();
3360 current_device
.mem_map
.root
= NULL
;
3361 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
3362 for (i
= 0; i
< new_num_devs
; i
++)
3364 current_device
.target_id
= i
;
3365 devs
[num_devs
] = current_device
;
3366 gomp_mutex_init (&devs
[num_devs
].lock
);
3377 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3378 NUM_DEVICES_OPENMP. */
3379 struct gomp_device_descr
*devs_s
3380 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
3387 num_devs_openmp
= 0;
3388 for (i
= 0; i
< num_devs
; i
++)
3389 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3390 devs_s
[num_devs_openmp
++] = devs
[i
];
3391 int num_devs_after_openmp
= num_devs_openmp
;
3392 for (i
= 0; i
< num_devs
; i
++)
3393 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3394 devs_s
[num_devs_after_openmp
++] = devs
[i
];
3398 for (i
= 0; i
< num_devs
; i
++)
3400 /* The 'devices' array can be moved (by the realloc call) until we have
3401 found all the plugins, so registering with the OpenACC runtime (which
3402 takes a copy of the pointer argument) must be delayed until now. */
3403 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
3404 goacc_register (&devs
[i
]);
3407 num_devices
= num_devs
;
3408 num_devices_openmp
= num_devs_openmp
;
3410 if (atexit (gomp_target_fini
) != 0)
3411 gomp_fatal ("atexit failed");
3414 #else /* PLUGIN_SUPPORT */
3415 /* If dlfcn.h is unavailable we always fallback to host execution.
3416 GOMP_target* routines are just stubs for this case. */
3418 gomp_target_init (void)
3421 #endif /* PLUGIN_SUPPORT */