]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/target.c
offload-defaulted: Config option to silently ignore uninstalled offload compilers
[thirdparty/gcc.git] / libgomp / target.c
CommitLineData
99dee823 1/* Copyright (C) 2013-2021 Free Software Foundation, Inc.
acf0174b
JJ
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
acf0174b
JJ
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
1df3f842 26/* This file contains the support of offloading. */
acf0174b
JJ
27
28#include "libgomp.h"
41dbbb37
TS
29#include "oacc-plugin.h"
30#include "oacc-int.h"
31#include "gomp-constants.h"
acf0174b
JJ
32#include <limits.h>
33#include <stdbool.h>
34#include <stdlib.h>
01c0b3b0
KT
35#ifdef HAVE_INTTYPES_H
36# include <inttypes.h> /* For PRIu64. */
37#endif
acf0174b 38#include <string.h>
41dbbb37 39#include <assert.h>
d9a6bd32 40#include <errno.h>
acf0174b 41
1df3f842
JJ
42#ifdef PLUGIN_SUPPORT
43#include <dlfcn.h>
b5f7a6ca 44#include "plugin-suffix.h"
1df3f842
JJ
45#endif
46
6c7e076b
JB
47#define FIELD_TGT_EMPTY (~(size_t) 0)
48
1df3f842
JJ
49static void gomp_target_init (void);
50
41dbbb37 51/* The whole initialization code for offloading plugins is only run one. */
1df3f842
JJ
52static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53
a51df54e
IV
54/* Mutex for offload image registration. */
55static gomp_mutex_t register_lock;
56
1df3f842
JJ
57/* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60struct offload_image_descr {
2a21ff19 61 unsigned version;
1df3f842 62 enum offload_target_type type;
ebe4a560 63 const void *host_table;
afb2d80b 64 const void *target_data;
1df3f842
JJ
65};
66
67/* Array of descriptors of offload images. */
68static struct offload_image_descr *offload_images;
69
70/* Total number of offload images. */
71static int num_offload_images;
72
73/* Array of descriptors for all available devices. */
74static struct gomp_device_descr *devices;
75
76/* Total number of available devices. */
77static int num_devices;
78
41dbbb37
TS
79/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80static int num_devices_openmp;
81
a51df54e
IV
82/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83
84static void *
85gomp_realloc_unlock (void *old, size_t size)
86{
87 void *ret = realloc (old, size);
88 if (ret == NULL)
89 {
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 }
93 return ret;
94}
95
41dbbb37
TS
96attribute_hidden void
97gomp_init_targets_once (void)
1df3f842 98{
41dbbb37
TS
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100}
1df3f842 101
acf0174b
JJ
102attribute_hidden int
103gomp_get_num_devices (void)
104{
41dbbb37
TS
105 gomp_init_targets_once ();
106 return num_devices_openmp;
1df3f842
JJ
107}
108
109static struct gomp_device_descr *
110resolve_device (int device_id)
111{
41dbbb37 112 if (device_id == GOMP_DEVICE_ICV)
1df3f842
JJ
113 {
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
116 }
117
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
1bfc07d1
KCY
119 {
120 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
74c9882b
JJ
121 && device_id != GOMP_DEVICE_HOST_FALLBACK
122 && device_id != num_devices_openmp)
1bfc07d1
KCY
123 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
124 "but device not found");
125
126 return NULL;
127 }
1df3f842 128
d9a6bd32 129 gomp_mutex_lock (&devices[device_id].lock);
d84ffc0a 130 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
d9a6bd32 131 gomp_init_device (&devices[device_id]);
d84ffc0a
IV
132 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
133 {
134 gomp_mutex_unlock (&devices[device_id].lock);
1bfc07d1 135
74c9882b 136 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
1bfc07d1
KCY
137 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
138 "but device is finalized");
139
d84ffc0a
IV
140 return NULL;
141 }
d9a6bd32
JJ
142 gomp_mutex_unlock (&devices[device_id].lock);
143
1df3f842
JJ
144 return &devices[device_id];
145}
146
147
d9a6bd32
JJ
148static inline splay_tree_key
149gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
150{
151 if (key->host_start != key->host_end)
152 return splay_tree_lookup (mem_map, key);
153
154 key->host_end++;
155 splay_tree_key n = splay_tree_lookup (mem_map, key);
156 key->host_end--;
157 if (n)
158 return n;
159 key->host_start--;
160 n = splay_tree_lookup (mem_map, key);
161 key->host_start++;
162 if (n)
163 return n;
164 return splay_tree_lookup (mem_map, key);
165}
166
e01d41e5
JJ
167static inline splay_tree_key
168gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
169{
170 if (key->host_start != key->host_end)
171 return splay_tree_lookup (mem_map, key);
172
173 key->host_end++;
174 splay_tree_key n = splay_tree_lookup (mem_map, key);
175 key->host_end--;
176 return n;
177}
178
6ce13072
CLT
179static inline void
180gomp_device_copy (struct gomp_device_descr *devicep,
181 bool (*copy_func) (int, void *, const void *, size_t),
182 const char *dst, void *dstaddr,
183 const char *src, const void *srcaddr,
184 size_t size)
185{
186 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
187 {
188 gomp_mutex_unlock (&devicep->lock);
189 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
190 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
191 }
192}
193
1f4c5b9b
CLT
194static inline void
195goacc_device_copy_async (struct gomp_device_descr *devicep,
196 bool (*copy_func) (int, void *, const void *, size_t,
197 struct goacc_asyncqueue *),
198 const char *dst, void *dstaddr,
199 const char *src, const void *srcaddr,
200 size_t size, struct goacc_asyncqueue *aq)
201{
202 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
203 {
204 gomp_mutex_unlock (&devicep->lock);
205 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
206 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
207 }
208}
209
7324369a
JJ
210/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
211 host to device memory transfers. */
212
a44c1790
TS
213struct gomp_coalesce_chunk
214{
215 /* The starting and ending point of a coalesced chunk of memory. */
216 size_t start, end;
217};
218
7324369a
JJ
219struct gomp_coalesce_buf
220{
221 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
222 it will be copied to the device. */
223 void *buf;
224 struct target_mem_desc *tgt;
a44c1790
TS
225 /* Array with offsets, chunks[i].start is the starting offset and
226 chunks[i].end ending offset relative to tgt->tgt_start device address
7324369a 227 of chunks which are to be copied to buf and later copied to device. */
a44c1790 228 struct gomp_coalesce_chunk *chunks;
7324369a
JJ
229 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
230 be performed. */
231 long chunk_cnt;
232 /* During construction of chunks array, how many memory regions are within
233 the last chunk. If there is just one memory region for a chunk, we copy
234 it directly to device rather than going through buf. */
235 long use_cnt;
236};
237
238/* Maximum size of memory region considered for coalescing. Larger copies
239 are performed directly. */
240#define MAX_COALESCE_BUF_SIZE (32 * 1024)
241
242/* Maximum size of a gap in between regions to consider them being copied
243 within the same chunk. All the device offsets considered are within
244 newly allocated device memory, so it isn't fatal if we copy some padding
245 in between from host to device. The gaps come either from alignment
246 padding or from memory regions which are not supposed to be copied from
247 host to device (e.g. map(alloc:), map(from:) etc.). */
248#define MAX_COALESCE_BUF_GAP (4 * 1024)
249
250/* Add region with device tgt_start relative offset and length to CBUF. */
251
252static inline void
253gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
254{
255 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
256 return;
257 if (cbuf->chunk_cnt)
258 {
259 if (cbuf->chunk_cnt < 0)
260 return;
a44c1790 261 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
7324369a
JJ
262 {
263 cbuf->chunk_cnt = -1;
264 return;
265 }
a44c1790 266 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
7324369a 267 {
a44c1790 268 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
7324369a
JJ
269 cbuf->use_cnt++;
270 return;
271 }
272 /* If the last chunk is only used by one mapping, discard it,
273 as it will be one host to device copy anyway and
274 memcpying it around will only waste cycles. */
275 if (cbuf->use_cnt == 1)
276 cbuf->chunk_cnt--;
277 }
a44c1790
TS
278 cbuf->chunks[cbuf->chunk_cnt].start = start;
279 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
7324369a
JJ
280 cbuf->chunk_cnt++;
281 cbuf->use_cnt = 1;
282}
283
284/* Return true for mapping kinds which need to copy data from the
285 host to device for regions that weren't previously mapped. */
286
287static inline bool
288gomp_to_device_kind_p (int kind)
289{
290 switch (kind)
291 {
292 case GOMP_MAP_ALLOC:
293 case GOMP_MAP_FROM:
294 case GOMP_MAP_FORCE_ALLOC:
cc3f11f5 295 case GOMP_MAP_FORCE_FROM:
7324369a
JJ
296 case GOMP_MAP_ALWAYS_FROM:
297 return false;
298 default:
299 return true;
300 }
301}
302
1f4c5b9b 303attribute_hidden void
6ce13072 304gomp_copy_host2dev (struct gomp_device_descr *devicep,
1f4c5b9b 305 struct goacc_asyncqueue *aq,
7324369a
JJ
306 void *d, const void *h, size_t sz,
307 struct gomp_coalesce_buf *cbuf)
6ce13072 308{
7324369a
JJ
309 if (cbuf)
310 {
311 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
a44c1790 312 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
7324369a
JJ
313 {
314 long first = 0;
315 long last = cbuf->chunk_cnt - 1;
316 while (first <= last)
317 {
318 long middle = (first + last) >> 1;
a44c1790 319 if (cbuf->chunks[middle].end <= doff)
7324369a 320 first = middle + 1;
a44c1790 321 else if (cbuf->chunks[middle].start <= doff)
7324369a 322 {
a44c1790 323 if (doff + sz > cbuf->chunks[middle].end)
7324369a 324 gomp_fatal ("internal libgomp cbuf error");
a44c1790 325 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
7324369a
JJ
326 h, sz);
327 return;
328 }
329 else
330 last = middle - 1;
331 }
332 }
333 }
1f4c5b9b
CLT
334 if (__builtin_expect (aq != NULL, 0))
335 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
336 "dev", d, "host", h, sz, aq);
337 else
338 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
6ce13072
CLT
339}
340
1f4c5b9b 341attribute_hidden void
6ce13072 342gomp_copy_dev2host (struct gomp_device_descr *devicep,
1f4c5b9b 343 struct goacc_asyncqueue *aq,
6ce13072
CLT
344 void *h, const void *d, size_t sz)
345{
1f4c5b9b
CLT
346 if (__builtin_expect (aq != NULL, 0))
347 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
348 "host", h, "dev", d, sz, aq);
349 else
350 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
6ce13072
CLT
351}
352
353static void
354gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
355{
356 if (!devicep->free_func (devicep->target_id, devptr))
357 {
358 gomp_mutex_unlock (&devicep->lock);
359 gomp_fatal ("error in freeing device memory block at %p", devptr);
360 }
361}
362
e01d41e5
JJ
363/* Handle the case where gomp_map_lookup, splay_tree_lookup or
364 gomp_map_0len_lookup found oldn for newn.
1df3f842
JJ
365 Helper function of gomp_map_vars. */
366
367static inline void
1f4c5b9b
CLT
368gomp_map_vars_existing (struct gomp_device_descr *devicep,
369 struct goacc_asyncqueue *aq, splay_tree_key oldn,
d9a6bd32 370 splay_tree_key newn, struct target_var_desc *tgt_var,
972da557
TB
371 unsigned char kind, bool always_to_flag,
372 struct gomp_coalesce_buf *cbuf)
1df3f842 373{
aff43ac0
TS
374 assert (kind != GOMP_MAP_ATTACH);
375
d9a6bd32
JJ
376 tgt_var->key = oldn;
377 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
378 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
bc4ed079 379 tgt_var->is_attach = false;
d9a6bd32
JJ
380 tgt_var->offset = newn->host_start - oldn->host_start;
381 tgt_var->length = newn->host_end - newn->host_start;
382
41dbbb37
TS
383 if ((kind & GOMP_MAP_FLAG_FORCE)
384 || oldn->host_start > newn->host_start
1df3f842 385 || oldn->host_end < newn->host_end)
a51df54e
IV
386 {
387 gomp_mutex_unlock (&devicep->lock);
388 gomp_fatal ("Trying to map into device [%p..%p) object when "
389 "[%p..%p) is already mapped",
390 (void *) newn->host_start, (void *) newn->host_end,
391 (void *) oldn->host_start, (void *) oldn->host_end);
392 }
d9a6bd32 393
972da557 394 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
1f4c5b9b 395 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
396 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
397 + newn->host_start - oldn->host_start),
398 (void *) newn->host_start,
7324369a 399 newn->host_end - newn->host_start, cbuf);
6ce13072 400
d9a6bd32
JJ
401 if (oldn->refcount != REFCOUNT_INFINITY)
402 oldn->refcount++;
1df3f842
JJ
403}
404
41dbbb37 405static int
d9a6bd32 406get_kind (bool short_mapkind, void *kinds, int idx)
41dbbb37 407{
d9a6bd32
JJ
408 return short_mapkind ? ((unsigned short *) kinds)[idx]
409 : ((unsigned char *) kinds)[idx];
41dbbb37
TS
410}
411
1716efeb 412static void
1f4c5b9b
CLT
413gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
414 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
7324369a 415 struct gomp_coalesce_buf *cbuf)
1716efeb
CLT
416{
417 struct gomp_device_descr *devicep = tgt->device_descr;
418 struct splay_tree_s *mem_map = &devicep->mem_map;
419 struct splay_tree_key_s cur_node;
420
421 cur_node.host_start = host_ptr;
422 if (cur_node.host_start == (uintptr_t) NULL)
423 {
424 cur_node.tgt_offset = (uintptr_t) NULL;
1f4c5b9b 425 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
426 (void *) (tgt->tgt_start + target_offset),
427 (void *) &cur_node.tgt_offset,
7324369a 428 sizeof (void *), cbuf);
1716efeb
CLT
429 return;
430 }
431 /* Add bias to the pointer value. */
432 cur_node.host_start += bias;
d9a6bd32
JJ
433 cur_node.host_end = cur_node.host_start;
434 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1716efeb
CLT
435 if (n == NULL)
436 {
437 gomp_mutex_unlock (&devicep->lock);
438 gomp_fatal ("Pointer target of array section wasn't mapped");
439 }
440 cur_node.host_start -= n->host_start;
441 cur_node.tgt_offset
442 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
443 /* At this point tgt_offset is target address of the
444 array section. Now subtract bias to get what we want
445 to initialize the pointer with. */
446 cur_node.tgt_offset -= bias;
1f4c5b9b 447 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
7324369a 448 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
1716efeb
CLT
449}
450
d9a6bd32 451static void
1f4c5b9b
CLT
452gomp_map_fields_existing (struct target_mem_desc *tgt,
453 struct goacc_asyncqueue *aq, splay_tree_key n,
d9a6bd32 454 size_t first, size_t i, void **hostaddrs,
7324369a
JJ
455 size_t *sizes, void *kinds,
456 struct gomp_coalesce_buf *cbuf)
d9a6bd32
JJ
457{
458 struct gomp_device_descr *devicep = tgt->device_descr;
459 struct splay_tree_s *mem_map = &devicep->mem_map;
460 struct splay_tree_key_s cur_node;
461 int kind;
462 const bool short_mapkind = true;
463 const int typemask = short_mapkind ? 0xff : 0x7;
464
465 cur_node.host_start = (uintptr_t) hostaddrs[i];
466 cur_node.host_end = cur_node.host_start + sizes[i];
467 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
468 kind = get_kind (short_mapkind, kinds, i);
469 if (n2
470 && n2->tgt == n->tgt
471 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
472 {
972da557
TB
473 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
474 kind & typemask, false, cbuf);
d9a6bd32
JJ
475 return;
476 }
477 if (sizes[i] == 0)
478 {
479 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
480 {
481 cur_node.host_start--;
482 n2 = splay_tree_lookup (mem_map, &cur_node);
483 cur_node.host_start++;
484 if (n2
485 && n2->tgt == n->tgt
486 && n2->host_start - n->host_start
487 == n2->tgt_offset - n->tgt_offset)
488 {
972da557
TB
489 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
490 kind & typemask, false, cbuf);
d9a6bd32
JJ
491 return;
492 }
493 }
494 cur_node.host_end++;
495 n2 = splay_tree_lookup (mem_map, &cur_node);
496 cur_node.host_end--;
497 if (n2
498 && n2->tgt == n->tgt
499 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
500 {
1f4c5b9b 501 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
972da557 502 kind & typemask, false, cbuf);
d9a6bd32
JJ
503 return;
504 }
505 }
506 gomp_mutex_unlock (&devicep->lock);
507 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
508 "other mapped elements from the same structure weren't mapped "
509 "together with it", (void *) cur_node.host_start,
510 (void *) cur_node.host_end);
511}
512
5d5be7bf
JB
513attribute_hidden void
514gomp_attach_pointer (struct gomp_device_descr *devicep,
515 struct goacc_asyncqueue *aq, splay_tree mem_map,
516 splay_tree_key n, uintptr_t attach_to, size_t bias,
517 struct gomp_coalesce_buf *cbufp)
518{
519 struct splay_tree_key_s s;
520 size_t size, idx;
521
522 if (n == NULL)
523 {
524 gomp_mutex_unlock (&devicep->lock);
525 gomp_fatal ("enclosing struct not mapped for attach");
526 }
527
528 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
529 /* We might have a pointer in a packed struct: however we cannot have more
530 than one such pointer in each pointer-sized portion of the struct, so
531 this is safe. */
532 idx = (attach_to - n->host_start) / sizeof (void *);
533
534 if (!n->aux)
535 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
536
537 if (!n->aux->attach_count)
538 n->aux->attach_count
539 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
540
541 if (n->aux->attach_count[idx] < UINTPTR_MAX)
542 n->aux->attach_count[idx]++;
543 else
544 {
545 gomp_mutex_unlock (&devicep->lock);
546 gomp_fatal ("attach count overflow");
547 }
548
549 if (n->aux->attach_count[idx] == 1)
550 {
551 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
552 - n->host_start;
553 uintptr_t target = (uintptr_t) *(void **) attach_to;
554 splay_tree_key tn;
555 uintptr_t data;
556
557 if ((void *) target == NULL)
558 {
559 gomp_mutex_unlock (&devicep->lock);
560 gomp_fatal ("attempt to attach null pointer");
561 }
562
563 s.host_start = target + bias;
564 s.host_end = s.host_start + 1;
565 tn = splay_tree_lookup (mem_map, &s);
566
567 if (!tn)
568 {
569 gomp_mutex_unlock (&devicep->lock);
570 gomp_fatal ("pointer target not mapped for attach");
571 }
572
573 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
574
575 gomp_debug (1,
576 "%s: attaching host %p, target %p (struct base %p) to %p\n",
577 __FUNCTION__, (void *) attach_to, (void *) devptr,
578 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
579
580 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
581 sizeof (void *), cbufp);
582 }
583 else
584 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
585 (void *) attach_to, (int) n->aux->attach_count[idx]);
586}
587
588attribute_hidden void
589gomp_detach_pointer (struct gomp_device_descr *devicep,
590 struct goacc_asyncqueue *aq, splay_tree_key n,
591 uintptr_t detach_from, bool finalize,
592 struct gomp_coalesce_buf *cbufp)
593{
594 size_t idx;
595
596 if (n == NULL)
597 {
598 gomp_mutex_unlock (&devicep->lock);
599 gomp_fatal ("enclosing struct not mapped for detach");
600 }
601
602 idx = (detach_from - n->host_start) / sizeof (void *);
603
604 if (!n->aux || !n->aux->attach_count)
605 {
606 gomp_mutex_unlock (&devicep->lock);
607 gomp_fatal ("no attachment counters for struct");
608 }
609
610 if (finalize)
611 n->aux->attach_count[idx] = 1;
612
613 if (n->aux->attach_count[idx] == 0)
614 {
615 gomp_mutex_unlock (&devicep->lock);
616 gomp_fatal ("attach count underflow");
617 }
618 else
619 n->aux->attach_count[idx]--;
620
621 if (n->aux->attach_count[idx] == 0)
622 {
623 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
624 - n->host_start;
625 uintptr_t target = (uintptr_t) *(void **) detach_from;
626
627 gomp_debug (1,
628 "%s: detaching host %p, target %p (struct base %p) to %p\n",
629 __FUNCTION__, (void *) detach_from, (void *) devptr,
630 (void *) (n->tgt->tgt_start + n->tgt_offset),
631 (void *) target);
632
633 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
634 sizeof (void *), cbufp);
635 }
636 else
637 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
638 (void *) detach_from, (int) n->aux->attach_count[idx]);
639}
640
5bcd470b 641attribute_hidden uintptr_t
e01d41e5
JJ
642gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
643{
644 if (tgt->list[i].key != NULL)
645 return tgt->list[i].key->tgt->tgt_start
646 + tgt->list[i].key->tgt_offset
647 + tgt->list[i].offset;
6c7e076b
JB
648
649 switch (tgt->list[i].offset)
650 {
651 case OFFSET_INLINED:
652 return (uintptr_t) hostaddrs[i];
653
654 case OFFSET_POINTER:
655 return 0;
656
657 case OFFSET_STRUCT:
658 return tgt->list[i + 1].key->tgt->tgt_start
659 + tgt->list[i + 1].key->tgt_offset
660 + tgt->list[i + 1].offset
661 + (uintptr_t) hostaddrs[i]
662 - (uintptr_t) hostaddrs[i + 1];
663
664 default:
665 return tgt->tgt_start + tgt->list[i].offset;
666 }
e01d41e5
JJ
667}
668
1f4c5b9b
CLT
669static inline __attribute__((always_inline)) struct target_mem_desc *
670gomp_map_vars_internal (struct gomp_device_descr *devicep,
671 struct goacc_asyncqueue *aq, size_t mapnum,
672 void **hostaddrs, void **devaddrs, size_t *sizes,
673 void *kinds, bool short_mapkind,
674 enum gomp_map_vars_kind pragma_kind)
1df3f842
JJ
675{
676 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
d9a6bd32 677 bool has_firstprivate = false;
972da557 678 bool has_always_ptrset = false;
d9a6bd32
JJ
679 const int rshift = short_mapkind ? 8 : 3;
680 const int typemask = short_mapkind ? 0xff : 0x7;
a51df54e 681 struct splay_tree_s *mem_map = &devicep->mem_map;
1df3f842
JJ
682 struct splay_tree_key_s cur_node;
683 struct target_mem_desc *tgt
684 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
685 tgt->list_count = mapnum;
9e628024 686 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
1df3f842 687 tgt->device_descr = devicep;
378da98f 688 tgt->prev = NULL;
7324369a 689 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
1df3f842
JJ
690
691 if (mapnum == 0)
3837c6d6
IV
692 {
693 tgt->tgt_start = 0;
694 tgt->tgt_end = 0;
695 return tgt;
696 }
1df3f842
JJ
697
698 tgt_align = sizeof (void *);
699 tgt_size = 0;
7324369a
JJ
700 cbuf.chunks = NULL;
701 cbuf.chunk_cnt = -1;
702 cbuf.use_cnt = 0;
703 cbuf.buf = NULL;
704 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
705 {
a44c1790
TS
706 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
707 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
7324369a
JJ
708 cbuf.chunk_cnt = 0;
709 }
d9a6bd32 710 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
711 {
712 size_t align = 4 * sizeof (void *);
713 tgt_align = align;
714 tgt_size = mapnum * sizeof (void *);
7324369a
JJ
715 cbuf.chunk_cnt = 1;
716 cbuf.use_cnt = 1 + (mapnum > 1);
a44c1790
TS
717 cbuf.chunks[0].start = 0;
718 cbuf.chunks[0].end = tgt_size;
1df3f842
JJ
719 }
720
a51df54e 721 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
722 if (devicep->state == GOMP_DEVICE_FINALIZED)
723 {
724 gomp_mutex_unlock (&devicep->lock);
725 free (tgt);
726 return NULL;
727 }
41dbbb37 728
1df3f842
JJ
729 for (i = 0; i < mapnum; i++)
730 {
d9a6bd32
JJ
731 int kind = get_kind (short_mapkind, kinds, i);
732 if (hostaddrs[i] == NULL
733 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1df3f842 734 {
d9a6bd32 735 tgt->list[i].key = NULL;
6c7e076b 736 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32
JJ
737 continue;
738 }
d5c23c6c
TB
739 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
740 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
d9a6bd32 741 {
8860d270
JJ
742 tgt->list[i].key = NULL;
743 if (!not_found_cnt)
d9a6bd32 744 {
7d48e14f
JJ
745 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
746 on a separate construct prior to using use_device_{addr,ptr}.
747 In OpenMP 5.0, map directives need to be ordered by the
748 middle-end before the use_device_* clauses. If
749 !not_found_cnt, all mappings requested (if any) are already
750 mapped, so use_device_{addr,ptr} can be resolved right away.
751 Otherwise, if not_found_cnt, gomp_map_lookup might fail
752 now but would succeed after performing the mappings in the
753 following loop. We can't defer this always to the second
754 loop, because it is not even invoked when !not_found_cnt
755 after the first loop. */
756 cur_node.host_start = (uintptr_t) hostaddrs[i];
757 cur_node.host_end = cur_node.host_start;
758 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
b3b75e66
TS
759 if (n != NULL)
760 {
761 cur_node.host_start -= n->host_start;
762 hostaddrs[i]
763 = (void *) (n->tgt->tgt_start + n->tgt_offset
764 + cur_node.host_start);
765 }
766 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
7d48e14f
JJ
767 {
768 gomp_mutex_unlock (&devicep->lock);
769 gomp_fatal ("use_device_ptr pointer wasn't mapped");
770 }
b3b75e66
TS
771 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
772 /* If not present, continue using the host address. */
773 ;
774 else
775 __builtin_unreachable ();
776 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32 777 }
8860d270
JJ
778 else
779 tgt->list[i].offset = 0;
d9a6bd32
JJ
780 continue;
781 }
782 else if ((kind & typemask) == GOMP_MAP_STRUCT)
783 {
784 size_t first = i + 1;
785 size_t last = i + sizes[i];
786 cur_node.host_start = (uintptr_t) hostaddrs[i];
787 cur_node.host_end = (uintptr_t) hostaddrs[last]
788 + sizes[last];
789 tgt->list[i].key = NULL;
6c7e076b 790 tgt->list[i].offset = OFFSET_STRUCT;
d9a6bd32
JJ
791 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
792 if (n == NULL)
793 {
794 size_t align = (size_t) 1 << (kind >> rshift);
795 if (tgt_align < align)
796 tgt_align = align;
7324369a 797 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
d9a6bd32 798 tgt_size = (tgt_size + align - 1) & ~(align - 1);
7324369a 799 tgt_size += cur_node.host_end - cur_node.host_start;
d9a6bd32
JJ
800 not_found_cnt += last - i;
801 for (i = first; i <= last; i++)
7324369a
JJ
802 {
803 tgt->list[i].key = NULL;
804 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
805 & typemask))
806 gomp_coalesce_buf_add (&cbuf,
807 tgt_size - cur_node.host_end
808 + (uintptr_t) hostaddrs[i],
809 sizes[i]);
810 }
d9a6bd32
JJ
811 i--;
812 continue;
813 }
814 for (i = first; i <= last; i++)
1f4c5b9b 815 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
7324369a 816 sizes, kinds, NULL);
d9a6bd32 817 i--;
1df3f842
JJ
818 continue;
819 }
e01d41e5
JJ
820 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
821 {
822 tgt->list[i].key = NULL;
6c7e076b 823 tgt->list[i].offset = OFFSET_POINTER;
e01d41e5
JJ
824 has_firstprivate = true;
825 continue;
826 }
8e7e71ff
JB
827 else if ((kind & typemask) == GOMP_MAP_ATTACH)
828 {
829 tgt->list[i].key = NULL;
830 has_firstprivate = true;
831 continue;
832 }
1df3f842 833 cur_node.host_start = (uintptr_t) hostaddrs[i];
aff43ac0 834 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
835 cur_node.host_end = cur_node.host_start + sizes[i];
836 else
837 cur_node.host_end = cur_node.host_start + sizeof (void *);
d9a6bd32
JJ
838 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
839 {
840 tgt->list[i].key = NULL;
841
842 size_t align = (size_t) 1 << (kind >> rshift);
843 if (tgt_align < align)
844 tgt_align = align;
845 tgt_size = (tgt_size + align - 1) & ~(align - 1);
7324369a
JJ
846 gomp_coalesce_buf_add (&cbuf, tgt_size,
847 cur_node.host_end - cur_node.host_start);
d9a6bd32
JJ
848 tgt_size += cur_node.host_end - cur_node.host_start;
849 has_firstprivate = true;
850 continue;
851 }
852 splay_tree_key n;
853 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1df3f842 854 {
e01d41e5 855 n = gomp_map_0len_lookup (mem_map, &cur_node);
d9a6bd32
JJ
856 if (!n)
857 {
858 tgt->list[i].key = NULL;
6c7e076b 859 tgt->list[i].offset = OFFSET_POINTER;
d9a6bd32
JJ
860 continue;
861 }
1df3f842 862 }
d9a6bd32
JJ
863 else
864 n = splay_tree_lookup (mem_map, &cur_node);
4a38b02b 865 if (n && n->refcount != REFCOUNT_LINK)
972da557
TB
866 {
867 int always_to_cnt = 0;
868 if ((kind & typemask) == GOMP_MAP_TO_PSET)
869 {
1b9bdd52 870 bool has_nullptr = false;
972da557
TB
871 size_t j;
872 for (j = 0; j < n->tgt->list_count; j++)
873 if (n->tgt->list[j].key == n)
874 {
875 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
876 break;
877 }
878 if (n->tgt->list_count == 0)
879 {
880 /* 'declare target'; assume has_nullptr; it could also be
881 statically assigned pointer, but that it should be to
882 the equivalent variable on the host. */
883 assert (n->refcount == REFCOUNT_INFINITY);
884 has_nullptr = true;
885 }
886 else
887 assert (j < n->tgt->list_count);
888 /* Re-map the data if there is an 'always' modifier or if it a
889 null pointer was there and non a nonnull has been found; that
890 permits transparent re-mapping for Fortran array descriptors
891 which were previously mapped unallocated. */
892 for (j = i + 1; j < mapnum; j++)
893 {
894 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
895 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
896 && (!has_nullptr
897 || !GOMP_MAP_POINTER_P (ptr_kind)
898 || *(void **) hostaddrs[j] == NULL))
899 break;
900 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
901 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
902 > cur_node.host_end))
903 break;
904 else
905 {
906 has_always_ptrset = true;
907 ++always_to_cnt;
908 }
909 }
910 }
911 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
912 kind & typemask, always_to_cnt > 0, NULL);
913 i += always_to_cnt;
914 }
1df3f842
JJ
915 else
916 {
d9a6bd32 917 tgt->list[i].key = NULL;
41dbbb37 918
a6163563
JB
919 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
920 {
921 /* Not present, hence, skip entry - including its MAP_POINTER,
922 when existing. */
5bcd470b 923 tgt->list[i].offset = OFFSET_POINTER;
a6163563
JB
924 if (i + 1 < mapnum
925 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
926 == GOMP_MAP_POINTER))
927 {
928 ++i;
929 tgt->list[i].key = NULL;
930 tgt->list[i].offset = 0;
931 }
932 continue;
933 }
41dbbb37 934 size_t align = (size_t) 1 << (kind >> rshift);
1df3f842
JJ
935 not_found_cnt++;
936 if (tgt_align < align)
937 tgt_align = align;
938 tgt_size = (tgt_size + align - 1) & ~(align - 1);
7324369a
JJ
939 if (gomp_to_device_kind_p (kind & typemask))
940 gomp_coalesce_buf_add (&cbuf, tgt_size,
941 cur_node.host_end - cur_node.host_start);
1df3f842 942 tgt_size += cur_node.host_end - cur_node.host_start;
41dbbb37 943 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1df3f842
JJ
944 {
945 size_t j;
972da557 946 int kind;
1df3f842 947 for (j = i + 1; j < mapnum; j++)
972da557
TB
948 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
949 kinds, j)) & typemask))
950 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1df3f842
JJ
951 break;
952 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
953 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
954 > cur_node.host_end))
955 break;
956 else
957 {
d9a6bd32 958 tgt->list[j].key = NULL;
1df3f842
JJ
959 i++;
960 }
961 }
962 }
963 }
964
41dbbb37
TS
965 if (devaddrs)
966 {
967 if (mapnum != 1)
a51df54e
IV
968 {
969 gomp_mutex_unlock (&devicep->lock);
970 gomp_fatal ("unexpected aggregation");
971 }
41dbbb37
TS
972 tgt->to_free = devaddrs[0];
973 tgt->tgt_start = (uintptr_t) tgt->to_free;
974 tgt->tgt_end = tgt->tgt_start + sizes[0];
975 }
d9a6bd32 976 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
977 {
978 /* Allocate tgt_align aligned tgt_size block of memory. */
979 /* FIXME: Perhaps change interface to allocate properly aligned
980 memory. */
981 tgt->to_free = devicep->alloc_func (devicep->target_id,
982 tgt_size + tgt_align - 1);
6ce13072
CLT
983 if (!tgt->to_free)
984 {
985 gomp_mutex_unlock (&devicep->lock);
986 gomp_fatal ("device memory allocation fail");
987 }
988
1df3f842
JJ
989 tgt->tgt_start = (uintptr_t) tgt->to_free;
990 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
991 tgt->tgt_end = tgt->tgt_start + tgt_size;
7324369a
JJ
992
993 if (cbuf.use_cnt == 1)
994 cbuf.chunk_cnt--;
995 if (cbuf.chunk_cnt > 0)
996 {
997 cbuf.buf
a44c1790 998 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
7324369a
JJ
999 if (cbuf.buf)
1000 {
1001 cbuf.tgt = tgt;
1002 cbufp = &cbuf;
1003 }
1004 }
1df3f842
JJ
1005 }
1006 else
1007 {
1008 tgt->to_free = NULL;
1009 tgt->tgt_start = 0;
1010 tgt->tgt_end = 0;
1011 }
1012
1013 tgt_size = 0;
d9a6bd32 1014 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1015 tgt_size = mapnum * sizeof (void *);
1016
1017 tgt->array = NULL;
972da557 1018 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1df3f842 1019 {
d9a6bd32
JJ
1020 if (not_found_cnt)
1021 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1df3f842 1022 splay_tree_node array = tgt->array;
5cafae2c 1023 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
d9a6bd32 1024 uintptr_t field_tgt_base = 0;
1df3f842
JJ
1025
1026 for (i = 0; i < mapnum; i++)
972da557
TB
1027 if (has_always_ptrset
1028 && tgt->list[i].key
1029 && (get_kind (short_mapkind, kinds, i) & typemask)
1030 == GOMP_MAP_TO_PSET)
1031 {
1032 splay_tree_key k = tgt->list[i].key;
1b9bdd52 1033 bool has_nullptr = false;
972da557
TB
1034 size_t j;
1035 for (j = 0; j < k->tgt->list_count; j++)
1036 if (k->tgt->list[j].key == k)
1037 {
1038 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1039 break;
1040 }
1041 if (k->tgt->list_count == 0)
1042 has_nullptr = true;
1043 else
1044 assert (j < k->tgt->list_count);
1045
1046 tgt->list[i].has_null_ptr_assoc = false;
1047 for (j = i + 1; j < mapnum; j++)
1048 {
1049 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1050 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1051 && (!has_nullptr
1052 || !GOMP_MAP_POINTER_P (ptr_kind)
1053 || *(void **) hostaddrs[j] == NULL))
1054 break;
1055 else if ((uintptr_t) hostaddrs[j] < k->host_start
1056 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1057 > k->host_end))
1058 break;
1059 else
1060 {
1061 if (*(void **) hostaddrs[j] == NULL)
1062 tgt->list[i].has_null_ptr_assoc = true;
1063 tgt->list[j].key = k;
1064 tgt->list[j].copy_from = false;
1065 tgt->list[j].always_copy_from = false;
1066 tgt->list[j].is_attach = false;
1067 if (k->refcount != REFCOUNT_INFINITY)
1068 k->refcount++;
1069 gomp_map_pointer (k->tgt, aq,
1070 (uintptr_t) *(void **) hostaddrs[j],
1071 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1072 - k->host_start),
1073 sizes[j], cbufp);
1074 }
1075 }
1076 i = j - 1;
1077 }
1078 else if (tgt->list[i].key == NULL)
1df3f842 1079 {
d9a6bd32 1080 int kind = get_kind (short_mapkind, kinds, i);
1df3f842
JJ
1081 if (hostaddrs[i] == NULL)
1082 continue;
d9a6bd32
JJ
1083 switch (kind & typemask)
1084 {
1085 size_t align, len, first, last;
1086 splay_tree_key n;
1087 case GOMP_MAP_FIRSTPRIVATE:
1088 align = (size_t) 1 << (kind >> rshift);
1089 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1090 tgt->list[i].offset = tgt_size;
1091 len = sizes[i];
1f4c5b9b 1092 gomp_copy_host2dev (devicep, aq,
6ce13072 1093 (void *) (tgt->tgt_start + tgt_size),
7324369a 1094 (void *) hostaddrs[i], len, cbufp);
d9a6bd32
JJ
1095 tgt_size += len;
1096 continue;
1097 case GOMP_MAP_FIRSTPRIVATE_INT:
d9a6bd32
JJ
1098 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1099 continue;
d5c23c6c 1100 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
b3b75e66
TS
1101 /* The OpenACC 'host_data' construct only allows 'use_device'
1102 "mapping" clauses, so in the first loop, 'not_found_cnt'
1103 must always have been zero, so all OpenACC 'use_device'
1104 clauses have already been handled. (We can only easily test
1105 'use_device' with 'if_present' clause here.) */
1106 assert (tgt->list[i].offset == OFFSET_INLINED);
1107 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1108 code conceptually simple, similar to the first loop. */
1109 case GOMP_MAP_USE_DEVICE_PTR:
8860d270
JJ
1110 if (tgt->list[i].offset == 0)
1111 {
1112 cur_node.host_start = (uintptr_t) hostaddrs[i];
1113 cur_node.host_end = cur_node.host_start;
1114 n = gomp_map_lookup (mem_map, &cur_node);
b3b75e66
TS
1115 if (n != NULL)
1116 {
1117 cur_node.host_start -= n->host_start;
1118 hostaddrs[i]
1119 = (void *) (n->tgt->tgt_start + n->tgt_offset
1120 + cur_node.host_start);
1121 }
1122 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
8860d270
JJ
1123 {
1124 gomp_mutex_unlock (&devicep->lock);
1125 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1126 }
b3b75e66
TS
1127 else if ((kind & typemask)
1128 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1129 /* If not present, continue using the host address. */
1130 ;
1131 else
1132 __builtin_unreachable ();
1133 tgt->list[i].offset = OFFSET_INLINED;
8860d270
JJ
1134 }
1135 continue;
d9a6bd32
JJ
1136 case GOMP_MAP_STRUCT:
1137 first = i + 1;
1138 last = i + sizes[i];
1139 cur_node.host_start = (uintptr_t) hostaddrs[i];
1140 cur_node.host_end = (uintptr_t) hostaddrs[last]
1141 + sizes[last];
1142 if (tgt->list[first].key != NULL)
1143 continue;
1144 n = splay_tree_lookup (mem_map, &cur_node);
1145 if (n == NULL)
1146 {
1147 size_t align = (size_t) 1 << (kind >> rshift);
1148 tgt_size -= (uintptr_t) hostaddrs[first]
1149 - (uintptr_t) hostaddrs[i];
1150 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1151 tgt_size += (uintptr_t) hostaddrs[first]
1152 - (uintptr_t) hostaddrs[i];
1153 field_tgt_base = (uintptr_t) hostaddrs[first];
1154 field_tgt_offset = tgt_size;
1155 field_tgt_clear = last;
1156 tgt_size += cur_node.host_end
1157 - (uintptr_t) hostaddrs[first];
1158 continue;
1159 }
1160 for (i = first; i <= last; i++)
1f4c5b9b 1161 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
7324369a 1162 sizes, kinds, cbufp);
d9a6bd32
JJ
1163 i--;
1164 continue;
e01d41e5
JJ
1165 case GOMP_MAP_ALWAYS_POINTER:
1166 cur_node.host_start = (uintptr_t) hostaddrs[i];
1167 cur_node.host_end = cur_node.host_start + sizeof (void *);
1168 n = splay_tree_lookup (mem_map, &cur_node);
1169 if (n == NULL
1170 || n->host_start > cur_node.host_start
1171 || n->host_end < cur_node.host_end)
1172 {
1173 gomp_mutex_unlock (&devicep->lock);
1174 gomp_fatal ("always pointer not mapped");
1175 }
1176 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1177 != GOMP_MAP_ALWAYS_POINTER)
1178 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1179 if (cur_node.tgt_offset)
1180 cur_node.tgt_offset -= sizes[i];
1f4c5b9b 1181 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1182 (void *) (n->tgt->tgt_start
1183 + n->tgt_offset
1184 + cur_node.host_start
1185 - n->host_start),
1186 (void *) &cur_node.tgt_offset,
7324369a 1187 sizeof (void *), cbufp);
e01d41e5
JJ
1188 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1189 + cur_node.host_start - n->host_start;
1190 continue;
a6163563
JB
1191 case GOMP_MAP_IF_PRESENT:
1192 /* Not present - otherwise handled above. Skip over its
1193 MAP_POINTER as well. */
1194 if (i + 1 < mapnum
1195 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1196 == GOMP_MAP_POINTER))
1197 ++i;
1198 continue;
8e7e71ff
JB
1199 case GOMP_MAP_ATTACH:
1200 {
1201 cur_node.host_start = (uintptr_t) hostaddrs[i];
1202 cur_node.host_end = cur_node.host_start + sizeof (void *);
1203 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1204 if (n != NULL)
1205 {
1206 tgt->list[i].key = n;
1207 tgt->list[i].offset = cur_node.host_start - n->host_start;
1208 tgt->list[i].length = n->host_end - n->host_start;
1209 tgt->list[i].copy_from = false;
1210 tgt->list[i].always_copy_from = false;
bc4ed079
JB
1211 tgt->list[i].is_attach = true;
1212 /* OpenACC 'attach'/'detach' doesn't affect
1213 structured/dynamic reference counts ('n->refcount',
1214 'n->dynamic_refcount'). */
9e628024
CLT
1215
1216 gomp_attach_pointer (devicep, aq, mem_map, n,
1217 (uintptr_t) hostaddrs[i], sizes[i],
1218 cbufp);
8e7e71ff 1219 }
9e628024 1220 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
8e7e71ff
JB
1221 {
1222 gomp_mutex_unlock (&devicep->lock);
1223 gomp_fatal ("outer struct not mapped for attach");
1224 }
8e7e71ff
JB
1225 continue;
1226 }
d9a6bd32
JJ
1227 default:
1228 break;
1229 }
1df3f842
JJ
1230 splay_tree_key k = &array->key;
1231 k->host_start = (uintptr_t) hostaddrs[i];
41dbbb37 1232 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
1233 k->host_end = k->host_start + sizes[i];
1234 else
1235 k->host_end = k->host_start + sizeof (void *);
a51df54e 1236 splay_tree_key n = splay_tree_lookup (mem_map, k);
4a38b02b 1237 if (n && n->refcount != REFCOUNT_LINK)
1f4c5b9b 1238 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
972da557 1239 kind & typemask, false, cbufp);
1df3f842
JJ
1240 else
1241 {
2a656a93 1242 k->aux = NULL;
4a38b02b
IV
1243 if (n && n->refcount == REFCOUNT_LINK)
1244 {
1245 /* Replace target address of the pointer with target address
1246 of mapped object in the splay tree. */
1247 splay_tree_remove (mem_map, n);
2a656a93
JB
1248 k->aux
1249 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1250 k->aux->link_key = n;
4a38b02b 1251 }
41dbbb37 1252 size_t align = (size_t) 1 << (kind >> rshift);
d9a6bd32 1253 tgt->list[i].key = k;
1df3f842 1254 k->tgt = tgt;
6c7e076b 1255 if (field_tgt_clear != FIELD_TGT_EMPTY)
d9a6bd32
JJ
1256 {
1257 k->tgt_offset = k->host_start - field_tgt_base
1258 + field_tgt_offset;
1259 if (i == field_tgt_clear)
6c7e076b 1260 field_tgt_clear = FIELD_TGT_EMPTY;
d9a6bd32
JJ
1261 }
1262 else
1263 {
1264 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1265 k->tgt_offset = tgt_size;
1266 tgt_size += k->host_end - k->host_start;
1267 }
1268 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1269 tgt->list[i].always_copy_from
1270 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
bc4ed079 1271 tgt->list[i].is_attach = false;
d9a6bd32
JJ
1272 tgt->list[i].offset = 0;
1273 tgt->list[i].length = k->host_end - k->host_start;
1df3f842 1274 k->refcount = 1;
6f5b4b64 1275 k->dynamic_refcount = 0;
1df3f842
JJ
1276 tgt->refcount++;
1277 array->left = NULL;
1278 array->right = NULL;
a51df54e 1279 splay_tree_insert (mem_map, array);
41dbbb37 1280 switch (kind & typemask)
1df3f842 1281 {
41dbbb37
TS
1282 case GOMP_MAP_ALLOC:
1283 case GOMP_MAP_FROM:
1284 case GOMP_MAP_FORCE_ALLOC:
1285 case GOMP_MAP_FORCE_FROM:
d9a6bd32 1286 case GOMP_MAP_ALWAYS_FROM:
1df3f842 1287 break;
41dbbb37
TS
1288 case GOMP_MAP_TO:
1289 case GOMP_MAP_TOFROM:
1290 case GOMP_MAP_FORCE_TO:
1291 case GOMP_MAP_FORCE_TOFROM:
d9a6bd32
JJ
1292 case GOMP_MAP_ALWAYS_TO:
1293 case GOMP_MAP_ALWAYS_TOFROM:
1f4c5b9b 1294 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1295 (void *) (tgt->tgt_start
1296 + k->tgt_offset),
1297 (void *) k->host_start,
7324369a 1298 k->host_end - k->host_start, cbufp);
1df3f842 1299 break;
41dbbb37 1300 case GOMP_MAP_POINTER:
1f4c5b9b
CLT
1301 gomp_map_pointer (tgt, aq,
1302 (uintptr_t) *(void **) k->host_start,
7324369a 1303 k->tgt_offset, sizes[i], cbufp);
1df3f842 1304 break;
41dbbb37 1305 case GOMP_MAP_TO_PSET:
1f4c5b9b 1306 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1307 (void *) (tgt->tgt_start
1308 + k->tgt_offset),
1309 (void *) k->host_start,
7324369a 1310 k->host_end - k->host_start, cbufp);
972da557 1311 tgt->list[i].has_null_ptr_assoc = false;
41dbbb37 1312
1df3f842 1313 for (j = i + 1; j < mapnum; j++)
972da557
TB
1314 {
1315 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1316 & typemask);
1317 if (!GOMP_MAP_POINTER_P (ptr_kind)
1318 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1319 break;
1320 else if ((uintptr_t) hostaddrs[j] < k->host_start
1321 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1322 > k->host_end))
1323 break;
1324 else
1325 {
1326 tgt->list[j].key = k;
1327 tgt->list[j].copy_from = false;
1328 tgt->list[j].always_copy_from = false;
1329 tgt->list[j].is_attach = false;
1330 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1331 if (k->refcount != REFCOUNT_INFINITY)
1332 k->refcount++;
1333 gomp_map_pointer (tgt, aq,
1334 (uintptr_t) *(void **) hostaddrs[j],
1335 k->tgt_offset
1336 + ((uintptr_t) hostaddrs[j]
1337 - k->host_start),
1338 sizes[j], cbufp);
1339 }
1df3f842 1340 }
972da557 1341 i = j - 1;
41dbbb37
TS
1342 break;
1343 case GOMP_MAP_FORCE_PRESENT:
1344 {
1345 /* We already looked up the memory region above and it
1346 was missing. */
1347 size_t size = k->host_end - k->host_start;
a51df54e 1348 gomp_mutex_unlock (&devicep->lock);
01c0b3b0
KT
1349#ifdef HAVE_INTTYPES_H
1350 gomp_fatal ("present clause: !acc_is_present (%p, "
1351 "%"PRIu64" (0x%"PRIx64"))",
1352 (void *) k->host_start,
1353 (uint64_t) size, (uint64_t) size);
1354#else
41dbbb37 1355 gomp_fatal ("present clause: !acc_is_present (%p, "
01c0b3b0
KT
1356 "%lu (0x%lx))", (void *) k->host_start,
1357 (unsigned long) size, (unsigned long) size);
1358#endif
41dbbb37
TS
1359 }
1360 break;
1361 case GOMP_MAP_FORCE_DEVICEPTR:
1362 assert (k->host_end - k->host_start == sizeof (void *));
1f4c5b9b 1363 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1364 (void *) (tgt->tgt_start
1365 + k->tgt_offset),
1366 (void *) k->host_start,
7324369a 1367 sizeof (void *), cbufp);
41dbbb37
TS
1368 break;
1369 default:
a51df54e 1370 gomp_mutex_unlock (&devicep->lock);
41dbbb37
TS
1371 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1372 kind);
1df3f842 1373 }
4a38b02b 1374
2a656a93 1375 if (k->aux && k->aux->link_key)
4a38b02b
IV
1376 {
1377 /* Set link pointer on target to the device address of the
1378 mapped object. */
1379 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
84ca08d2
TS
1380 /* We intentionally do not use coalescing here, as it's not
1381 data allocated by the current call to this function. */
1f4c5b9b 1382 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
84ca08d2 1383 &tgt_addr, sizeof (void *), NULL);
4a38b02b 1384 }
1df3f842
JJ
1385 array++;
1386 }
1387 }
1388 }
41dbbb37 1389
d9a6bd32 1390 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1391 {
1392 for (i = 0; i < mapnum; i++)
1393 {
e01d41e5 1394 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1f4c5b9b 1395 gomp_copy_host2dev (devicep, aq,
6ce13072 1396 (void *) (tgt->tgt_start + i * sizeof (void *)),
7324369a
JJ
1397 (void *) &cur_node.tgt_offset, sizeof (void *),
1398 cbufp);
1df3f842
JJ
1399 }
1400 }
1401
7324369a
JJ
1402 if (cbufp)
1403 {
1404 long c = 0;
1405 for (c = 0; c < cbuf.chunk_cnt; ++c)
1f4c5b9b 1406 gomp_copy_host2dev (devicep, aq,
a44c1790
TS
1407 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1408 (char *) cbuf.buf + (cbuf.chunks[c].start
1409 - cbuf.chunks[0].start),
1410 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
7324369a 1411 free (cbuf.buf);
a44c1790
TS
1412 cbuf.buf = NULL;
1413 cbufp = NULL;
7324369a
JJ
1414 }
1415
d9a6bd32
JJ
1416 /* If the variable from "omp target enter data" map-list was already mapped,
1417 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1418 gomp_exit_data. */
9e628024 1419 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
6f5b4b64 1420 {
d9a6bd32
JJ
1421 free (tgt);
1422 tgt = NULL;
1423 }
1424
a51df54e 1425 gomp_mutex_unlock (&devicep->lock);
1df3f842
JJ
1426 return tgt;
1427}
1428
1f4c5b9b
CLT
1429attribute_hidden struct target_mem_desc *
1430gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1431 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1432 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1433{
1434 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1435 sizes, kinds, short_mapkind, pragma_kind);
1436}
1437
1438attribute_hidden struct target_mem_desc *
1439gomp_map_vars_async (struct gomp_device_descr *devicep,
1440 struct goacc_asyncqueue *aq, size_t mapnum,
1441 void **hostaddrs, void **devaddrs, size_t *sizes,
1442 void *kinds, bool short_mapkind,
1443 enum gomp_map_vars_kind pragma_kind)
1444{
1445 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1446 sizes, kinds, short_mapkind, pragma_kind);
1447}
1448
6278b549 1449static void
1df3f842
JJ
1450gomp_unmap_tgt (struct target_mem_desc *tgt)
1451{
1452 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1453 if (tgt->tgt_end)
6ce13072 1454 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1df3f842
JJ
1455
1456 free (tgt->array);
1457 free (tgt);
1458}
1459
1cbd94e8
JB
1460static bool
1461gomp_unref_tgt (void *ptr)
829c6349
CLT
1462{
1463 bool is_tgt_unmapped = false;
1cbd94e8
JB
1464
1465 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1466
1467 if (tgt->refcount > 1)
1468 tgt->refcount--;
829c6349
CLT
1469 else
1470 {
1cbd94e8 1471 gomp_unmap_tgt (tgt);
829c6349 1472 is_tgt_unmapped = true;
829c6349 1473 }
1cbd94e8 1474
829c6349
CLT
1475 return is_tgt_unmapped;
1476}
1477
1f4c5b9b 1478static void
1cbd94e8 1479gomp_unref_tgt_void (void *ptr)
1f4c5b9b 1480{
1cbd94e8
JB
1481 (void) gomp_unref_tgt (ptr);
1482}
1f4c5b9b 1483
1cbd94e8
JB
1484static inline __attribute__((always_inline)) bool
1485gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1486 struct goacc_asyncqueue *aq)
1487{
1488 bool is_tgt_unmapped = false;
1489 splay_tree_remove (&devicep->mem_map, k);
2a656a93
JB
1490 if (k->aux)
1491 {
1492 if (k->aux->link_key)
1493 splay_tree_insert (&devicep->mem_map,
1494 (splay_tree_node) k->aux->link_key);
5d5be7bf
JB
1495 if (k->aux->attach_count)
1496 free (k->aux->attach_count);
2a656a93
JB
1497 free (k->aux);
1498 k->aux = NULL;
1499 }
1cbd94e8
JB
1500 if (aq)
1501 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1502 (void *) k->tgt);
1f4c5b9b 1503 else
1cbd94e8
JB
1504 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1505 return is_tgt_unmapped;
1506}
1507
1508attribute_hidden bool
1509gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1510{
1511 return gomp_remove_var_internal (devicep, k, NULL);
1512}
1513
1514/* Remove a variable asynchronously. This actually removes the variable
1515 mapping immediately, but retains the linked target_mem_desc until the
1516 asynchronous operation has completed (as it may still refer to target
1517 memory). The device lock must be held before entry, and remains locked on
1518 exit. */
1519
1520attribute_hidden void
1521gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1522 struct goacc_asyncqueue *aq)
1523{
1524 (void) gomp_remove_var_internal (devicep, k, aq);
1f4c5b9b
CLT
1525}
1526
41dbbb37
TS
1527/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1528 variables back from device to host: if it is false, it is assumed that this
b4557008 1529 has been done already. */
41dbbb37 1530
1f4c5b9b
CLT
1531static inline __attribute__((always_inline)) void
1532gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1533 struct goacc_asyncqueue *aq)
1df3f842
JJ
1534{
1535 struct gomp_device_descr *devicep = tgt->device_descr;
1536
1537 if (tgt->list_count == 0)
1538 {
1539 free (tgt);
1540 return;
1541 }
1542
a51df54e 1543 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1544 if (devicep->state == GOMP_DEVICE_FINALIZED)
1545 {
1546 gomp_mutex_unlock (&devicep->lock);
1547 free (tgt->array);
1548 free (tgt);
1549 return;
1550 }
41dbbb37 1551
1df3f842 1552 size_t i;
8e7e71ff
JB
1553
1554 /* We must perform detachments before any copies back to the host. */
1555 for (i = 0; i < tgt->list_count; i++)
1556 {
1557 splay_tree_key k = tgt->list[i].key;
1558
bc4ed079 1559 if (k != NULL && tgt->list[i].is_attach)
8e7e71ff
JB
1560 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1561 + tgt->list[i].offset,
25bce75c 1562 false, NULL);
8e7e71ff
JB
1563 }
1564
1df3f842 1565 for (i = 0; i < tgt->list_count; i++)
d9a6bd32
JJ
1566 {
1567 splay_tree_key k = tgt->list[i].key;
1568 if (k == NULL)
1569 continue;
1570
bc4ed079
JB
1571 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1572 counts ('n->refcount', 'n->dynamic_refcount'). */
1573 if (tgt->list[i].is_attach)
1574 continue;
1575
d9a6bd32 1576 bool do_unmap = false;
6f5b4b64 1577 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
d9a6bd32
JJ
1578 k->refcount--;
1579 else if (k->refcount == 1)
1580 {
b4557008
CLT
1581 k->refcount--;
1582 do_unmap = true;
d9a6bd32
JJ
1583 }
1584
1585 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1586 || tgt->list[i].always_copy_from)
1f4c5b9b 1587 gomp_copy_dev2host (devicep, aq,
6ce13072
CLT
1588 (void *) (k->host_start + tgt->list[i].offset),
1589 (void *) (k->tgt->tgt_start + k->tgt_offset
1590 + tgt->list[i].offset),
1591 tgt->list[i].length);
d9a6bd32 1592 if (do_unmap)
83d1d065
TS
1593 {
1594 struct target_mem_desc *k_tgt = k->tgt;
1595 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1596 /* It would be bad if TGT got unmapped while we're still iterating
1597 over its LIST_COUNT, and also expect to use it in the following
1598 code. */
1599 assert (!is_tgt_unmapped
1600 || k_tgt != tgt);
1601 }
d9a6bd32 1602 }
1df3f842 1603
1f4c5b9b 1604 if (aq)
1cbd94e8 1605 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1f4c5b9b 1606 (void *) tgt);
1df3f842 1607 else
1f4c5b9b 1608 gomp_unref_tgt ((void *) tgt);
41dbbb37 1609
a51df54e 1610 gomp_mutex_unlock (&devicep->lock);
1df3f842
JJ
1611}
1612
1f4c5b9b
CLT
1613attribute_hidden void
1614gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1615{
1616 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1617}
1618
1619attribute_hidden void
1620gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1621 struct goacc_asyncqueue *aq)
1622{
1623 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1624}
1625
1df3f842 1626static void
a51df54e 1627gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
d9a6bd32 1628 size_t *sizes, void *kinds, bool short_mapkind)
1df3f842
JJ
1629{
1630 size_t i;
1631 struct splay_tree_key_s cur_node;
d9a6bd32 1632 const int typemask = short_mapkind ? 0xff : 0x7;
1df3f842
JJ
1633
1634 if (!devicep)
1635 return;
1636
1637 if (mapnum == 0)
1638 return;
1639
a51df54e 1640 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1641 if (devicep->state == GOMP_DEVICE_FINALIZED)
1642 {
1643 gomp_mutex_unlock (&devicep->lock);
1644 return;
1645 }
1646
1df3f842
JJ
1647 for (i = 0; i < mapnum; i++)
1648 if (sizes[i])
1649 {
1650 cur_node.host_start = (uintptr_t) hostaddrs[i];
1651 cur_node.host_end = cur_node.host_start + sizes[i];
a51df54e 1652 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1df3f842
JJ
1653 if (n)
1654 {
d9a6bd32 1655 int kind = get_kind (short_mapkind, kinds, i);
1df3f842
JJ
1656 if (n->host_start > cur_node.host_start
1657 || n->host_end < cur_node.host_end)
a51df54e
IV
1658 {
1659 gomp_mutex_unlock (&devicep->lock);
1660 gomp_fatal ("Trying to update [%p..%p) object when "
1661 "only [%p..%p) is mapped",
1662 (void *) cur_node.host_start,
1663 (void *) cur_node.host_end,
1664 (void *) n->host_start,
1665 (void *) n->host_end);
1666 }
6ce13072
CLT
1667
1668
1669 void *hostaddr = (void *) cur_node.host_start;
1670 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1671 + cur_node.host_start - n->host_start);
1672 size_t size = cur_node.host_end - cur_node.host_start;
1673
41dbbb37 1674 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1f4c5b9b
CLT
1675 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1676 NULL);
41dbbb37 1677 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1f4c5b9b 1678 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1df3f842 1679 }
1df3f842 1680 }
a51df54e
IV
1681 gomp_mutex_unlock (&devicep->lock);
1682}
1683
1684/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1685 And insert to splay tree the mapping between addresses from HOST_TABLE and
22be2349
NS
1686 from loaded target image. We rely in the host and device compiler
1687 emitting variable and functions in the same order. */
a51df54e
IV
1688
1689static void
2a21ff19 1690gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
22be2349
NS
1691 const void *host_table, const void *target_data,
1692 bool is_register_lock)
a51df54e
IV
1693{
1694 void **host_func_table = ((void ***) host_table)[0];
1695 void **host_funcs_end = ((void ***) host_table)[1];
1696 void **host_var_table = ((void ***) host_table)[2];
1697 void **host_vars_end = ((void ***) host_table)[3];
1698
1699 /* The func table contains only addresses, the var table contains addresses
1700 and corresponding sizes. */
1701 int num_funcs = host_funcs_end - host_func_table;
1702 int num_vars = (host_vars_end - host_var_table) / 2;
1703
1704 /* Load image to device and get target addresses for the image. */
1705 struct addr_pair *target_table = NULL;
2a21ff19
NS
1706 int i, num_target_entries;
1707
1708 num_target_entries
1709 = devicep->load_image_func (devicep->target_id, version,
1710 target_data, &target_table);
a51df54e
IV
1711
1712 if (num_target_entries != num_funcs + num_vars)
1713 {
1714 gomp_mutex_unlock (&devicep->lock);
1715 if (is_register_lock)
1716 gomp_mutex_unlock (&register_lock);
2a21ff19
NS
1717 gomp_fatal ("Cannot map target functions or variables"
1718 " (expected %u, have %u)", num_funcs + num_vars,
1719 num_target_entries);
a51df54e
IV
1720 }
1721
1722 /* Insert host-target address mapping into splay tree. */
1723 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1724 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
d9a6bd32 1725 tgt->refcount = REFCOUNT_INFINITY;
a51df54e
IV
1726 tgt->tgt_start = 0;
1727 tgt->tgt_end = 0;
1728 tgt->to_free = NULL;
1729 tgt->prev = NULL;
1730 tgt->list_count = 0;
1731 tgt->device_descr = devicep;
1732 splay_tree_node array = tgt->array;
1733
1734 for (i = 0; i < num_funcs; i++)
1735 {
1736 splay_tree_key k = &array->key;
1737 k->host_start = (uintptr_t) host_func_table[i];
1738 k->host_end = k->host_start + 1;
1739 k->tgt = tgt;
1740 k->tgt_offset = target_table[i].start;
d9a6bd32 1741 k->refcount = REFCOUNT_INFINITY;
6f5b4b64 1742 k->dynamic_refcount = 0;
2a656a93 1743 k->aux = NULL;
a51df54e
IV
1744 array->left = NULL;
1745 array->right = NULL;
1746 splay_tree_insert (&devicep->mem_map, array);
1747 array++;
1748 }
1749
4a38b02b
IV
1750 /* Most significant bit of the size in host and target tables marks
1751 "omp declare target link" variables. */
1752 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1753 const uintptr_t size_mask = ~link_bit;
1754
a51df54e
IV
1755 for (i = 0; i < num_vars; i++)
1756 {
1757 struct addr_pair *target_var = &target_table[num_funcs + i];
4a38b02b 1758 uintptr_t target_size = target_var->end - target_var->start;
4897bb00 1759 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
4a38b02b 1760
4897bb00 1761 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
a51df54e
IV
1762 {
1763 gomp_mutex_unlock (&devicep->lock);
1764 if (is_register_lock)
1765 gomp_mutex_unlock (&register_lock);
4a38b02b 1766 gomp_fatal ("Cannot map target variables (size mismatch)");
a51df54e
IV
1767 }
1768
1769 splay_tree_key k = &array->key;
1770 k->host_start = (uintptr_t) host_var_table[i * 2];
4a38b02b
IV
1771 k->host_end
1772 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
a51df54e
IV
1773 k->tgt = tgt;
1774 k->tgt_offset = target_var->start;
4897bb00 1775 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
6f5b4b64 1776 k->dynamic_refcount = 0;
2a656a93 1777 k->aux = NULL;
a51df54e
IV
1778 array->left = NULL;
1779 array->right = NULL;
1780 splay_tree_insert (&devicep->mem_map, array);
1781 array++;
1782 }
1783
1784 free (target_table);
1df3f842
JJ
1785}
1786
22be2349
NS
1787/* Unload the mappings described by target_data from device DEVICE_P.
1788 The device must be locked. */
1789
1790static void
1791gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2a21ff19 1792 unsigned version,
22be2349
NS
1793 const void *host_table, const void *target_data)
1794{
1795 void **host_func_table = ((void ***) host_table)[0];
1796 void **host_funcs_end = ((void ***) host_table)[1];
1797 void **host_var_table = ((void ***) host_table)[2];
1798 void **host_vars_end = ((void ***) host_table)[3];
1799
1800 /* The func table contains only addresses, the var table contains addresses
1801 and corresponding sizes. */
1802 int num_funcs = host_funcs_end - host_func_table;
1803 int num_vars = (host_vars_end - host_var_table) / 2;
1804
22be2349
NS
1805 struct splay_tree_key_s k;
1806 splay_tree_key node = NULL;
1807
1808 /* Find mapping at start of node array */
1809 if (num_funcs || num_vars)
1810 {
1811 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1812 : (uintptr_t) host_var_table[0]);
1813 k.host_end = k.host_start + 1;
1814 node = splay_tree_lookup (&devicep->mem_map, &k);
1815 }
2a21ff19 1816
6ce13072
CLT
1817 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1818 {
1819 gomp_mutex_unlock (&devicep->lock);
1820 gomp_fatal ("image unload fail");
1821 }
22be2349
NS
1822
1823 /* Remove mappings from splay tree. */
4a38b02b
IV
1824 int i;
1825 for (i = 0; i < num_funcs; i++)
22be2349 1826 {
4a38b02b 1827 k.host_start = (uintptr_t) host_func_table[i];
22be2349
NS
1828 k.host_end = k.host_start + 1;
1829 splay_tree_remove (&devicep->mem_map, &k);
1830 }
1831
4a38b02b
IV
1832 /* Most significant bit of the size in host and target tables marks
1833 "omp declare target link" variables. */
1834 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1835 const uintptr_t size_mask = ~link_bit;
1836 bool is_tgt_unmapped = false;
1837
1838 for (i = 0; i < num_vars; i++)
22be2349 1839 {
4a38b02b
IV
1840 k.host_start = (uintptr_t) host_var_table[i * 2];
1841 k.host_end
1842 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1843
1844 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1845 splay_tree_remove (&devicep->mem_map, &k);
1846 else
1847 {
1848 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
829c6349 1849 is_tgt_unmapped = gomp_remove_var (devicep, n);
4a38b02b 1850 }
22be2349
NS
1851 }
1852
4a38b02b 1853 if (node && !is_tgt_unmapped)
22be2349
NS
1854 {
1855 free (node->tgt);
1856 free (node);
1857 }
1858}
1859
a51df54e 1860/* This function should be called from every offload image while loading.
1df3f842
JJ
1861 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1862 the target, and TARGET_DATA needed by target plugin. */
1863
1864void
2a21ff19
NS
1865GOMP_offload_register_ver (unsigned version, const void *host_table,
1866 int target_type, const void *target_data)
1df3f842 1867{
a51df54e 1868 int i;
2a21ff19
NS
1869
1870 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1871 gomp_fatal ("Library too old for offload (version %u < %u)",
1872 GOMP_VERSION, GOMP_VERSION_LIB (version));
1873
a51df54e
IV
1874 gomp_mutex_lock (&register_lock);
1875
1876 /* Load image to all initialized devices. */
1877 for (i = 0; i < num_devices; i++)
1878 {
1879 struct gomp_device_descr *devicep = &devices[i];
1880 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1881 if (devicep->type == target_type
1882 && devicep->state == GOMP_DEVICE_INITIALIZED)
2a21ff19
NS
1883 gomp_load_image_to_device (devicep, version,
1884 host_table, target_data, true);
a51df54e
IV
1885 gomp_mutex_unlock (&devicep->lock);
1886 }
1df3f842 1887
a51df54e
IV
1888 /* Insert image to array of pending images. */
1889 offload_images
1890 = gomp_realloc_unlock (offload_images,
1891 (num_offload_images + 1)
1892 * sizeof (struct offload_image_descr));
2a21ff19 1893 offload_images[num_offload_images].version = version;
1df3f842
JJ
1894 offload_images[num_offload_images].type = target_type;
1895 offload_images[num_offload_images].host_table = host_table;
1896 offload_images[num_offload_images].target_data = target_data;
1897
1898 num_offload_images++;
a51df54e 1899 gomp_mutex_unlock (&register_lock);
1df3f842
JJ
1900}
1901
2a21ff19
NS
1902void
1903GOMP_offload_register (const void *host_table, int target_type,
1904 const void *target_data)
1905{
1906 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1907}
1908
a51df54e
IV
1909/* This function should be called from every offload image while unloading.
1910 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1911 the target, and TARGET_DATA needed by target plugin. */
1df3f842 1912
a51df54e 1913void
2a21ff19
NS
1914GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1915 int target_type, const void *target_data)
1df3f842 1916{
a51df54e
IV
1917 int i;
1918
a51df54e
IV
1919 gomp_mutex_lock (&register_lock);
1920
1921 /* Unload image from all initialized devices. */
1922 for (i = 0; i < num_devices; i++)
1923 {
a51df54e
IV
1924 struct gomp_device_descr *devicep = &devices[i];
1925 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1926 if (devicep->type == target_type
1927 && devicep->state == GOMP_DEVICE_INITIALIZED)
2a21ff19
NS
1928 gomp_unload_image_from_device (devicep, version,
1929 host_table, target_data);
a51df54e
IV
1930 gomp_mutex_unlock (&devicep->lock);
1931 }
1932
1933 /* Remove image from array of pending images. */
1934 for (i = 0; i < num_offload_images; i++)
1935 if (offload_images[i].target_data == target_data)
1936 {
1937 offload_images[i] = offload_images[--num_offload_images];
1938 break;
1939 }
1940
1941 gomp_mutex_unlock (&register_lock);
41dbbb37 1942}
1df3f842 1943
2a21ff19
NS
1944void
1945GOMP_offload_unregister (const void *host_table, int target_type,
1946 const void *target_data)
1947{
1948 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1949}
1950
a51df54e
IV
1951/* This function initializes the target device, specified by DEVICEP. DEVICEP
1952 must be locked on entry, and remains locked on return. */
41dbbb37
TS
1953
1954attribute_hidden void
a51df54e 1955gomp_init_device (struct gomp_device_descr *devicep)
41dbbb37 1956{
1df3f842 1957 int i;
6ce13072
CLT
1958 if (!devicep->init_device_func (devicep->target_id))
1959 {
1960 gomp_mutex_unlock (&devicep->lock);
1961 gomp_fatal ("device initialization failed");
1962 }
a51df54e
IV
1963
1964 /* Load to device all images registered by the moment. */
1965 for (i = 0; i < num_offload_images; i++)
1df3f842 1966 {
a51df54e
IV
1967 struct offload_image_descr *image = &offload_images[i];
1968 if (image->type == devicep->type)
2a21ff19
NS
1969 gomp_load_image_to_device (devicep, image->version,
1970 image->host_table, image->target_data,
1971 false);
1df3f842
JJ
1972 }
1973
1f4c5b9b
CLT
1974 /* Initialize OpenACC asynchronous queues. */
1975 goacc_init_asyncqueues (devicep);
1976
d84ffc0a 1977 devicep->state = GOMP_DEVICE_INITIALIZED;
41dbbb37
TS
1978}
1979
1f4c5b9b
CLT
1980/* This function finalizes the target device, specified by DEVICEP. DEVICEP
1981 must be locked on entry, and remains locked on return. */
1982
1983attribute_hidden bool
1984gomp_fini_device (struct gomp_device_descr *devicep)
1985{
1986 bool ret = goacc_fini_asyncqueues (devicep);
1987 ret &= devicep->fini_device_func (devicep->target_id);
1988 devicep->state = GOMP_DEVICE_FINALIZED;
1989 return ret;
1990}
1991
22be2349
NS
1992attribute_hidden void
1993gomp_unload_device (struct gomp_device_descr *devicep)
1994{
d84ffc0a 1995 if (devicep->state == GOMP_DEVICE_INITIALIZED)
22be2349
NS
1996 {
1997 unsigned i;
1998
1999 /* Unload from device all images registered at the moment. */
2000 for (i = 0; i < num_offload_images; i++)
2001 {
2002 struct offload_image_descr *image = &offload_images[i];
2003 if (image->type == devicep->type)
2a21ff19
NS
2004 gomp_unload_image_from_device (devicep, image->version,
2005 image->host_table,
22be2349
NS
2006 image->target_data);
2007 }
2008 }
2009}
2010
e01d41e5 2011/* Host fallback for GOMP_target{,_ext} routines. */
d9a6bd32
JJ
2012
2013static void
1bfc07d1
KCY
2014gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2015 struct gomp_device_descr *devicep)
d9a6bd32
JJ
2016{
2017 struct gomp_thread old_thr, *thr = gomp_thread ();
1bfc07d1
KCY
2018
2019 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2020 && devicep != NULL)
2021 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2022 "be used for offloading");
2023
d9a6bd32
JJ
2024 old_thr = *thr;
2025 memset (thr, '\0', sizeof (*thr));
2026 if (gomp_places_list)
2027 {
2028 thr->place = old_thr.place;
2029 thr->ts.place_partition_len = gomp_places_list_len;
2030 }
2031 fn (hostaddrs);
2032 gomp_free_thread (thr);
2033 *thr = old_thr;
2034}
2035
b2b40051
MJ
2036/* Calculate alignment and size requirements of a private copy of data shared
2037 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
e01d41e5 2038
b2b40051
MJ
2039static inline void
2040calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2041 unsigned short *kinds, size_t *tgt_align,
2042 size_t *tgt_size)
e01d41e5 2043{
b2b40051
MJ
2044 size_t i;
2045 for (i = 0; i < mapnum; i++)
2046 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2047 {
2048 size_t align = (size_t) 1 << (kinds[i] >> 8);
2049 if (*tgt_align < align)
2050 *tgt_align = align;
2051 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2052 *tgt_size += sizes[i];
2053 }
2054}
2055
2056/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2057
2058static inline void
2059copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2060 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2061 size_t tgt_size)
2062{
2063 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2064 if (al)
2065 tgt += tgt_align - al;
2066 tgt_size = 0;
2067 size_t i;
e01d41e5
JJ
2068 for (i = 0; i < mapnum; i++)
2069 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2070 {
2071 size_t align = (size_t) 1 << (kinds[i] >> 8);
e01d41e5 2072 tgt_size = (tgt_size + align - 1) & ~(align - 1);
b2b40051
MJ
2073 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2074 hostaddrs[i] = tgt + tgt_size;
2075 tgt_size = tgt_size + sizes[i];
e01d41e5 2076 }
b2b40051
MJ
2077}
2078
e01d41e5 2079/* Helper function of GOMP_target{,_ext} routines. */
d9a6bd32
JJ
2080
2081static void *
2082gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2083 void (*host_fn) (void *))
2084{
2085 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2086 return (void *) host_fn;
2087 else
2088 {
2089 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2090 if (devicep->state == GOMP_DEVICE_FINALIZED)
2091 {
2092 gomp_mutex_unlock (&devicep->lock);
2093 return NULL;
2094 }
2095
d9a6bd32
JJ
2096 struct splay_tree_key_s k;
2097 k.host_start = (uintptr_t) host_fn;
2098 k.host_end = k.host_start + 1;
2099 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2100 gomp_mutex_unlock (&devicep->lock);
2101 if (tgt_fn == NULL)
eb4048f2 2102 return NULL;
d9a6bd32
JJ
2103
2104 return (void *) tgt_fn->tgt_offset;
2105 }
2106}
2107
acf0174b 2108/* Called when encountering a target directive. If DEVICE
41dbbb37
TS
2109 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2110 GOMP_DEVICE_HOST_FALLBACK (or any value
2111 larger than last available hw device), use host fallback.
128b26dc
TS
2112 FN is address of host code, UNUSED is part of the current ABI, but
2113 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
acf0174b
JJ
2114 with MAPNUM entries, with addresses of the host objects,
2115 sizes of the host objects (resp. for pointer kind pointer bias
2116 and assumed sizeof (void *) size) and kinds. */
2117
2118void
128b26dc 2119GOMP_target (int device, void (*fn) (void *), const void *unused,
acf0174b
JJ
2120 size_t mapnum, void **hostaddrs, size_t *sizes,
2121 unsigned char *kinds)
2122{
1df3f842 2123 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37 2124
d84ffc0a 2125 void *fn_addr;
41dbbb37 2126 if (devicep == NULL
d84ffc0a 2127 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
2128 /* All shared memory devices should use the GOMP_target_ext function. */
2129 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
d84ffc0a 2130 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1bfc07d1 2131 return gomp_target_fallback (fn, hostaddrs, devicep);
d9a6bd32 2132
d9a6bd32
JJ
2133 struct target_mem_desc *tgt_vars
2134 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2135 GOMP_MAP_VARS_TARGET);
b2b40051
MJ
2136 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2137 NULL);
d9a6bd32
JJ
2138 gomp_unmap_vars (tgt_vars, true);
2139}
1df3f842 2140
001ab12e
FH
2141static inline unsigned int
2142clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2143{
2144 /* If we cannot run asynchronously, simply ignore nowait. */
2145 if (devicep != NULL && devicep->async_run_func == NULL)
2146 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2147
2148 return flags;
2149}
2150
e01d41e5
JJ
2151/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2152 and several arguments have been added:
2153 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2154 DEPEND is array of dependencies, see GOMP_task for details.
b2b40051
MJ
2155
2156 ARGS is a pointer to an array consisting of a variable number of both
2157 device-independent and device-specific arguments, which can take one two
2158 elements where the first specifies for which device it is intended, the type
2159 and optionally also the value. If the value is not present in the first
2160 one, the whole second element the actual value. The last element of the
2161 array is a single NULL. Among the device independent can be for example
2162 NUM_TEAMS and THREAD_LIMIT.
2163
e01d41e5
JJ
2164 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2165 that value, or 1 if teams construct is not present, or 0, if
2166 teams construct does not have num_teams clause and so the choice is
2167 implementation defined, and -1 if it can't be determined on the host
2168 what value will GOMP_teams have on the device.
2169 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2170 body with that value, or 0, if teams construct does not have thread_limit
2171 clause or the teams construct is not present, or -1 if it can't be
2172 determined on the host what value will GOMP_teams have on the device. */
2173
d9a6bd32 2174void
e01d41e5
JJ
2175GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2176 void **hostaddrs, size_t *sizes, unsigned short *kinds,
b2b40051 2177 unsigned int flags, void **depend, void **args)
d9a6bd32
JJ
2178{
2179 struct gomp_device_descr *devicep = resolve_device (device);
21f3a236
JJ
2180 size_t tgt_align = 0, tgt_size = 0;
2181 bool fpc_done = false;
41dbbb37 2182
001ab12e
FH
2183 flags = clear_unsupported_flags (devicep, flags);
2184
e4606348
JJ
2185 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2186 {
2187 struct gomp_thread *thr = gomp_thread ();
2188 /* Create a team if we don't have any around, as nowait
2189 target tasks make sense to run asynchronously even when
2190 outside of any parallel. */
2191 if (__builtin_expect (thr->ts.team == NULL, 0))
2192 {
2193 struct gomp_team *team = gomp_new_team (1);
2194 struct gomp_task *task = thr->task;
2195 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2196 team->prev_ts = thr->ts;
2197 thr->ts.team = team;
2198 thr->ts.team_id = 0;
2199 thr->ts.work_share = &team->work_shares[0];
2200 thr->ts.last_work_share = NULL;
2201#ifdef HAVE_SYNC_BUILTINS
2202 thr->ts.single_count = 0;
2203#endif
2204 thr->ts.static_trip = 0;
2205 thr->task = &team->implicit_task[0];
2206 gomp_init_task (thr->task, NULL, icv);
2207 if (task)
2208 {
2209 thr->task = task;
2210 gomp_end_task ();
2211 free (task);
2212 thr->task = &team->implicit_task[0];
2213 }
2214 else
2215 pthread_setspecific (gomp_thread_destructor, thr);
2216 }
2217 if (thr->ts.team
2218 && !thr->task->final_task)
2219 {
2220 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
b2b40051 2221 sizes, kinds, flags, depend, args,
e4606348
JJ
2222 GOMP_TARGET_TASK_BEFORE_MAP);
2223 return;
2224 }
2225 }
2226
2227 /* If there are depend clauses, but nowait is not present
2228 (or we are in a final task), block the parent task until the
2229 dependencies are resolved and then just continue with the rest
2230 of the function as if it is a merged task. */
d9a6bd32
JJ
2231 if (depend != NULL)
2232 {
2233 struct gomp_thread *thr = gomp_thread ();
2234 if (thr->task && thr->task->depend_hash)
21f3a236
JJ
2235 {
2236 /* If we might need to wait, copy firstprivate now. */
2237 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2238 &tgt_align, &tgt_size);
2239 if (tgt_align)
2240 {
2241 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2242 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2243 tgt_align, tgt_size);
2244 }
2245 fpc_done = true;
2246 gomp_task_maybe_wait_for_dependencies (depend);
2247 }
d9a6bd32 2248 }
41dbbb37 2249
d84ffc0a 2250 void *fn_addr;
d9a6bd32 2251 if (devicep == NULL
d84ffc0a 2252 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
2253 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2254 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
41dbbb37 2255 {
21f3a236
JJ
2256 if (!fpc_done)
2257 {
2258 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2259 &tgt_align, &tgt_size);
2260 if (tgt_align)
2261 {
2262 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2263 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2264 tgt_align, tgt_size);
2265 }
2266 }
1bfc07d1 2267 gomp_target_fallback (fn, hostaddrs, devicep);
d9a6bd32 2268 return;
41dbbb37 2269 }
1df3f842 2270
b2b40051 2271 struct target_mem_desc *tgt_vars;
b2b40051
MJ
2272 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2273 {
21f3a236
JJ
2274 if (!fpc_done)
2275 {
2276 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2277 &tgt_align, &tgt_size);
2278 if (tgt_align)
2279 {
2280 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2281 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2282 tgt_align, tgt_size);
2283 }
2284 }
b2b40051
MJ
2285 tgt_vars = NULL;
2286 }
2287 else
2288 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2289 true, GOMP_MAP_VARS_TARGET);
2290 devicep->run_func (devicep->target_id, fn_addr,
2291 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2292 args);
2293 if (tgt_vars)
2294 gomp_unmap_vars (tgt_vars, true);
acf0174b
JJ
2295}
2296
e01d41e5 2297/* Host fallback for GOMP_target_data{,_ext} routines. */
d9a6bd32
JJ
2298
2299static void
1bfc07d1 2300gomp_target_data_fallback (struct gomp_device_descr *devicep)
d9a6bd32
JJ
2301{
2302 struct gomp_task_icv *icv = gomp_icv (false);
1bfc07d1
KCY
2303
2304 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2305 && devicep != NULL)
2306 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2307 "be used for offloading");
2308
d9a6bd32
JJ
2309 if (icv->target_data)
2310 {
2311 /* Even when doing a host fallback, if there are any active
2312 #pragma omp target data constructs, need to remember the
2313 new #pragma omp target data, otherwise GOMP_target_end_data
2314 would get out of sync. */
2315 struct target_mem_desc *tgt
2316 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2317 GOMP_MAP_VARS_DATA);
2318 tgt->prev = icv->target_data;
2319 icv->target_data = tgt;
2320 }
2321}
2322
acf0174b 2323void
128b26dc 2324GOMP_target_data (int device, const void *unused, size_t mapnum,
acf0174b
JJ
2325 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2326{
1df3f842 2327 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37
TS
2328
2329 if (devicep == NULL
b2b40051
MJ
2330 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2331 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1bfc07d1 2332 return gomp_target_data_fallback (devicep);
41dbbb37 2333
1df3f842 2334 struct target_mem_desc *tgt
41dbbb37 2335 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
d9a6bd32
JJ
2336 GOMP_MAP_VARS_DATA);
2337 struct gomp_task_icv *icv = gomp_icv (true);
2338 tgt->prev = icv->target_data;
2339 icv->target_data = tgt;
2340}
2341
2342void
e01d41e5
JJ
2343GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2344 size_t *sizes, unsigned short *kinds)
d9a6bd32
JJ
2345{
2346 struct gomp_device_descr *devicep = resolve_device (device);
2347
2348 if (devicep == NULL
b2b40051
MJ
2349 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2350 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1bfc07d1 2351 return gomp_target_data_fallback (devicep);
d9a6bd32
JJ
2352
2353 struct target_mem_desc *tgt
2354 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2355 GOMP_MAP_VARS_DATA);
1df3f842
JJ
2356 struct gomp_task_icv *icv = gomp_icv (true);
2357 tgt->prev = icv->target_data;
2358 icv->target_data = tgt;
acf0174b
JJ
2359}
2360
2361void
2362GOMP_target_end_data (void)
2363{
1df3f842
JJ
2364 struct gomp_task_icv *icv = gomp_icv (false);
2365 if (icv->target_data)
2366 {
2367 struct target_mem_desc *tgt = icv->target_data;
2368 icv->target_data = tgt->prev;
41dbbb37 2369 gomp_unmap_vars (tgt, true);
1df3f842 2370 }
acf0174b
JJ
2371}
2372
2373void
128b26dc 2374GOMP_target_update (int device, const void *unused, size_t mapnum,
acf0174b
JJ
2375 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2376{
1df3f842 2377 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37
TS
2378
2379 if (devicep == NULL
b2b40051
MJ
2380 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2381 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1df3f842
JJ
2382 return;
2383
d9a6bd32
JJ
2384 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2385}
2386
2387void
e01d41e5
JJ
2388GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2389 size_t *sizes, unsigned short *kinds,
2390 unsigned int flags, void **depend)
d9a6bd32
JJ
2391{
2392 struct gomp_device_descr *devicep = resolve_device (device);
2393
2394 /* If there are depend clauses, but nowait is not present,
2395 block the parent task until the dependencies are resolved
2396 and then just continue with the rest of the function as if it
2397 is a merged task. Until we are able to schedule task during
2398 variable mapping or unmapping, ignore nowait if depend clauses
2399 are not present. */
2400 if (depend != NULL)
2401 {
2402 struct gomp_thread *thr = gomp_thread ();
2403 if (thr->task && thr->task->depend_hash)
2404 {
2405 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2406 && thr->ts.team
2407 && !thr->task->final_task)
2408 {
e4606348
JJ
2409 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2410 mapnum, hostaddrs, sizes, kinds,
2411 flags | GOMP_TARGET_FLAG_UPDATE,
b2b40051 2412 depend, NULL, GOMP_TARGET_TASK_DATA))
e4606348
JJ
2413 return;
2414 }
2415 else
2416 {
2417 struct gomp_team *team = thr->ts.team;
2418 /* If parallel or taskgroup has been cancelled, don't start new
2419 tasks. */
28567c40
JJ
2420 if (__builtin_expect (gomp_cancel_var, 0) && team)
2421 {
2422 if (gomp_team_barrier_cancelled (&team->barrier))
2423 return;
2424 if (thr->task->taskgroup)
2425 {
2426 if (thr->task->taskgroup->cancelled)
2427 return;
2428 if (thr->task->taskgroup->workshare
2429 && thr->task->taskgroup->prev
2430 && thr->task->taskgroup->prev->cancelled)
2431 return;
2432 }
2433 }
e4606348
JJ
2434
2435 gomp_task_maybe_wait_for_dependencies (depend);
d9a6bd32 2436 }
d9a6bd32
JJ
2437 }
2438 }
2439
2440 if (devicep == NULL
b2b40051
MJ
2441 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2442 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2443 return;
2444
2445 struct gomp_thread *thr = gomp_thread ();
2446 struct gomp_team *team = thr->ts.team;
2447 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
2448 if (__builtin_expect (gomp_cancel_var, 0) && team)
2449 {
2450 if (gomp_team_barrier_cancelled (&team->barrier))
2451 return;
2452 if (thr->task->taskgroup)
2453 {
2454 if (thr->task->taskgroup->cancelled)
2455 return;
2456 if (thr->task->taskgroup->workshare
2457 && thr->task->taskgroup->prev
2458 && thr->task->taskgroup->prev->cancelled)
2459 return;
2460 }
2461 }
d9a6bd32
JJ
2462
2463 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2464}
2465
2466static void
2467gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2468 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2469{
2470 const int typemask = 0xff;
2471 size_t i;
41dbbb37 2472 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2473 if (devicep->state == GOMP_DEVICE_FINALIZED)
2474 {
2475 gomp_mutex_unlock (&devicep->lock);
2476 return;
2477 }
2478
9e628024
CLT
2479 for (i = 0; i < mapnum; i++)
2480 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2481 {
2482 struct splay_tree_key_s cur_node;
2483 cur_node.host_start = (uintptr_t) hostaddrs[i];
2484 cur_node.host_end = cur_node.host_start + sizeof (void *);
2485 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2486
2487 if (n)
2488 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2489 false, NULL);
2490 }
2491
d9a6bd32
JJ
2492 for (i = 0; i < mapnum; i++)
2493 {
2494 struct splay_tree_key_s cur_node;
2495 unsigned char kind = kinds[i] & typemask;
2496 switch (kind)
2497 {
2498 case GOMP_MAP_FROM:
2499 case GOMP_MAP_ALWAYS_FROM:
2500 case GOMP_MAP_DELETE:
2501 case GOMP_MAP_RELEASE:
2502 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2503 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2504 cur_node.host_start = (uintptr_t) hostaddrs[i];
2505 cur_node.host_end = cur_node.host_start + sizes[i];
2506 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2507 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
e01d41e5 2508 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
d9a6bd32
JJ
2509 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2510 if (!k)
2511 continue;
2512
2513 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2514 k->refcount--;
2515 if ((kind == GOMP_MAP_DELETE
2516 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2517 && k->refcount != REFCOUNT_INFINITY)
2518 k->refcount = 0;
2519
2520 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2521 || kind == GOMP_MAP_ALWAYS_FROM)
1f4c5b9b 2522 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
6ce13072
CLT
2523 (void *) (k->tgt->tgt_start + k->tgt_offset
2524 + cur_node.host_start
2525 - k->host_start),
2526 cur_node.host_end - cur_node.host_start);
d9a6bd32 2527 if (k->refcount == 0)
b5859e40 2528 gomp_remove_var (devicep, k);
9e628024 2529 break;
d9a6bd32 2530
9e628024 2531 case GOMP_MAP_DETACH:
d9a6bd32
JJ
2532 break;
2533 default:
2534 gomp_mutex_unlock (&devicep->lock);
2535 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2536 kind);
2537 }
2538 }
2539
41dbbb37 2540 gomp_mutex_unlock (&devicep->lock);
d9a6bd32 2541}
41dbbb37 2542
d9a6bd32
JJ
2543void
2544GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2545 size_t *sizes, unsigned short *kinds,
2546 unsigned int flags, void **depend)
2547{
2548 struct gomp_device_descr *devicep = resolve_device (device);
2549
2550 /* If there are depend clauses, but nowait is not present,
2551 block the parent task until the dependencies are resolved
2552 and then just continue with the rest of the function as if it
2553 is a merged task. Until we are able to schedule task during
2554 variable mapping or unmapping, ignore nowait if depend clauses
2555 are not present. */
2556 if (depend != NULL)
2557 {
2558 struct gomp_thread *thr = gomp_thread ();
2559 if (thr->task && thr->task->depend_hash)
2560 {
2561 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2562 && thr->ts.team
2563 && !thr->task->final_task)
2564 {
e4606348
JJ
2565 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2566 mapnum, hostaddrs, sizes, kinds,
b2b40051 2567 flags, depend, NULL,
e4606348
JJ
2568 GOMP_TARGET_TASK_DATA))
2569 return;
2570 }
2571 else
2572 {
2573 struct gomp_team *team = thr->ts.team;
2574 /* If parallel or taskgroup has been cancelled, don't start new
2575 tasks. */
28567c40
JJ
2576 if (__builtin_expect (gomp_cancel_var, 0) && team)
2577 {
2578 if (gomp_team_barrier_cancelled (&team->barrier))
2579 return;
2580 if (thr->task->taskgroup)
2581 {
2582 if (thr->task->taskgroup->cancelled)
2583 return;
2584 if (thr->task->taskgroup->workshare
2585 && thr->task->taskgroup->prev
2586 && thr->task->taskgroup->prev->cancelled)
2587 return;
2588 }
2589 }
e4606348
JJ
2590
2591 gomp_task_maybe_wait_for_dependencies (depend);
d9a6bd32 2592 }
d9a6bd32
JJ
2593 }
2594 }
2595
2596 if (devicep == NULL
b2b40051
MJ
2597 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2598 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2599 return;
2600
2601 struct gomp_thread *thr = gomp_thread ();
2602 struct gomp_team *team = thr->ts.team;
2603 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
2604 if (__builtin_expect (gomp_cancel_var, 0) && team)
2605 {
2606 if (gomp_team_barrier_cancelled (&team->barrier))
2607 return;
2608 if (thr->task->taskgroup)
2609 {
2610 if (thr->task->taskgroup->cancelled)
2611 return;
2612 if (thr->task->taskgroup->workshare
2613 && thr->task->taskgroup->prev
2614 && thr->task->taskgroup->prev->cancelled)
2615 return;
2616 }
2617 }
d9a6bd32 2618
689418b9
TB
2619 /* The variables are mapped separately such that they can be released
2620 independently. */
2621 size_t i, j;
d9a6bd32
JJ
2622 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2623 for (i = 0; i < mapnum; i++)
2624 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2625 {
2626 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2627 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2628 i += sizes[i];
2629 }
689418b9
TB
2630 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2631 {
2632 for (j = i + 1; j < mapnum; j++)
972da557
TB
2633 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
2634 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
689418b9
TB
2635 break;
2636 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2637 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2638 i += j - i - 1;
2639 }
9e628024
CLT
2640 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
2641 {
2642 /* An attach operation must be processed together with the mapped
2643 base-pointer list item. */
2644 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2645 true, GOMP_MAP_VARS_ENTER_DATA);
2646 i += 1;
2647 }
d9a6bd32
JJ
2648 else
2649 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2650 true, GOMP_MAP_VARS_ENTER_DATA);
2651 else
2652 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2653}
2654
e4606348 2655bool
d9a6bd32
JJ
2656gomp_target_task_fn (void *data)
2657{
2658 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
e4606348
JJ
2659 struct gomp_device_descr *devicep = ttask->devicep;
2660
d9a6bd32
JJ
2661 if (ttask->fn != NULL)
2662 {
d84ffc0a 2663 void *fn_addr;
e4606348 2664 if (devicep == NULL
d84ffc0a 2665 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
2666 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2667 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
e4606348
JJ
2668 {
2669 ttask->state = GOMP_TARGET_TASK_FALLBACK;
1bfc07d1 2670 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
e4606348
JJ
2671 return false;
2672 }
2673
2674 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2675 {
b2b40051
MJ
2676 if (ttask->tgt)
2677 gomp_unmap_vars (ttask->tgt, true);
e4606348
JJ
2678 return false;
2679 }
2680
b2b40051
MJ
2681 void *actual_arguments;
2682 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2683 {
2684 ttask->tgt = NULL;
b2b40051
MJ
2685 actual_arguments = ttask->hostaddrs;
2686 }
2687 else
2688 {
2689 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2690 NULL, ttask->sizes, ttask->kinds, true,
2691 GOMP_MAP_VARS_TARGET);
2692 actual_arguments = (void *) ttask->tgt->tgt_start;
2693 }
e4606348
JJ
2694 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2695
001ab12e 2696 assert (devicep->async_run_func);
b2b40051
MJ
2697 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2698 ttask->args, (void *) ttask);
e4606348 2699 return true;
d9a6bd32 2700 }
e4606348 2701 else if (devicep == NULL
b2b40051
MJ
2702 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2703 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
e4606348 2704 return false;
d9a6bd32
JJ
2705
2706 size_t i;
2707 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
e4606348 2708 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
d9a6bd32
JJ
2709 ttask->kinds, true);
2710 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2711 for (i = 0; i < ttask->mapnum; i++)
2712 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2713 {
e4606348
JJ
2714 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2715 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2716 GOMP_MAP_VARS_ENTER_DATA);
d9a6bd32
JJ
2717 i += ttask->sizes[i];
2718 }
2719 else
e4606348
JJ
2720 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2721 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
d9a6bd32 2722 else
e4606348
JJ
2723 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2724 ttask->kinds);
2725 return false;
acf0174b
JJ
2726}
2727
2728void
2729GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2730{
2731 if (thread_limit)
2732 {
2733 struct gomp_task_icv *icv = gomp_icv (true);
2734 icv->thread_limit_var
2735 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2736 }
2737 (void) num_teams;
2738}
1df3f842 2739
d9a6bd32
JJ
2740void *
2741omp_target_alloc (size_t size, int device_num)
2742{
74c9882b 2743 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
2744 return malloc (size);
2745
2746 if (device_num < 0)
2747 return NULL;
2748
2749 struct gomp_device_descr *devicep = resolve_device (device_num);
2750 if (devicep == NULL)
2751 return NULL;
2752
b2b40051
MJ
2753 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2754 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2755 return malloc (size);
2756
2757 gomp_mutex_lock (&devicep->lock);
2758 void *ret = devicep->alloc_func (devicep->target_id, size);
2759 gomp_mutex_unlock (&devicep->lock);
2760 return ret;
2761}
2762
2763void
2764omp_target_free (void *device_ptr, int device_num)
2765{
2766 if (device_ptr == NULL)
2767 return;
2768
74c9882b 2769 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
2770 {
2771 free (device_ptr);
2772 return;
2773 }
2774
2775 if (device_num < 0)
2776 return;
2777
2778 struct gomp_device_descr *devicep = resolve_device (device_num);
2779 if (devicep == NULL)
2780 return;
2781
b2b40051
MJ
2782 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2783 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2784 {
2785 free (device_ptr);
2786 return;
2787 }
2788
2789 gomp_mutex_lock (&devicep->lock);
6ce13072 2790 gomp_free_device_memory (devicep, device_ptr);
d9a6bd32
JJ
2791 gomp_mutex_unlock (&devicep->lock);
2792}
2793
2794int
28567c40 2795omp_target_is_present (const void *ptr, int device_num)
d9a6bd32
JJ
2796{
2797 if (ptr == NULL)
2798 return 1;
2799
74c9882b 2800 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
2801 return 1;
2802
2803 if (device_num < 0)
2804 return 0;
2805
2806 struct gomp_device_descr *devicep = resolve_device (device_num);
2807 if (devicep == NULL)
2808 return 0;
2809
b2b40051
MJ
2810 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2811 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2812 return 1;
2813
2814 gomp_mutex_lock (&devicep->lock);
2815 struct splay_tree_s *mem_map = &devicep->mem_map;
2816 struct splay_tree_key_s cur_node;
2817
2818 cur_node.host_start = (uintptr_t) ptr;
2819 cur_node.host_end = cur_node.host_start;
e01d41e5 2820 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
d9a6bd32
JJ
2821 int ret = n != NULL;
2822 gomp_mutex_unlock (&devicep->lock);
2823 return ret;
2824}
2825
2826int
28567c40
JJ
2827omp_target_memcpy (void *dst, const void *src, size_t length,
2828 size_t dst_offset, size_t src_offset, int dst_device_num,
2829 int src_device_num)
d9a6bd32
JJ
2830{
2831 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
6ce13072 2832 bool ret;
d9a6bd32 2833
74c9882b 2834 if (dst_device_num != gomp_get_num_devices ())
d9a6bd32
JJ
2835 {
2836 if (dst_device_num < 0)
2837 return EINVAL;
2838
2839 dst_devicep = resolve_device (dst_device_num);
2840 if (dst_devicep == NULL)
2841 return EINVAL;
2842
b2b40051
MJ
2843 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2844 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2845 dst_devicep = NULL;
2846 }
74c9882b 2847 if (src_device_num != num_devices_openmp)
d9a6bd32
JJ
2848 {
2849 if (src_device_num < 0)
2850 return EINVAL;
2851
2852 src_devicep = resolve_device (src_device_num);
2853 if (src_devicep == NULL)
2854 return EINVAL;
2855
b2b40051
MJ
2856 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2857 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2858 src_devicep = NULL;
2859 }
2860 if (src_devicep == NULL && dst_devicep == NULL)
2861 {
2862 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2863 return 0;
2864 }
2865 if (src_devicep == NULL)
2866 {
2867 gomp_mutex_lock (&dst_devicep->lock);
6ce13072
CLT
2868 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2869 (char *) dst + dst_offset,
2870 (char *) src + src_offset, length);
d9a6bd32 2871 gomp_mutex_unlock (&dst_devicep->lock);
6ce13072 2872 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
2873 }
2874 if (dst_devicep == NULL)
2875 {
2876 gomp_mutex_lock (&src_devicep->lock);
6ce13072
CLT
2877 ret = src_devicep->dev2host_func (src_devicep->target_id,
2878 (char *) dst + dst_offset,
2879 (char *) src + src_offset, length);
d9a6bd32 2880 gomp_mutex_unlock (&src_devicep->lock);
6ce13072 2881 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
2882 }
2883 if (src_devicep == dst_devicep)
2884 {
2885 gomp_mutex_lock (&src_devicep->lock);
6ce13072
CLT
2886 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2887 (char *) dst + dst_offset,
2888 (char *) src + src_offset, length);
d9a6bd32 2889 gomp_mutex_unlock (&src_devicep->lock);
6ce13072 2890 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
2891 }
2892 return EINVAL;
2893}
2894
2895static int
28567c40 2896omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
d9a6bd32
JJ
2897 int num_dims, const size_t *volume,
2898 const size_t *dst_offsets,
2899 const size_t *src_offsets,
2900 const size_t *dst_dimensions,
2901 const size_t *src_dimensions,
2902 struct gomp_device_descr *dst_devicep,
2903 struct gomp_device_descr *src_devicep)
2904{
2905 size_t dst_slice = element_size;
2906 size_t src_slice = element_size;
2907 size_t j, dst_off, src_off, length;
2908 int i, ret;
2909
2910 if (num_dims == 1)
2911 {
2912 if (__builtin_mul_overflow (element_size, volume[0], &length)
2913 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2914 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2915 return EINVAL;
2916 if (dst_devicep == NULL && src_devicep == NULL)
6ce13072 2917 {
28567c40
JJ
2918 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2919 length);
6ce13072
CLT
2920 ret = 1;
2921 }
d9a6bd32 2922 else if (src_devicep == NULL)
6ce13072
CLT
2923 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2924 (char *) dst + dst_off,
28567c40
JJ
2925 (const char *) src + src_off,
2926 length);
d9a6bd32 2927 else if (dst_devicep == NULL)
6ce13072
CLT
2928 ret = src_devicep->dev2host_func (src_devicep->target_id,
2929 (char *) dst + dst_off,
28567c40
JJ
2930 (const char *) src + src_off,
2931 length);
d9a6bd32 2932 else if (src_devicep == dst_devicep)
6ce13072
CLT
2933 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2934 (char *) dst + dst_off,
28567c40
JJ
2935 (const char *) src + src_off,
2936 length);
d9a6bd32 2937 else
6ce13072
CLT
2938 ret = 0;
2939 return ret ? 0 : EINVAL;
d9a6bd32
JJ
2940 }
2941
2942 /* FIXME: it would be nice to have some plugin function to handle
2943 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2944 be handled in the generic recursion below, and for host-host it
2945 should be used even for any num_dims >= 2. */
2946
2947 for (i = 1; i < num_dims; i++)
2948 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2949 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2950 return EINVAL;
2951 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2952 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2953 return EINVAL;
2954 for (j = 0; j < volume[0]; j++)
2955 {
2956 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
28567c40 2957 (const char *) src + src_off,
d9a6bd32
JJ
2958 element_size, num_dims - 1,
2959 volume + 1, dst_offsets + 1,
2960 src_offsets + 1, dst_dimensions + 1,
2961 src_dimensions + 1, dst_devicep,
2962 src_devicep);
2963 if (ret)
2964 return ret;
2965 dst_off += dst_slice;
2966 src_off += src_slice;
2967 }
2968 return 0;
2969}
2970
2971int
28567c40 2972omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
d9a6bd32
JJ
2973 int num_dims, const size_t *volume,
2974 const size_t *dst_offsets,
2975 const size_t *src_offsets,
2976 const size_t *dst_dimensions,
2977 const size_t *src_dimensions,
2978 int dst_device_num, int src_device_num)
2979{
2980 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2981
2982 if (!dst && !src)
2983 return INT_MAX;
2984
74c9882b 2985 if (dst_device_num != gomp_get_num_devices ())
d9a6bd32
JJ
2986 {
2987 if (dst_device_num < 0)
2988 return EINVAL;
2989
2990 dst_devicep = resolve_device (dst_device_num);
2991 if (dst_devicep == NULL)
2992 return EINVAL;
2993
b2b40051
MJ
2994 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2995 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2996 dst_devicep = NULL;
2997 }
74c9882b 2998 if (src_device_num != num_devices_openmp)
d9a6bd32
JJ
2999 {
3000 if (src_device_num < 0)
3001 return EINVAL;
3002
3003 src_devicep = resolve_device (src_device_num);
3004 if (src_devicep == NULL)
3005 return EINVAL;
3006
b2b40051
MJ
3007 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3008 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3009 src_devicep = NULL;
3010 }
3011
3012 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3013 return EINVAL;
3014
3015 if (src_devicep)
3016 gomp_mutex_lock (&src_devicep->lock);
3017 else if (dst_devicep)
3018 gomp_mutex_lock (&dst_devicep->lock);
3019 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3020 volume, dst_offsets, src_offsets,
3021 dst_dimensions, src_dimensions,
3022 dst_devicep, src_devicep);
3023 if (src_devicep)
3024 gomp_mutex_unlock (&src_devicep->lock);
3025 else if (dst_devicep)
3026 gomp_mutex_unlock (&dst_devicep->lock);
3027 return ret;
3028}
3029
3030int
28567c40
JJ
3031omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3032 size_t size, size_t device_offset, int device_num)
d9a6bd32 3033{
74c9882b 3034 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
3035 return EINVAL;
3036
3037 if (device_num < 0)
3038 return EINVAL;
3039
3040 struct gomp_device_descr *devicep = resolve_device (device_num);
3041 if (devicep == NULL)
3042 return EINVAL;
3043
b2b40051
MJ
3044 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3045 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3046 return EINVAL;
3047
3048 gomp_mutex_lock (&devicep->lock);
3049
3050 struct splay_tree_s *mem_map = &devicep->mem_map;
3051 struct splay_tree_key_s cur_node;
3052 int ret = EINVAL;
3053
3054 cur_node.host_start = (uintptr_t) host_ptr;
3055 cur_node.host_end = cur_node.host_start + size;
3056 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3057 if (n)
3058 {
3059 if (n->tgt->tgt_start + n->tgt_offset
3060 == (uintptr_t) device_ptr + device_offset
3061 && n->host_start <= cur_node.host_start
3062 && n->host_end >= cur_node.host_end)
3063 ret = 0;
3064 }
3065 else
3066 {
3067 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3068 tgt->array = gomp_malloc (sizeof (*tgt->array));
3069 tgt->refcount = 1;
3070 tgt->tgt_start = 0;
3071 tgt->tgt_end = 0;
3072 tgt->to_free = NULL;
3073 tgt->prev = NULL;
3074 tgt->list_count = 0;
3075 tgt->device_descr = devicep;
3076 splay_tree_node array = tgt->array;
3077 splay_tree_key k = &array->key;
3078 k->host_start = cur_node.host_start;
3079 k->host_end = cur_node.host_end;
3080 k->tgt = tgt;
3081 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3082 k->refcount = REFCOUNT_INFINITY;
6f5b4b64 3083 k->dynamic_refcount = 0;
2a656a93 3084 k->aux = NULL;
d9a6bd32
JJ
3085 array->left = NULL;
3086 array->right = NULL;
3087 splay_tree_insert (&devicep->mem_map, array);
3088 ret = 0;
3089 }
3090 gomp_mutex_unlock (&devicep->lock);
3091 return ret;
3092}
3093
3094int
28567c40 3095omp_target_disassociate_ptr (const void *ptr, int device_num)
d9a6bd32 3096{
74c9882b 3097 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
3098 return EINVAL;
3099
3100 if (device_num < 0)
3101 return EINVAL;
3102
3103 struct gomp_device_descr *devicep = resolve_device (device_num);
3104 if (devicep == NULL)
3105 return EINVAL;
3106
3107 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3108 return EINVAL;
3109
3110 gomp_mutex_lock (&devicep->lock);
3111
3112 struct splay_tree_s *mem_map = &devicep->mem_map;
3113 struct splay_tree_key_s cur_node;
3114 int ret = EINVAL;
3115
3116 cur_node.host_start = (uintptr_t) ptr;
3117 cur_node.host_end = cur_node.host_start;
3118 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3119 if (n
3120 && n->host_start == cur_node.host_start
3121 && n->refcount == REFCOUNT_INFINITY
3122 && n->tgt->tgt_start == 0
3123 && n->tgt->to_free == NULL
3124 && n->tgt->refcount == 1
3125 && n->tgt->list_count == 0)
3126 {
3127 splay_tree_remove (&devicep->mem_map, n);
3128 gomp_unmap_tgt (n->tgt);
3129 ret = 0;
3130 }
3131
3132 gomp_mutex_unlock (&devicep->lock);
3133 return ret;
3134}
3135
28567c40
JJ
3136int
3137omp_pause_resource (omp_pause_resource_t kind, int device_num)
3138{
3139 (void) kind;
74c9882b 3140 if (device_num == gomp_get_num_devices ())
28567c40 3141 return gomp_pause_host ();
74c9882b 3142 if (device_num < 0 || device_num >= num_devices_openmp)
28567c40
JJ
3143 return -1;
3144 /* Do nothing for target devices for now. */
3145 return 0;
3146}
3147
3148int
3149omp_pause_resource_all (omp_pause_resource_t kind)
3150{
3151 (void) kind;
3152 if (gomp_pause_host ())
3153 return -1;
3154 /* Do nothing for target devices for now. */
3155 return 0;
3156}
3157
3158ialias (omp_pause_resource)
3159ialias (omp_pause_resource_all)
3160
1df3f842
JJ
3161#ifdef PLUGIN_SUPPORT
3162
3163/* This function tries to load a plugin for DEVICE. Name of plugin is passed
3164 in PLUGIN_NAME.
3165 The handles of the found functions are stored in the corresponding fields
3166 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3167
3168static bool
3169gomp_load_plugin_for_device (struct gomp_device_descr *device,
3170 const char *plugin_name)
3171{
196904d8 3172 const char *err = NULL, *last_missing = NULL;
41dbbb37 3173
1df3f842
JJ
3174 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3175 if (!plugin_handle)
fe5bfa67
TB
3176#if OFFLOAD_DEFAULTED
3177 return 0;
3178#else
2a21ff19 3179 goto dl_fail;
fe5bfa67 3180#endif
1df3f842
JJ
3181
3182 /* Check if all required functions are available in the plugin and store
2a21ff19
NS
3183 their handlers. None of the symbols can legitimately be NULL,
3184 so we don't need to check dlerror all the time. */
41dbbb37 3185#define DLSYM(f) \
2a21ff19
NS
3186 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3187 goto dl_fail
3188 /* Similar, but missing functions are not an error. Return false if
3189 failed, true otherwise. */
3190#define DLSYM_OPT(f, n) \
3191 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3192 || (last_missing = #n, 0))
3193
3194 DLSYM (version);
3195 if (device->version_func () != GOMP_VERSION)
3196 {
3197 err = "plugin version mismatch";
3198 goto fail;
3199 }
41dbbb37
TS
3200
3201 DLSYM (get_name);
3202 DLSYM (get_caps);
1df3f842
JJ
3203 DLSYM (get_type);
3204 DLSYM (get_num_devices);
1df3f842 3205 DLSYM (init_device);
41dbbb37 3206 DLSYM (fini_device);
a51df54e
IV
3207 DLSYM (load_image);
3208 DLSYM (unload_image);
1df3f842
JJ
3209 DLSYM (alloc);
3210 DLSYM (free);
3211 DLSYM (dev2host);
3212 DLSYM (host2dev);
41dbbb37
TS
3213 device->capabilities = device->get_caps_func ();
3214 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
d9a6bd32
JJ
3215 {
3216 DLSYM (run);
001ab12e 3217 DLSYM_OPT (async_run, async_run);
b2b40051 3218 DLSYM_OPT (can_run, can_run);
d9a6bd32
JJ
3219 DLSYM (dev2dev);
3220 }
41dbbb37
TS
3221 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3222 {
345a8c17 3223 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2a21ff19
NS
3224 || !DLSYM_OPT (openacc.create_thread_data,
3225 openacc_create_thread_data)
3226 || !DLSYM_OPT (openacc.destroy_thread_data,
1f4c5b9b
CLT
3227 openacc_destroy_thread_data)
3228 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3229 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3230 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3231 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3232 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3233 || !DLSYM_OPT (openacc.async.queue_callback,
3234 openacc_async_queue_callback)
3235 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3236 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
6fc0385c
TS
3237 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3238 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
41dbbb37 3239 {
2a21ff19
NS
3240 /* Require all the OpenACC handlers if we have
3241 GOMP_OFFLOAD_CAP_OPENACC_200. */
41dbbb37 3242 err = "plugin missing OpenACC handler function";
2a21ff19 3243 goto fail;
41dbbb37 3244 }
2a21ff19
NS
3245
3246 unsigned cuda = 0;
3247 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
345a8c17 3248 openacc_cuda_get_current_device);
2a21ff19 3249 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
345a8c17
TS
3250 openacc_cuda_get_current_context);
3251 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3252 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2a21ff19 3253 if (cuda && cuda != 4)
41dbbb37 3254 {
2a21ff19 3255 /* Make sure all the CUDA functions are there if any of them are. */
41dbbb37 3256 err = "plugin missing OpenACC CUDA handler function";
2a21ff19 3257 goto fail;
41dbbb37
TS
3258 }
3259 }
1df3f842 3260#undef DLSYM
41dbbb37 3261#undef DLSYM_OPT
1df3f842 3262
2a21ff19
NS
3263 return 1;
3264
3265 dl_fail:
3266 err = dlerror ();
3267 fail:
3268 gomp_error ("while loading %s: %s", plugin_name, err);
3269 if (last_missing)
3270 gomp_error ("missing function was %s", last_missing);
3271 if (plugin_handle)
3272 dlclose (plugin_handle);
3273
3274 return 0;
1df3f842
JJ
3275}
3276
d84ffc0a
IV
3277/* This function finalizes all initialized devices. */
3278
3279static void
3280gomp_target_fini (void)
3281{
3282 int i;
3283 for (i = 0; i < num_devices; i++)
3284 {
6ce13072 3285 bool ret = true;
d84ffc0a
IV
3286 struct gomp_device_descr *devicep = &devices[i];
3287 gomp_mutex_lock (&devicep->lock);
3288 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1f4c5b9b 3289 ret = gomp_fini_device (devicep);
d84ffc0a 3290 gomp_mutex_unlock (&devicep->lock);
6ce13072
CLT
3291 if (!ret)
3292 gomp_fatal ("device finalization failed");
d84ffc0a
IV
3293 }
3294}
3295
ee332b4a
TS
3296/* This function initializes the runtime for offloading.
3297 It parses the list of offload plugins, and tries to load these.
3298 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
41dbbb37
TS
3299 will be set, and the array DEVICES initialized, containing descriptors for
3300 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3301 by the others. */
1df3f842
JJ
3302
3303static void
3304gomp_target_init (void)
3305{
3306 const char *prefix ="libgomp-plugin-";
b5f7a6ca 3307 const char *suffix = SONAME_SUFFIX (1);
1df3f842
JJ
3308 const char *cur, *next;
3309 char *plugin_name;
35f258f4
JJ
3310 int i, new_num_devs;
3311 int num_devs = 0, num_devs_openmp;
3312 struct gomp_device_descr *devs = NULL;
1df3f842 3313
1bfc07d1
KCY
3314 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3315 return;
3316
ee332b4a 3317 cur = OFFLOAD_PLUGINS;
1df3f842
JJ
3318 if (*cur)
3319 do
3320 {
3321 struct gomp_device_descr current_device;
b13547d8 3322 size_t prefix_len, suffix_len, cur_len;
1df3f842
JJ
3323
3324 next = strchr (cur, ',');
3325
b13547d8
JJ
3326 prefix_len = strlen (prefix);
3327 cur_len = next ? next - cur : strlen (cur);
3328 suffix_len = strlen (suffix);
3329
3330 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
1df3f842
JJ
3331 if (!plugin_name)
3332 {
35f258f4 3333 num_devs = 0;
1df3f842
JJ
3334 break;
3335 }
3336
b13547d8
JJ
3337 memcpy (plugin_name, prefix, prefix_len);
3338 memcpy (plugin_name + prefix_len, cur, cur_len);
3339 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
1df3f842
JJ
3340
3341 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3342 {
35f258f4
JJ
3343 new_num_devs = current_device.get_num_devices_func ();
3344 if (new_num_devs >= 1)
1df3f842 3345 {
41dbbb37
TS
3346 /* Augment DEVICES and NUM_DEVICES. */
3347
35f258f4
JJ
3348 devs = realloc (devs, (num_devs + new_num_devs)
3349 * sizeof (struct gomp_device_descr));
3350 if (!devs)
1df3f842 3351 {
35f258f4 3352 num_devs = 0;
1df3f842
JJ
3353 free (plugin_name);
3354 break;
3355 }
3356
41dbbb37
TS
3357 current_device.name = current_device.get_name_func ();
3358 /* current_device.capabilities has already been set. */
1df3f842 3359 current_device.type = current_device.get_type_func ();
a51df54e 3360 current_device.mem_map.root = NULL;
d84ffc0a 3361 current_device.state = GOMP_DEVICE_UNINITIALIZED;
35f258f4 3362 for (i = 0; i < new_num_devs; i++)
1df3f842 3363 {
1df3f842 3364 current_device.target_id = i;
35f258f4
JJ
3365 devs[num_devs] = current_device;
3366 gomp_mutex_init (&devs[num_devs].lock);
3367 num_devs++;
1df3f842
JJ
3368 }
3369 }
3370 }
3371
3372 free (plugin_name);
3373 cur = next + 1;
3374 }
3375 while (next);
3376
41dbbb37
TS
3377 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3378 NUM_DEVICES_OPENMP. */
35f258f4
JJ
3379 struct gomp_device_descr *devs_s
3380 = malloc (num_devs * sizeof (struct gomp_device_descr));
3381 if (!devs_s)
3382 {
3383 num_devs = 0;
3384 free (devs);
3385 devs = NULL;
3386 }
3387 num_devs_openmp = 0;
3388 for (i = 0; i < num_devs; i++)
3389 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3390 devs_s[num_devs_openmp++] = devs[i];
3391 int num_devs_after_openmp = num_devs_openmp;
3392 for (i = 0; i < num_devs; i++)
3393 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3394 devs_s[num_devs_after_openmp++] = devs[i];
3395 free (devs);
3396 devs = devs_s;
3397
3398 for (i = 0; i < num_devs; i++)
41dbbb37 3399 {
41dbbb37
TS
3400 /* The 'devices' array can be moved (by the realloc call) until we have
3401 found all the plugins, so registering with the OpenACC runtime (which
3402 takes a copy of the pointer argument) must be delayed until now. */
35f258f4
JJ
3403 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3404 goacc_register (&devs[i]);
41dbbb37 3405 }
d84ffc0a 3406
35f258f4
JJ
3407 num_devices = num_devs;
3408 num_devices_openmp = num_devs_openmp;
3409 devices = devs;
d84ffc0a
IV
3410 if (atexit (gomp_target_fini) != 0)
3411 gomp_fatal ("atexit failed");
1df3f842
JJ
3412}
3413
3414#else /* PLUGIN_SUPPORT */
3415/* If dlfcn.h is unavailable we always fallback to host execution.
3416 GOMP_target* routines are just stubs for this case. */
3417static void
3418gomp_target_init (void)
3419{
3420}
3421#endif /* PLUGIN_SUPPORT */