]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/target.c
Update copyright years.
[thirdparty/gcc.git] / libgomp / target.c
CommitLineData
83ffe9cd 1/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
acf0174b
JJ
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
acf0174b
JJ
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
1df3f842 26/* This file contains the support of offloading. */
acf0174b
JJ
27
28#include "libgomp.h"
41dbbb37
TS
29#include "oacc-plugin.h"
30#include "oacc-int.h"
31#include "gomp-constants.h"
acf0174b
JJ
32#include <limits.h>
33#include <stdbool.h>
34#include <stdlib.h>
01c0b3b0
KT
35#ifdef HAVE_INTTYPES_H
36# include <inttypes.h> /* For PRIu64. */
37#endif
acf0174b 38#include <string.h>
683f1184 39#include <stdio.h> /* For snprintf. */
41dbbb37 40#include <assert.h>
d9a6bd32 41#include <errno.h>
acf0174b 42
1df3f842
JJ
43#ifdef PLUGIN_SUPPORT
44#include <dlfcn.h>
b5f7a6ca 45#include "plugin-suffix.h"
1df3f842
JJ
46#endif
47
ea4b23d9
TB
48/* Define another splay tree instantiation - for reverse offload. */
49#define splay_tree_prefix reverse
50#define splay_tree_c
51#include "splay-tree.h"
52
53
275c736e
CLT
54typedef uintptr_t *hash_entry_type;
55static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56static inline void htab_free (void *ptr) { free (ptr); }
57#include "hashtab.h"
58
6c420193
MV
59ialias_redirect (GOMP_task)
60
275c736e
CLT
61static inline hashval_t
62htab_hash (hash_entry_type element)
63{
64 return hash_pointer ((void *) element);
65}
66
67static inline bool
68htab_eq (hash_entry_type x, hash_entry_type y)
69{
70 return x == y;
71}
72
6c7e076b
JB
73#define FIELD_TGT_EMPTY (~(size_t) 0)
74
1df3f842
JJ
75static void gomp_target_init (void);
76
41dbbb37 77/* The whole initialization code for offloading plugins is only run one. */
1df3f842
JJ
78static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
79
a51df54e
IV
80/* Mutex for offload image registration. */
81static gomp_mutex_t register_lock;
82
1df3f842
JJ
83/* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86struct offload_image_descr {
2a21ff19 87 unsigned version;
1df3f842 88 enum offload_target_type type;
ebe4a560 89 const void *host_table;
afb2d80b 90 const void *target_data;
1df3f842
JJ
91};
92
93/* Array of descriptors of offload images. */
94static struct offload_image_descr *offload_images;
95
96/* Total number of offload images. */
97static int num_offload_images;
98
99/* Array of descriptors for all available devices. */
100static struct gomp_device_descr *devices;
101
102/* Total number of available devices. */
103static int num_devices;
104
41dbbb37
TS
105/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106static int num_devices_openmp;
107
683f1184
TB
108/* OpenMP requires mask. */
109static int omp_requires_mask;
110
a51df54e
IV
111/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
112
113static void *
114gomp_realloc_unlock (void *old, size_t size)
115{
116 void *ret = realloc (old, size);
117 if (ret == NULL)
118 {
119 gomp_mutex_unlock (&register_lock);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
121 }
122 return ret;
123}
124
41dbbb37
TS
125attribute_hidden void
126gomp_init_targets_once (void)
1df3f842 127{
41dbbb37
TS
128 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
129}
1df3f842 130
acf0174b
JJ
131attribute_hidden int
132gomp_get_num_devices (void)
133{
41dbbb37
TS
134 gomp_init_targets_once ();
135 return num_devices_openmp;
1df3f842
JJ
136}
137
138static struct gomp_device_descr *
1158fe43 139resolve_device (int device_id, bool remapped)
1df3f842 140{
1158fe43 141 if (remapped && device_id == GOMP_DEVICE_ICV)
1df3f842
JJ
142 {
143 struct gomp_task_icv *icv = gomp_icv (false);
144 device_id = icv->default_device_var;
1158fe43 145 remapped = false;
1df3f842
JJ
146 }
147
1158fe43
JJ
148 if (device_id < 0)
149 {
150 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device))
152 return NULL;
153 if (device_id == omp_invalid_device)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
158
159 return NULL;
160 }
161 else if (device_id >= gomp_get_num_devices ())
1bfc07d1
KCY
162 {
163 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
74c9882b 164 && device_id != num_devices_openmp)
1bfc07d1
KCY
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
167
168 return NULL;
169 }
1df3f842 170
d9a6bd32 171 gomp_mutex_lock (&devices[device_id].lock);
d84ffc0a 172 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
d9a6bd32 173 gomp_init_device (&devices[device_id]);
d84ffc0a
IV
174 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
175 {
176 gomp_mutex_unlock (&devices[device_id].lock);
1bfc07d1 177
74c9882b 178 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
1bfc07d1
KCY
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
181
d84ffc0a
IV
182 return NULL;
183 }
d9a6bd32
JJ
184 gomp_mutex_unlock (&devices[device_id].lock);
185
1df3f842
JJ
186 return &devices[device_id];
187}
188
189
d9a6bd32
JJ
190static inline splay_tree_key
191gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
192{
193 if (key->host_start != key->host_end)
194 return splay_tree_lookup (mem_map, key);
195
196 key->host_end++;
197 splay_tree_key n = splay_tree_lookup (mem_map, key);
198 key->host_end--;
199 if (n)
200 return n;
201 key->host_start--;
202 n = splay_tree_lookup (mem_map, key);
203 key->host_start++;
204 if (n)
205 return n;
206 return splay_tree_lookup (mem_map, key);
207}
208
ea4b23d9
TB
209static inline reverse_splay_tree_key
210gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
211{
212 return reverse_splay_tree_lookup (mem_map_rev, key);
213}
214
e01d41e5
JJ
215static inline splay_tree_key
216gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
217{
218 if (key->host_start != key->host_end)
219 return splay_tree_lookup (mem_map, key);
220
221 key->host_end++;
222 splay_tree_key n = splay_tree_lookup (mem_map, key);
223 key->host_end--;
224 return n;
225}
226
6ce13072
CLT
227static inline void
228gomp_device_copy (struct gomp_device_descr *devicep,
229 bool (*copy_func) (int, void *, const void *, size_t),
230 const char *dst, void *dstaddr,
231 const char *src, const void *srcaddr,
232 size_t size)
233{
234 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
235 {
236 gomp_mutex_unlock (&devicep->lock);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
239 }
240}
241
1f4c5b9b
CLT
242static inline void
243goacc_device_copy_async (struct gomp_device_descr *devicep,
244 bool (*copy_func) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue *),
246 const char *dst, void *dstaddr,
247 const char *src, const void *srcaddr,
9c41f5b9 248 const void *srcaddr_orig,
1f4c5b9b
CLT
249 size_t size, struct goacc_asyncqueue *aq)
250{
251 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
252 {
253 gomp_mutex_unlock (&devicep->lock);
9c41f5b9
JB
254 if (srcaddr_orig && srcaddr_orig != srcaddr)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src, srcaddr_orig, srcaddr_orig + size,
259 src, srcaddr, srcaddr + size,
260 dst, dstaddr, dstaddr + size);
261 else
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src, srcaddr, srcaddr + size,
265 dst, dstaddr, dstaddr + size);
1f4c5b9b
CLT
266 }
267}
268
7324369a
JJ
269/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
271
a44c1790
TS
272struct gomp_coalesce_chunk
273{
274 /* The starting and ending point of a coalesced chunk of memory. */
275 size_t start, end;
276};
277
7324369a
JJ
278struct gomp_coalesce_buf
279{
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
282 void *buf;
283 struct target_mem_desc *tgt;
a44c1790
TS
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
7324369a 286 of chunks which are to be copied to buf and later copied to device. */
a44c1790 287 struct gomp_coalesce_chunk *chunks;
7324369a
JJ
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
289 be performed. */
290 long chunk_cnt;
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
294 long use_cnt;
295};
296
297/* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299#define MAX_COALESCE_BUF_SIZE (32 * 1024)
300
301/* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307#define MAX_COALESCE_BUF_GAP (4 * 1024)
308
d88a6951
TS
309/* Add region with device tgt_start relative offset and length to CBUF.
310
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
313 example).
314 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
315 is it more performant to use libgomp CBUF buffering or individual device
316 asyncronous copying?) */
7324369a
JJ
317
318static inline void
319gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
320{
321 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
322 return;
323 if (cbuf->chunk_cnt)
324 {
325 if (cbuf->chunk_cnt < 0)
326 return;
a44c1790 327 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
7324369a
JJ
328 {
329 cbuf->chunk_cnt = -1;
330 return;
331 }
a44c1790 332 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
7324369a 333 {
a44c1790 334 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
7324369a
JJ
335 cbuf->use_cnt++;
336 return;
337 }
338 /* If the last chunk is only used by one mapping, discard it,
339 as it will be one host to device copy anyway and
340 memcpying it around will only waste cycles. */
341 if (cbuf->use_cnt == 1)
342 cbuf->chunk_cnt--;
343 }
a44c1790
TS
344 cbuf->chunks[cbuf->chunk_cnt].start = start;
345 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
7324369a
JJ
346 cbuf->chunk_cnt++;
347 cbuf->use_cnt = 1;
348}
349
350/* Return true for mapping kinds which need to copy data from the
351 host to device for regions that weren't previously mapped. */
352
353static inline bool
354gomp_to_device_kind_p (int kind)
355{
356 switch (kind)
357 {
358 case GOMP_MAP_ALLOC:
359 case GOMP_MAP_FROM:
360 case GOMP_MAP_FORCE_ALLOC:
cc3f11f5 361 case GOMP_MAP_FORCE_FROM:
7324369a
JJ
362 case GOMP_MAP_ALWAYS_FROM:
363 return false;
364 default:
365 return true;
366 }
367}
368
9c41f5b9
JB
369/* Copy host memory to an offload device. In asynchronous mode (if AQ is
370 non-NULL), when the source data is stack or may otherwise be deallocated
371 before the asynchronous copy takes place, EPHEMERAL must be passed as
372 TRUE. */
373
1f4c5b9b 374attribute_hidden void
6ce13072 375gomp_copy_host2dev (struct gomp_device_descr *devicep,
1f4c5b9b 376 struct goacc_asyncqueue *aq,
7324369a 377 void *d, const void *h, size_t sz,
9c41f5b9 378 bool ephemeral, struct gomp_coalesce_buf *cbuf)
6ce13072 379{
d88a6951
TS
380 if (__builtin_expect (aq != NULL, 0))
381 {
382 /* See 'gomp_coalesce_buf_add'. */
383 assert (!cbuf);
384
385 void *h_buf = (void *) h;
386 if (ephemeral)
387 {
388 /* We're queueing up an asynchronous copy from data that may
389 disappear before the transfer takes place (i.e. because it is a
390 stack local in a function that is no longer executing). Make a
391 copy of the data into a temporary buffer in those cases. */
392 h_buf = gomp_malloc (sz);
393 memcpy (h_buf, h, sz);
394 }
395 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
396 "dev", d, "host", h_buf, h, sz, aq);
397 if (ephemeral)
398 /* Free temporary buffer once the transfer has completed. */
399 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
400
401 return;
402 }
403
7324369a
JJ
404 if (cbuf)
405 {
406 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
a44c1790 407 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
7324369a
JJ
408 {
409 long first = 0;
410 long last = cbuf->chunk_cnt - 1;
411 while (first <= last)
412 {
413 long middle = (first + last) >> 1;
a44c1790 414 if (cbuf->chunks[middle].end <= doff)
7324369a 415 first = middle + 1;
a44c1790 416 else if (cbuf->chunks[middle].start <= doff)
7324369a 417 {
a44c1790 418 if (doff + sz > cbuf->chunks[middle].end)
ccfcf08e
JB
419 {
420 gomp_mutex_unlock (&devicep->lock);
421 gomp_fatal ("internal libgomp cbuf error");
422 }
a44c1790 423 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
7324369a
JJ
424 h, sz);
425 return;
426 }
427 else
428 last = middle - 1;
429 }
430 }
431 }
d88a6951
TS
432
433 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
6ce13072
CLT
434}
435
1f4c5b9b 436attribute_hidden void
6ce13072 437gomp_copy_dev2host (struct gomp_device_descr *devicep,
1f4c5b9b 438 struct goacc_asyncqueue *aq,
6ce13072
CLT
439 void *h, const void *d, size_t sz)
440{
1f4c5b9b
CLT
441 if (__builtin_expect (aq != NULL, 0))
442 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
9c41f5b9 443 "host", h, "dev", d, NULL, sz, aq);
1f4c5b9b
CLT
444 else
445 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
6ce13072
CLT
446}
447
448static void
449gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
450{
451 if (!devicep->free_func (devicep->target_id, devptr))
452 {
453 gomp_mutex_unlock (&devicep->lock);
454 gomp_fatal ("error in freeing device memory block at %p", devptr);
455 }
456}
457
275c736e
CLT
458/* Increment reference count of a splay_tree_key region K by 1.
459 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
460 increment the value if refcount is not yet contained in the set (used for
461 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
462 once for each construct). */
463
464static inline void
465gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
466{
467 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
468 return;
469
470 uintptr_t *refcount_ptr = &k->refcount;
471
472 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
473 refcount_ptr = &k->structelem_refcount;
474 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
475 refcount_ptr = k->structelem_refcount_ptr;
476
477 if (refcount_set)
478 {
479 if (htab_find (*refcount_set, refcount_ptr))
480 return;
481 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
482 *slot = refcount_ptr;
483 }
484
485 *refcount_ptr += 1;
486 return;
487}
488
489/* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
490 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
491 track already seen refcounts, and only adjust the value if refcount is not
492 yet contained in the set (like gomp_increment_refcount).
493
494 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
495 it is already zero and we know we decremented it earlier. This signals that
496 associated maps should be copied back to host.
497
498 *DO_REMOVE is set to true when we this is the first handling of this refcount
499 and we are setting it to zero. This signals a removal of this key from the
500 splay-tree map.
501
502 Copy and removal are separated due to cases like handling of structure
503 elements, e.g. each map of a structure element representing a possible copy
504 out of a structure field has to be handled individually, but we only signal
505 removal for one (the first encountered) sibing map. */
506
507static inline void
508gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
509 bool *do_copy, bool *do_remove)
510{
511 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
512 {
513 *do_copy = *do_remove = false;
514 return;
515 }
516
517 uintptr_t *refcount_ptr = &k->refcount;
518
519 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
520 refcount_ptr = &k->structelem_refcount;
521 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
522 refcount_ptr = k->structelem_refcount_ptr;
523
524 bool new_encountered_refcount;
525 bool set_to_zero = false;
526 bool is_zero = false;
527
528 uintptr_t orig_refcount = *refcount_ptr;
529
530 if (refcount_set)
531 {
532 if (htab_find (*refcount_set, refcount_ptr))
533 {
534 new_encountered_refcount = false;
535 goto end;
536 }
537
538 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
539 *slot = refcount_ptr;
540 new_encountered_refcount = true;
541 }
542 else
543 /* If no refcount_set being used, assume all keys are being decremented
544 for the first time. */
545 new_encountered_refcount = true;
546
547 if (delete_p)
548 *refcount_ptr = 0;
549 else if (*refcount_ptr > 0)
550 *refcount_ptr -= 1;
551
552 end:
553 if (*refcount_ptr == 0)
554 {
555 if (orig_refcount > 0)
556 set_to_zero = true;
557
558 is_zero = true;
559 }
560
561 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
562 *do_remove = (new_encountered_refcount && set_to_zero);
563}
564
e01d41e5
JJ
565/* Handle the case where gomp_map_lookup, splay_tree_lookup or
566 gomp_map_0len_lookup found oldn for newn.
1df3f842
JJ
567 Helper function of gomp_map_vars. */
568
569static inline void
1f4c5b9b
CLT
570gomp_map_vars_existing (struct gomp_device_descr *devicep,
571 struct goacc_asyncqueue *aq, splay_tree_key oldn,
d9a6bd32 572 splay_tree_key newn, struct target_var_desc *tgt_var,
b7e20480 573 unsigned char kind, bool always_to_flag, bool implicit,
275c736e
CLT
574 struct gomp_coalesce_buf *cbuf,
575 htab_t *refcount_set)
1df3f842 576{
0ab29cf0
CLT
577 assert (kind != GOMP_MAP_ATTACH
578 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
aff43ac0 579
d9a6bd32
JJ
580 tgt_var->key = oldn;
581 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
582 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
bc4ed079 583 tgt_var->is_attach = false;
d9a6bd32 584 tgt_var->offset = newn->host_start - oldn->host_start;
b7e20480
CLT
585
586 /* For implicit maps, old contained in new is valid. */
587 bool implicit_subset = (implicit
588 && newn->host_start <= oldn->host_start
589 && oldn->host_end <= newn->host_end);
590 if (implicit_subset)
591 tgt_var->length = oldn->host_end - oldn->host_start;
592 else
593 tgt_var->length = newn->host_end - newn->host_start;
d9a6bd32 594
41dbbb37 595 if ((kind & GOMP_MAP_FLAG_FORCE)
b7e20480
CLT
596 /* For implicit maps, old contained in new is valid. */
597 || !(implicit_subset
598 /* Otherwise, new contained inside old is considered valid. */
599 || (oldn->host_start <= newn->host_start
600 && newn->host_end <= oldn->host_end)))
a51df54e
IV
601 {
602 gomp_mutex_unlock (&devicep->lock);
603 gomp_fatal ("Trying to map into device [%p..%p) object when "
604 "[%p..%p) is already mapped",
605 (void *) newn->host_start, (void *) newn->host_end,
606 (void *) oldn->host_start, (void *) oldn->host_end);
607 }
d9a6bd32 608
972da557 609 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
b7e20480
CLT
610 {
611 /* Implicit + always should not happen. If this does occur, below
612 address/length adjustment is a TODO. */
613 assert (!implicit_subset);
614
6c039937
CLT
615 if (oldn->aux && oldn->aux->attach_count)
616 {
617 /* We have to be careful not to overwrite still attached pointers
618 during the copyback to host. */
619 uintptr_t addr = newn->host_start;
620 while (addr < newn->host_end)
621 {
622 size_t i = (addr - oldn->host_start) / sizeof (void *);
623 if (oldn->aux->attach_count[i] == 0)
624 gomp_copy_host2dev (devicep, aq,
625 (void *) (oldn->tgt->tgt_start
626 + oldn->tgt_offset
627 + addr - oldn->host_start),
628 (void *) addr,
629 sizeof (void *), false, cbuf);
630 addr += sizeof (void *);
631 }
632 }
633 else
634 gomp_copy_host2dev (devicep, aq,
635 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
636 + newn->host_start - oldn->host_start),
637 (void *) newn->host_start,
638 newn->host_end - newn->host_start, false, cbuf);
b7e20480 639 }
6ce13072 640
275c736e 641 gomp_increment_refcount (oldn, refcount_set);
1df3f842
JJ
642}
643
41dbbb37 644static int
d9a6bd32 645get_kind (bool short_mapkind, void *kinds, int idx)
41dbbb37 646{
b7e20480
CLT
647 if (!short_mapkind)
648 return ((unsigned char *) kinds)[idx];
649
650 int val = ((unsigned short *) kinds)[idx];
651 if (GOMP_MAP_IMPLICIT_P (val))
652 val &= ~GOMP_MAP_IMPLICIT;
653 return val;
654}
655
656
657static bool
658get_implicit (bool short_mapkind, void *kinds, int idx)
659{
660 if (!short_mapkind)
661 return false;
662
663 int val = ((unsigned short *) kinds)[idx];
664 return GOMP_MAP_IMPLICIT_P (val);
41dbbb37
TS
665}
666
1716efeb 667static void
1f4c5b9b
CLT
668gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
669 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
0ab29cf0
CLT
670 struct gomp_coalesce_buf *cbuf,
671 bool allow_zero_length_array_sections)
1716efeb
CLT
672{
673 struct gomp_device_descr *devicep = tgt->device_descr;
674 struct splay_tree_s *mem_map = &devicep->mem_map;
675 struct splay_tree_key_s cur_node;
676
677 cur_node.host_start = host_ptr;
678 if (cur_node.host_start == (uintptr_t) NULL)
679 {
680 cur_node.tgt_offset = (uintptr_t) NULL;
1f4c5b9b 681 gomp_copy_host2dev (devicep, aq,
6ce13072 682 (void *) (tgt->tgt_start + target_offset),
9c41f5b9
JB
683 (void *) &cur_node.tgt_offset, sizeof (void *),
684 true, cbuf);
1716efeb
CLT
685 return;
686 }
687 /* Add bias to the pointer value. */
688 cur_node.host_start += bias;
d9a6bd32
JJ
689 cur_node.host_end = cur_node.host_start;
690 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1716efeb
CLT
691 if (n == NULL)
692 {
0ab29cf0
CLT
693 if (allow_zero_length_array_sections)
694 cur_node.tgt_offset = 0;
695 else
696 {
697 gomp_mutex_unlock (&devicep->lock);
698 gomp_fatal ("Pointer target of array section wasn't mapped");
699 }
700 }
701 else
702 {
703 cur_node.host_start -= n->host_start;
704 cur_node.tgt_offset
705 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
706 /* At this point tgt_offset is target address of the
707 array section. Now subtract bias to get what we want
708 to initialize the pointer with. */
709 cur_node.tgt_offset -= bias;
1716efeb 710 }
1f4c5b9b 711 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
9c41f5b9
JB
712 (void *) &cur_node.tgt_offset, sizeof (void *),
713 true, cbuf);
1716efeb
CLT
714}
715
d9a6bd32 716static void
1f4c5b9b
CLT
717gomp_map_fields_existing (struct target_mem_desc *tgt,
718 struct goacc_asyncqueue *aq, splay_tree_key n,
d9a6bd32 719 size_t first, size_t i, void **hostaddrs,
7324369a 720 size_t *sizes, void *kinds,
275c736e 721 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
d9a6bd32
JJ
722{
723 struct gomp_device_descr *devicep = tgt->device_descr;
724 struct splay_tree_s *mem_map = &devicep->mem_map;
725 struct splay_tree_key_s cur_node;
726 int kind;
b7e20480 727 bool implicit;
d9a6bd32
JJ
728 const bool short_mapkind = true;
729 const int typemask = short_mapkind ? 0xff : 0x7;
730
731 cur_node.host_start = (uintptr_t) hostaddrs[i];
732 cur_node.host_end = cur_node.host_start + sizes[i];
733 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
734 kind = get_kind (short_mapkind, kinds, i);
b7e20480 735 implicit = get_implicit (short_mapkind, kinds, i);
d9a6bd32
JJ
736 if (n2
737 && n2->tgt == n->tgt
738 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
739 {
972da557 740 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
b7e20480
CLT
741 kind & typemask, false, implicit, cbuf,
742 refcount_set);
d9a6bd32
JJ
743 return;
744 }
745 if (sizes[i] == 0)
746 {
747 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
748 {
749 cur_node.host_start--;
750 n2 = splay_tree_lookup (mem_map, &cur_node);
751 cur_node.host_start++;
752 if (n2
753 && n2->tgt == n->tgt
754 && n2->host_start - n->host_start
755 == n2->tgt_offset - n->tgt_offset)
756 {
972da557 757 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
b7e20480
CLT
758 kind & typemask, false, implicit, cbuf,
759 refcount_set);
d9a6bd32
JJ
760 return;
761 }
762 }
763 cur_node.host_end++;
764 n2 = splay_tree_lookup (mem_map, &cur_node);
765 cur_node.host_end--;
766 if (n2
767 && n2->tgt == n->tgt
768 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
769 {
1f4c5b9b 770 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
b7e20480
CLT
771 kind & typemask, false, implicit, cbuf,
772 refcount_set);
d9a6bd32
JJ
773 return;
774 }
775 }
776 gomp_mutex_unlock (&devicep->lock);
777 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
778 "other mapped elements from the same structure weren't mapped "
779 "together with it", (void *) cur_node.host_start,
780 (void *) cur_node.host_end);
781}
782
5d5be7bf
JB
783attribute_hidden void
784gomp_attach_pointer (struct gomp_device_descr *devicep,
785 struct goacc_asyncqueue *aq, splay_tree mem_map,
786 splay_tree_key n, uintptr_t attach_to, size_t bias,
0ab29cf0
CLT
787 struct gomp_coalesce_buf *cbufp,
788 bool allow_zero_length_array_sections)
5d5be7bf
JB
789{
790 struct splay_tree_key_s s;
791 size_t size, idx;
792
793 if (n == NULL)
794 {
795 gomp_mutex_unlock (&devicep->lock);
796 gomp_fatal ("enclosing struct not mapped for attach");
797 }
798
799 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
800 /* We might have a pointer in a packed struct: however we cannot have more
801 than one such pointer in each pointer-sized portion of the struct, so
802 this is safe. */
803 idx = (attach_to - n->host_start) / sizeof (void *);
804
805 if (!n->aux)
806 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
807
808 if (!n->aux->attach_count)
809 n->aux->attach_count
810 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
811
812 if (n->aux->attach_count[idx] < UINTPTR_MAX)
813 n->aux->attach_count[idx]++;
814 else
815 {
816 gomp_mutex_unlock (&devicep->lock);
817 gomp_fatal ("attach count overflow");
818 }
819
820 if (n->aux->attach_count[idx] == 1)
821 {
822 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
823 - n->host_start;
824 uintptr_t target = (uintptr_t) *(void **) attach_to;
825 splay_tree_key tn;
826 uintptr_t data;
827
828 if ((void *) target == NULL)
829 {
830 gomp_mutex_unlock (&devicep->lock);
831 gomp_fatal ("attempt to attach null pointer");
832 }
833
834 s.host_start = target + bias;
835 s.host_end = s.host_start + 1;
836 tn = splay_tree_lookup (mem_map, &s);
837
838 if (!tn)
839 {
0ab29cf0
CLT
840 if (allow_zero_length_array_sections)
841 /* When allowing attachment to zero-length array sections, we
842 allow attaching to NULL pointers when the target region is not
843 mapped. */
844 data = 0;
845 else
846 {
847 gomp_mutex_unlock (&devicep->lock);
848 gomp_fatal ("pointer target not mapped for attach");
849 }
5d5be7bf 850 }
0ab29cf0
CLT
851 else
852 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
5d5be7bf
JB
853
854 gomp_debug (1,
855 "%s: attaching host %p, target %p (struct base %p) to %p\n",
856 __FUNCTION__, (void *) attach_to, (void *) devptr,
857 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
858
859 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
9c41f5b9 860 sizeof (void *), true, cbufp);
5d5be7bf
JB
861 }
862 else
863 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
864 (void *) attach_to, (int) n->aux->attach_count[idx]);
865}
866
867attribute_hidden void
868gomp_detach_pointer (struct gomp_device_descr *devicep,
869 struct goacc_asyncqueue *aq, splay_tree_key n,
870 uintptr_t detach_from, bool finalize,
871 struct gomp_coalesce_buf *cbufp)
872{
873 size_t idx;
874
875 if (n == NULL)
876 {
877 gomp_mutex_unlock (&devicep->lock);
878 gomp_fatal ("enclosing struct not mapped for detach");
879 }
880
881 idx = (detach_from - n->host_start) / sizeof (void *);
882
883 if (!n->aux || !n->aux->attach_count)
884 {
885 gomp_mutex_unlock (&devicep->lock);
886 gomp_fatal ("no attachment counters for struct");
887 }
888
889 if (finalize)
890 n->aux->attach_count[idx] = 1;
891
892 if (n->aux->attach_count[idx] == 0)
893 {
894 gomp_mutex_unlock (&devicep->lock);
895 gomp_fatal ("attach count underflow");
896 }
897 else
898 n->aux->attach_count[idx]--;
899
900 if (n->aux->attach_count[idx] == 0)
901 {
902 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
903 - n->host_start;
904 uintptr_t target = (uintptr_t) *(void **) detach_from;
905
906 gomp_debug (1,
907 "%s: detaching host %p, target %p (struct base %p) to %p\n",
908 __FUNCTION__, (void *) detach_from, (void *) devptr,
909 (void *) (n->tgt->tgt_start + n->tgt_offset),
910 (void *) target);
911
912 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
9c41f5b9 913 sizeof (void *), true, cbufp);
5d5be7bf
JB
914 }
915 else
916 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
917 (void *) detach_from, (int) n->aux->attach_count[idx]);
918}
919
5bcd470b 920attribute_hidden uintptr_t
e01d41e5
JJ
921gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
922{
923 if (tgt->list[i].key != NULL)
924 return tgt->list[i].key->tgt->tgt_start
925 + tgt->list[i].key->tgt_offset
926 + tgt->list[i].offset;
6c7e076b
JB
927
928 switch (tgt->list[i].offset)
929 {
930 case OFFSET_INLINED:
931 return (uintptr_t) hostaddrs[i];
932
933 case OFFSET_POINTER:
934 return 0;
935
936 case OFFSET_STRUCT:
937 return tgt->list[i + 1].key->tgt->tgt_start
938 + tgt->list[i + 1].key->tgt_offset
939 + tgt->list[i + 1].offset
940 + (uintptr_t) hostaddrs[i]
941 - (uintptr_t) hostaddrs[i + 1];
942
943 default:
944 return tgt->tgt_start + tgt->list[i].offset;
945 }
e01d41e5
JJ
946}
947
1f4c5b9b
CLT
948static inline __attribute__((always_inline)) struct target_mem_desc *
949gomp_map_vars_internal (struct gomp_device_descr *devicep,
950 struct goacc_asyncqueue *aq, size_t mapnum,
951 void **hostaddrs, void **devaddrs, size_t *sizes,
952 void *kinds, bool short_mapkind,
275c736e 953 htab_t *refcount_set,
1f4c5b9b 954 enum gomp_map_vars_kind pragma_kind)
1df3f842
JJ
955{
956 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
d9a6bd32 957 bool has_firstprivate = false;
972da557 958 bool has_always_ptrset = false;
275c736e 959 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
d9a6bd32
JJ
960 const int rshift = short_mapkind ? 8 : 3;
961 const int typemask = short_mapkind ? 0xff : 0x7;
a51df54e 962 struct splay_tree_s *mem_map = &devicep->mem_map;
1df3f842
JJ
963 struct splay_tree_key_s cur_node;
964 struct target_mem_desc *tgt
965 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
966 tgt->list_count = mapnum;
9e628024 967 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
1df3f842 968 tgt->device_descr = devicep;
378da98f 969 tgt->prev = NULL;
7324369a 970 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
1df3f842
JJ
971
972 if (mapnum == 0)
3837c6d6
IV
973 {
974 tgt->tgt_start = 0;
975 tgt->tgt_end = 0;
976 return tgt;
977 }
1df3f842
JJ
978
979 tgt_align = sizeof (void *);
980 tgt_size = 0;
7324369a
JJ
981 cbuf.chunks = NULL;
982 cbuf.chunk_cnt = -1;
983 cbuf.use_cnt = 0;
984 cbuf.buf = NULL;
985 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
986 {
a44c1790
TS
987 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
988 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
7324369a
JJ
989 cbuf.chunk_cnt = 0;
990 }
d9a6bd32 991 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
992 {
993 size_t align = 4 * sizeof (void *);
994 tgt_align = align;
995 tgt_size = mapnum * sizeof (void *);
7324369a
JJ
996 cbuf.chunk_cnt = 1;
997 cbuf.use_cnt = 1 + (mapnum > 1);
a44c1790
TS
998 cbuf.chunks[0].start = 0;
999 cbuf.chunks[0].end = tgt_size;
1df3f842
JJ
1000 }
1001
a51df54e 1002 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1003 if (devicep->state == GOMP_DEVICE_FINALIZED)
1004 {
1005 gomp_mutex_unlock (&devicep->lock);
1006 free (tgt);
1007 return NULL;
1008 }
41dbbb37 1009
1df3f842
JJ
1010 for (i = 0; i < mapnum; i++)
1011 {
d9a6bd32 1012 int kind = get_kind (short_mapkind, kinds, i);
b7e20480 1013 bool implicit = get_implicit (short_mapkind, kinds, i);
d9a6bd32
JJ
1014 if (hostaddrs[i] == NULL
1015 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1df3f842 1016 {
d9a6bd32 1017 tgt->list[i].key = NULL;
6c7e076b 1018 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32
JJ
1019 continue;
1020 }
d5c23c6c
TB
1021 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1022 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
d9a6bd32 1023 {
8860d270
JJ
1024 tgt->list[i].key = NULL;
1025 if (!not_found_cnt)
d9a6bd32 1026 {
7d48e14f
JJ
1027 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1028 on a separate construct prior to using use_device_{addr,ptr}.
1029 In OpenMP 5.0, map directives need to be ordered by the
1030 middle-end before the use_device_* clauses. If
1031 !not_found_cnt, all mappings requested (if any) are already
1032 mapped, so use_device_{addr,ptr} can be resolved right away.
1033 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1034 now but would succeed after performing the mappings in the
1035 following loop. We can't defer this always to the second
1036 loop, because it is not even invoked when !not_found_cnt
1037 after the first loop. */
1038 cur_node.host_start = (uintptr_t) hostaddrs[i];
1039 cur_node.host_end = cur_node.host_start;
1040 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
b3b75e66
TS
1041 if (n != NULL)
1042 {
1043 cur_node.host_start -= n->host_start;
1044 hostaddrs[i]
1045 = (void *) (n->tgt->tgt_start + n->tgt_offset
1046 + cur_node.host_start);
1047 }
1048 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
7d48e14f
JJ
1049 {
1050 gomp_mutex_unlock (&devicep->lock);
1051 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1052 }
b3b75e66
TS
1053 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1054 /* If not present, continue using the host address. */
1055 ;
1056 else
1057 __builtin_unreachable ();
1058 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32 1059 }
8860d270
JJ
1060 else
1061 tgt->list[i].offset = 0;
d9a6bd32
JJ
1062 continue;
1063 }
1064 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1065 {
1066 size_t first = i + 1;
1067 size_t last = i + sizes[i];
1068 cur_node.host_start = (uintptr_t) hostaddrs[i];
1069 cur_node.host_end = (uintptr_t) hostaddrs[last]
1070 + sizes[last];
1071 tgt->list[i].key = NULL;
6c7e076b 1072 tgt->list[i].offset = OFFSET_STRUCT;
d9a6bd32
JJ
1073 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1074 if (n == NULL)
1075 {
1076 size_t align = (size_t) 1 << (kind >> rshift);
1077 if (tgt_align < align)
1078 tgt_align = align;
7324369a 1079 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
d9a6bd32 1080 tgt_size = (tgt_size + align - 1) & ~(align - 1);
7324369a 1081 tgt_size += cur_node.host_end - cur_node.host_start;
d9a6bd32
JJ
1082 not_found_cnt += last - i;
1083 for (i = first; i <= last; i++)
7324369a
JJ
1084 {
1085 tgt->list[i].key = NULL;
d88a6951
TS
1086 if (!aq
1087 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1088 & typemask))
7324369a
JJ
1089 gomp_coalesce_buf_add (&cbuf,
1090 tgt_size - cur_node.host_end
1091 + (uintptr_t) hostaddrs[i],
1092 sizes[i]);
1093 }
d9a6bd32
JJ
1094 i--;
1095 continue;
1096 }
1097 for (i = first; i <= last; i++)
1f4c5b9b 1098 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
275c736e 1099 sizes, kinds, NULL, refcount_set);
d9a6bd32 1100 i--;
1df3f842
JJ
1101 continue;
1102 }
e01d41e5
JJ
1103 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1104 {
1105 tgt->list[i].key = NULL;
6c7e076b 1106 tgt->list[i].offset = OFFSET_POINTER;
e01d41e5
JJ
1107 has_firstprivate = true;
1108 continue;
1109 }
0ab29cf0
CLT
1110 else if ((kind & typemask) == GOMP_MAP_ATTACH
1111 || ((kind & typemask)
1112 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
8e7e71ff
JB
1113 {
1114 tgt->list[i].key = NULL;
1115 has_firstprivate = true;
1116 continue;
1117 }
1df3f842 1118 cur_node.host_start = (uintptr_t) hostaddrs[i];
aff43ac0 1119 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
1120 cur_node.host_end = cur_node.host_start + sizes[i];
1121 else
1122 cur_node.host_end = cur_node.host_start + sizeof (void *);
d9a6bd32
JJ
1123 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1124 {
1125 tgt->list[i].key = NULL;
1126
1127 size_t align = (size_t) 1 << (kind >> rshift);
1128 if (tgt_align < align)
1129 tgt_align = align;
1130 tgt_size = (tgt_size + align - 1) & ~(align - 1);
d88a6951
TS
1131 if (!aq)
1132 gomp_coalesce_buf_add (&cbuf, tgt_size,
1133 cur_node.host_end - cur_node.host_start);
d9a6bd32
JJ
1134 tgt_size += cur_node.host_end - cur_node.host_start;
1135 has_firstprivate = true;
1136 continue;
1137 }
1138 splay_tree_key n;
1139 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1df3f842 1140 {
e01d41e5 1141 n = gomp_map_0len_lookup (mem_map, &cur_node);
d9a6bd32
JJ
1142 if (!n)
1143 {
1144 tgt->list[i].key = NULL;
6c7e076b 1145 tgt->list[i].offset = OFFSET_POINTER;
d9a6bd32
JJ
1146 continue;
1147 }
1df3f842 1148 }
d9a6bd32
JJ
1149 else
1150 n = splay_tree_lookup (mem_map, &cur_node);
4a38b02b 1151 if (n && n->refcount != REFCOUNT_LINK)
972da557
TB
1152 {
1153 int always_to_cnt = 0;
1154 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1155 {
1b9bdd52 1156 bool has_nullptr = false;
972da557
TB
1157 size_t j;
1158 for (j = 0; j < n->tgt->list_count; j++)
1159 if (n->tgt->list[j].key == n)
1160 {
1161 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1162 break;
1163 }
1164 if (n->tgt->list_count == 0)
1165 {
1166 /* 'declare target'; assume has_nullptr; it could also be
1167 statically assigned pointer, but that it should be to
1168 the equivalent variable on the host. */
1169 assert (n->refcount == REFCOUNT_INFINITY);
1170 has_nullptr = true;
1171 }
1172 else
1173 assert (j < n->tgt->list_count);
1174 /* Re-map the data if there is an 'always' modifier or if it a
1175 null pointer was there and non a nonnull has been found; that
1176 permits transparent re-mapping for Fortran array descriptors
1177 which were previously mapped unallocated. */
1178 for (j = i + 1; j < mapnum; j++)
1179 {
1180 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1181 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1182 && (!has_nullptr
1183 || !GOMP_MAP_POINTER_P (ptr_kind)
1184 || *(void **) hostaddrs[j] == NULL))
1185 break;
1186 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1187 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1188 > cur_node.host_end))
1189 break;
1190 else
1191 {
1192 has_always_ptrset = true;
1193 ++always_to_cnt;
1194 }
1195 }
1196 }
1197 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
b7e20480
CLT
1198 kind & typemask, always_to_cnt > 0, implicit,
1199 NULL, refcount_set);
972da557
TB
1200 i += always_to_cnt;
1201 }
1df3f842
JJ
1202 else
1203 {
d9a6bd32 1204 tgt->list[i].key = NULL;
41dbbb37 1205
a6163563
JB
1206 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1207 {
1208 /* Not present, hence, skip entry - including its MAP_POINTER,
1209 when existing. */
5bcd470b 1210 tgt->list[i].offset = OFFSET_POINTER;
a6163563
JB
1211 if (i + 1 < mapnum
1212 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1213 == GOMP_MAP_POINTER))
1214 {
1215 ++i;
1216 tgt->list[i].key = NULL;
1217 tgt->list[i].offset = 0;
1218 }
1219 continue;
1220 }
41dbbb37 1221 size_t align = (size_t) 1 << (kind >> rshift);
1df3f842
JJ
1222 not_found_cnt++;
1223 if (tgt_align < align)
1224 tgt_align = align;
1225 tgt_size = (tgt_size + align - 1) & ~(align - 1);
d88a6951
TS
1226 if (!aq
1227 && gomp_to_device_kind_p (kind & typemask))
7324369a
JJ
1228 gomp_coalesce_buf_add (&cbuf, tgt_size,
1229 cur_node.host_end - cur_node.host_start);
1df3f842 1230 tgt_size += cur_node.host_end - cur_node.host_start;
41dbbb37 1231 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1df3f842
JJ
1232 {
1233 size_t j;
972da557 1234 int kind;
1df3f842 1235 for (j = i + 1; j < mapnum; j++)
972da557
TB
1236 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1237 kinds, j)) & typemask))
1238 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1df3f842
JJ
1239 break;
1240 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1241 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1242 > cur_node.host_end))
1243 break;
1244 else
1245 {
d9a6bd32 1246 tgt->list[j].key = NULL;
1df3f842
JJ
1247 i++;
1248 }
1249 }
1250 }
1251 }
1252
41dbbb37
TS
1253 if (devaddrs)
1254 {
1255 if (mapnum != 1)
a51df54e
IV
1256 {
1257 gomp_mutex_unlock (&devicep->lock);
1258 gomp_fatal ("unexpected aggregation");
1259 }
41dbbb37
TS
1260 tgt->to_free = devaddrs[0];
1261 tgt->tgt_start = (uintptr_t) tgt->to_free;
1262 tgt->tgt_end = tgt->tgt_start + sizes[0];
1263 }
d9a6bd32 1264 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1265 {
1266 /* Allocate tgt_align aligned tgt_size block of memory. */
1267 /* FIXME: Perhaps change interface to allocate properly aligned
1268 memory. */
1269 tgt->to_free = devicep->alloc_func (devicep->target_id,
1270 tgt_size + tgt_align - 1);
6ce13072
CLT
1271 if (!tgt->to_free)
1272 {
1273 gomp_mutex_unlock (&devicep->lock);
1274 gomp_fatal ("device memory allocation fail");
1275 }
1276
1df3f842
JJ
1277 tgt->tgt_start = (uintptr_t) tgt->to_free;
1278 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1279 tgt->tgt_end = tgt->tgt_start + tgt_size;
7324369a
JJ
1280
1281 if (cbuf.use_cnt == 1)
1282 cbuf.chunk_cnt--;
1283 if (cbuf.chunk_cnt > 0)
1284 {
1285 cbuf.buf
a44c1790 1286 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
7324369a
JJ
1287 if (cbuf.buf)
1288 {
1289 cbuf.tgt = tgt;
1290 cbufp = &cbuf;
1291 }
1292 }
1df3f842
JJ
1293 }
1294 else
1295 {
1296 tgt->to_free = NULL;
1297 tgt->tgt_start = 0;
1298 tgt->tgt_end = 0;
1299 }
1300
1301 tgt_size = 0;
d9a6bd32 1302 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1303 tgt_size = mapnum * sizeof (void *);
1304
1305 tgt->array = NULL;
972da557 1306 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1df3f842 1307 {
d9a6bd32
JJ
1308 if (not_found_cnt)
1309 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1df3f842 1310 splay_tree_node array = tgt->array;
5cafae2c 1311 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
d9a6bd32 1312 uintptr_t field_tgt_base = 0;
275c736e 1313 splay_tree_key field_tgt_structelem_first = NULL;
1df3f842
JJ
1314
1315 for (i = 0; i < mapnum; i++)
972da557
TB
1316 if (has_always_ptrset
1317 && tgt->list[i].key
1318 && (get_kind (short_mapkind, kinds, i) & typemask)
1319 == GOMP_MAP_TO_PSET)
1320 {
1321 splay_tree_key k = tgt->list[i].key;
1b9bdd52 1322 bool has_nullptr = false;
972da557
TB
1323 size_t j;
1324 for (j = 0; j < k->tgt->list_count; j++)
1325 if (k->tgt->list[j].key == k)
1326 {
1327 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1328 break;
1329 }
1330 if (k->tgt->list_count == 0)
1331 has_nullptr = true;
1332 else
1333 assert (j < k->tgt->list_count);
1334
1335 tgt->list[i].has_null_ptr_assoc = false;
1336 for (j = i + 1; j < mapnum; j++)
1337 {
1338 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1339 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1340 && (!has_nullptr
1341 || !GOMP_MAP_POINTER_P (ptr_kind)
1342 || *(void **) hostaddrs[j] == NULL))
1343 break;
1344 else if ((uintptr_t) hostaddrs[j] < k->host_start
1345 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1346 > k->host_end))
1347 break;
1348 else
1349 {
1350 if (*(void **) hostaddrs[j] == NULL)
1351 tgt->list[i].has_null_ptr_assoc = true;
1352 tgt->list[j].key = k;
1353 tgt->list[j].copy_from = false;
1354 tgt->list[j].always_copy_from = false;
1355 tgt->list[j].is_attach = false;
275c736e 1356 gomp_increment_refcount (k, refcount_set);
972da557
TB
1357 gomp_map_pointer (k->tgt, aq,
1358 (uintptr_t) *(void **) hostaddrs[j],
1359 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1360 - k->host_start),
0ab29cf0 1361 sizes[j], cbufp, false);
972da557
TB
1362 }
1363 }
1364 i = j - 1;
1365 }
1366 else if (tgt->list[i].key == NULL)
1df3f842 1367 {
d9a6bd32 1368 int kind = get_kind (short_mapkind, kinds, i);
b7e20480 1369 bool implicit = get_implicit (short_mapkind, kinds, i);
1df3f842
JJ
1370 if (hostaddrs[i] == NULL)
1371 continue;
d9a6bd32
JJ
1372 switch (kind & typemask)
1373 {
1374 size_t align, len, first, last;
1375 splay_tree_key n;
1376 case GOMP_MAP_FIRSTPRIVATE:
1377 align = (size_t) 1 << (kind >> rshift);
1378 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1379 tgt->list[i].offset = tgt_size;
1380 len = sizes[i];
1f4c5b9b 1381 gomp_copy_host2dev (devicep, aq,
6ce13072 1382 (void *) (tgt->tgt_start + tgt_size),
9c41f5b9 1383 (void *) hostaddrs[i], len, false, cbufp);
49d1a2f9
TB
1384 /* Save device address in hostaddr to permit latter availablity
1385 when doing a deep-firstprivate with pointer attach. */
1386 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
d9a6bd32 1387 tgt_size += len;
49d1a2f9
TB
1388
1389 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1390 firstprivate to hostaddrs[i+1], which is assumed to contain a
1391 device address. */
1392 if (i + 1 < mapnum
1393 && (GOMP_MAP_ATTACH
1394 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1395 {
1396 uintptr_t target = (uintptr_t) hostaddrs[i];
1397 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1398 gomp_copy_host2dev (devicep, aq, devptr, &target,
1399 sizeof (void *), false, cbufp);
1400 ++i;
1401 }
d9a6bd32
JJ
1402 continue;
1403 case GOMP_MAP_FIRSTPRIVATE_INT:
d9a6bd32
JJ
1404 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1405 continue;
d5c23c6c 1406 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
b3b75e66
TS
1407 /* The OpenACC 'host_data' construct only allows 'use_device'
1408 "mapping" clauses, so in the first loop, 'not_found_cnt'
1409 must always have been zero, so all OpenACC 'use_device'
1410 clauses have already been handled. (We can only easily test
1411 'use_device' with 'if_present' clause here.) */
1412 assert (tgt->list[i].offset == OFFSET_INLINED);
1413 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1414 code conceptually simple, similar to the first loop. */
1415 case GOMP_MAP_USE_DEVICE_PTR:
8860d270
JJ
1416 if (tgt->list[i].offset == 0)
1417 {
1418 cur_node.host_start = (uintptr_t) hostaddrs[i];
1419 cur_node.host_end = cur_node.host_start;
1420 n = gomp_map_lookup (mem_map, &cur_node);
b3b75e66
TS
1421 if (n != NULL)
1422 {
1423 cur_node.host_start -= n->host_start;
1424 hostaddrs[i]
1425 = (void *) (n->tgt->tgt_start + n->tgt_offset
1426 + cur_node.host_start);
1427 }
1428 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
8860d270
JJ
1429 {
1430 gomp_mutex_unlock (&devicep->lock);
1431 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1432 }
b3b75e66
TS
1433 else if ((kind & typemask)
1434 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1435 /* If not present, continue using the host address. */
1436 ;
1437 else
1438 __builtin_unreachable ();
1439 tgt->list[i].offset = OFFSET_INLINED;
8860d270
JJ
1440 }
1441 continue;
d9a6bd32
JJ
1442 case GOMP_MAP_STRUCT:
1443 first = i + 1;
1444 last = i + sizes[i];
1445 cur_node.host_start = (uintptr_t) hostaddrs[i];
1446 cur_node.host_end = (uintptr_t) hostaddrs[last]
1447 + sizes[last];
1448 if (tgt->list[first].key != NULL)
1449 continue;
1450 n = splay_tree_lookup (mem_map, &cur_node);
1451 if (n == NULL)
1452 {
1453 size_t align = (size_t) 1 << (kind >> rshift);
1454 tgt_size -= (uintptr_t) hostaddrs[first]
1455 - (uintptr_t) hostaddrs[i];
1456 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1457 tgt_size += (uintptr_t) hostaddrs[first]
1458 - (uintptr_t) hostaddrs[i];
1459 field_tgt_base = (uintptr_t) hostaddrs[first];
1460 field_tgt_offset = tgt_size;
1461 field_tgt_clear = last;
275c736e 1462 field_tgt_structelem_first = NULL;
d9a6bd32
JJ
1463 tgt_size += cur_node.host_end
1464 - (uintptr_t) hostaddrs[first];
1465 continue;
1466 }
1467 for (i = first; i <= last; i++)
1f4c5b9b 1468 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
275c736e 1469 sizes, kinds, cbufp, refcount_set);
d9a6bd32
JJ
1470 i--;
1471 continue;
e01d41e5
JJ
1472 case GOMP_MAP_ALWAYS_POINTER:
1473 cur_node.host_start = (uintptr_t) hostaddrs[i];
1474 cur_node.host_end = cur_node.host_start + sizeof (void *);
1475 n = splay_tree_lookup (mem_map, &cur_node);
1476 if (n == NULL
1477 || n->host_start > cur_node.host_start
1478 || n->host_end < cur_node.host_end)
1479 {
1480 gomp_mutex_unlock (&devicep->lock);
1481 gomp_fatal ("always pointer not mapped");
1482 }
1483 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1484 != GOMP_MAP_ALWAYS_POINTER)
1485 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1486 if (cur_node.tgt_offset)
1487 cur_node.tgt_offset -= sizes[i];
1f4c5b9b 1488 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1489 (void *) (n->tgt->tgt_start
1490 + n->tgt_offset
1491 + cur_node.host_start
1492 - n->host_start),
1493 (void *) &cur_node.tgt_offset,
9c41f5b9 1494 sizeof (void *), true, cbufp);
e01d41e5
JJ
1495 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1496 + cur_node.host_start - n->host_start;
1497 continue;
a6163563
JB
1498 case GOMP_MAP_IF_PRESENT:
1499 /* Not present - otherwise handled above. Skip over its
1500 MAP_POINTER as well. */
1501 if (i + 1 < mapnum
1502 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1503 == GOMP_MAP_POINTER))
1504 ++i;
1505 continue;
8e7e71ff 1506 case GOMP_MAP_ATTACH:
0ab29cf0 1507 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
8e7e71ff
JB
1508 {
1509 cur_node.host_start = (uintptr_t) hostaddrs[i];
1510 cur_node.host_end = cur_node.host_start + sizeof (void *);
1511 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1512 if (n != NULL)
1513 {
1514 tgt->list[i].key = n;
1515 tgt->list[i].offset = cur_node.host_start - n->host_start;
1516 tgt->list[i].length = n->host_end - n->host_start;
1517 tgt->list[i].copy_from = false;
1518 tgt->list[i].always_copy_from = false;
bc4ed079
JB
1519 tgt->list[i].is_attach = true;
1520 /* OpenACC 'attach'/'detach' doesn't affect
1521 structured/dynamic reference counts ('n->refcount',
1522 'n->dynamic_refcount'). */
9e628024 1523
0ab29cf0
CLT
1524 bool zlas
1525 = ((kind & typemask)
1526 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
9e628024
CLT
1527 gomp_attach_pointer (devicep, aq, mem_map, n,
1528 (uintptr_t) hostaddrs[i], sizes[i],
0ab29cf0 1529 cbufp, zlas);
8e7e71ff 1530 }
9e628024 1531 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
8e7e71ff
JB
1532 {
1533 gomp_mutex_unlock (&devicep->lock);
1534 gomp_fatal ("outer struct not mapped for attach");
1535 }
8e7e71ff
JB
1536 continue;
1537 }
d9a6bd32
JJ
1538 default:
1539 break;
1540 }
1df3f842
JJ
1541 splay_tree_key k = &array->key;
1542 k->host_start = (uintptr_t) hostaddrs[i];
41dbbb37 1543 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
1544 k->host_end = k->host_start + sizes[i];
1545 else
1546 k->host_end = k->host_start + sizeof (void *);
a51df54e 1547 splay_tree_key n = splay_tree_lookup (mem_map, k);
4a38b02b 1548 if (n && n->refcount != REFCOUNT_LINK)
1f4c5b9b 1549 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
b7e20480 1550 kind & typemask, false, implicit, cbufp,
275c736e 1551 refcount_set);
1df3f842
JJ
1552 else
1553 {
2a656a93 1554 k->aux = NULL;
4a38b02b
IV
1555 if (n && n->refcount == REFCOUNT_LINK)
1556 {
1557 /* Replace target address of the pointer with target address
1558 of mapped object in the splay tree. */
1559 splay_tree_remove (mem_map, n);
2a656a93
JB
1560 k->aux
1561 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1562 k->aux->link_key = n;
4a38b02b 1563 }
41dbbb37 1564 size_t align = (size_t) 1 << (kind >> rshift);
d9a6bd32 1565 tgt->list[i].key = k;
1df3f842 1566 k->tgt = tgt;
275c736e
CLT
1567 k->refcount = 0;
1568 k->dynamic_refcount = 0;
6c7e076b 1569 if (field_tgt_clear != FIELD_TGT_EMPTY)
d9a6bd32
JJ
1570 {
1571 k->tgt_offset = k->host_start - field_tgt_base
1572 + field_tgt_offset;
275c736e
CLT
1573 if (openmp_p)
1574 {
1575 k->refcount = REFCOUNT_STRUCTELEM;
1576 if (field_tgt_structelem_first == NULL)
1577 {
1578 /* Set to first structure element of sequence. */
1579 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1580 field_tgt_structelem_first = k;
1581 }
1582 else
1583 /* Point to refcount of leading element, but do not
1584 increment again. */
1585 k->structelem_refcount_ptr
1586 = &field_tgt_structelem_first->structelem_refcount;
1587
1588 if (i == field_tgt_clear)
1589 {
1590 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1591 field_tgt_structelem_first = NULL;
1592 }
1593 }
d9a6bd32 1594 if (i == field_tgt_clear)
6c7e076b 1595 field_tgt_clear = FIELD_TGT_EMPTY;
d9a6bd32
JJ
1596 }
1597 else
1598 {
1599 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1600 k->tgt_offset = tgt_size;
1601 tgt_size += k->host_end - k->host_start;
1602 }
275c736e
CLT
1603 /* First increment, from 0 to 1. gomp_increment_refcount
1604 encapsulates the different increment cases, so use this
1605 instead of directly setting 1 during initialization. */
1606 gomp_increment_refcount (k, refcount_set);
1607
d9a6bd32
JJ
1608 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1609 tgt->list[i].always_copy_from
1610 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
bc4ed079 1611 tgt->list[i].is_attach = false;
d9a6bd32
JJ
1612 tgt->list[i].offset = 0;
1613 tgt->list[i].length = k->host_end - k->host_start;
1df3f842
JJ
1614 tgt->refcount++;
1615 array->left = NULL;
1616 array->right = NULL;
a51df54e 1617 splay_tree_insert (mem_map, array);
41dbbb37 1618 switch (kind & typemask)
1df3f842 1619 {
41dbbb37
TS
1620 case GOMP_MAP_ALLOC:
1621 case GOMP_MAP_FROM:
1622 case GOMP_MAP_FORCE_ALLOC:
1623 case GOMP_MAP_FORCE_FROM:
d9a6bd32 1624 case GOMP_MAP_ALWAYS_FROM:
1df3f842 1625 break;
41dbbb37
TS
1626 case GOMP_MAP_TO:
1627 case GOMP_MAP_TOFROM:
1628 case GOMP_MAP_FORCE_TO:
1629 case GOMP_MAP_FORCE_TOFROM:
d9a6bd32
JJ
1630 case GOMP_MAP_ALWAYS_TO:
1631 case GOMP_MAP_ALWAYS_TOFROM:
1f4c5b9b 1632 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1633 (void *) (tgt->tgt_start
1634 + k->tgt_offset),
1635 (void *) k->host_start,
9c41f5b9
JB
1636 k->host_end - k->host_start,
1637 false, cbufp);
1df3f842 1638 break;
41dbbb37 1639 case GOMP_MAP_POINTER:
0ab29cf0
CLT
1640 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1641 gomp_map_pointer
1642 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1643 k->tgt_offset, sizes[i], cbufp,
1644 ((kind & typemask)
1645 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1df3f842 1646 break;
41dbbb37 1647 case GOMP_MAP_TO_PSET:
1f4c5b9b 1648 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1649 (void *) (tgt->tgt_start
1650 + k->tgt_offset),
1651 (void *) k->host_start,
9c41f5b9
JB
1652 k->host_end - k->host_start,
1653 false, cbufp);
972da557 1654 tgt->list[i].has_null_ptr_assoc = false;
41dbbb37 1655
1df3f842 1656 for (j = i + 1; j < mapnum; j++)
972da557
TB
1657 {
1658 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1659 & typemask);
1660 if (!GOMP_MAP_POINTER_P (ptr_kind)
1661 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1662 break;
1663 else if ((uintptr_t) hostaddrs[j] < k->host_start
1664 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1665 > k->host_end))
1666 break;
1667 else
1668 {
1669 tgt->list[j].key = k;
1670 tgt->list[j].copy_from = false;
1671 tgt->list[j].always_copy_from = false;
1672 tgt->list[j].is_attach = false;
1673 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
275c736e
CLT
1674 /* For OpenMP, the use of refcount_sets causes
1675 errors if we set k->refcount = 1 above but also
1676 increment it again here, for decrementing will
1677 not properly match, since we decrement only once
1678 for each key's refcount. Therefore avoid this
1679 increment for OpenMP constructs. */
1680 if (!openmp_p)
1681 gomp_increment_refcount (k, refcount_set);
972da557
TB
1682 gomp_map_pointer (tgt, aq,
1683 (uintptr_t) *(void **) hostaddrs[j],
1684 k->tgt_offset
1685 + ((uintptr_t) hostaddrs[j]
1686 - k->host_start),
0ab29cf0 1687 sizes[j], cbufp, false);
972da557 1688 }
ea4b23d9 1689 }
972da557 1690 i = j - 1;
41dbbb37
TS
1691 break;
1692 case GOMP_MAP_FORCE_PRESENT:
1693 {
1694 /* We already looked up the memory region above and it
1695 was missing. */
1696 size_t size = k->host_end - k->host_start;
a51df54e 1697 gomp_mutex_unlock (&devicep->lock);
01c0b3b0
KT
1698#ifdef HAVE_INTTYPES_H
1699 gomp_fatal ("present clause: !acc_is_present (%p, "
1700 "%"PRIu64" (0x%"PRIx64"))",
1701 (void *) k->host_start,
1702 (uint64_t) size, (uint64_t) size);
1703#else
41dbbb37 1704 gomp_fatal ("present clause: !acc_is_present (%p, "
01c0b3b0
KT
1705 "%lu (0x%lx))", (void *) k->host_start,
1706 (unsigned long) size, (unsigned long) size);
1707#endif
41dbbb37
TS
1708 }
1709 break;
1710 case GOMP_MAP_FORCE_DEVICEPTR:
1711 assert (k->host_end - k->host_start == sizeof (void *));
1f4c5b9b 1712 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1713 (void *) (tgt->tgt_start
1714 + k->tgt_offset),
1715 (void *) k->host_start,
9c41f5b9 1716 sizeof (void *), false, cbufp);
41dbbb37
TS
1717 break;
1718 default:
a51df54e 1719 gomp_mutex_unlock (&devicep->lock);
41dbbb37
TS
1720 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1721 kind);
1df3f842 1722 }
4a38b02b 1723
2a656a93 1724 if (k->aux && k->aux->link_key)
4a38b02b
IV
1725 {
1726 /* Set link pointer on target to the device address of the
1727 mapped object. */
1728 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
84ca08d2
TS
1729 /* We intentionally do not use coalescing here, as it's not
1730 data allocated by the current call to this function. */
1f4c5b9b 1731 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
9c41f5b9 1732 &tgt_addr, sizeof (void *), true, NULL);
4a38b02b 1733 }
1df3f842
JJ
1734 array++;
1735 }
1736 }
1737 }
41dbbb37 1738
d9a6bd32 1739 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1740 {
1741 for (i = 0; i < mapnum; i++)
1742 {
e01d41e5 1743 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1f4c5b9b 1744 gomp_copy_host2dev (devicep, aq,
6ce13072 1745 (void *) (tgt->tgt_start + i * sizeof (void *)),
7324369a 1746 (void *) &cur_node.tgt_offset, sizeof (void *),
9c41f5b9 1747 true, cbufp);
1df3f842
JJ
1748 }
1749 }
1750
7324369a
JJ
1751 if (cbufp)
1752 {
d88a6951
TS
1753 /* See 'gomp_coalesce_buf_add'. */
1754 assert (!aq);
1755
7324369a
JJ
1756 long c = 0;
1757 for (c = 0; c < cbuf.chunk_cnt; ++c)
1f4c5b9b 1758 gomp_copy_host2dev (devicep, aq,
a44c1790
TS
1759 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1760 (char *) cbuf.buf + (cbuf.chunks[c].start
1761 - cbuf.chunks[0].start),
9c41f5b9
JB
1762 cbuf.chunks[c].end - cbuf.chunks[c].start,
1763 true, NULL);
7324369a 1764 free (cbuf.buf);
a44c1790
TS
1765 cbuf.buf = NULL;
1766 cbufp = NULL;
7324369a
JJ
1767 }
1768
d9a6bd32
JJ
1769 /* If the variable from "omp target enter data" map-list was already mapped,
1770 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1771 gomp_exit_data. */
9e628024 1772 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
6f5b4b64 1773 {
d9a6bd32
JJ
1774 free (tgt);
1775 tgt = NULL;
1776 }
1777
a51df54e 1778 gomp_mutex_unlock (&devicep->lock);
1df3f842
JJ
1779 return tgt;
1780}
1781
275c736e 1782static struct target_mem_desc *
1f4c5b9b
CLT
1783gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1784 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
275c736e
CLT
1785 bool short_mapkind, htab_t *refcount_set,
1786 enum gomp_map_vars_kind pragma_kind)
1f4c5b9b 1787{
275c736e
CLT
1788 /* This management of a local refcount_set is for convenience of callers
1789 who do not share a refcount_set over multiple map/unmap uses. */
1790 htab_t local_refcount_set = NULL;
1791 if (refcount_set == NULL)
1792 {
1793 local_refcount_set = htab_create (mapnum);
1794 refcount_set = &local_refcount_set;
1795 }
1796
1797 struct target_mem_desc *tgt;
1798 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1799 sizes, kinds, short_mapkind, refcount_set,
1800 pragma_kind);
1801 if (local_refcount_set)
1802 htab_free (local_refcount_set);
1803
1804 return tgt;
1f4c5b9b
CLT
1805}
1806
1807attribute_hidden struct target_mem_desc *
275c736e
CLT
1808goacc_map_vars (struct gomp_device_descr *devicep,
1809 struct goacc_asyncqueue *aq, size_t mapnum,
1810 void **hostaddrs, void **devaddrs, size_t *sizes,
1811 void *kinds, bool short_mapkind,
1812 enum gomp_map_vars_kind pragma_kind)
1f4c5b9b
CLT
1813{
1814 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
275c736e
CLT
1815 sizes, kinds, short_mapkind, NULL,
1816 GOMP_MAP_VARS_OPENACC | pragma_kind);
1f4c5b9b
CLT
1817}
1818
6278b549 1819static void
1df3f842
JJ
1820gomp_unmap_tgt (struct target_mem_desc *tgt)
1821{
1822 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1823 if (tgt->tgt_end)
6ce13072 1824 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1df3f842
JJ
1825
1826 free (tgt->array);
1827 free (tgt);
1828}
1829
1cbd94e8
JB
1830static bool
1831gomp_unref_tgt (void *ptr)
829c6349
CLT
1832{
1833 bool is_tgt_unmapped = false;
1cbd94e8
JB
1834
1835 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1836
1837 if (tgt->refcount > 1)
1838 tgt->refcount--;
829c6349
CLT
1839 else
1840 {
1cbd94e8 1841 gomp_unmap_tgt (tgt);
829c6349 1842 is_tgt_unmapped = true;
829c6349 1843 }
1cbd94e8 1844
829c6349
CLT
1845 return is_tgt_unmapped;
1846}
1847
1f4c5b9b 1848static void
1cbd94e8 1849gomp_unref_tgt_void (void *ptr)
1f4c5b9b 1850{
1cbd94e8
JB
1851 (void) gomp_unref_tgt (ptr);
1852}
1f4c5b9b 1853
275c736e
CLT
1854static void
1855gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1cbd94e8 1856{
275c736e 1857 splay_tree_remove (sp, k);
2a656a93
JB
1858 if (k->aux)
1859 {
1860 if (k->aux->link_key)
275c736e 1861 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
5d5be7bf
JB
1862 if (k->aux->attach_count)
1863 free (k->aux->attach_count);
2a656a93
JB
1864 free (k->aux);
1865 k->aux = NULL;
1866 }
275c736e
CLT
1867}
1868
1869static inline __attribute__((always_inline)) bool
1870gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1871 struct goacc_asyncqueue *aq)
1872{
1873 bool is_tgt_unmapped = false;
1874
1875 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1876 {
1877 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1878 /* Infer the splay_tree_key of the first structelem key using the
1879 pointer to the first structleme_refcount. */
1880 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1881 - offsetof (struct splay_tree_key_s,
1882 structelem_refcount));
1883 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1884
1885 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1886 with the splay_tree_keys embedded inside. */
1887 splay_tree_node node =
1888 (splay_tree_node) ((char *) k
1889 - offsetof (struct splay_tree_node_s, key));
1890 while (true)
1891 {
1892 /* Starting from the _FIRST key, and continue for all following
1893 sibling keys. */
1894 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1895 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1896 break;
1897 else
1898 k = &(++node)->key;
1899 }
1900 }
1901 else
1902 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1903
1cbd94e8
JB
1904 if (aq)
1905 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1906 (void *) k->tgt);
1f4c5b9b 1907 else
1cbd94e8
JB
1908 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1909 return is_tgt_unmapped;
1910}
1911
1912attribute_hidden bool
1913gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1914{
1915 return gomp_remove_var_internal (devicep, k, NULL);
1916}
1917
1918/* Remove a variable asynchronously. This actually removes the variable
1919 mapping immediately, but retains the linked target_mem_desc until the
1920 asynchronous operation has completed (as it may still refer to target
1921 memory). The device lock must be held before entry, and remains locked on
1922 exit. */
1923
1924attribute_hidden void
1925gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1926 struct goacc_asyncqueue *aq)
1927{
1928 (void) gomp_remove_var_internal (devicep, k, aq);
1f4c5b9b
CLT
1929}
1930
41dbbb37
TS
1931/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1932 variables back from device to host: if it is false, it is assumed that this
b4557008 1933 has been done already. */
41dbbb37 1934
1f4c5b9b
CLT
1935static inline __attribute__((always_inline)) void
1936gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
275c736e 1937 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1df3f842
JJ
1938{
1939 struct gomp_device_descr *devicep = tgt->device_descr;
1940
1941 if (tgt->list_count == 0)
1942 {
1943 free (tgt);
1944 return;
1945 }
1946
a51df54e 1947 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1948 if (devicep->state == GOMP_DEVICE_FINALIZED)
1949 {
1950 gomp_mutex_unlock (&devicep->lock);
1951 free (tgt->array);
1952 free (tgt);
1953 return;
1954 }
41dbbb37 1955
1df3f842 1956 size_t i;
8e7e71ff
JB
1957
1958 /* We must perform detachments before any copies back to the host. */
1959 for (i = 0; i < tgt->list_count; i++)
1960 {
1961 splay_tree_key k = tgt->list[i].key;
1962
bc4ed079 1963 if (k != NULL && tgt->list[i].is_attach)
8e7e71ff
JB
1964 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1965 + tgt->list[i].offset,
25bce75c 1966 false, NULL);
8e7e71ff
JB
1967 }
1968
1df3f842 1969 for (i = 0; i < tgt->list_count; i++)
d9a6bd32
JJ
1970 {
1971 splay_tree_key k = tgt->list[i].key;
1972 if (k == NULL)
1973 continue;
1974
bc4ed079
JB
1975 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1976 counts ('n->refcount', 'n->dynamic_refcount'). */
1977 if (tgt->list[i].is_attach)
1978 continue;
1979
275c736e
CLT
1980 bool do_copy, do_remove;
1981 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
d9a6bd32 1982
275c736e 1983 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
d9a6bd32 1984 || tgt->list[i].always_copy_from)
1f4c5b9b 1985 gomp_copy_dev2host (devicep, aq,
6ce13072
CLT
1986 (void *) (k->host_start + tgt->list[i].offset),
1987 (void *) (k->tgt->tgt_start + k->tgt_offset
1988 + tgt->list[i].offset),
1989 tgt->list[i].length);
275c736e 1990 if (do_remove)
83d1d065
TS
1991 {
1992 struct target_mem_desc *k_tgt = k->tgt;
1993 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1994 /* It would be bad if TGT got unmapped while we're still iterating
1995 over its LIST_COUNT, and also expect to use it in the following
1996 code. */
1997 assert (!is_tgt_unmapped
1998 || k_tgt != tgt);
1999 }
d9a6bd32 2000 }
1df3f842 2001
1f4c5b9b 2002 if (aq)
1cbd94e8 2003 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1f4c5b9b 2004 (void *) tgt);
1df3f842 2005 else
1f4c5b9b 2006 gomp_unref_tgt ((void *) tgt);
41dbbb37 2007
a51df54e 2008 gomp_mutex_unlock (&devicep->lock);
1df3f842
JJ
2009}
2010
275c736e
CLT
2011static void
2012gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2013 htab_t *refcount_set)
1f4c5b9b 2014{
275c736e
CLT
2015 /* This management of a local refcount_set is for convenience of callers
2016 who do not share a refcount_set over multiple map/unmap uses. */
2017 htab_t local_refcount_set = NULL;
2018 if (refcount_set == NULL)
2019 {
2020 local_refcount_set = htab_create (tgt->list_count);
2021 refcount_set = &local_refcount_set;
2022 }
2023
2024 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2025
2026 if (local_refcount_set)
2027 htab_free (local_refcount_set);
1f4c5b9b
CLT
2028}
2029
2030attribute_hidden void
275c736e
CLT
2031goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2032 struct goacc_asyncqueue *aq)
1f4c5b9b 2033{
275c736e 2034 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1f4c5b9b
CLT
2035}
2036
1df3f842 2037static void
a51df54e 2038gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
d9a6bd32 2039 size_t *sizes, void *kinds, bool short_mapkind)
1df3f842
JJ
2040{
2041 size_t i;
2042 struct splay_tree_key_s cur_node;
d9a6bd32 2043 const int typemask = short_mapkind ? 0xff : 0x7;
1df3f842
JJ
2044
2045 if (!devicep)
2046 return;
2047
2048 if (mapnum == 0)
2049 return;
2050
a51df54e 2051 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2052 if (devicep->state == GOMP_DEVICE_FINALIZED)
2053 {
2054 gomp_mutex_unlock (&devicep->lock);
2055 return;
2056 }
2057
1df3f842
JJ
2058 for (i = 0; i < mapnum; i++)
2059 if (sizes[i])
2060 {
2061 cur_node.host_start = (uintptr_t) hostaddrs[i];
2062 cur_node.host_end = cur_node.host_start + sizes[i];
a51df54e 2063 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1df3f842
JJ
2064 if (n)
2065 {
d9a6bd32 2066 int kind = get_kind (short_mapkind, kinds, i);
1df3f842
JJ
2067 if (n->host_start > cur_node.host_start
2068 || n->host_end < cur_node.host_end)
a51df54e
IV
2069 {
2070 gomp_mutex_unlock (&devicep->lock);
2071 gomp_fatal ("Trying to update [%p..%p) object when "
2072 "only [%p..%p) is mapped",
2073 (void *) cur_node.host_start,
2074 (void *) cur_node.host_end,
2075 (void *) n->host_start,
2076 (void *) n->host_end);
2077 }
6ce13072 2078
6c039937
CLT
2079 if (n->aux && n->aux->attach_count)
2080 {
2081 uintptr_t addr = cur_node.host_start;
2082 while (addr < cur_node.host_end)
2083 {
2084 /* We have to be careful not to overwrite still attached
2085 pointers during host<->device updates. */
2086 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2087 if (n->aux->attach_count[i] == 0)
2088 {
2089 void *devaddr = (void *) (n->tgt->tgt_start
2090 + n->tgt_offset
2091 + addr - n->host_start);
2092 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2093 gomp_copy_host2dev (devicep, NULL,
2094 devaddr, (void *) addr,
2095 sizeof (void *), false, NULL);
2096 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2097 gomp_copy_dev2host (devicep, NULL,
2098 (void *) addr, devaddr,
2099 sizeof (void *));
2100 }
2101 addr += sizeof (void *);
2102 }
2103 }
2104 else
2105 {
2106 void *hostaddr = (void *) cur_node.host_start;
2107 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2108 + cur_node.host_start
2109 - n->host_start);
2110 size_t size = cur_node.host_end - cur_node.host_start;
2111
2112 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2113 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2114 false, NULL);
2115 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2116 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2117 }
1df3f842 2118 }
1df3f842 2119 }
a51df54e
IV
2120 gomp_mutex_unlock (&devicep->lock);
2121}
2122
9f2fca56
MV
2123static struct gomp_offload_icv_list *
2124gomp_get_offload_icv_item (int dev_num)
2125{
2126 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2127 while (l != NULL && l->device_num != dev_num)
2128 l = l->next;
2129
2130 return l;
2131}
2132
2133/* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2134 depending on the device num and the variable hierarchy
2135 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2136 device and thus no item with that device number is contained in
2137 gomp_offload_icv_list, then a new item is created and added to the list. */
2138
2139static struct gomp_offload_icvs *
2140get_gomp_offload_icvs (int dev_num)
2141{
2142 struct gomp_icv_list *dev
2143 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2144 struct gomp_icv_list *all
2145 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2146 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2147 struct gomp_offload_icv_list *offload_icvs
2148 = gomp_get_offload_icv_item (dev_num);
2149
2150 if (offload_icvs != NULL)
2151 return &offload_icvs->icvs;
2152
2153 struct gomp_offload_icv_list *new
2154 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2155
2156 new->device_num = dev_num;
2157 new->icvs.device_num = dev_num;
2158 new->next = gomp_offload_icv_list;
2159
2160 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2161 new->icvs.nteams = dev_x->icvs.nteams_var;
2162 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2163 new->icvs.nteams = dev->icvs.nteams_var;
2164 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2165 new->icvs.nteams = all->icvs.nteams_var;
2166 else
2167 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2168
81476bc4
MV
2169 if (dev_x != NULL
2170 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2171 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2172 else if (dev != NULL
2173 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2174 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2175 else if (all != NULL
2176 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2177 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2178 else
2179 new->icvs.teams_thread_limit
2180 = gomp_default_icv_values.teams_thread_limit_var;
2181
9f2fca56
MV
2182 if (dev_x != NULL
2183 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2184 new->icvs.default_device = dev_x->icvs.default_device_var;
2185 else if (dev != NULL
2186 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2187 new->icvs.default_device = dev->icvs.default_device_var;
2188 else if (all != NULL
2189 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2190 new->icvs.default_device = all->icvs.default_device_var;
2191 else
2192 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2193
2194 gomp_offload_icv_list = new;
2195 return &new->icvs;
2196}
2197
a51df54e
IV
2198/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2199 And insert to splay tree the mapping between addresses from HOST_TABLE and
22be2349
NS
2200 from loaded target image. We rely in the host and device compiler
2201 emitting variable and functions in the same order. */
a51df54e
IV
2202
2203static void
2a21ff19 2204gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
22be2349
NS
2205 const void *host_table, const void *target_data,
2206 bool is_register_lock)
a51df54e
IV
2207{
2208 void **host_func_table = ((void ***) host_table)[0];
2209 void **host_funcs_end = ((void ***) host_table)[1];
2210 void **host_var_table = ((void ***) host_table)[2];
2211 void **host_vars_end = ((void ***) host_table)[3];
2212
2213 /* The func table contains only addresses, the var table contains addresses
2214 and corresponding sizes. */
2215 int num_funcs = host_funcs_end - host_func_table;
2216 int num_vars = (host_vars_end - host_var_table) / 2;
2217
2218 /* Load image to device and get target addresses for the image. */
2219 struct addr_pair *target_table = NULL;
ea4b23d9 2220 uint64_t *rev_target_fn_table = NULL;
2a21ff19
NS
2221 int i, num_target_entries;
2222
ea4b23d9
TB
2223 /* With reverse offload, insert also target-host addresses. */
2224 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2225
2a21ff19
NS
2226 num_target_entries
2227 = devicep->load_image_func (devicep->target_id, version,
ea4b23d9
TB
2228 target_data, &target_table,
2229 rev_lookup ? &rev_target_fn_table : NULL);
a51df54e 2230
0bac793e 2231 if (num_target_entries != num_funcs + num_vars
9f2fca56
MV
2232 /* "+1" due to the additional ICV struct. */
2233 && num_target_entries != num_funcs + num_vars + 1)
a51df54e
IV
2234 {
2235 gomp_mutex_unlock (&devicep->lock);
2236 if (is_register_lock)
2237 gomp_mutex_unlock (&register_lock);
2a21ff19
NS
2238 gomp_fatal ("Cannot map target functions or variables"
2239 " (expected %u, have %u)", num_funcs + num_vars,
2240 num_target_entries);
a51df54e
IV
2241 }
2242
2243 /* Insert host-target address mapping into splay tree. */
2244 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
9f2fca56
MV
2245 /* "+1" due to the additional ICV struct. */
2246 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2247 * sizeof (*tgt->array));
ea4b23d9
TB
2248 if (rev_target_fn_table)
2249 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2250 else
2251 tgt->rev_array = NULL;
d9a6bd32 2252 tgt->refcount = REFCOUNT_INFINITY;
a51df54e
IV
2253 tgt->tgt_start = 0;
2254 tgt->tgt_end = 0;
2255 tgt->to_free = NULL;
2256 tgt->prev = NULL;
2257 tgt->list_count = 0;
2258 tgt->device_descr = devicep;
2259 splay_tree_node array = tgt->array;
ea4b23d9 2260 reverse_splay_tree_node rev_array = tgt->rev_array;
a51df54e
IV
2261
2262 for (i = 0; i < num_funcs; i++)
2263 {
2264 splay_tree_key k = &array->key;
2265 k->host_start = (uintptr_t) host_func_table[i];
2266 k->host_end = k->host_start + 1;
2267 k->tgt = tgt;
2268 k->tgt_offset = target_table[i].start;
d9a6bd32 2269 k->refcount = REFCOUNT_INFINITY;
6f5b4b64 2270 k->dynamic_refcount = 0;
2a656a93 2271 k->aux = NULL;
a51df54e
IV
2272 array->left = NULL;
2273 array->right = NULL;
2274 splay_tree_insert (&devicep->mem_map, array);
ea4b23d9
TB
2275 if (rev_target_fn_table)
2276 {
2277 reverse_splay_tree_key k2 = &rev_array->key;
2278 k2->dev = rev_target_fn_table[i];
2279 k2->k = k;
2280 rev_array->left = NULL;
2281 rev_array->right = NULL;
2282 if (k2->dev != 0)
2283 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2284 rev_array++;
2285 }
a51df54e
IV
2286 array++;
2287 }
2288
4a38b02b
IV
2289 /* Most significant bit of the size in host and target tables marks
2290 "omp declare target link" variables. */
2291 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2292 const uintptr_t size_mask = ~link_bit;
2293
a51df54e
IV
2294 for (i = 0; i < num_vars; i++)
2295 {
2296 struct addr_pair *target_var = &target_table[num_funcs + i];
4a38b02b 2297 uintptr_t target_size = target_var->end - target_var->start;
4897bb00 2298 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
4a38b02b 2299
4897bb00 2300 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
a51df54e
IV
2301 {
2302 gomp_mutex_unlock (&devicep->lock);
2303 if (is_register_lock)
2304 gomp_mutex_unlock (&register_lock);
4a38b02b 2305 gomp_fatal ("Cannot map target variables (size mismatch)");
a51df54e
IV
2306 }
2307
2308 splay_tree_key k = &array->key;
2309 k->host_start = (uintptr_t) host_var_table[i * 2];
4a38b02b
IV
2310 k->host_end
2311 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
a51df54e
IV
2312 k->tgt = tgt;
2313 k->tgt_offset = target_var->start;
4897bb00 2314 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
6f5b4b64 2315 k->dynamic_refcount = 0;
2a656a93 2316 k->aux = NULL;
a51df54e
IV
2317 array->left = NULL;
2318 array->right = NULL;
2319 splay_tree_insert (&devicep->mem_map, array);
2320 array++;
2321 }
2322
9f2fca56
MV
2323 /* Last entry is for a ICVs variable.
2324 Tolerate case where plugin does not return those entries. */
0bac793e
CLT
2325 if (num_funcs + num_vars < num_target_entries)
2326 {
9f2fca56
MV
2327 struct addr_pair *var = &target_table[num_funcs + num_vars];
2328
2329 /* Start address will be non-zero for the ICVs variable if
2330 the variable was found in this image. */
2331 if (var->start != 0)
0bac793e
CLT
2332 {
2333 /* The index of the devicep within devices[] is regarded as its
2334 'device number', which is different from the per-device type
2335 devicep->target_id. */
9f2fca56
MV
2336 int dev_num = (int) (devicep - &devices[0]);
2337 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2338 size_t var_size = var->end - var->start;
81476bc4
MV
2339 if (var_size != sizeof (struct gomp_offload_icvs))
2340 {
2341 gomp_mutex_unlock (&devicep->lock);
2342 if (is_register_lock)
2343 gomp_mutex_unlock (&register_lock);
2344 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2345 "format");
2346 }
9f2fca56
MV
2347 /* Copy the ICVs variable to place on device memory, hereby
2348 actually designating its device number into effect. */
2349 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2350 var_size, false, NULL);
2351 splay_tree_key k = &array->key;
2352 k->host_start = (uintptr_t) icvs;
2353 k->host_end =
2354 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2355 k->tgt = tgt;
2356 k->tgt_offset = var->start;
2357 k->refcount = REFCOUNT_INFINITY;
2358 k->dynamic_refcount = 0;
2359 k->aux = NULL;
2360 array->left = NULL;
2361 array->right = NULL;
2362 splay_tree_insert (&devicep->mem_map, array);
2363 array++;
0bac793e
CLT
2364 }
2365 }
2366
a51df54e 2367 free (target_table);
1df3f842
JJ
2368}
2369
22be2349
NS
2370/* Unload the mappings described by target_data from device DEVICE_P.
2371 The device must be locked. */
2372
2373static void
2374gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2a21ff19 2375 unsigned version,
22be2349
NS
2376 const void *host_table, const void *target_data)
2377{
2378 void **host_func_table = ((void ***) host_table)[0];
2379 void **host_funcs_end = ((void ***) host_table)[1];
2380 void **host_var_table = ((void ***) host_table)[2];
2381 void **host_vars_end = ((void ***) host_table)[3];
2382
2383 /* The func table contains only addresses, the var table contains addresses
2384 and corresponding sizes. */
2385 int num_funcs = host_funcs_end - host_func_table;
2386 int num_vars = (host_vars_end - host_var_table) / 2;
2387
22be2349
NS
2388 struct splay_tree_key_s k;
2389 splay_tree_key node = NULL;
2390
2391 /* Find mapping at start of node array */
2392 if (num_funcs || num_vars)
2393 {
2394 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2395 : (uintptr_t) host_var_table[0]);
2396 k.host_end = k.host_start + 1;
2397 node = splay_tree_lookup (&devicep->mem_map, &k);
2398 }
2a21ff19 2399
6ce13072
CLT
2400 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2401 {
2402 gomp_mutex_unlock (&devicep->lock);
2403 gomp_fatal ("image unload fail");
2404 }
ea4b23d9
TB
2405 if (devicep->mem_map_rev.root)
2406 {
2407 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2408 real allocation. */
2409 assert (node && node->tgt && node->tgt->rev_array);
2410 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2411 free (node->tgt->rev_array);
2412 devicep->mem_map_rev.root = NULL;
2413 }
22be2349
NS
2414
2415 /* Remove mappings from splay tree. */
4a38b02b
IV
2416 int i;
2417 for (i = 0; i < num_funcs; i++)
22be2349 2418 {
4a38b02b 2419 k.host_start = (uintptr_t) host_func_table[i];
22be2349
NS
2420 k.host_end = k.host_start + 1;
2421 splay_tree_remove (&devicep->mem_map, &k);
2422 }
2423
4a38b02b
IV
2424 /* Most significant bit of the size in host and target tables marks
2425 "omp declare target link" variables. */
2426 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2427 const uintptr_t size_mask = ~link_bit;
2428 bool is_tgt_unmapped = false;
2429
2430 for (i = 0; i < num_vars; i++)
22be2349 2431 {
4a38b02b
IV
2432 k.host_start = (uintptr_t) host_var_table[i * 2];
2433 k.host_end
2434 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2435
2436 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2437 splay_tree_remove (&devicep->mem_map, &k);
2438 else
2439 {
2440 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
829c6349 2441 is_tgt_unmapped = gomp_remove_var (devicep, n);
4a38b02b 2442 }
22be2349
NS
2443 }
2444
4a38b02b 2445 if (node && !is_tgt_unmapped)
22be2349
NS
2446 {
2447 free (node->tgt);
2448 free (node);
2449 }
2450}
2451
683f1184
TB
2452static void
2453gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2454{
2455 char *end = buf + size, *p = buf;
2456 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2457 p += snprintf (p, end - p, "unified_address");
2458 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2459 p += snprintf (p, end - p, "%sunified_shared_memory",
2460 (p == buf ? "" : ", "));
2461 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2462 p += snprintf (p, end - p, "%sreverse_offload",
2463 (p == buf ? "" : ", "));
2464}
2465
a51df54e 2466/* This function should be called from every offload image while loading.
1df3f842 2467 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
3f05e03d 2468 the target, and DATA. */
1df3f842
JJ
2469
2470void
2a21ff19 2471GOMP_offload_register_ver (unsigned version, const void *host_table,
3f05e03d 2472 int target_type, const void *data)
1df3f842 2473{
a51df54e 2474 int i;
2a21ff19
NS
2475
2476 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2477 gomp_fatal ("Library too old for offload (version %u < %u)",
2478 GOMP_VERSION, GOMP_VERSION_LIB (version));
683f1184 2479
3f05e03d
TS
2480 int omp_req;
2481 const void *target_data;
683f1184
TB
2482 if (GOMP_VERSION_LIB (version) > 1)
2483 {
3f05e03d
TS
2484 omp_req = (int) (size_t) ((void **) data)[0];
2485 target_data = &((void **) data)[1];
2486 }
2487 else
2488 {
2489 omp_req = 0;
2490 target_data = data;
683f1184
TB
2491 }
2492
a51df54e
IV
2493 gomp_mutex_lock (&register_lock);
2494
683f1184
TB
2495 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2496 {
2497 char buf1[sizeof ("unified_address, unified_shared_memory, "
2498 "reverse_offload")];
2499 char buf2[sizeof ("unified_address, unified_shared_memory, "
2500 "reverse_offload")];
2501 gomp_requires_to_name (buf2, sizeof (buf2),
2502 omp_req != GOMP_REQUIRES_TARGET_USED
2503 ? omp_req : omp_requires_mask);
2504 if (omp_req != GOMP_REQUIRES_TARGET_USED
2505 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2506 {
2507 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2508 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2509 "in multiple compilation units: '%s' vs. '%s'",
2510 buf1, buf2);
2511 }
2512 else
2513 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2514 "some compilation units", buf2);
2515 }
2516 omp_requires_mask = omp_req;
2517
a51df54e
IV
2518 /* Load image to all initialized devices. */
2519 for (i = 0; i < num_devices; i++)
2520 {
2521 struct gomp_device_descr *devicep = &devices[i];
2522 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2523 if (devicep->type == target_type
2524 && devicep->state == GOMP_DEVICE_INITIALIZED)
2a21ff19
NS
2525 gomp_load_image_to_device (devicep, version,
2526 host_table, target_data, true);
a51df54e
IV
2527 gomp_mutex_unlock (&devicep->lock);
2528 }
1df3f842 2529
a51df54e
IV
2530 /* Insert image to array of pending images. */
2531 offload_images
2532 = gomp_realloc_unlock (offload_images,
2533 (num_offload_images + 1)
2534 * sizeof (struct offload_image_descr));
2a21ff19 2535 offload_images[num_offload_images].version = version;
1df3f842
JJ
2536 offload_images[num_offload_images].type = target_type;
2537 offload_images[num_offload_images].host_table = host_table;
2538 offload_images[num_offload_images].target_data = target_data;
2539
2540 num_offload_images++;
a51df54e 2541 gomp_mutex_unlock (&register_lock);
1df3f842
JJ
2542}
2543
9ef71453
TS
2544/* Legacy entry point. */
2545
2a21ff19
NS
2546void
2547GOMP_offload_register (const void *host_table, int target_type,
2548 const void *target_data)
2549{
2550 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2551}
2552
a51df54e
IV
2553/* This function should be called from every offload image while unloading.
2554 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
3f05e03d 2555 the target, and DATA. */
1df3f842 2556
a51df54e 2557void
2a21ff19 2558GOMP_offload_unregister_ver (unsigned version, const void *host_table,
3f05e03d 2559 int target_type, const void *data)
1df3f842 2560{
a51df54e
IV
2561 int i;
2562
3f05e03d
TS
2563 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2564 gomp_fatal ("Library too old for offload (version %u < %u)",
2565 GOMP_VERSION, GOMP_VERSION_LIB (version));
2566
2567 const void *target_data;
2568 if (GOMP_VERSION_LIB (version) > 1)
2569 target_data = &((void **) data)[1];
2570 else
2571 target_data = data;
2572
a51df54e
IV
2573 gomp_mutex_lock (&register_lock);
2574
2575 /* Unload image from all initialized devices. */
2576 for (i = 0; i < num_devices; i++)
2577 {
a51df54e
IV
2578 struct gomp_device_descr *devicep = &devices[i];
2579 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2580 if (devicep->type == target_type
2581 && devicep->state == GOMP_DEVICE_INITIALIZED)
2a21ff19
NS
2582 gomp_unload_image_from_device (devicep, version,
2583 host_table, target_data);
a51df54e
IV
2584 gomp_mutex_unlock (&devicep->lock);
2585 }
2586
2587 /* Remove image from array of pending images. */
2588 for (i = 0; i < num_offload_images; i++)
2589 if (offload_images[i].target_data == target_data)
2590 {
2591 offload_images[i] = offload_images[--num_offload_images];
2592 break;
2593 }
2594
2595 gomp_mutex_unlock (&register_lock);
41dbbb37 2596}
1df3f842 2597
9ef71453
TS
2598/* Legacy entry point. */
2599
2a21ff19
NS
2600void
2601GOMP_offload_unregister (const void *host_table, int target_type,
2602 const void *target_data)
2603{
2604 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2605}
2606
a51df54e
IV
2607/* This function initializes the target device, specified by DEVICEP. DEVICEP
2608 must be locked on entry, and remains locked on return. */
41dbbb37
TS
2609
2610attribute_hidden void
a51df54e 2611gomp_init_device (struct gomp_device_descr *devicep)
41dbbb37 2612{
1df3f842 2613 int i;
6ce13072
CLT
2614 if (!devicep->init_device_func (devicep->target_id))
2615 {
2616 gomp_mutex_unlock (&devicep->lock);
2617 gomp_fatal ("device initialization failed");
2618 }
a51df54e
IV
2619
2620 /* Load to device all images registered by the moment. */
2621 for (i = 0; i < num_offload_images; i++)
1df3f842 2622 {
a51df54e
IV
2623 struct offload_image_descr *image = &offload_images[i];
2624 if (image->type == devicep->type)
2a21ff19
NS
2625 gomp_load_image_to_device (devicep, image->version,
2626 image->host_table, image->target_data,
2627 false);
1df3f842
JJ
2628 }
2629
1f4c5b9b
CLT
2630 /* Initialize OpenACC asynchronous queues. */
2631 goacc_init_asyncqueues (devicep);
2632
d84ffc0a 2633 devicep->state = GOMP_DEVICE_INITIALIZED;
41dbbb37
TS
2634}
2635
1f4c5b9b
CLT
2636/* This function finalizes the target device, specified by DEVICEP. DEVICEP
2637 must be locked on entry, and remains locked on return. */
2638
2639attribute_hidden bool
2640gomp_fini_device (struct gomp_device_descr *devicep)
2641{
2642 bool ret = goacc_fini_asyncqueues (devicep);
2643 ret &= devicep->fini_device_func (devicep->target_id);
2644 devicep->state = GOMP_DEVICE_FINALIZED;
2645 return ret;
2646}
2647
22be2349
NS
2648attribute_hidden void
2649gomp_unload_device (struct gomp_device_descr *devicep)
2650{
d84ffc0a 2651 if (devicep->state == GOMP_DEVICE_INITIALIZED)
22be2349
NS
2652 {
2653 unsigned i;
2654
2655 /* Unload from device all images registered at the moment. */
2656 for (i = 0; i < num_offload_images; i++)
2657 {
2658 struct offload_image_descr *image = &offload_images[i];
2659 if (image->type == devicep->type)
2a21ff19
NS
2660 gomp_unload_image_from_device (devicep, image->version,
2661 image->host_table,
22be2349
NS
2662 image->target_data);
2663 }
2664 }
2665}
2666
e01d41e5 2667/* Host fallback for GOMP_target{,_ext} routines. */
d9a6bd32
JJ
2668
2669static void
1bfc07d1 2670gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
aea72386 2671 struct gomp_device_descr *devicep, void **args)
d9a6bd32
JJ
2672{
2673 struct gomp_thread old_thr, *thr = gomp_thread ();
1bfc07d1
KCY
2674
2675 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2676 && devicep != NULL)
2677 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2678 "be used for offloading");
2679
d9a6bd32
JJ
2680 old_thr = *thr;
2681 memset (thr, '\0', sizeof (*thr));
2682 if (gomp_places_list)
2683 {
2684 thr->place = old_thr.place;
2685 thr->ts.place_partition_len = gomp_places_list_len;
2686 }
aea72386
JJ
2687 if (args)
2688 while (*args)
2689 {
2690 intptr_t id = (intptr_t) *args++, val;
2691 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2692 val = (intptr_t) *args++;
2693 else
2694 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2695 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2696 continue;
2697 id &= GOMP_TARGET_ARG_ID_MASK;
2698 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2699 continue;
2700 val = val > INT_MAX ? INT_MAX : val;
2701 if (val)
2702 gomp_icv (true)->thread_limit_var = val;
2703 break;
2704 }
2705
d9a6bd32
JJ
2706 fn (hostaddrs);
2707 gomp_free_thread (thr);
2708 *thr = old_thr;
2709}
2710
b2b40051
MJ
2711/* Calculate alignment and size requirements of a private copy of data shared
2712 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
e01d41e5 2713
b2b40051
MJ
2714static inline void
2715calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2716 unsigned short *kinds, size_t *tgt_align,
2717 size_t *tgt_size)
e01d41e5 2718{
b2b40051
MJ
2719 size_t i;
2720 for (i = 0; i < mapnum; i++)
2721 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2722 {
2723 size_t align = (size_t) 1 << (kinds[i] >> 8);
2724 if (*tgt_align < align)
2725 *tgt_align = align;
2726 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2727 *tgt_size += sizes[i];
2728 }
2729}
2730
2731/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2732
2733static inline void
2734copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2735 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2736 size_t tgt_size)
2737{
2738 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2739 if (al)
2740 tgt += tgt_align - al;
2741 tgt_size = 0;
2742 size_t i;
e01d41e5 2743 for (i = 0; i < mapnum; i++)
bbb7f860 2744 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
e01d41e5
JJ
2745 {
2746 size_t align = (size_t) 1 << (kinds[i] >> 8);
e01d41e5 2747 tgt_size = (tgt_size + align - 1) & ~(align - 1);
b2b40051
MJ
2748 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2749 hostaddrs[i] = tgt + tgt_size;
2750 tgt_size = tgt_size + sizes[i];
49d1a2f9
TB
2751 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2752 {
2753 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2754 ++i;
2755 }
e01d41e5 2756 }
b2b40051
MJ
2757}
2758
e01d41e5 2759/* Helper function of GOMP_target{,_ext} routines. */
d9a6bd32
JJ
2760
2761static void *
2762gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2763 void (*host_fn) (void *))
2764{
2765 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2766 return (void *) host_fn;
2767 else
2768 {
2769 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2770 if (devicep->state == GOMP_DEVICE_FINALIZED)
2771 {
2772 gomp_mutex_unlock (&devicep->lock);
2773 return NULL;
2774 }
2775
d9a6bd32
JJ
2776 struct splay_tree_key_s k;
2777 k.host_start = (uintptr_t) host_fn;
2778 k.host_end = k.host_start + 1;
2779 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2780 gomp_mutex_unlock (&devicep->lock);
2781 if (tgt_fn == NULL)
eb4048f2 2782 return NULL;
d9a6bd32
JJ
2783
2784 return (void *) tgt_fn->tgt_offset;
2785 }
2786}
2787
acf0174b 2788/* Called when encountering a target directive. If DEVICE
41dbbb37
TS
2789 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2790 GOMP_DEVICE_HOST_FALLBACK (or any value
2791 larger than last available hw device), use host fallback.
128b26dc
TS
2792 FN is address of host code, UNUSED is part of the current ABI, but
2793 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
acf0174b
JJ
2794 with MAPNUM entries, with addresses of the host objects,
2795 sizes of the host objects (resp. for pointer kind pointer bias
2796 and assumed sizeof (void *) size) and kinds. */
2797
2798void
128b26dc 2799GOMP_target (int device, void (*fn) (void *), const void *unused,
acf0174b
JJ
2800 size_t mapnum, void **hostaddrs, size_t *sizes,
2801 unsigned char *kinds)
2802{
1158fe43 2803 struct gomp_device_descr *devicep = resolve_device (device, true);
41dbbb37 2804
d84ffc0a 2805 void *fn_addr;
41dbbb37 2806 if (devicep == NULL
d84ffc0a 2807 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
2808 /* All shared memory devices should use the GOMP_target_ext function. */
2809 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
d84ffc0a 2810 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
aea72386 2811 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
d9a6bd32 2812
275c736e 2813 htab_t refcount_set = htab_create (mapnum);
d9a6bd32
JJ
2814 struct target_mem_desc *tgt_vars
2815 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
275c736e 2816 &refcount_set, GOMP_MAP_VARS_TARGET);
b2b40051
MJ
2817 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2818 NULL);
275c736e
CLT
2819 htab_clear (refcount_set);
2820 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2821 htab_free (refcount_set);
d9a6bd32 2822}
1df3f842 2823
001ab12e
FH
2824static inline unsigned int
2825clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2826{
2827 /* If we cannot run asynchronously, simply ignore nowait. */
2828 if (devicep != NULL && devicep->async_run_func == NULL)
2829 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2830
2831 return flags;
2832}
2833
81476bc4
MV
2834static void
2835gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2836{
2837 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2838 if (item == NULL)
2839 return;
2840
2841 void *host_ptr = &item->icvs;
2842 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2843 if (dev_ptr != NULL)
2844 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2845 sizeof (struct gomp_offload_icvs));
2846}
2847
e01d41e5
JJ
2848/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2849 and several arguments have been added:
2850 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2851 DEPEND is array of dependencies, see GOMP_task for details.
b2b40051
MJ
2852
2853 ARGS is a pointer to an array consisting of a variable number of both
2854 device-independent and device-specific arguments, which can take one two
2855 elements where the first specifies for which device it is intended, the type
2856 and optionally also the value. If the value is not present in the first
2857 one, the whole second element the actual value. The last element of the
2858 array is a single NULL. Among the device independent can be for example
2859 NUM_TEAMS and THREAD_LIMIT.
2860
e01d41e5
JJ
2861 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2862 that value, or 1 if teams construct is not present, or 0, if
2863 teams construct does not have num_teams clause and so the choice is
2864 implementation defined, and -1 if it can't be determined on the host
2865 what value will GOMP_teams have on the device.
2866 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2867 body with that value, or 0, if teams construct does not have thread_limit
2868 clause or the teams construct is not present, or -1 if it can't be
2869 determined on the host what value will GOMP_teams have on the device. */
2870
d9a6bd32 2871void
e01d41e5
JJ
2872GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2873 void **hostaddrs, size_t *sizes, unsigned short *kinds,
b2b40051 2874 unsigned int flags, void **depend, void **args)
d9a6bd32 2875{
1158fe43 2876 struct gomp_device_descr *devicep = resolve_device (device, true);
21f3a236
JJ
2877 size_t tgt_align = 0, tgt_size = 0;
2878 bool fpc_done = false;
41dbbb37 2879
81476bc4
MV
2880 /* Obtain the original TEAMS and THREADS values from ARGS. */
2881 intptr_t orig_teams = 1, orig_threads = 0;
2882 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
2883 void **tmpargs = args;
2884 while (*tmpargs)
2885 {
2886 intptr_t id = (intptr_t) *tmpargs++, val;
2887 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2888 {
2889 val = (intptr_t) *tmpargs++;
2890 len = 2;
2891 }
2892 else
2893 {
2894 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2895 len = 1;
2896 }
2897 num_args += len;
2898 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2899 continue;
2900 val = val > INT_MAX ? INT_MAX : val;
2901 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
2902 {
2903 orig_teams = val;
2904 teams_len = len;
2905 }
2906 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
2907 {
2908 orig_threads = val;
2909 threads_len = len;
2910 }
2911 }
2912
2913 intptr_t new_teams = orig_teams, new_threads = orig_threads;
2914 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2915 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2916 value could not be determined. No change.
2917 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2918 Set device-specific value.
2919 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2920 No change. */
2921 if (orig_teams == -2)
2922 new_teams = 1;
2923 else if (orig_teams == 0)
2924 {
2925 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2926 if (item != NULL)
2927 new_teams = item->icvs.nteams;
2928 }
2929 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2930 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2931 e.g. a THREAD_LIMIT clause. */
2932 if (orig_teams > -2 && orig_threads == 0)
2933 {
2934 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2935 if (item != NULL)
2936 new_threads = item->icvs.teams_thread_limit;
2937 }
2938
2939 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2940 updated. */
2941 void **new_args = args;
2942 if (orig_teams != new_teams || orig_threads != new_threads)
2943 {
2944 size_t tms_len = (orig_teams == new_teams
2945 ? teams_len
2946 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
2947 ? 1 : 2));
2948 size_t ths_len = (orig_threads == new_threads
2949 ? threads_len
2950 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
2951 ? 1 : 2));
2952 /* One additional item after the last arg must be NULL. */
2953 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
2954 + ths_len + 1;
2955 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
2956
2957 tmpargs = args;
2958 void **tmp_new_args = new_args;
2959 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
2960 too if they have not been changed and skipped otherwise. */
2961 while (*tmpargs)
2962 {
2963 intptr_t id = (intptr_t) *tmpargs;
2964 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
2965 && orig_teams != new_teams)
2966 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
2967 && orig_threads != new_threads))
2968 {
2969 tmpargs++;
2970 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2971 tmpargs++;
2972 }
2973 else
2974 {
2975 *tmp_new_args++ = *tmpargs++;
2976 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2977 *tmp_new_args++ = *tmpargs++;
2978 }
2979 }
2980
2981 /* Add the new TEAMS arg to the new args list if it has been changed. */
2982 if (orig_teams != new_teams)
2983 {
2984 intptr_t new_val = new_teams;
2985 if (tms_len == 1)
2986 {
2987 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
2988 | GOMP_TARGET_ARG_NUM_TEAMS;
2989 *tmp_new_args++ = (void *) new_val;
2990 }
2991 else
2992 {
2993 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
2994 | GOMP_TARGET_ARG_NUM_TEAMS);
2995 *tmp_new_args++ = (void *) new_val;
2996 }
2997 }
2998
2999 /* Add the new THREADS arg to the new args list if it has been changed. */
3000 if (orig_threads != new_threads)
3001 {
3002 intptr_t new_val = new_threads;
3003 if (ths_len == 1)
3004 {
3005 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3006 | GOMP_TARGET_ARG_THREAD_LIMIT;
3007 *tmp_new_args++ = (void *) new_val;
3008 }
3009 else
3010 {
3011 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3012 | GOMP_TARGET_ARG_THREAD_LIMIT);
3013 *tmp_new_args++ = (void *) new_val;
3014 }
3015 }
3016
3017 *tmp_new_args = NULL;
3018 }
3019
001ab12e
FH
3020 flags = clear_unsupported_flags (devicep, flags);
3021
e4606348
JJ
3022 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3023 {
3024 struct gomp_thread *thr = gomp_thread ();
3025 /* Create a team if we don't have any around, as nowait
3026 target tasks make sense to run asynchronously even when
3027 outside of any parallel. */
3028 if (__builtin_expect (thr->ts.team == NULL, 0))
3029 {
3030 struct gomp_team *team = gomp_new_team (1);
3031 struct gomp_task *task = thr->task;
a58a965e 3032 struct gomp_task **implicit_task = &task;
e4606348
JJ
3033 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3034 team->prev_ts = thr->ts;
3035 thr->ts.team = team;
3036 thr->ts.team_id = 0;
3037 thr->ts.work_share = &team->work_shares[0];
3038 thr->ts.last_work_share = NULL;
3039#ifdef HAVE_SYNC_BUILTINS
3040 thr->ts.single_count = 0;
3041#endif
3042 thr->ts.static_trip = 0;
3043 thr->task = &team->implicit_task[0];
3044 gomp_init_task (thr->task, NULL, icv);
a58a965e
JJ
3045 while (*implicit_task
3046 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3047 implicit_task = &(*implicit_task)->parent;
3048 if (*implicit_task)
e4606348 3049 {
a58a965e 3050 thr->task = *implicit_task;
e4606348 3051 gomp_end_task ();
a58a965e 3052 free (*implicit_task);
e4606348
JJ
3053 thr->task = &team->implicit_task[0];
3054 }
3055 else
3056 pthread_setspecific (gomp_thread_destructor, thr);
a58a965e
JJ
3057 if (implicit_task != &task)
3058 {
3059 *implicit_task = thr->task;
3060 thr->task = task;
3061 }
e4606348
JJ
3062 }
3063 if (thr->ts.team
3064 && !thr->task->final_task)
3065 {
3066 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
81476bc4 3067 sizes, kinds, flags, depend, new_args,
e4606348
JJ
3068 GOMP_TARGET_TASK_BEFORE_MAP);
3069 return;
3070 }
3071 }
3072
3073 /* If there are depend clauses, but nowait is not present
3074 (or we are in a final task), block the parent task until the
3075 dependencies are resolved and then just continue with the rest
3076 of the function as if it is a merged task. */
d9a6bd32
JJ
3077 if (depend != NULL)
3078 {
3079 struct gomp_thread *thr = gomp_thread ();
3080 if (thr->task && thr->task->depend_hash)
21f3a236
JJ
3081 {
3082 /* If we might need to wait, copy firstprivate now. */
3083 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3084 &tgt_align, &tgt_size);
3085 if (tgt_align)
3086 {
3087 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3088 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3089 tgt_align, tgt_size);
3090 }
3091 fpc_done = true;
3092 gomp_task_maybe_wait_for_dependencies (depend);
3093 }
d9a6bd32 3094 }
41dbbb37 3095
d84ffc0a 3096 void *fn_addr;
d9a6bd32 3097 if (devicep == NULL
d84ffc0a 3098 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
3099 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3100 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
41dbbb37 3101 {
21f3a236
JJ
3102 if (!fpc_done)
3103 {
3104 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3105 &tgt_align, &tgt_size);
3106 if (tgt_align)
3107 {
3108 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3109 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3110 tgt_align, tgt_size);
3111 }
3112 }
81476bc4 3113 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
d9a6bd32 3114 return;
41dbbb37 3115 }
1df3f842 3116
b2b40051 3117 struct target_mem_desc *tgt_vars;
275c736e
CLT
3118 htab_t refcount_set = NULL;
3119
b2b40051
MJ
3120 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3121 {
21f3a236
JJ
3122 if (!fpc_done)
3123 {
3124 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3125 &tgt_align, &tgt_size);
3126 if (tgt_align)
3127 {
3128 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3129 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3130 tgt_align, tgt_size);
3131 }
3132 }
b2b40051
MJ
3133 tgt_vars = NULL;
3134 }
3135 else
275c736e
CLT
3136 {
3137 refcount_set = htab_create (mapnum);
3138 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3139 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3140 }
b2b40051
MJ
3141 devicep->run_func (devicep->target_id, fn_addr,
3142 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
81476bc4 3143 new_args);
b2b40051 3144 if (tgt_vars)
275c736e
CLT
3145 {
3146 htab_clear (refcount_set);
3147 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3148 }
3149 if (refcount_set)
3150 htab_free (refcount_set);
81476bc4
MV
3151
3152 /* Copy back ICVs from device to host.
3153 HOST_PTR is expected to exist since it was added in
3154 gomp_load_image_to_device if not already available. */
3155 gomp_copy_back_icvs (devicep, device);
3156
acf0174b
JJ
3157}
3158
ea4b23d9
TB
3159
3160/* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3161 keeping track of all variable handling - assuming that reverse offload occurs
3162 ony very rarely. Downside is that the reverse search is slow. */
3163
3164struct gomp_splay_tree_rev_lookup_data {
3165 uintptr_t tgt_start;
3166 uintptr_t tgt_end;
3167 splay_tree_key key;
3168};
3169
3170static int
3171gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3172{
3173 struct gomp_splay_tree_rev_lookup_data *data;
3174 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3175 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3176
3177 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3178 return 0;
3179
3180 size_t j;
3181 for (j = 0; j < key->tgt->list_count; j++)
3182 if (key->tgt->list[j].key == key)
3183 break;
3184 assert (j < key->tgt->list_count);
3185 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3186
3187 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3188 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3189 {
3190 data->key = key;
3191 return 1;
3192 }
3193 return 0;
3194}
3195
3196static inline splay_tree_key
3197gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3198 bool zero_len)
3199{
3200 struct gomp_splay_tree_rev_lookup_data data;
3201 data.key = NULL;
3202 data.tgt_start = tgt_start;
3203 data.tgt_end = tgt_end;
3204
3205 if (tgt_start != tgt_end)
3206 {
3207 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3208 return data.key;
3209 }
3210
3211 data.tgt_end++;
3212 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3213 if (data.key != NULL || zero_len)
3214 return data.key;
3215 data.tgt_end--;
3216
3217 data.tgt_start--;
3218 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3219 return data.key;
3220}
3221
3222struct cpy_data
3223{
3224 uint64_t devaddr;
3225 bool present, aligned;
3226};
3227
3228
3229/* Search just mapped reverse-offload data; returns index if found,
3230 otherwise >= n. */
3231
3232static inline int
3233gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3234 unsigned short *kinds, uint64_t *sizes, size_t n,
3235 uint64_t tgt_start, uint64_t tgt_end)
3236{
3237 const bool short_mapkind = true;
3238 const int typemask = short_mapkind ? 0xff : 0x7;
3239 size_t i;
3240 for (i = 0; i < n; i++)
3241 {
3242 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3243 == GOMP_MAP_STRUCT);
3244 uint64_t dev_end;
3245 if (!is_struct)
3246 dev_end = d[i].devaddr + sizes[i];
3247 else
3248 {
3249 if (i + sizes[i] < n)
3250 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3251 else
3252 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3253 }
3254 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3255 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3256 break;
3257 if (is_struct)
3258 i += sizes[i];
3259 }
3260 return i;
3261}
3262
3263static inline int
3264gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3265 unsigned short *kinds, uint64_t *sizes,
3266 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3267 bool zero_len)
3268{
3269 size_t i;
3270 if (tgt_start != tgt_end)
3271 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3272 tgt_start, tgt_end);
3273 tgt_end++;
3274 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3275 tgt_start, tgt_end);
3276 if (i < n || zero_len)
3277 return i;
3278 tgt_end--;
3279
3280 tgt_start--;
3281 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3282 tgt_start, tgt_end);
3283}
3284
131d18e9 3285/* Handle reverse offload. This is called by the device plugins for a
ea4b23d9
TB
3286 reverse offload; it is not called if the outer target runs on the host.
3287 The mapping is simplified device-affecting constructs (except for target
3288 with device(ancestor:1)) must not be encountered; in particular not
3289 target (enter/exit) data. */
131d18e9
TB
3290
3291void
ea4b23d9
TB
3292gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3293 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3294 void (*dev_to_host_cpy) (void *, const void *, size_t, void*),
3295 void (*host_to_dev_cpy) (void *, const void *, size_t, void*),
3296 void *token)
131d18e9 3297{
ea4b23d9
TB
3298 /* Return early if there is no offload code. */
3299 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3300 return;
3301 /* Currently, this fails because of calculate_firstprivate_requirements
3302 below; it could be fixed but additional code needs to be updated to
3303 handle 32bit hosts - thus, it is not worthwhile. */
3304 if (sizeof (void *) != sizeof (uint64_t))
3305 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3306
3307 struct cpy_data *cdata = NULL;
3308 uint64_t *devaddrs;
3309 uint64_t *sizes;
3310 unsigned short *kinds;
3311 const bool short_mapkind = true;
3312 const int typemask = short_mapkind ? 0xff : 0x7;
3313 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3314
3315 reverse_splay_tree_key n;
3316 struct reverse_splay_tree_key_s k;
3317 k.dev = fn_ptr;
3318
3319 gomp_mutex_lock (&devicep->lock);
3320 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3321 gomp_mutex_unlock (&devicep->lock);
3322
3323 if (n == NULL)
3324 gomp_fatal ("Cannot find reverse-offload function");
3325 void (*host_fn)() = (void (*)()) n->k->host_start;
3326
3327 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3328 {
3329 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3330 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3331 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3332 }
3333 else
3334 {
3335 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3336 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3337 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3338 if (dev_to_host_cpy)
3339 {
3340 dev_to_host_cpy (devaddrs, (const void *) (uintptr_t) devaddrs_ptr,
3341 mapnum * sizeof (uint64_t), token);
3342 dev_to_host_cpy (sizes, (const void *) (uintptr_t) sizes_ptr,
3343 mapnum * sizeof (uint64_t), token);
3344 dev_to_host_cpy (kinds, (const void *) (uintptr_t) kinds_ptr,
3345 mapnum * sizeof (unsigned short), token);
3346 }
3347 else
3348 {
3349 gomp_copy_dev2host (devicep, NULL, devaddrs,
3350 (const void *) (uintptr_t) devaddrs_ptr,
3351 mapnum * sizeof (uint64_t));
3352 gomp_copy_dev2host (devicep, NULL, sizes,
3353 (const void *) (uintptr_t) sizes_ptr,
3354 mapnum * sizeof (uint64_t));
3355 gomp_copy_dev2host (devicep, NULL, kinds, (const void *) (uintptr_t) kinds_ptr,
3356 mapnum * sizeof (unsigned short));
3357 }
3358 }
3359
3360 size_t tgt_align = 0, tgt_size = 0;
3361
3362 /* If actually executed on 32bit systems, the casts lead to wrong code;
3363 but 32bit with offloading is not supported; see top of this function. */
3364 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3365 (void *) (uintptr_t) kinds,
3366 &tgt_align, &tgt_size);
3367
3368 if (tgt_align)
3369 {
3370 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3371 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3372 if (al)
3373 tgt += tgt_align - al;
3374 tgt_size = 0;
3375 for (uint64_t i = 0; i < mapnum; i++)
3376 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3377 && devaddrs[i] != 0)
3378 {
3379 size_t align = (size_t) 1 << (kinds[i] >> 8);
3380 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3381 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3382 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3383 (size_t) sizes[i]);
3384 else if (dev_to_host_cpy)
3385 dev_to_host_cpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3386 (size_t) sizes[i], token);
3387 else
3388 gomp_copy_dev2host (devicep, NULL, tgt + tgt_size,
3389 (void *) (uintptr_t) devaddrs[i],
3390 (size_t) sizes[i]);
3391 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3392 tgt_size = tgt_size + sizes[i];
3393 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3394 && i + 1 < mapnum
3395 && ((get_kind (short_mapkind, kinds, i) & typemask)
3396 == GOMP_MAP_ATTACH))
3397 {
3398 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3399 = (uint64_t) devaddrs[i];
3400 ++i;
3401 }
3402 }
3403 }
3404
3405 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3406 {
3407 size_t j, struct_cpy = 0;
3408 splay_tree_key n2;
3409 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3410 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3411 gomp_mutex_lock (&devicep->lock);
3412 for (uint64_t i = 0; i < mapnum; i++)
3413 {
3414 if (devaddrs[i] == 0)
3415 continue;
3416 n = NULL;
3417 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3418 switch (kind)
3419 {
3420 case GOMP_MAP_FIRSTPRIVATE:
3421 case GOMP_MAP_FIRSTPRIVATE_INT:
3422 continue;
3423
3424 case GOMP_MAP_DELETE:
3425 case GOMP_MAP_RELEASE:
3426 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3427 /* Assume it is present; look it up - but ignore otherwise. */
3428 case GOMP_MAP_ALLOC:
3429 case GOMP_MAP_FROM:
3430 case GOMP_MAP_FORCE_ALLOC:
3431 case GOMP_MAP_FORCE_FROM:
3432 case GOMP_MAP_ALWAYS_FROM:
3433 case GOMP_MAP_TO:
3434 case GOMP_MAP_TOFROM:
3435 case GOMP_MAP_FORCE_TO:
3436 case GOMP_MAP_FORCE_TOFROM:
3437 case GOMP_MAP_ALWAYS_TO:
3438 case GOMP_MAP_ALWAYS_TOFROM:
3439 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3440 cdata[i].devaddr = devaddrs[i];
3441 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3442 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3443 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3444 devaddrs[i],
3445 devaddrs[i] + sizes[i], zero_len);
3446 if (j < i)
3447 {
3448 n2 = NULL;
3449 cdata[i].present = true;
3450 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3451 }
3452 else
3453 {
3454 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3455 devaddrs[i],
3456 devaddrs[i] + sizes[i], zero_len);
3457 cdata[i].present = n2 != NULL;
3458 }
3459 if (!cdata[i].present
3460 && kind != GOMP_MAP_DELETE
3461 && kind != GOMP_MAP_RELEASE
3462 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3463 {
3464 cdata[i].aligned = true;
3465 size_t align = (size_t) 1 << (kinds[i] >> 8);
3466 devaddrs[i]
3467 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3468 sizes[i]);
3469 }
3470 else if (n2 != NULL)
3471 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3472 - (n2->tgt->tgt_start + n2->tgt_offset));
3473 if (((!cdata[i].present || struct_cpy)
3474 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3475 || kind == GOMP_MAP_FORCE_TO
3476 || kind == GOMP_MAP_FORCE_TOFROM
3477 || kind == GOMP_MAP_ALWAYS_TO
3478 || kind == GOMP_MAP_ALWAYS_TOFROM)
3479 {
3480 if (dev_to_host_cpy)
3481 dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i],
3482 (void *) (uintptr_t) cdata[i].devaddr,
3483 sizes[i], token);
3484 else
3485 gomp_copy_dev2host (devicep, NULL,
3486 (void *) (uintptr_t) devaddrs[i],
3487 (void *) (uintptr_t) cdata[i].devaddr,
3488 sizes[i]);
3489 }
3490 if (struct_cpy)
3491 struct_cpy--;
3492 break;
3493 case GOMP_MAP_ATTACH:
3494 case GOMP_MAP_POINTER:
3495 case GOMP_MAP_ALWAYS_POINTER:
3496 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3497 devaddrs[i] + sizes[i],
3498 devaddrs[i] + sizes[i]
3499 + sizeof (void*), false);
3500 cdata[i].present = n2 != NULL;
3501 cdata[i].devaddr = devaddrs[i];
3502 if (n2)
3503 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3504 - (n2->tgt->tgt_start + n2->tgt_offset));
3505 else
3506 {
3507 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3508 devaddrs[i] + sizes[i],
3509 devaddrs[i] + sizes[i]
3510 + sizeof (void*), false);
3511 if (j < i)
3512 {
3513 cdata[i].present = true;
3514 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3515 - cdata[j].devaddr);
3516 }
3517 }
3518 if (!cdata[i].present)
3519 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3520 /* Assume that when present, the pointer is already correct. */
3521 if (!n2)
3522 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3523 = devaddrs[i-1];
3524 break;
3525 case GOMP_MAP_TO_PSET:
3526 /* Assume that when present, the pointers are fine and no 'to:'
3527 is required. */
3528 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3529 devaddrs[i], devaddrs[i] + sizes[i],
3530 false);
3531 cdata[i].present = n2 != NULL;
3532 cdata[i].devaddr = devaddrs[i];
3533 if (n2)
3534 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3535 - (n2->tgt->tgt_start + n2->tgt_offset));
3536 else
3537 {
3538 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3539 devaddrs[i],
3540 devaddrs[i] + sizes[i], false);
3541 if (j < i)
3542 {
3543 cdata[i].present = true;
3544 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3545 - cdata[j].devaddr);
3546 }
3547 }
3548 if (!cdata[i].present)
3549 {
3550 cdata[i].aligned = true;
3551 size_t align = (size_t) 1 << (kinds[i] >> 8);
3552 devaddrs[i]
3553 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3554 sizes[i]);
3555 if (dev_to_host_cpy)
3556 dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i],
3557 (void *) (uintptr_t) cdata[i].devaddr,
3558 sizes[i], token);
3559 else
3560 gomp_copy_dev2host (devicep, NULL,
3561 (void *) (uintptr_t) devaddrs[i],
3562 (void *) (uintptr_t) cdata[i].devaddr,
3563 sizes[i]);
3564 }
3565 for (j = i + 1; j < mapnum; j++)
3566 {
3567 kind = get_kind (short_mapkind, kinds, j) & typemask;
3568 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3569 && !GOMP_MAP_POINTER_P (kind))
3570 break;
3571 if (devaddrs[j] < devaddrs[i])
3572 break;
3573 if (cdata[i].present)
3574 continue;
3575 if (devaddrs[j] == 0)
3576 {
3577 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3578 continue;
3579 }
3580 int k;
3581 n2 = NULL;
3582 cdata[i].present = true;
3583 cdata[j].devaddr = devaddrs[j];
3584 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3585 devaddrs[j],
3586 devaddrs[j] + sizeof (void*),
3587 false);
3588 if (k < j)
3589 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3590 - cdata[k].devaddr);
3591 else
3592 {
3593 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3594 devaddrs[j],
3595 devaddrs[j] + sizeof (void*),
3596 false);
3597 if (n2 == NULL)
3598 {
3599 gomp_mutex_unlock (&devicep->lock);
3600 gomp_fatal ("Pointer target wasn't mapped");
3601 }
3602 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3603 - (n2->tgt->tgt_start + n2->tgt_offset));
3604 }
3605 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3606 = (void *) (uintptr_t) devaddrs[j];
3607 }
3608 i = j -1;
3609 break;
3610 case GOMP_MAP_STRUCT:
3611 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3612 devaddrs[i + sizes[i]]
3613 + sizes[i + sizes[i]], false);
3614 cdata[i].present = n2 != NULL;
3615 cdata[i].devaddr = devaddrs[i];
3616 struct_cpy = cdata[i].present ? 0 : sizes[i];
3617 if (!n2)
3618 {
3619 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3620 - devaddrs[i+1]
3621 + sizes[i + sizes[i]]);
3622 size_t align = (size_t) 1 << (kinds[i] >> 8);
3623 cdata[i].aligned = true;
3624 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3625 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3626 }
3627 else
3628 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3629 - (n2->tgt->tgt_start + n2->tgt_offset));
3630 break;
3631 default:
3632 gomp_mutex_unlock (&devicep->lock);
3633 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3634 }
3635 }
3636 gomp_mutex_unlock (&devicep->lock);
3637 }
3638
3639 host_fn (devaddrs);
3640
3641 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3642 {
3643 uint64_t struct_cpy = 0;
3644 bool clean_struct = false;
3645 for (uint64_t i = 0; i < mapnum; i++)
3646 {
3647 if (cdata[i].devaddr == 0)
3648 continue;
3649 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3650 bool copy = !cdata[i].present || struct_cpy;
3651 switch (kind)
3652 {
3653 case GOMP_MAP_FORCE_FROM:
3654 case GOMP_MAP_FORCE_TOFROM:
3655 case GOMP_MAP_ALWAYS_FROM:
3656 case GOMP_MAP_ALWAYS_TOFROM:
3657 copy = true;
3658 /* FALLTHRU */
3659 case GOMP_MAP_FROM:
3660 case GOMP_MAP_TOFROM:
3661 if (copy && host_to_dev_cpy)
3662 host_to_dev_cpy ((void *) (uintptr_t) cdata[i].devaddr,
3663 (void *) (uintptr_t) devaddrs[i],
3664 sizes[i], token);
3665 else if (copy)
3666 gomp_copy_host2dev (devicep, NULL,
3667 (void *) (uintptr_t) cdata[i].devaddr,
3668 (void *) (uintptr_t) devaddrs[i],
3669 sizes[i], false, NULL);
3670 default:
3671 break;
3672 }
3673 if (struct_cpy)
3674 {
3675 struct_cpy--;
3676 continue;
3677 }
3678 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3679 {
3680 clean_struct = true;
3681 struct_cpy = sizes[i];
3682 }
3683 else if (cdata[i].aligned)
3684 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3685 else if (!cdata[i].present)
3686 free ((void *) (uintptr_t) devaddrs[i]);
3687 }
3688 if (clean_struct)
3689 for (uint64_t i = 0; i < mapnum; i++)
3690 if (!cdata[i].present
3691 && ((get_kind (short_mapkind, kinds, i) & typemask)
3692 == GOMP_MAP_STRUCT))
3693 {
3694 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3695 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3696 }
3697
3698 free (devaddrs);
3699 free (sizes);
3700 free (kinds);
3701 }
131d18e9
TB
3702}
3703
e01d41e5 3704/* Host fallback for GOMP_target_data{,_ext} routines. */
d9a6bd32
JJ
3705
3706static void
1bfc07d1 3707gomp_target_data_fallback (struct gomp_device_descr *devicep)
d9a6bd32
JJ
3708{
3709 struct gomp_task_icv *icv = gomp_icv (false);
1bfc07d1
KCY
3710
3711 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3712 && devicep != NULL)
3713 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3714 "be used for offloading");
3715
d9a6bd32
JJ
3716 if (icv->target_data)
3717 {
3718 /* Even when doing a host fallback, if there are any active
3719 #pragma omp target data constructs, need to remember the
3720 new #pragma omp target data, otherwise GOMP_target_end_data
3721 would get out of sync. */
3722 struct target_mem_desc *tgt
3723 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
275c736e 3724 NULL, GOMP_MAP_VARS_DATA);
d9a6bd32
JJ
3725 tgt->prev = icv->target_data;
3726 icv->target_data = tgt;
3727 }
3728}
3729
acf0174b 3730void
128b26dc 3731GOMP_target_data (int device, const void *unused, size_t mapnum,
acf0174b
JJ
3732 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3733{
1158fe43 3734 struct gomp_device_descr *devicep = resolve_device (device, true);
41dbbb37
TS
3735
3736 if (devicep == NULL
b2b40051
MJ
3737 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3738 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1bfc07d1 3739 return gomp_target_data_fallback (devicep);
41dbbb37 3740
1df3f842 3741 struct target_mem_desc *tgt
41dbbb37 3742 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
275c736e 3743 NULL, GOMP_MAP_VARS_DATA);
d9a6bd32
JJ
3744 struct gomp_task_icv *icv = gomp_icv (true);
3745 tgt->prev = icv->target_data;
3746 icv->target_data = tgt;
3747}
3748
3749void
e01d41e5
JJ
3750GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3751 size_t *sizes, unsigned short *kinds)
d9a6bd32 3752{
1158fe43 3753 struct gomp_device_descr *devicep = resolve_device (device, true);
d9a6bd32
JJ
3754
3755 if (devicep == NULL
b2b40051
MJ
3756 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3757 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1bfc07d1 3758 return gomp_target_data_fallback (devicep);
d9a6bd32
JJ
3759
3760 struct target_mem_desc *tgt
3761 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
275c736e 3762 NULL, GOMP_MAP_VARS_DATA);
1df3f842
JJ
3763 struct gomp_task_icv *icv = gomp_icv (true);
3764 tgt->prev = icv->target_data;
3765 icv->target_data = tgt;
acf0174b
JJ
3766}
3767
3768void
3769GOMP_target_end_data (void)
3770{
1df3f842
JJ
3771 struct gomp_task_icv *icv = gomp_icv (false);
3772 if (icv->target_data)
3773 {
3774 struct target_mem_desc *tgt = icv->target_data;
3775 icv->target_data = tgt->prev;
275c736e 3776 gomp_unmap_vars (tgt, true, NULL);
1df3f842 3777 }
acf0174b
JJ
3778}
3779
3780void
128b26dc 3781GOMP_target_update (int device, const void *unused, size_t mapnum,
acf0174b
JJ
3782 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3783{
1158fe43 3784 struct gomp_device_descr *devicep = resolve_device (device, true);
41dbbb37
TS
3785
3786 if (devicep == NULL
b2b40051
MJ
3787 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3788 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1df3f842
JJ
3789 return;
3790
d9a6bd32
JJ
3791 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3792}
3793
3794void
e01d41e5
JJ
3795GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3796 size_t *sizes, unsigned short *kinds,
3797 unsigned int flags, void **depend)
d9a6bd32 3798{
1158fe43 3799 struct gomp_device_descr *devicep = resolve_device (device, true);
d9a6bd32
JJ
3800
3801 /* If there are depend clauses, but nowait is not present,
3802 block the parent task until the dependencies are resolved
3803 and then just continue with the rest of the function as if it
3804 is a merged task. Until we are able to schedule task during
3805 variable mapping or unmapping, ignore nowait if depend clauses
3806 are not present. */
3807 if (depend != NULL)
3808 {
3809 struct gomp_thread *thr = gomp_thread ();
3810 if (thr->task && thr->task->depend_hash)
3811 {
3812 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3813 && thr->ts.team
3814 && !thr->task->final_task)
3815 {
e4606348
JJ
3816 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3817 mapnum, hostaddrs, sizes, kinds,
3818 flags | GOMP_TARGET_FLAG_UPDATE,
b2b40051 3819 depend, NULL, GOMP_TARGET_TASK_DATA))
e4606348
JJ
3820 return;
3821 }
3822 else
3823 {
3824 struct gomp_team *team = thr->ts.team;
3825 /* If parallel or taskgroup has been cancelled, don't start new
3826 tasks. */
28567c40
JJ
3827 if (__builtin_expect (gomp_cancel_var, 0) && team)
3828 {
3829 if (gomp_team_barrier_cancelled (&team->barrier))
3830 return;
3831 if (thr->task->taskgroup)
3832 {
3833 if (thr->task->taskgroup->cancelled)
3834 return;
3835 if (thr->task->taskgroup->workshare
3836 && thr->task->taskgroup->prev
3837 && thr->task->taskgroup->prev->cancelled)
3838 return;
3839 }
3840 }
e4606348
JJ
3841
3842 gomp_task_maybe_wait_for_dependencies (depend);
d9a6bd32 3843 }
d9a6bd32
JJ
3844 }
3845 }
3846
3847 if (devicep == NULL
b2b40051
MJ
3848 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3849 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3850 return;
3851
3852 struct gomp_thread *thr = gomp_thread ();
3853 struct gomp_team *team = thr->ts.team;
3854 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
3855 if (__builtin_expect (gomp_cancel_var, 0) && team)
3856 {
3857 if (gomp_team_barrier_cancelled (&team->barrier))
3858 return;
3859 if (thr->task->taskgroup)
3860 {
3861 if (thr->task->taskgroup->cancelled)
3862 return;
3863 if (thr->task->taskgroup->workshare
3864 && thr->task->taskgroup->prev
3865 && thr->task->taskgroup->prev->cancelled)
3866 return;
3867 }
3868 }
d9a6bd32
JJ
3869
3870 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3871}
3872
3873static void
3874gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
275c736e
CLT
3875 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3876 htab_t *refcount_set)
d9a6bd32
JJ
3877{
3878 const int typemask = 0xff;
3879 size_t i;
41dbbb37 3880 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
3881 if (devicep->state == GOMP_DEVICE_FINALIZED)
3882 {
3883 gomp_mutex_unlock (&devicep->lock);
3884 return;
3885 }
3886
9e628024
CLT
3887 for (i = 0; i < mapnum; i++)
3888 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3889 {
3890 struct splay_tree_key_s cur_node;
3891 cur_node.host_start = (uintptr_t) hostaddrs[i];
3892 cur_node.host_end = cur_node.host_start + sizeof (void *);
3893 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3894
3895 if (n)
3896 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3897 false, NULL);
3898 }
3899
275c736e
CLT
3900 int nrmvars = 0;
3901 splay_tree_key remove_vars[mapnum];
3902
d9a6bd32
JJ
3903 for (i = 0; i < mapnum; i++)
3904 {
3905 struct splay_tree_key_s cur_node;
3906 unsigned char kind = kinds[i] & typemask;
3907 switch (kind)
3908 {
3909 case GOMP_MAP_FROM:
3910 case GOMP_MAP_ALWAYS_FROM:
3911 case GOMP_MAP_DELETE:
3912 case GOMP_MAP_RELEASE:
3913 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3914 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3915 cur_node.host_start = (uintptr_t) hostaddrs[i];
3916 cur_node.host_end = cur_node.host_start + sizes[i];
3917 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3918 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
e01d41e5 3919 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
d9a6bd32
JJ
3920 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3921 if (!k)
3922 continue;
3923
275c736e
CLT
3924 bool delete_p = (kind == GOMP_MAP_DELETE
3925 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
3926 bool do_copy, do_remove;
3927 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
3928 &do_remove);
d9a6bd32 3929
275c736e 3930 if ((kind == GOMP_MAP_FROM && do_copy)
d9a6bd32 3931 || kind == GOMP_MAP_ALWAYS_FROM)
6c039937
CLT
3932 {
3933 if (k->aux && k->aux->attach_count)
3934 {
3935 /* We have to be careful not to overwrite still attached
3936 pointers during the copyback to host. */
3937 uintptr_t addr = k->host_start;
3938 while (addr < k->host_end)
3939 {
3940 size_t i = (addr - k->host_start) / sizeof (void *);
3941 if (k->aux->attach_count[i] == 0)
3942 gomp_copy_dev2host (devicep, NULL, (void *) addr,
3943 (void *) (k->tgt->tgt_start
3944 + k->tgt_offset
3945 + addr - k->host_start),
3946 sizeof (void *));
3947 addr += sizeof (void *);
3948 }
3949 }
3950 else
3951 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3952 (void *) (k->tgt->tgt_start + k->tgt_offset
3953 + cur_node.host_start
3954 - k->host_start),
3955 cur_node.host_end - cur_node.host_start);
3956 }
275c736e
CLT
3957
3958 /* Structure elements lists are removed altogether at once, which
3959 may cause immediate deallocation of the target_mem_desc, causing
3960 errors if we still have following element siblings to copy back.
3961 While we're at it, it also seems more disciplined to simply
3962 queue all removals together for processing below.
3963
3964 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3965 not have this problem, since they maintain an additional
3966 tgt->refcount = 1 reference to the target_mem_desc to start with.
3967 */
3968 if (do_remove)
3969 remove_vars[nrmvars++] = k;
9e628024 3970 break;
d9a6bd32 3971
9e628024 3972 case GOMP_MAP_DETACH:
d9a6bd32
JJ
3973 break;
3974 default:
3975 gomp_mutex_unlock (&devicep->lock);
3976 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3977 kind);
3978 }
3979 }
3980
275c736e
CLT
3981 for (int i = 0; i < nrmvars; i++)
3982 gomp_remove_var (devicep, remove_vars[i]);
3983
41dbbb37 3984 gomp_mutex_unlock (&devicep->lock);
d9a6bd32 3985}
41dbbb37 3986
d9a6bd32
JJ
3987void
3988GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3989 size_t *sizes, unsigned short *kinds,
3990 unsigned int flags, void **depend)
3991{
1158fe43 3992 struct gomp_device_descr *devicep = resolve_device (device, true);
d9a6bd32
JJ
3993
3994 /* If there are depend clauses, but nowait is not present,
3995 block the parent task until the dependencies are resolved
3996 and then just continue with the rest of the function as if it
3997 is a merged task. Until we are able to schedule task during
3998 variable mapping or unmapping, ignore nowait if depend clauses
3999 are not present. */
4000 if (depend != NULL)
4001 {
4002 struct gomp_thread *thr = gomp_thread ();
4003 if (thr->task && thr->task->depend_hash)
4004 {
4005 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4006 && thr->ts.team
4007 && !thr->task->final_task)
4008 {
e4606348
JJ
4009 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4010 mapnum, hostaddrs, sizes, kinds,
b2b40051 4011 flags, depend, NULL,
e4606348
JJ
4012 GOMP_TARGET_TASK_DATA))
4013 return;
4014 }
4015 else
4016 {
4017 struct gomp_team *team = thr->ts.team;
4018 /* If parallel or taskgroup has been cancelled, don't start new
4019 tasks. */
28567c40
JJ
4020 if (__builtin_expect (gomp_cancel_var, 0) && team)
4021 {
4022 if (gomp_team_barrier_cancelled (&team->barrier))
4023 return;
4024 if (thr->task->taskgroup)
4025 {
4026 if (thr->task->taskgroup->cancelled)
4027 return;
4028 if (thr->task->taskgroup->workshare
4029 && thr->task->taskgroup->prev
4030 && thr->task->taskgroup->prev->cancelled)
4031 return;
4032 }
4033 }
e4606348
JJ
4034
4035 gomp_task_maybe_wait_for_dependencies (depend);
d9a6bd32 4036 }
d9a6bd32
JJ
4037 }
4038 }
4039
4040 if (devicep == NULL
b2b40051
MJ
4041 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4042 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4043 return;
4044
4045 struct gomp_thread *thr = gomp_thread ();
4046 struct gomp_team *team = thr->ts.team;
4047 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
4048 if (__builtin_expect (gomp_cancel_var, 0) && team)
4049 {
4050 if (gomp_team_barrier_cancelled (&team->barrier))
4051 return;
4052 if (thr->task->taskgroup)
4053 {
4054 if (thr->task->taskgroup->cancelled)
4055 return;
4056 if (thr->task->taskgroup->workshare
4057 && thr->task->taskgroup->prev
4058 && thr->task->taskgroup->prev->cancelled)
4059 return;
4060 }
4061 }
d9a6bd32 4062
275c736e
CLT
4063 htab_t refcount_set = htab_create (mapnum);
4064
689418b9
TB
4065 /* The variables are mapped separately such that they can be released
4066 independently. */
4067 size_t i, j;
d9a6bd32
JJ
4068 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4069 for (i = 0; i < mapnum; i++)
4070 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4071 {
4072 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
275c736e
CLT
4073 &kinds[i], true, &refcount_set,
4074 GOMP_MAP_VARS_ENTER_DATA);
d9a6bd32
JJ
4075 i += sizes[i];
4076 }
689418b9
TB
4077 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4078 {
4079 for (j = i + 1; j < mapnum; j++)
972da557
TB
4080 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4081 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
689418b9
TB
4082 break;
4083 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
275c736e
CLT
4084 &kinds[i], true, &refcount_set,
4085 GOMP_MAP_VARS_ENTER_DATA);
689418b9
TB
4086 i += j - i - 1;
4087 }
9e628024
CLT
4088 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
4089 {
4090 /* An attach operation must be processed together with the mapped
4091 base-pointer list item. */
4092 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
275c736e 4093 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
9e628024
CLT
4094 i += 1;
4095 }
d9a6bd32
JJ
4096 else
4097 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
275c736e 4098 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
d9a6bd32 4099 else
275c736e
CLT
4100 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4101 htab_free (refcount_set);
d9a6bd32
JJ
4102}
4103
e4606348 4104bool
d9a6bd32
JJ
4105gomp_target_task_fn (void *data)
4106{
4107 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
e4606348
JJ
4108 struct gomp_device_descr *devicep = ttask->devicep;
4109
d9a6bd32
JJ
4110 if (ttask->fn != NULL)
4111 {
d84ffc0a 4112 void *fn_addr;
e4606348 4113 if (devicep == NULL
d84ffc0a 4114 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
4115 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4116 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
e4606348
JJ
4117 {
4118 ttask->state = GOMP_TARGET_TASK_FALLBACK;
aea72386
JJ
4119 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4120 ttask->args);
e4606348
JJ
4121 return false;
4122 }
4123
4124 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4125 {
b2b40051 4126 if (ttask->tgt)
275c736e 4127 gomp_unmap_vars (ttask->tgt, true, NULL);
e4606348
JJ
4128 return false;
4129 }
4130
b2b40051
MJ
4131 void *actual_arguments;
4132 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4133 {
4134 ttask->tgt = NULL;
b2b40051
MJ
4135 actual_arguments = ttask->hostaddrs;
4136 }
4137 else
4138 {
4139 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4140 NULL, ttask->sizes, ttask->kinds, true,
275c736e 4141 NULL, GOMP_MAP_VARS_TARGET);
b2b40051
MJ
4142 actual_arguments = (void *) ttask->tgt->tgt_start;
4143 }
e4606348
JJ
4144 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4145
001ab12e 4146 assert (devicep->async_run_func);
b2b40051
MJ
4147 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4148 ttask->args, (void *) ttask);
e4606348 4149 return true;
d9a6bd32 4150 }
e4606348 4151 else if (devicep == NULL
b2b40051
MJ
4152 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4153 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
e4606348 4154 return false;
d9a6bd32
JJ
4155
4156 size_t i;
4157 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
e4606348 4158 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
d9a6bd32 4159 ttask->kinds, true);
d9a6bd32 4160 else
275c736e
CLT
4161 {
4162 htab_t refcount_set = htab_create (ttask->mapnum);
4163 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4164 for (i = 0; i < ttask->mapnum; i++)
4165 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4166 {
4167 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4168 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4169 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4170 i += ttask->sizes[i];
4171 }
4172 else
4173 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4174 &ttask->kinds[i], true, &refcount_set,
4175 GOMP_MAP_VARS_ENTER_DATA);
4176 else
4177 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4178 ttask->kinds, &refcount_set);
4179 htab_free (refcount_set);
4180 }
e4606348 4181 return false;
acf0174b
JJ
4182}
4183
4184void
4185GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4186{
4187 if (thread_limit)
4188 {
4189 struct gomp_task_icv *icv = gomp_icv (true);
4190 icv->thread_limit_var
4191 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4192 }
4193 (void) num_teams;
4194}
1df3f842 4195
7d6da11f
JJ
4196bool
4197GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4198 unsigned int thread_limit, bool first)
4199{
4200 struct gomp_thread *thr = gomp_thread ();
4201 if (first)
4202 {
4203 if (thread_limit)
4204 {
4205 struct gomp_task_icv *icv = gomp_icv (true);
4206 icv->thread_limit_var
4207 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4208 }
4209 (void) num_teams_high;
4210 if (num_teams_low == 0)
4211 num_teams_low = 1;
4212 thr->num_teams = num_teams_low - 1;
4213 thr->team_num = 0;
4214 }
4215 else if (thr->team_num == thr->num_teams)
4216 return false;
4217 else
4218 ++thr->team_num;
4219 return true;
4220}
4221
d9a6bd32
JJ
4222void *
4223omp_target_alloc (size_t size, int device_num)
4224{
1158fe43
JJ
4225 if (device_num == omp_initial_device
4226 || device_num == gomp_get_num_devices ())
d9a6bd32
JJ
4227 return malloc (size);
4228
1158fe43 4229 struct gomp_device_descr *devicep = resolve_device (device_num, false);
d9a6bd32
JJ
4230 if (devicep == NULL)
4231 return NULL;
4232
b2b40051
MJ
4233 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4234 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4235 return malloc (size);
4236
4237 gomp_mutex_lock (&devicep->lock);
4238 void *ret = devicep->alloc_func (devicep->target_id, size);
4239 gomp_mutex_unlock (&devicep->lock);
4240 return ret;
4241}
4242
4243void
4244omp_target_free (void *device_ptr, int device_num)
4245{
1158fe43
JJ
4246 if (device_num == omp_initial_device
4247 || device_num == gomp_get_num_devices ())
d9a6bd32
JJ
4248 {
4249 free (device_ptr);
4250 return;
4251 }
4252
1158fe43
JJ
4253 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4254 if (devicep == NULL || device_ptr == NULL)
d9a6bd32
JJ
4255 return;
4256
b2b40051
MJ
4257 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4258 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4259 {
4260 free (device_ptr);
4261 return;
4262 }
4263
4264 gomp_mutex_lock (&devicep->lock);
6ce13072 4265 gomp_free_device_memory (devicep, device_ptr);
d9a6bd32
JJ
4266 gomp_mutex_unlock (&devicep->lock);
4267}
4268
4269int
28567c40 4270omp_target_is_present (const void *ptr, int device_num)
d9a6bd32 4271{
1158fe43
JJ
4272 if (device_num == omp_initial_device
4273 || device_num == gomp_get_num_devices ())
d9a6bd32
JJ
4274 return 1;
4275
1158fe43 4276 struct gomp_device_descr *devicep = resolve_device (device_num, false);
d9a6bd32
JJ
4277 if (devicep == NULL)
4278 return 0;
4279
1158fe43
JJ
4280 if (ptr == NULL)
4281 return 1;
4282
b2b40051
MJ
4283 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4284 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4285 return 1;
4286
4287 gomp_mutex_lock (&devicep->lock);
4288 struct splay_tree_s *mem_map = &devicep->mem_map;
4289 struct splay_tree_key_s cur_node;
4290
4291 cur_node.host_start = (uintptr_t) ptr;
4292 cur_node.host_end = cur_node.host_start;
e01d41e5 4293 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
d9a6bd32
JJ
4294 int ret = n != NULL;
4295 gomp_mutex_unlock (&devicep->lock);
4296 return ret;
4297}
4298
6c420193
MV
4299static int
4300omp_target_memcpy_check (int dst_device_num, int src_device_num,
4301 struct gomp_device_descr **dst_devicep,
4302 struct gomp_device_descr **src_devicep)
d9a6bd32 4303{
1158fe43
JJ
4304 if (dst_device_num != gomp_get_num_devices ()
4305 /* Above gomp_get_num_devices has to be called unconditionally. */
4306 && dst_device_num != omp_initial_device)
d9a6bd32 4307 {
1158fe43 4308 *dst_devicep = resolve_device (dst_device_num, false);
6c420193 4309 if (*dst_devicep == NULL)
d9a6bd32
JJ
4310 return EINVAL;
4311
6c420193
MV
4312 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4313 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4314 *dst_devicep = NULL;
d9a6bd32 4315 }
6c420193 4316
1158fe43
JJ
4317 if (src_device_num != num_devices_openmp
4318 && src_device_num != omp_initial_device)
d9a6bd32 4319 {
1158fe43 4320 *src_devicep = resolve_device (src_device_num, false);
6c420193 4321 if (*src_devicep == NULL)
d9a6bd32
JJ
4322 return EINVAL;
4323
6c420193
MV
4324 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4325 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4326 *src_devicep = NULL;
d9a6bd32 4327 }
6c420193
MV
4328
4329 return 0;
4330}
4331
4332static int
4333omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4334 size_t dst_offset, size_t src_offset,
4335 struct gomp_device_descr *dst_devicep,
4336 struct gomp_device_descr *src_devicep)
4337{
4338 bool ret;
d9a6bd32
JJ
4339 if (src_devicep == NULL && dst_devicep == NULL)
4340 {
4341 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4342 return 0;
4343 }
4344 if (src_devicep == NULL)
4345 {
4346 gomp_mutex_lock (&dst_devicep->lock);
6ce13072
CLT
4347 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4348 (char *) dst + dst_offset,
4349 (char *) src + src_offset, length);
d9a6bd32 4350 gomp_mutex_unlock (&dst_devicep->lock);
6ce13072 4351 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
4352 }
4353 if (dst_devicep == NULL)
4354 {
4355 gomp_mutex_lock (&src_devicep->lock);
6ce13072
CLT
4356 ret = src_devicep->dev2host_func (src_devicep->target_id,
4357 (char *) dst + dst_offset,
4358 (char *) src + src_offset, length);
d9a6bd32 4359 gomp_mutex_unlock (&src_devicep->lock);
6ce13072 4360 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
4361 }
4362 if (src_devicep == dst_devicep)
4363 {
4364 gomp_mutex_lock (&src_devicep->lock);
6ce13072
CLT
4365 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4366 (char *) dst + dst_offset,
4367 (char *) src + src_offset, length);
d9a6bd32 4368 gomp_mutex_unlock (&src_devicep->lock);
6ce13072 4369 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
4370 }
4371 return EINVAL;
4372}
4373
6c420193
MV
4374int
4375omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4376 size_t src_offset, int dst_device_num, int src_device_num)
4377{
4378 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4379 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4380 &dst_devicep, &src_devicep);
4381
4382 if (ret)
4383 return ret;
4384
4385 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4386 dst_devicep, src_devicep);
4387
4388 return ret;
4389}
4390
4391typedef struct
4392{
4393 void *dst;
4394 const void *src;
4395 size_t length;
4396 size_t dst_offset;
4397 size_t src_offset;
4398 struct gomp_device_descr *dst_devicep;
4399 struct gomp_device_descr *src_devicep;
4400} omp_target_memcpy_data;
4401
4402static void
4403omp_target_memcpy_async_helper (void *args)
4404{
4405 omp_target_memcpy_data *a = args;
4406 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4407 a->src_offset, a->dst_devicep, a->src_devicep))
4408 gomp_fatal ("omp_target_memcpy failed");
4409}
4410
4411int
4412omp_target_memcpy_async (void *dst, const void *src, size_t length,
4413 size_t dst_offset, size_t src_offset,
4414 int dst_device_num, int src_device_num,
4415 int depobj_count, omp_depend_t *depobj_list)
4416{
4417 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4418 unsigned int flags = 0;
4419 void *depend[depobj_count + 5];
4420 int i;
4421 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4422 &dst_devicep, &src_devicep);
4423
4424 omp_target_memcpy_data s = {
4425 .dst = dst,
4426 .src = src,
4427 .length = length,
4428 .dst_offset = dst_offset,
4429 .src_offset = src_offset,
4430 .dst_devicep = dst_devicep,
4431 .src_devicep = src_devicep
4432 };
4433
4434 if (check)
4435 return check;
4436
4437 if (depobj_count > 0 && depobj_list != NULL)
4438 {
4439 flags |= GOMP_TASK_FLAG_DEPEND;
4440 depend[0] = 0;
4441 depend[1] = (void *) (uintptr_t) depobj_count;
4442 depend[2] = depend[3] = depend[4] = 0;
4443 for (i = 0; i < depobj_count; ++i)
4444 depend[i + 5] = &depobj_list[i];
4445 }
4446
4447 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4448 __alignof__ (s), true, flags, depend, 0, NULL);
4449
4450 return 0;
4451}
4452
d9a6bd32 4453static int
28567c40 4454omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
d9a6bd32
JJ
4455 int num_dims, const size_t *volume,
4456 const size_t *dst_offsets,
4457 const size_t *src_offsets,
4458 const size_t *dst_dimensions,
4459 const size_t *src_dimensions,
4460 struct gomp_device_descr *dst_devicep,
4461 struct gomp_device_descr *src_devicep)
4462{
4463 size_t dst_slice = element_size;
4464 size_t src_slice = element_size;
4465 size_t j, dst_off, src_off, length;
4466 int i, ret;
4467
4468 if (num_dims == 1)
4469 {
4470 if (__builtin_mul_overflow (element_size, volume[0], &length)
4471 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4472 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4473 return EINVAL;
4474 if (dst_devicep == NULL && src_devicep == NULL)
6ce13072 4475 {
28567c40
JJ
4476 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4477 length);
6ce13072
CLT
4478 ret = 1;
4479 }
d9a6bd32 4480 else if (src_devicep == NULL)
6ce13072
CLT
4481 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4482 (char *) dst + dst_off,
28567c40
JJ
4483 (const char *) src + src_off,
4484 length);
d9a6bd32 4485 else if (dst_devicep == NULL)
6ce13072
CLT
4486 ret = src_devicep->dev2host_func (src_devicep->target_id,
4487 (char *) dst + dst_off,
28567c40
JJ
4488 (const char *) src + src_off,
4489 length);
d9a6bd32 4490 else if (src_devicep == dst_devicep)
6ce13072
CLT
4491 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4492 (char *) dst + dst_off,
28567c40
JJ
4493 (const char *) src + src_off,
4494 length);
d9a6bd32 4495 else
6ce13072
CLT
4496 ret = 0;
4497 return ret ? 0 : EINVAL;
d9a6bd32
JJ
4498 }
4499
4500 /* FIXME: it would be nice to have some plugin function to handle
4501 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4502 be handled in the generic recursion below, and for host-host it
4503 should be used even for any num_dims >= 2. */
4504
4505 for (i = 1; i < num_dims; i++)
4506 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4507 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4508 return EINVAL;
4509 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4510 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4511 return EINVAL;
4512 for (j = 0; j < volume[0]; j++)
4513 {
4514 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
28567c40 4515 (const char *) src + src_off,
d9a6bd32
JJ
4516 element_size, num_dims - 1,
4517 volume + 1, dst_offsets + 1,
4518 src_offsets + 1, dst_dimensions + 1,
4519 src_dimensions + 1, dst_devicep,
4520 src_devicep);
4521 if (ret)
4522 return ret;
4523 dst_off += dst_slice;
4524 src_off += src_slice;
4525 }
4526 return 0;
4527}
4528
6c420193
MV
4529static int
4530omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4531 int src_device_num,
4532 struct gomp_device_descr **dst_devicep,
4533 struct gomp_device_descr **src_devicep)
d9a6bd32 4534{
d9a6bd32
JJ
4535 if (!dst && !src)
4536 return INT_MAX;
4537
6c420193
MV
4538 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4539 dst_devicep, src_devicep);
4540 if (ret)
4541 return ret;
d9a6bd32 4542
6c420193 4543 if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
d9a6bd32
JJ
4544 return EINVAL;
4545
6c420193
MV
4546 return 0;
4547}
4548
4549static int
4550omp_target_memcpy_rect_copy (void *dst, const void *src,
4551 size_t element_size, int num_dims,
4552 const size_t *volume, const size_t *dst_offsets,
4553 const size_t *src_offsets,
4554 const size_t *dst_dimensions,
4555 const size_t *src_dimensions,
4556 struct gomp_device_descr *dst_devicep,
4557 struct gomp_device_descr *src_devicep)
4558{
d9a6bd32
JJ
4559 if (src_devicep)
4560 gomp_mutex_lock (&src_devicep->lock);
4561 else if (dst_devicep)
4562 gomp_mutex_lock (&dst_devicep->lock);
4563 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4564 volume, dst_offsets, src_offsets,
4565 dst_dimensions, src_dimensions,
4566 dst_devicep, src_devicep);
4567 if (src_devicep)
4568 gomp_mutex_unlock (&src_devicep->lock);
4569 else if (dst_devicep)
4570 gomp_mutex_unlock (&dst_devicep->lock);
6c420193
MV
4571
4572 return ret;
4573}
4574
4575int
4576omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4577 int num_dims, const size_t *volume,
4578 const size_t *dst_offsets,
4579 const size_t *src_offsets,
4580 const size_t *dst_dimensions,
4581 const size_t *src_dimensions,
4582 int dst_device_num, int src_device_num)
4583{
4584 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4585
4586 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4587 src_device_num, &dst_devicep,
4588 &src_devicep);
4589
4590 if (check)
4591 return check;
4592
4593 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4594 volume, dst_offsets, src_offsets,
4595 dst_dimensions, src_dimensions,
4596 dst_devicep, src_devicep);
4597
d9a6bd32
JJ
4598 return ret;
4599}
4600
6c420193
MV
4601typedef struct
4602{
4603 void *dst;
4604 const void *src;
4605 size_t element_size;
4606 const size_t *volume;
4607 const size_t *dst_offsets;
4608 const size_t *src_offsets;
4609 const size_t *dst_dimensions;
4610 const size_t *src_dimensions;
4611 struct gomp_device_descr *dst_devicep;
4612 struct gomp_device_descr *src_devicep;
4613 int num_dims;
4614} omp_target_memcpy_rect_data;
4615
4616static void
4617omp_target_memcpy_rect_async_helper (void *args)
4618{
4619 omp_target_memcpy_rect_data *a = args;
4620 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4621 a->num_dims, a->volume, a->dst_offsets,
4622 a->src_offsets, a->dst_dimensions,
4623 a->src_dimensions, a->dst_devicep,
4624 a->src_devicep);
4625 if (ret)
4626 gomp_fatal ("omp_target_memcpy_rect failed");
4627}
4628
4629int
4630omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4631 int num_dims, const size_t *volume,
4632 const size_t *dst_offsets,
4633 const size_t *src_offsets,
4634 const size_t *dst_dimensions,
4635 const size_t *src_dimensions,
4636 int dst_device_num, int src_device_num,
4637 int depobj_count, omp_depend_t *depobj_list)
4638{
4639 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4640 unsigned flags = 0;
4641 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4642 src_device_num, &dst_devicep,
4643 &src_devicep);
4644 void *depend[depobj_count + 5];
4645 int i;
4646
4647 omp_target_memcpy_rect_data s = {
4648 .dst = dst,
4649 .src = src,
4650 .element_size = element_size,
4651 .num_dims = num_dims,
4652 .volume = volume,
4653 .dst_offsets = dst_offsets,
4654 .src_offsets = src_offsets,
4655 .dst_dimensions = dst_dimensions,
4656 .src_dimensions = src_dimensions,
4657 .dst_devicep = dst_devicep,
4658 .src_devicep = src_devicep
4659 };
4660
4661 if (check)
4662 return check;
4663
4664 if (depobj_count > 0 && depobj_list != NULL)
4665 {
4666 flags |= GOMP_TASK_FLAG_DEPEND;
4667 depend[0] = 0;
4668 depend[1] = (void *) (uintptr_t) depobj_count;
4669 depend[2] = depend[3] = depend[4] = 0;
4670 for (i = 0; i < depobj_count; ++i)
4671 depend[i + 5] = &depobj_list[i];
4672 }
4673
4674 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4675 __alignof__ (s), true, flags, depend, 0, NULL);
4676
4677 return 0;
4678}
4679
d9a6bd32 4680int
28567c40
JJ
4681omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4682 size_t size, size_t device_offset, int device_num)
d9a6bd32 4683{
1158fe43
JJ
4684 if (device_num == omp_initial_device
4685 || device_num == gomp_get_num_devices ())
d9a6bd32
JJ
4686 return EINVAL;
4687
1158fe43 4688 struct gomp_device_descr *devicep = resolve_device (device_num, false);
d9a6bd32
JJ
4689 if (devicep == NULL)
4690 return EINVAL;
4691
b2b40051
MJ
4692 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4693 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4694 return EINVAL;
4695
4696 gomp_mutex_lock (&devicep->lock);
4697
4698 struct splay_tree_s *mem_map = &devicep->mem_map;
4699 struct splay_tree_key_s cur_node;
4700 int ret = EINVAL;
4701
4702 cur_node.host_start = (uintptr_t) host_ptr;
4703 cur_node.host_end = cur_node.host_start + size;
4704 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4705 if (n)
4706 {
4707 if (n->tgt->tgt_start + n->tgt_offset
4708 == (uintptr_t) device_ptr + device_offset
4709 && n->host_start <= cur_node.host_start
4710 && n->host_end >= cur_node.host_end)
4711 ret = 0;
4712 }
4713 else
4714 {
4715 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4716 tgt->array = gomp_malloc (sizeof (*tgt->array));
4717 tgt->refcount = 1;
4718 tgt->tgt_start = 0;
4719 tgt->tgt_end = 0;
4720 tgt->to_free = NULL;
4721 tgt->prev = NULL;
4722 tgt->list_count = 0;
4723 tgt->device_descr = devicep;
4724 splay_tree_node array = tgt->array;
4725 splay_tree_key k = &array->key;
4726 k->host_start = cur_node.host_start;
4727 k->host_end = cur_node.host_end;
4728 k->tgt = tgt;
4729 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4730 k->refcount = REFCOUNT_INFINITY;
6f5b4b64 4731 k->dynamic_refcount = 0;
2a656a93 4732 k->aux = NULL;
d9a6bd32
JJ
4733 array->left = NULL;
4734 array->right = NULL;
4735 splay_tree_insert (&devicep->mem_map, array);
4736 ret = 0;
4737 }
4738 gomp_mutex_unlock (&devicep->lock);
4739 return ret;
4740}
4741
4742int
28567c40 4743omp_target_disassociate_ptr (const void *ptr, int device_num)
d9a6bd32 4744{
1158fe43 4745 struct gomp_device_descr *devicep = resolve_device (device_num, false);
d9a6bd32
JJ
4746 if (devicep == NULL)
4747 return EINVAL;
4748
4749 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4750 return EINVAL;
4751
4752 gomp_mutex_lock (&devicep->lock);
4753
4754 struct splay_tree_s *mem_map = &devicep->mem_map;
4755 struct splay_tree_key_s cur_node;
4756 int ret = EINVAL;
4757
4758 cur_node.host_start = (uintptr_t) ptr;
4759 cur_node.host_end = cur_node.host_start;
4760 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4761 if (n
4762 && n->host_start == cur_node.host_start
4763 && n->refcount == REFCOUNT_INFINITY
4764 && n->tgt->tgt_start == 0
4765 && n->tgt->to_free == NULL
4766 && n->tgt->refcount == 1
4767 && n->tgt->list_count == 0)
4768 {
4769 splay_tree_remove (&devicep->mem_map, n);
4770 gomp_unmap_tgt (n->tgt);
4771 ret = 0;
4772 }
4773
4774 gomp_mutex_unlock (&devicep->lock);
4775 return ret;
4776}
4777
941cdc8b
MV
4778void *
4779omp_get_mapped_ptr (const void *ptr, int device_num)
4780{
1158fe43
JJ
4781 if (device_num == omp_initial_device
4782 || device_num == omp_get_initial_device ())
941cdc8b
MV
4783 return (void *) ptr;
4784
1158fe43 4785 struct gomp_device_descr *devicep = resolve_device (device_num, false);
941cdc8b
MV
4786 if (devicep == NULL)
4787 return NULL;
4788
4789 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4790 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4791 return (void *) ptr;
4792
4793 gomp_mutex_lock (&devicep->lock);
4794
4795 struct splay_tree_s *mem_map = &devicep->mem_map;
4796 struct splay_tree_key_s cur_node;
4797 void *ret = NULL;
4798
4799 cur_node.host_start = (uintptr_t) ptr;
4800 cur_node.host_end = cur_node.host_start;
4801 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4802
4803 if (n)
4804 {
4805 uintptr_t offset = cur_node.host_start - n->host_start;
4806 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
4807 }
4808
4809 gomp_mutex_unlock (&devicep->lock);
4810
4811 return ret;
4812}
4813
4043f53c
MV
4814int
4815omp_target_is_accessible (const void *ptr, size_t size, int device_num)
4816{
1158fe43
JJ
4817 if (device_num == omp_initial_device
4818 || device_num == gomp_get_num_devices ())
4043f53c
MV
4819 return true;
4820
1158fe43 4821 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4043f53c
MV
4822 if (devicep == NULL)
4823 return false;
4824
4825 /* TODO: Unified shared memory must be handled when available. */
4826
4827 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
4828}
4829
28567c40
JJ
4830int
4831omp_pause_resource (omp_pause_resource_t kind, int device_num)
4832{
4833 (void) kind;
1158fe43
JJ
4834 if (device_num == omp_initial_device
4835 || device_num == gomp_get_num_devices ())
28567c40 4836 return gomp_pause_host ();
1158fe43
JJ
4837
4838 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4839 if (devicep == NULL)
28567c40 4840 return -1;
1158fe43 4841
28567c40
JJ
4842 /* Do nothing for target devices for now. */
4843 return 0;
4844}
4845
4846int
4847omp_pause_resource_all (omp_pause_resource_t kind)
4848{
4849 (void) kind;
4850 if (gomp_pause_host ())
4851 return -1;
4852 /* Do nothing for target devices for now. */
4853 return 0;
4854}
4855
4856ialias (omp_pause_resource)
4857ialias (omp_pause_resource_all)
4858
1df3f842
JJ
4859#ifdef PLUGIN_SUPPORT
4860
4861/* This function tries to load a plugin for DEVICE. Name of plugin is passed
4862 in PLUGIN_NAME.
4863 The handles of the found functions are stored in the corresponding fields
4864 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4865
4866static bool
4867gomp_load_plugin_for_device (struct gomp_device_descr *device,
4868 const char *plugin_name)
4869{
196904d8 4870 const char *err = NULL, *last_missing = NULL;
41dbbb37 4871
1df3f842
JJ
4872 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
4873 if (!plugin_handle)
fe5bfa67
TB
4874#if OFFLOAD_DEFAULTED
4875 return 0;
4876#else
2a21ff19 4877 goto dl_fail;
fe5bfa67 4878#endif
1df3f842
JJ
4879
4880 /* Check if all required functions are available in the plugin and store
2a21ff19
NS
4881 their handlers. None of the symbols can legitimately be NULL,
4882 so we don't need to check dlerror all the time. */
41dbbb37 4883#define DLSYM(f) \
2a21ff19
NS
4884 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4885 goto dl_fail
4886 /* Similar, but missing functions are not an error. Return false if
4887 failed, true otherwise. */
4888#define DLSYM_OPT(f, n) \
4889 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4890 || (last_missing = #n, 0))
4891
4892 DLSYM (version);
4893 if (device->version_func () != GOMP_VERSION)
4894 {
4895 err = "plugin version mismatch";
4896 goto fail;
4897 }
41dbbb37
TS
4898
4899 DLSYM (get_name);
4900 DLSYM (get_caps);
1df3f842
JJ
4901 DLSYM (get_type);
4902 DLSYM (get_num_devices);
1df3f842 4903 DLSYM (init_device);
41dbbb37 4904 DLSYM (fini_device);
a51df54e
IV
4905 DLSYM (load_image);
4906 DLSYM (unload_image);
1df3f842
JJ
4907 DLSYM (alloc);
4908 DLSYM (free);
4909 DLSYM (dev2host);
4910 DLSYM (host2dev);
41dbbb37
TS
4911 device->capabilities = device->get_caps_func ();
4912 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
d9a6bd32
JJ
4913 {
4914 DLSYM (run);
001ab12e 4915 DLSYM_OPT (async_run, async_run);
b2b40051 4916 DLSYM_OPT (can_run, can_run);
d9a6bd32
JJ
4917 DLSYM (dev2dev);
4918 }
41dbbb37
TS
4919 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4920 {
345a8c17 4921 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2a21ff19
NS
4922 || !DLSYM_OPT (openacc.create_thread_data,
4923 openacc_create_thread_data)
4924 || !DLSYM_OPT (openacc.destroy_thread_data,
1f4c5b9b
CLT
4925 openacc_destroy_thread_data)
4926 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
4927 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
4928 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
4929 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
4930 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
4931 || !DLSYM_OPT (openacc.async.queue_callback,
4932 openacc_async_queue_callback)
4933 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
4934 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
6fc0385c
TS
4935 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
4936 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
41dbbb37 4937 {
2a21ff19
NS
4938 /* Require all the OpenACC handlers if we have
4939 GOMP_OFFLOAD_CAP_OPENACC_200. */
41dbbb37 4940 err = "plugin missing OpenACC handler function";
2a21ff19 4941 goto fail;
41dbbb37 4942 }
2a21ff19
NS
4943
4944 unsigned cuda = 0;
4945 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
345a8c17 4946 openacc_cuda_get_current_device);
2a21ff19 4947 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
345a8c17
TS
4948 openacc_cuda_get_current_context);
4949 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
4950 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2a21ff19 4951 if (cuda && cuda != 4)
41dbbb37 4952 {
2a21ff19 4953 /* Make sure all the CUDA functions are there if any of them are. */
41dbbb37 4954 err = "plugin missing OpenACC CUDA handler function";
2a21ff19 4955 goto fail;
41dbbb37
TS
4956 }
4957 }
1df3f842 4958#undef DLSYM
41dbbb37 4959#undef DLSYM_OPT
1df3f842 4960
2a21ff19
NS
4961 return 1;
4962
4963 dl_fail:
4964 err = dlerror ();
4965 fail:
4966 gomp_error ("while loading %s: %s", plugin_name, err);
4967 if (last_missing)
4968 gomp_error ("missing function was %s", last_missing);
4969 if (plugin_handle)
4970 dlclose (plugin_handle);
4971
4972 return 0;
1df3f842
JJ
4973}
4974
d84ffc0a
IV
4975/* This function finalizes all initialized devices. */
4976
4977static void
4978gomp_target_fini (void)
4979{
4980 int i;
4981 for (i = 0; i < num_devices; i++)
4982 {
6ce13072 4983 bool ret = true;
d84ffc0a
IV
4984 struct gomp_device_descr *devicep = &devices[i];
4985 gomp_mutex_lock (&devicep->lock);
4986 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1f4c5b9b 4987 ret = gomp_fini_device (devicep);
d84ffc0a 4988 gomp_mutex_unlock (&devicep->lock);
6ce13072
CLT
4989 if (!ret)
4990 gomp_fatal ("device finalization failed");
d84ffc0a
IV
4991 }
4992}
4993
ee332b4a
TS
4994/* This function initializes the runtime for offloading.
4995 It parses the list of offload plugins, and tries to load these.
4996 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
41dbbb37
TS
4997 will be set, and the array DEVICES initialized, containing descriptors for
4998 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
4999 by the others. */
1df3f842
JJ
5000
5001static void
5002gomp_target_init (void)
5003{
5004 const char *prefix ="libgomp-plugin-";
b5f7a6ca 5005 const char *suffix = SONAME_SUFFIX (1);
1df3f842
JJ
5006 const char *cur, *next;
5007 char *plugin_name;
35f258f4
JJ
5008 int i, new_num_devs;
5009 int num_devs = 0, num_devs_openmp;
5010 struct gomp_device_descr *devs = NULL;
1df3f842 5011
1bfc07d1
KCY
5012 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5013 return;
5014
ee332b4a 5015 cur = OFFLOAD_PLUGINS;
1df3f842
JJ
5016 if (*cur)
5017 do
5018 {
5019 struct gomp_device_descr current_device;
b13547d8 5020 size_t prefix_len, suffix_len, cur_len;
1df3f842
JJ
5021
5022 next = strchr (cur, ',');
5023
b13547d8
JJ
5024 prefix_len = strlen (prefix);
5025 cur_len = next ? next - cur : strlen (cur);
5026 suffix_len = strlen (suffix);
5027
5028 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
1df3f842
JJ
5029 if (!plugin_name)
5030 {
35f258f4 5031 num_devs = 0;
1df3f842
JJ
5032 break;
5033 }
5034
b13547d8
JJ
5035 memcpy (plugin_name, prefix, prefix_len);
5036 memcpy (plugin_name + prefix_len, cur, cur_len);
5037 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
1df3f842
JJ
5038
5039 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5040 {
683f1184
TB
5041 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5042 new_num_devs = current_device.get_num_devices_func (omp_req);
5043 if (gomp_debug_var > 0 && new_num_devs < 0)
5044 {
5045 bool found = false;
5046 int type = current_device.get_type_func ();
5047 for (int img = 0; img < num_offload_images; img++)
5048 if (type == offload_images[img].type)
5049 found = true;
5050 if (found)
5051 {
5052 char buf[sizeof ("unified_address, unified_shared_memory, "
5053 "reverse_offload")];
5054 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5055 char *name = (char *) malloc (cur_len + 1);
5056 memcpy (name, cur, cur_len);
5057 name[cur_len] = '\0';
5058 gomp_debug (1,
5059 "%s devices present but 'omp requires %s' "
220bef46 5060 "cannot be fulfilled\n", name, buf);
683f1184
TB
5061 free (name);
5062 }
5063 }
5064 else if (new_num_devs >= 1)
1df3f842 5065 {
41dbbb37
TS
5066 /* Augment DEVICES and NUM_DEVICES. */
5067
35f258f4
JJ
5068 devs = realloc (devs, (num_devs + new_num_devs)
5069 * sizeof (struct gomp_device_descr));
5070 if (!devs)
1df3f842 5071 {
35f258f4 5072 num_devs = 0;
1df3f842
JJ
5073 free (plugin_name);
5074 break;
5075 }
5076
41dbbb37
TS
5077 current_device.name = current_device.get_name_func ();
5078 /* current_device.capabilities has already been set. */
1df3f842 5079 current_device.type = current_device.get_type_func ();
a51df54e 5080 current_device.mem_map.root = NULL;
ea4b23d9 5081 current_device.mem_map_rev.root = NULL;
d84ffc0a 5082 current_device.state = GOMP_DEVICE_UNINITIALIZED;
35f258f4 5083 for (i = 0; i < new_num_devs; i++)
1df3f842 5084 {
1df3f842 5085 current_device.target_id = i;
35f258f4
JJ
5086 devs[num_devs] = current_device;
5087 gomp_mutex_init (&devs[num_devs].lock);
5088 num_devs++;
1df3f842
JJ
5089 }
5090 }
5091 }
5092
5093 free (plugin_name);
5094 cur = next + 1;
5095 }
5096 while (next);
5097
41dbbb37
TS
5098 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5099 NUM_DEVICES_OPENMP. */
35f258f4
JJ
5100 struct gomp_device_descr *devs_s
5101 = malloc (num_devs * sizeof (struct gomp_device_descr));
5102 if (!devs_s)
5103 {
5104 num_devs = 0;
5105 free (devs);
5106 devs = NULL;
5107 }
5108 num_devs_openmp = 0;
5109 for (i = 0; i < num_devs; i++)
5110 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5111 devs_s[num_devs_openmp++] = devs[i];
5112 int num_devs_after_openmp = num_devs_openmp;
5113 for (i = 0; i < num_devs; i++)
5114 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5115 devs_s[num_devs_after_openmp++] = devs[i];
5116 free (devs);
5117 devs = devs_s;
5118
5119 for (i = 0; i < num_devs; i++)
41dbbb37 5120 {
41dbbb37
TS
5121 /* The 'devices' array can be moved (by the realloc call) until we have
5122 found all the plugins, so registering with the OpenACC runtime (which
5123 takes a copy of the pointer argument) must be delayed until now. */
35f258f4
JJ
5124 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5125 goacc_register (&devs[i]);
41dbbb37 5126 }
d84ffc0a 5127
35f258f4
JJ
5128 num_devices = num_devs;
5129 num_devices_openmp = num_devs_openmp;
5130 devices = devs;
d84ffc0a
IV
5131 if (atexit (gomp_target_fini) != 0)
5132 gomp_fatal ("atexit failed");
1df3f842
JJ
5133}
5134
5135#else /* PLUGIN_SUPPORT */
5136/* If dlfcn.h is unavailable we always fallback to host execution.
5137 GOMP_target* routines are just stubs for this case. */
5138static void
5139gomp_target_init (void)
5140{
5141}
5142#endif /* PLUGIN_SUPPORT */