]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/target.c
ada: Crash initializing component of private record type
[thirdparty/gcc.git] / libgomp / target.c
CommitLineData
83ffe9cd 1/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
acf0174b
JJ
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
acf0174b
JJ
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
1df3f842 26/* This file contains the support of offloading. */
acf0174b
JJ
27
28#include "libgomp.h"
41dbbb37
TS
29#include "oacc-plugin.h"
30#include "oacc-int.h"
31#include "gomp-constants.h"
acf0174b
JJ
32#include <limits.h>
33#include <stdbool.h>
34#include <stdlib.h>
01c0b3b0
KT
35#ifdef HAVE_INTTYPES_H
36# include <inttypes.h> /* For PRIu64. */
37#endif
acf0174b 38#include <string.h>
683f1184 39#include <stdio.h> /* For snprintf. */
41dbbb37 40#include <assert.h>
d9a6bd32 41#include <errno.h>
acf0174b 42
1df3f842
JJ
43#ifdef PLUGIN_SUPPORT
44#include <dlfcn.h>
b5f7a6ca 45#include "plugin-suffix.h"
1df3f842
JJ
46#endif
47
ea4b23d9
TB
48/* Define another splay tree instantiation - for reverse offload. */
49#define splay_tree_prefix reverse
50#define splay_tree_c
51#include "splay-tree.h"
52
53
275c736e
CLT
54typedef uintptr_t *hash_entry_type;
55static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56static inline void htab_free (void *ptr) { free (ptr); }
57#include "hashtab.h"
58
6c420193
MV
59ialias_redirect (GOMP_task)
60
275c736e
CLT
61static inline hashval_t
62htab_hash (hash_entry_type element)
63{
64 return hash_pointer ((void *) element);
65}
66
67static inline bool
68htab_eq (hash_entry_type x, hash_entry_type y)
69{
70 return x == y;
71}
72
6c7e076b
JB
73#define FIELD_TGT_EMPTY (~(size_t) 0)
74
1df3f842
JJ
75static void gomp_target_init (void);
76
41dbbb37 77/* The whole initialization code for offloading plugins is only run one. */
1df3f842
JJ
78static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
79
a51df54e
IV
80/* Mutex for offload image registration. */
81static gomp_mutex_t register_lock;
82
1df3f842
JJ
83/* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86struct offload_image_descr {
2a21ff19 87 unsigned version;
1df3f842 88 enum offload_target_type type;
ebe4a560 89 const void *host_table;
afb2d80b 90 const void *target_data;
1df3f842
JJ
91};
92
93/* Array of descriptors of offload images. */
94static struct offload_image_descr *offload_images;
95
96/* Total number of offload images. */
97static int num_offload_images;
98
99/* Array of descriptors for all available devices. */
100static struct gomp_device_descr *devices;
101
102/* Total number of available devices. */
103static int num_devices;
104
41dbbb37
TS
105/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106static int num_devices_openmp;
107
683f1184
TB
108/* OpenMP requires mask. */
109static int omp_requires_mask;
110
a51df54e
IV
111/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
112
113static void *
114gomp_realloc_unlock (void *old, size_t size)
115{
116 void *ret = realloc (old, size);
117 if (ret == NULL)
118 {
119 gomp_mutex_unlock (&register_lock);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
121 }
122 return ret;
123}
124
41dbbb37
TS
125attribute_hidden void
126gomp_init_targets_once (void)
1df3f842 127{
41dbbb37
TS
128 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
129}
1df3f842 130
acf0174b
JJ
131attribute_hidden int
132gomp_get_num_devices (void)
133{
41dbbb37
TS
134 gomp_init_targets_once ();
135 return num_devices_openmp;
1df3f842
JJ
136}
137
138static struct gomp_device_descr *
1158fe43 139resolve_device (int device_id, bool remapped)
1df3f842 140{
8216ca85
TB
141 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
142 called, which must be done before using default_device_var. */
143 int num_devices = gomp_get_num_devices ();
144
1158fe43 145 if (remapped && device_id == GOMP_DEVICE_ICV)
1df3f842
JJ
146 {
147 struct gomp_task_icv *icv = gomp_icv (false);
148 device_id = icv->default_device_var;
1158fe43 149 remapped = false;
1df3f842
JJ
150 }
151
1158fe43
JJ
152 if (device_id < 0)
153 {
154 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
155 : omp_initial_device))
156 return NULL;
18c8b56c 157 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
8216ca85 158 && num_devices == 0)
f2ef1dab
TS
159 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
160 "but only the host device is available");
18c8b56c 161 else if (device_id == omp_invalid_device)
1158fe43
JJ
162 gomp_fatal ("omp_invalid_device encountered");
163 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
164 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
165 "but device not found");
166
167 return NULL;
168 }
8216ca85 169 else if (device_id >= num_devices)
1bfc07d1
KCY
170 {
171 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
8216ca85 172 && device_id != num_devices)
1bfc07d1
KCY
173 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
174 "but device not found");
175
176 return NULL;
177 }
1df3f842 178
d9a6bd32 179 gomp_mutex_lock (&devices[device_id].lock);
d84ffc0a 180 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
d9a6bd32 181 gomp_init_device (&devices[device_id]);
d84ffc0a
IV
182 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
183 {
184 gomp_mutex_unlock (&devices[device_id].lock);
1bfc07d1 185
74c9882b 186 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
1bfc07d1
KCY
187 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
188 "but device is finalized");
189
d84ffc0a
IV
190 return NULL;
191 }
d9a6bd32
JJ
192 gomp_mutex_unlock (&devices[device_id].lock);
193
1df3f842
JJ
194 return &devices[device_id];
195}
196
197
d9a6bd32
JJ
198static inline splay_tree_key
199gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
200{
201 if (key->host_start != key->host_end)
202 return splay_tree_lookup (mem_map, key);
203
204 key->host_end++;
205 splay_tree_key n = splay_tree_lookup (mem_map, key);
206 key->host_end--;
207 if (n)
208 return n;
209 key->host_start--;
210 n = splay_tree_lookup (mem_map, key);
211 key->host_start++;
212 if (n)
213 return n;
214 return splay_tree_lookup (mem_map, key);
215}
216
ea4b23d9
TB
217static inline reverse_splay_tree_key
218gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
219{
220 return reverse_splay_tree_lookup (mem_map_rev, key);
221}
222
e01d41e5
JJ
223static inline splay_tree_key
224gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
225{
226 if (key->host_start != key->host_end)
227 return splay_tree_lookup (mem_map, key);
228
229 key->host_end++;
230 splay_tree_key n = splay_tree_lookup (mem_map, key);
231 key->host_end--;
232 return n;
233}
234
6ce13072
CLT
235static inline void
236gomp_device_copy (struct gomp_device_descr *devicep,
237 bool (*copy_func) (int, void *, const void *, size_t),
238 const char *dst, void *dstaddr,
239 const char *src, const void *srcaddr,
240 size_t size)
241{
242 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
243 {
244 gomp_mutex_unlock (&devicep->lock);
245 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
246 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
247 }
248}
249
1f4c5b9b
CLT
250static inline void
251goacc_device_copy_async (struct gomp_device_descr *devicep,
252 bool (*copy_func) (int, void *, const void *, size_t,
253 struct goacc_asyncqueue *),
254 const char *dst, void *dstaddr,
255 const char *src, const void *srcaddr,
9c41f5b9 256 const void *srcaddr_orig,
1f4c5b9b
CLT
257 size_t size, struct goacc_asyncqueue *aq)
258{
259 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
260 {
261 gomp_mutex_unlock (&devicep->lock);
9c41f5b9
JB
262 if (srcaddr_orig && srcaddr_orig != srcaddr)
263 gomp_fatal ("Copying of %s object [%p..%p)"
264 " via buffer %s object [%p..%p)"
265 " to %s object [%p..%p) failed",
266 src, srcaddr_orig, srcaddr_orig + size,
267 src, srcaddr, srcaddr + size,
268 dst, dstaddr, dstaddr + size);
269 else
270 gomp_fatal ("Copying of %s object [%p..%p)"
271 " to %s object [%p..%p) failed",
272 src, srcaddr, srcaddr + size,
273 dst, dstaddr, dstaddr + size);
1f4c5b9b
CLT
274 }
275}
276
7324369a
JJ
277/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
278 host to device memory transfers. */
279
a44c1790
TS
280struct gomp_coalesce_chunk
281{
282 /* The starting and ending point of a coalesced chunk of memory. */
283 size_t start, end;
284};
285
7324369a
JJ
286struct gomp_coalesce_buf
287{
288 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
289 it will be copied to the device. */
290 void *buf;
291 struct target_mem_desc *tgt;
a44c1790
TS
292 /* Array with offsets, chunks[i].start is the starting offset and
293 chunks[i].end ending offset relative to tgt->tgt_start device address
7324369a 294 of chunks which are to be copied to buf and later copied to device. */
a44c1790 295 struct gomp_coalesce_chunk *chunks;
7324369a
JJ
296 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
297 be performed. */
298 long chunk_cnt;
299 /* During construction of chunks array, how many memory regions are within
300 the last chunk. If there is just one memory region for a chunk, we copy
301 it directly to device rather than going through buf. */
302 long use_cnt;
303};
304
305/* Maximum size of memory region considered for coalescing. Larger copies
306 are performed directly. */
307#define MAX_COALESCE_BUF_SIZE (32 * 1024)
308
309/* Maximum size of a gap in between regions to consider them being copied
310 within the same chunk. All the device offsets considered are within
311 newly allocated device memory, so it isn't fatal if we copy some padding
312 in between from host to device. The gaps come either from alignment
313 padding or from memory regions which are not supposed to be copied from
314 host to device (e.g. map(alloc:), map(from:) etc.). */
315#define MAX_COALESCE_BUF_GAP (4 * 1024)
316
d88a6951
TS
317/* Add region with device tgt_start relative offset and length to CBUF.
318
319 This must not be used for asynchronous copies, because the host data might
320 not be computed yet (by an earlier asynchronous compute region, for
2b2340e2
TS
321 example). The exception is for EPHEMERAL data, that we know is available
322 already "by construction". */
7324369a
JJ
323
324static inline void
325gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
326{
327 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
328 return;
329 if (cbuf->chunk_cnt)
330 {
331 if (cbuf->chunk_cnt < 0)
332 return;
a44c1790 333 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
7324369a
JJ
334 {
335 cbuf->chunk_cnt = -1;
336 return;
337 }
a44c1790 338 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
7324369a 339 {
a44c1790 340 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
7324369a
JJ
341 cbuf->use_cnt++;
342 return;
343 }
344 /* If the last chunk is only used by one mapping, discard it,
345 as it will be one host to device copy anyway and
346 memcpying it around will only waste cycles. */
347 if (cbuf->use_cnt == 1)
348 cbuf->chunk_cnt--;
349 }
a44c1790
TS
350 cbuf->chunks[cbuf->chunk_cnt].start = start;
351 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
7324369a
JJ
352 cbuf->chunk_cnt++;
353 cbuf->use_cnt = 1;
354}
355
356/* Return true for mapping kinds which need to copy data from the
357 host to device for regions that weren't previously mapped. */
358
359static inline bool
360gomp_to_device_kind_p (int kind)
361{
362 switch (kind)
363 {
364 case GOMP_MAP_ALLOC:
365 case GOMP_MAP_FROM:
366 case GOMP_MAP_FORCE_ALLOC:
cc3f11f5 367 case GOMP_MAP_FORCE_FROM:
7324369a 368 case GOMP_MAP_ALWAYS_FROM:
4ede915d 369 case GOMP_MAP_ALWAYS_PRESENT_FROM:
38944ec2 370 case GOMP_MAP_FORCE_PRESENT:
7324369a
JJ
371 return false;
372 default:
373 return true;
374 }
375}
376
9c41f5b9
JB
377/* Copy host memory to an offload device. In asynchronous mode (if AQ is
378 non-NULL), when the source data is stack or may otherwise be deallocated
379 before the asynchronous copy takes place, EPHEMERAL must be passed as
380 TRUE. */
381
1f4c5b9b 382attribute_hidden void
6ce13072 383gomp_copy_host2dev (struct gomp_device_descr *devicep,
1f4c5b9b 384 struct goacc_asyncqueue *aq,
7324369a 385 void *d, const void *h, size_t sz,
9c41f5b9 386 bool ephemeral, struct gomp_coalesce_buf *cbuf)
6ce13072 387{
7324369a
JJ
388 if (cbuf)
389 {
390 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
a44c1790 391 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
7324369a
JJ
392 {
393 long first = 0;
394 long last = cbuf->chunk_cnt - 1;
395 while (first <= last)
396 {
397 long middle = (first + last) >> 1;
a44c1790 398 if (cbuf->chunks[middle].end <= doff)
7324369a 399 first = middle + 1;
a44c1790 400 else if (cbuf->chunks[middle].start <= doff)
7324369a 401 {
a44c1790 402 if (doff + sz > cbuf->chunks[middle].end)
ccfcf08e
JB
403 {
404 gomp_mutex_unlock (&devicep->lock);
405 gomp_fatal ("internal libgomp cbuf error");
406 }
2b2340e2
TS
407
408 /* In an asynchronous context, verify that CBUF isn't used
409 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
410 if (__builtin_expect (aq != NULL, 0))
411 assert (ephemeral);
412
a44c1790 413 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
7324369a
JJ
414 h, sz);
415 return;
416 }
417 else
418 last = middle - 1;
419 }
420 }
421 }
d88a6951 422
2b2340e2
TS
423 if (__builtin_expect (aq != NULL, 0))
424 {
425 void *h_buf = (void *) h;
426 if (ephemeral)
427 {
428 /* We're queueing up an asynchronous copy from data that may
429 disappear before the transfer takes place (i.e. because it is a
430 stack local in a function that is no longer executing). As we've
431 not been able to use CBUF, make a copy of the data into a
432 temporary buffer. */
433 h_buf = gomp_malloc (sz);
434 memcpy (h_buf, h, sz);
435 }
436 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
437 "dev", d, "host", h_buf, h, sz, aq);
438 if (ephemeral)
439 /* Free once the transfer has completed. */
440 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
441 }
442 else
443 gomp_device_copy (devicep, devicep->host2dev_func,
444 "dev", d, "host", h, sz);
6ce13072
CLT
445}
446
1f4c5b9b 447attribute_hidden void
6ce13072 448gomp_copy_dev2host (struct gomp_device_descr *devicep,
1f4c5b9b 449 struct goacc_asyncqueue *aq,
6ce13072
CLT
450 void *h, const void *d, size_t sz)
451{
1f4c5b9b
CLT
452 if (__builtin_expect (aq != NULL, 0))
453 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
9c41f5b9 454 "host", h, "dev", d, NULL, sz, aq);
1f4c5b9b
CLT
455 else
456 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
6ce13072
CLT
457}
458
459static void
460gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
461{
462 if (!devicep->free_func (devicep->target_id, devptr))
463 {
464 gomp_mutex_unlock (&devicep->lock);
465 gomp_fatal ("error in freeing device memory block at %p", devptr);
466 }
467}
468
275c736e
CLT
469/* Increment reference count of a splay_tree_key region K by 1.
470 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
471 increment the value if refcount is not yet contained in the set (used for
472 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
473 once for each construct). */
474
475static inline void
476gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
477{
478 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
479 return;
480
481 uintptr_t *refcount_ptr = &k->refcount;
482
483 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
484 refcount_ptr = &k->structelem_refcount;
485 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
486 refcount_ptr = k->structelem_refcount_ptr;
487
488 if (refcount_set)
489 {
490 if (htab_find (*refcount_set, refcount_ptr))
491 return;
492 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
493 *slot = refcount_ptr;
494 }
495
496 *refcount_ptr += 1;
497 return;
498}
499
500/* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
501 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
502 track already seen refcounts, and only adjust the value if refcount is not
503 yet contained in the set (like gomp_increment_refcount).
504
505 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
506 it is already zero and we know we decremented it earlier. This signals that
507 associated maps should be copied back to host.
508
509 *DO_REMOVE is set to true when we this is the first handling of this refcount
510 and we are setting it to zero. This signals a removal of this key from the
511 splay-tree map.
512
513 Copy and removal are separated due to cases like handling of structure
514 elements, e.g. each map of a structure element representing a possible copy
515 out of a structure field has to be handled individually, but we only signal
516 removal for one (the first encountered) sibing map. */
517
518static inline void
519gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
520 bool *do_copy, bool *do_remove)
521{
522 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
523 {
524 *do_copy = *do_remove = false;
525 return;
526 }
527
528 uintptr_t *refcount_ptr = &k->refcount;
529
530 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
531 refcount_ptr = &k->structelem_refcount;
532 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
533 refcount_ptr = k->structelem_refcount_ptr;
534
535 bool new_encountered_refcount;
536 bool set_to_zero = false;
537 bool is_zero = false;
538
539 uintptr_t orig_refcount = *refcount_ptr;
540
541 if (refcount_set)
542 {
543 if (htab_find (*refcount_set, refcount_ptr))
544 {
545 new_encountered_refcount = false;
546 goto end;
547 }
548
549 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
550 *slot = refcount_ptr;
551 new_encountered_refcount = true;
552 }
553 else
554 /* If no refcount_set being used, assume all keys are being decremented
555 for the first time. */
556 new_encountered_refcount = true;
557
558 if (delete_p)
559 *refcount_ptr = 0;
560 else if (*refcount_ptr > 0)
561 *refcount_ptr -= 1;
562
563 end:
564 if (*refcount_ptr == 0)
565 {
566 if (orig_refcount > 0)
567 set_to_zero = true;
568
569 is_zero = true;
570 }
571
572 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
573 *do_remove = (new_encountered_refcount && set_to_zero);
574}
575
e01d41e5
JJ
576/* Handle the case where gomp_map_lookup, splay_tree_lookup or
577 gomp_map_0len_lookup found oldn for newn.
1df3f842
JJ
578 Helper function of gomp_map_vars. */
579
580static inline void
1f4c5b9b
CLT
581gomp_map_vars_existing (struct gomp_device_descr *devicep,
582 struct goacc_asyncqueue *aq, splay_tree_key oldn,
d9a6bd32 583 splay_tree_key newn, struct target_var_desc *tgt_var,
b7e20480 584 unsigned char kind, bool always_to_flag, bool implicit,
275c736e
CLT
585 struct gomp_coalesce_buf *cbuf,
586 htab_t *refcount_set)
1df3f842 587{
0ab29cf0
CLT
588 assert (kind != GOMP_MAP_ATTACH
589 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
aff43ac0 590
d9a6bd32
JJ
591 tgt_var->key = oldn;
592 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
593 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
bc4ed079 594 tgt_var->is_attach = false;
d9a6bd32 595 tgt_var->offset = newn->host_start - oldn->host_start;
b7e20480
CLT
596
597 /* For implicit maps, old contained in new is valid. */
598 bool implicit_subset = (implicit
599 && newn->host_start <= oldn->host_start
600 && oldn->host_end <= newn->host_end);
601 if (implicit_subset)
602 tgt_var->length = oldn->host_end - oldn->host_start;
603 else
604 tgt_var->length = newn->host_end - newn->host_start;
d9a6bd32 605
4ede915d 606 if (GOMP_MAP_FORCE_P (kind)
b7e20480
CLT
607 /* For implicit maps, old contained in new is valid. */
608 || !(implicit_subset
609 /* Otherwise, new contained inside old is considered valid. */
610 || (oldn->host_start <= newn->host_start
611 && newn->host_end <= oldn->host_end)))
a51df54e
IV
612 {
613 gomp_mutex_unlock (&devicep->lock);
614 gomp_fatal ("Trying to map into device [%p..%p) object when "
615 "[%p..%p) is already mapped",
616 (void *) newn->host_start, (void *) newn->host_end,
617 (void *) oldn->host_start, (void *) oldn->host_end);
618 }
d9a6bd32 619
972da557 620 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
b7e20480
CLT
621 {
622 /* Implicit + always should not happen. If this does occur, below
623 address/length adjustment is a TODO. */
624 assert (!implicit_subset);
625
6c039937
CLT
626 if (oldn->aux && oldn->aux->attach_count)
627 {
628 /* We have to be careful not to overwrite still attached pointers
629 during the copyback to host. */
630 uintptr_t addr = newn->host_start;
631 while (addr < newn->host_end)
632 {
633 size_t i = (addr - oldn->host_start) / sizeof (void *);
634 if (oldn->aux->attach_count[i] == 0)
635 gomp_copy_host2dev (devicep, aq,
636 (void *) (oldn->tgt->tgt_start
637 + oldn->tgt_offset
638 + addr - oldn->host_start),
639 (void *) addr,
640 sizeof (void *), false, cbuf);
641 addr += sizeof (void *);
642 }
643 }
644 else
645 gomp_copy_host2dev (devicep, aq,
646 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
647 + newn->host_start - oldn->host_start),
648 (void *) newn->host_start,
649 newn->host_end - newn->host_start, false, cbuf);
b7e20480 650 }
6ce13072 651
275c736e 652 gomp_increment_refcount (oldn, refcount_set);
1df3f842
JJ
653}
654
41dbbb37 655static int
d9a6bd32 656get_kind (bool short_mapkind, void *kinds, int idx)
41dbbb37 657{
b7e20480
CLT
658 if (!short_mapkind)
659 return ((unsigned char *) kinds)[idx];
660
661 int val = ((unsigned short *) kinds)[idx];
662 if (GOMP_MAP_IMPLICIT_P (val))
663 val &= ~GOMP_MAP_IMPLICIT;
664 return val;
665}
666
667
668static bool
669get_implicit (bool short_mapkind, void *kinds, int idx)
670{
671 if (!short_mapkind)
672 return false;
673
674 int val = ((unsigned short *) kinds)[idx];
675 return GOMP_MAP_IMPLICIT_P (val);
41dbbb37
TS
676}
677
1716efeb 678static void
1f4c5b9b
CLT
679gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
680 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
0ab29cf0
CLT
681 struct gomp_coalesce_buf *cbuf,
682 bool allow_zero_length_array_sections)
1716efeb
CLT
683{
684 struct gomp_device_descr *devicep = tgt->device_descr;
685 struct splay_tree_s *mem_map = &devicep->mem_map;
686 struct splay_tree_key_s cur_node;
687
688 cur_node.host_start = host_ptr;
689 if (cur_node.host_start == (uintptr_t) NULL)
690 {
691 cur_node.tgt_offset = (uintptr_t) NULL;
1f4c5b9b 692 gomp_copy_host2dev (devicep, aq,
6ce13072 693 (void *) (tgt->tgt_start + target_offset),
9c41f5b9
JB
694 (void *) &cur_node.tgt_offset, sizeof (void *),
695 true, cbuf);
1716efeb
CLT
696 return;
697 }
698 /* Add bias to the pointer value. */
699 cur_node.host_start += bias;
d9a6bd32
JJ
700 cur_node.host_end = cur_node.host_start;
701 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1716efeb
CLT
702 if (n == NULL)
703 {
0ab29cf0
CLT
704 if (allow_zero_length_array_sections)
705 cur_node.tgt_offset = 0;
706 else
707 {
708 gomp_mutex_unlock (&devicep->lock);
709 gomp_fatal ("Pointer target of array section wasn't mapped");
710 }
711 }
712 else
713 {
714 cur_node.host_start -= n->host_start;
715 cur_node.tgt_offset
716 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
717 /* At this point tgt_offset is target address of the
718 array section. Now subtract bias to get what we want
719 to initialize the pointer with. */
720 cur_node.tgt_offset -= bias;
1716efeb 721 }
1f4c5b9b 722 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
9c41f5b9
JB
723 (void *) &cur_node.tgt_offset, sizeof (void *),
724 true, cbuf);
1716efeb
CLT
725}
726
d9a6bd32 727static void
1f4c5b9b
CLT
728gomp_map_fields_existing (struct target_mem_desc *tgt,
729 struct goacc_asyncqueue *aq, splay_tree_key n,
d9a6bd32 730 size_t first, size_t i, void **hostaddrs,
7324369a 731 size_t *sizes, void *kinds,
275c736e 732 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
d9a6bd32
JJ
733{
734 struct gomp_device_descr *devicep = tgt->device_descr;
735 struct splay_tree_s *mem_map = &devicep->mem_map;
736 struct splay_tree_key_s cur_node;
737 int kind;
b7e20480 738 bool implicit;
d9a6bd32
JJ
739 const bool short_mapkind = true;
740 const int typemask = short_mapkind ? 0xff : 0x7;
741
742 cur_node.host_start = (uintptr_t) hostaddrs[i];
743 cur_node.host_end = cur_node.host_start + sizes[i];
744 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
745 kind = get_kind (short_mapkind, kinds, i);
b7e20480 746 implicit = get_implicit (short_mapkind, kinds, i);
d9a6bd32
JJ
747 if (n2
748 && n2->tgt == n->tgt
749 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
750 {
972da557 751 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
b7e20480
CLT
752 kind & typemask, false, implicit, cbuf,
753 refcount_set);
d9a6bd32
JJ
754 return;
755 }
756 if (sizes[i] == 0)
757 {
758 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
759 {
760 cur_node.host_start--;
761 n2 = splay_tree_lookup (mem_map, &cur_node);
762 cur_node.host_start++;
763 if (n2
764 && n2->tgt == n->tgt
765 && n2->host_start - n->host_start
766 == n2->tgt_offset - n->tgt_offset)
767 {
972da557 768 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
b7e20480
CLT
769 kind & typemask, false, implicit, cbuf,
770 refcount_set);
d9a6bd32
JJ
771 return;
772 }
773 }
774 cur_node.host_end++;
775 n2 = splay_tree_lookup (mem_map, &cur_node);
776 cur_node.host_end--;
777 if (n2
778 && n2->tgt == n->tgt
779 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
780 {
1f4c5b9b 781 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
b7e20480
CLT
782 kind & typemask, false, implicit, cbuf,
783 refcount_set);
d9a6bd32
JJ
784 return;
785 }
786 }
787 gomp_mutex_unlock (&devicep->lock);
788 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
789 "other mapped elements from the same structure weren't mapped "
790 "together with it", (void *) cur_node.host_start,
791 (void *) cur_node.host_end);
792}
793
5d5be7bf
JB
794attribute_hidden void
795gomp_attach_pointer (struct gomp_device_descr *devicep,
796 struct goacc_asyncqueue *aq, splay_tree mem_map,
797 splay_tree_key n, uintptr_t attach_to, size_t bias,
0ab29cf0
CLT
798 struct gomp_coalesce_buf *cbufp,
799 bool allow_zero_length_array_sections)
5d5be7bf
JB
800{
801 struct splay_tree_key_s s;
802 size_t size, idx;
803
804 if (n == NULL)
805 {
806 gomp_mutex_unlock (&devicep->lock);
807 gomp_fatal ("enclosing struct not mapped for attach");
808 }
809
810 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
811 /* We might have a pointer in a packed struct: however we cannot have more
812 than one such pointer in each pointer-sized portion of the struct, so
813 this is safe. */
814 idx = (attach_to - n->host_start) / sizeof (void *);
815
816 if (!n->aux)
817 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
818
819 if (!n->aux->attach_count)
820 n->aux->attach_count
821 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
822
823 if (n->aux->attach_count[idx] < UINTPTR_MAX)
824 n->aux->attach_count[idx]++;
825 else
826 {
827 gomp_mutex_unlock (&devicep->lock);
828 gomp_fatal ("attach count overflow");
829 }
830
831 if (n->aux->attach_count[idx] == 1)
832 {
833 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
834 - n->host_start;
835 uintptr_t target = (uintptr_t) *(void **) attach_to;
836 splay_tree_key tn;
837 uintptr_t data;
838
839 if ((void *) target == NULL)
840 {
841 gomp_mutex_unlock (&devicep->lock);
842 gomp_fatal ("attempt to attach null pointer");
843 }
844
845 s.host_start = target + bias;
846 s.host_end = s.host_start + 1;
847 tn = splay_tree_lookup (mem_map, &s);
848
849 if (!tn)
850 {
0ab29cf0
CLT
851 if (allow_zero_length_array_sections)
852 /* When allowing attachment to zero-length array sections, we
853 allow attaching to NULL pointers when the target region is not
854 mapped. */
855 data = 0;
856 else
857 {
858 gomp_mutex_unlock (&devicep->lock);
859 gomp_fatal ("pointer target not mapped for attach");
860 }
5d5be7bf 861 }
0ab29cf0
CLT
862 else
863 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
5d5be7bf
JB
864
865 gomp_debug (1,
866 "%s: attaching host %p, target %p (struct base %p) to %p\n",
867 __FUNCTION__, (void *) attach_to, (void *) devptr,
868 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
869
870 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
9c41f5b9 871 sizeof (void *), true, cbufp);
5d5be7bf
JB
872 }
873 else
874 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
875 (void *) attach_to, (int) n->aux->attach_count[idx]);
876}
877
878attribute_hidden void
879gomp_detach_pointer (struct gomp_device_descr *devicep,
880 struct goacc_asyncqueue *aq, splay_tree_key n,
881 uintptr_t detach_from, bool finalize,
882 struct gomp_coalesce_buf *cbufp)
883{
884 size_t idx;
885
886 if (n == NULL)
887 {
888 gomp_mutex_unlock (&devicep->lock);
889 gomp_fatal ("enclosing struct not mapped for detach");
890 }
891
892 idx = (detach_from - n->host_start) / sizeof (void *);
893
894 if (!n->aux || !n->aux->attach_count)
895 {
896 gomp_mutex_unlock (&devicep->lock);
897 gomp_fatal ("no attachment counters for struct");
898 }
899
900 if (finalize)
901 n->aux->attach_count[idx] = 1;
902
903 if (n->aux->attach_count[idx] == 0)
904 {
905 gomp_mutex_unlock (&devicep->lock);
906 gomp_fatal ("attach count underflow");
907 }
908 else
909 n->aux->attach_count[idx]--;
910
911 if (n->aux->attach_count[idx] == 0)
912 {
913 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
914 - n->host_start;
915 uintptr_t target = (uintptr_t) *(void **) detach_from;
916
917 gomp_debug (1,
918 "%s: detaching host %p, target %p (struct base %p) to %p\n",
919 __FUNCTION__, (void *) detach_from, (void *) devptr,
920 (void *) (n->tgt->tgt_start + n->tgt_offset),
921 (void *) target);
922
923 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
9c41f5b9 924 sizeof (void *), true, cbufp);
5d5be7bf
JB
925 }
926 else
927 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
928 (void *) detach_from, (int) n->aux->attach_count[idx]);
929}
930
5bcd470b 931attribute_hidden uintptr_t
e01d41e5
JJ
932gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
933{
934 if (tgt->list[i].key != NULL)
935 return tgt->list[i].key->tgt->tgt_start
936 + tgt->list[i].key->tgt_offset
937 + tgt->list[i].offset;
6c7e076b
JB
938
939 switch (tgt->list[i].offset)
940 {
941 case OFFSET_INLINED:
942 return (uintptr_t) hostaddrs[i];
943
944 case OFFSET_POINTER:
945 return 0;
946
947 case OFFSET_STRUCT:
948 return tgt->list[i + 1].key->tgt->tgt_start
949 + tgt->list[i + 1].key->tgt_offset
950 + tgt->list[i + 1].offset
951 + (uintptr_t) hostaddrs[i]
952 - (uintptr_t) hostaddrs[i + 1];
953
954 default:
955 return tgt->tgt_start + tgt->list[i].offset;
956 }
e01d41e5
JJ
957}
958
1f4c5b9b
CLT
959static inline __attribute__((always_inline)) struct target_mem_desc *
960gomp_map_vars_internal (struct gomp_device_descr *devicep,
961 struct goacc_asyncqueue *aq, size_t mapnum,
962 void **hostaddrs, void **devaddrs, size_t *sizes,
963 void *kinds, bool short_mapkind,
275c736e 964 htab_t *refcount_set,
1f4c5b9b 965 enum gomp_map_vars_kind pragma_kind)
1df3f842
JJ
966{
967 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
d9a6bd32 968 bool has_firstprivate = false;
972da557 969 bool has_always_ptrset = false;
275c736e 970 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
d9a6bd32
JJ
971 const int rshift = short_mapkind ? 8 : 3;
972 const int typemask = short_mapkind ? 0xff : 0x7;
a51df54e 973 struct splay_tree_s *mem_map = &devicep->mem_map;
1df3f842
JJ
974 struct splay_tree_key_s cur_node;
975 struct target_mem_desc *tgt
976 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
977 tgt->list_count = mapnum;
9e628024 978 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
1df3f842 979 tgt->device_descr = devicep;
378da98f 980 tgt->prev = NULL;
7324369a 981 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
1df3f842
JJ
982
983 if (mapnum == 0)
3837c6d6
IV
984 {
985 tgt->tgt_start = 0;
986 tgt->tgt_end = 0;
987 return tgt;
988 }
1df3f842
JJ
989
990 tgt_align = sizeof (void *);
991 tgt_size = 0;
7324369a
JJ
992 cbuf.chunks = NULL;
993 cbuf.chunk_cnt = -1;
994 cbuf.use_cnt = 0;
995 cbuf.buf = NULL;
f8332e52 996 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
7324369a 997 {
a44c1790
TS
998 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
999 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
7324369a
JJ
1000 cbuf.chunk_cnt = 0;
1001 }
f8332e52 1002 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1003 {
1004 size_t align = 4 * sizeof (void *);
1005 tgt_align = align;
1006 tgt_size = mapnum * sizeof (void *);
7324369a
JJ
1007 cbuf.chunk_cnt = 1;
1008 cbuf.use_cnt = 1 + (mapnum > 1);
a44c1790
TS
1009 cbuf.chunks[0].start = 0;
1010 cbuf.chunks[0].end = tgt_size;
1df3f842
JJ
1011 }
1012
a51df54e 1013 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1014 if (devicep->state == GOMP_DEVICE_FINALIZED)
1015 {
1016 gomp_mutex_unlock (&devicep->lock);
1017 free (tgt);
1018 return NULL;
1019 }
41dbbb37 1020
1df3f842
JJ
1021 for (i = 0; i < mapnum; i++)
1022 {
d9a6bd32 1023 int kind = get_kind (short_mapkind, kinds, i);
b7e20480 1024 bool implicit = get_implicit (short_mapkind, kinds, i);
d9a6bd32
JJ
1025 if (hostaddrs[i] == NULL
1026 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1df3f842 1027 {
d9a6bd32 1028 tgt->list[i].key = NULL;
6c7e076b 1029 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32
JJ
1030 continue;
1031 }
d5c23c6c
TB
1032 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1033 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
d9a6bd32 1034 {
8860d270
JJ
1035 tgt->list[i].key = NULL;
1036 if (!not_found_cnt)
d9a6bd32 1037 {
7d48e14f
JJ
1038 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1039 on a separate construct prior to using use_device_{addr,ptr}.
1040 In OpenMP 5.0, map directives need to be ordered by the
1041 middle-end before the use_device_* clauses. If
1042 !not_found_cnt, all mappings requested (if any) are already
1043 mapped, so use_device_{addr,ptr} can be resolved right away.
1044 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1045 now but would succeed after performing the mappings in the
1046 following loop. We can't defer this always to the second
1047 loop, because it is not even invoked when !not_found_cnt
1048 after the first loop. */
1049 cur_node.host_start = (uintptr_t) hostaddrs[i];
1050 cur_node.host_end = cur_node.host_start;
1051 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
b3b75e66
TS
1052 if (n != NULL)
1053 {
1054 cur_node.host_start -= n->host_start;
1055 hostaddrs[i]
1056 = (void *) (n->tgt->tgt_start + n->tgt_offset
1057 + cur_node.host_start);
1058 }
1059 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
7d48e14f
JJ
1060 {
1061 gomp_mutex_unlock (&devicep->lock);
1062 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1063 }
b3b75e66
TS
1064 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1065 /* If not present, continue using the host address. */
1066 ;
1067 else
1068 __builtin_unreachable ();
1069 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32 1070 }
8860d270
JJ
1071 else
1072 tgt->list[i].offset = 0;
d9a6bd32
JJ
1073 continue;
1074 }
1075 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1076 {
1077 size_t first = i + 1;
1078 size_t last = i + sizes[i];
1079 cur_node.host_start = (uintptr_t) hostaddrs[i];
1080 cur_node.host_end = (uintptr_t) hostaddrs[last]
1081 + sizes[last];
1082 tgt->list[i].key = NULL;
6c7e076b 1083 tgt->list[i].offset = OFFSET_STRUCT;
d9a6bd32
JJ
1084 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1085 if (n == NULL)
1086 {
1087 size_t align = (size_t) 1 << (kind >> rshift);
1088 if (tgt_align < align)
1089 tgt_align = align;
7324369a 1090 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
d9a6bd32 1091 tgt_size = (tgt_size + align - 1) & ~(align - 1);
7324369a 1092 tgt_size += cur_node.host_end - cur_node.host_start;
d9a6bd32
JJ
1093 not_found_cnt += last - i;
1094 for (i = first; i <= last; i++)
7324369a
JJ
1095 {
1096 tgt->list[i].key = NULL;
d88a6951
TS
1097 if (!aq
1098 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1099 & typemask))
7324369a
JJ
1100 gomp_coalesce_buf_add (&cbuf,
1101 tgt_size - cur_node.host_end
1102 + (uintptr_t) hostaddrs[i],
1103 sizes[i]);
1104 }
d9a6bd32
JJ
1105 i--;
1106 continue;
1107 }
1108 for (i = first; i <= last; i++)
1f4c5b9b 1109 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
275c736e 1110 sizes, kinds, NULL, refcount_set);
d9a6bd32 1111 i--;
1df3f842
JJ
1112 continue;
1113 }
e01d41e5
JJ
1114 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1115 {
1116 tgt->list[i].key = NULL;
6c7e076b 1117 tgt->list[i].offset = OFFSET_POINTER;
e01d41e5
JJ
1118 has_firstprivate = true;
1119 continue;
1120 }
0ab29cf0
CLT
1121 else if ((kind & typemask) == GOMP_MAP_ATTACH
1122 || ((kind & typemask)
1123 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
8e7e71ff
JB
1124 {
1125 tgt->list[i].key = NULL;
1126 has_firstprivate = true;
1127 continue;
1128 }
1df3f842 1129 cur_node.host_start = (uintptr_t) hostaddrs[i];
aff43ac0 1130 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
1131 cur_node.host_end = cur_node.host_start + sizes[i];
1132 else
1133 cur_node.host_end = cur_node.host_start + sizeof (void *);
d9a6bd32
JJ
1134 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1135 {
1136 tgt->list[i].key = NULL;
1137
1138 size_t align = (size_t) 1 << (kind >> rshift);
1139 if (tgt_align < align)
1140 tgt_align = align;
1141 tgt_size = (tgt_size + align - 1) & ~(align - 1);
d88a6951
TS
1142 if (!aq)
1143 gomp_coalesce_buf_add (&cbuf, tgt_size,
1144 cur_node.host_end - cur_node.host_start);
d9a6bd32
JJ
1145 tgt_size += cur_node.host_end - cur_node.host_start;
1146 has_firstprivate = true;
1147 continue;
1148 }
1149 splay_tree_key n;
1150 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1df3f842 1151 {
e01d41e5 1152 n = gomp_map_0len_lookup (mem_map, &cur_node);
d9a6bd32
JJ
1153 if (!n)
1154 {
1155 tgt->list[i].key = NULL;
b25ea7ab 1156 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32
JJ
1157 continue;
1158 }
1df3f842 1159 }
d9a6bd32
JJ
1160 else
1161 n = splay_tree_lookup (mem_map, &cur_node);
4a38b02b 1162 if (n && n->refcount != REFCOUNT_LINK)
972da557
TB
1163 {
1164 int always_to_cnt = 0;
1165 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1166 {
1b9bdd52 1167 bool has_nullptr = false;
972da557
TB
1168 size_t j;
1169 for (j = 0; j < n->tgt->list_count; j++)
1170 if (n->tgt->list[j].key == n)
1171 {
1172 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1173 break;
1174 }
1175 if (n->tgt->list_count == 0)
1176 {
1177 /* 'declare target'; assume has_nullptr; it could also be
1178 statically assigned pointer, but that it should be to
1179 the equivalent variable on the host. */
1180 assert (n->refcount == REFCOUNT_INFINITY);
1181 has_nullptr = true;
1182 }
1183 else
1184 assert (j < n->tgt->list_count);
1185 /* Re-map the data if there is an 'always' modifier or if it a
1186 null pointer was there and non a nonnull has been found; that
1187 permits transparent re-mapping for Fortran array descriptors
1188 which were previously mapped unallocated. */
1189 for (j = i + 1; j < mapnum; j++)
1190 {
1191 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1192 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1193 && (!has_nullptr
1194 || !GOMP_MAP_POINTER_P (ptr_kind)
1195 || *(void **) hostaddrs[j] == NULL))
1196 break;
1197 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1198 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1199 > cur_node.host_end))
1200 break;
1201 else
1202 {
1203 has_always_ptrset = true;
1204 ++always_to_cnt;
1205 }
1206 }
1207 }
1208 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
b7e20480
CLT
1209 kind & typemask, always_to_cnt > 0, implicit,
1210 NULL, refcount_set);
972da557
TB
1211 i += always_to_cnt;
1212 }
1df3f842
JJ
1213 else
1214 {
d9a6bd32 1215 tgt->list[i].key = NULL;
41dbbb37 1216
a6163563
JB
1217 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1218 {
1219 /* Not present, hence, skip entry - including its MAP_POINTER,
1220 when existing. */
199867d0 1221 tgt->list[i].offset = OFFSET_INLINED;
a6163563
JB
1222 if (i + 1 < mapnum
1223 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1224 == GOMP_MAP_POINTER))
1225 {
1226 ++i;
1227 tgt->list[i].key = NULL;
1228 tgt->list[i].offset = 0;
1229 }
1230 continue;
1231 }
41dbbb37 1232 size_t align = (size_t) 1 << (kind >> rshift);
1df3f842
JJ
1233 not_found_cnt++;
1234 if (tgt_align < align)
1235 tgt_align = align;
1236 tgt_size = (tgt_size + align - 1) & ~(align - 1);
d88a6951
TS
1237 if (!aq
1238 && gomp_to_device_kind_p (kind & typemask))
7324369a
JJ
1239 gomp_coalesce_buf_add (&cbuf, tgt_size,
1240 cur_node.host_end - cur_node.host_start);
1df3f842 1241 tgt_size += cur_node.host_end - cur_node.host_start;
41dbbb37 1242 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1df3f842
JJ
1243 {
1244 size_t j;
972da557 1245 int kind;
1df3f842 1246 for (j = i + 1; j < mapnum; j++)
972da557
TB
1247 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1248 kinds, j)) & typemask))
1249 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1df3f842
JJ
1250 break;
1251 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1252 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1253 > cur_node.host_end))
1254 break;
1255 else
1256 {
d9a6bd32 1257 tgt->list[j].key = NULL;
1df3f842
JJ
1258 i++;
1259 }
1260 }
1261 }
1262 }
1263
41dbbb37
TS
1264 if (devaddrs)
1265 {
1266 if (mapnum != 1)
a51df54e
IV
1267 {
1268 gomp_mutex_unlock (&devicep->lock);
1269 gomp_fatal ("unexpected aggregation");
1270 }
41dbbb37
TS
1271 tgt->to_free = devaddrs[0];
1272 tgt->tgt_start = (uintptr_t) tgt->to_free;
1273 tgt->tgt_end = tgt->tgt_start + sizes[0];
1274 }
f8332e52 1275 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1df3f842
JJ
1276 {
1277 /* Allocate tgt_align aligned tgt_size block of memory. */
1278 /* FIXME: Perhaps change interface to allocate properly aligned
1279 memory. */
1280 tgt->to_free = devicep->alloc_func (devicep->target_id,
1281 tgt_size + tgt_align - 1);
6ce13072
CLT
1282 if (!tgt->to_free)
1283 {
1284 gomp_mutex_unlock (&devicep->lock);
1285 gomp_fatal ("device memory allocation fail");
1286 }
1287
1df3f842
JJ
1288 tgt->tgt_start = (uintptr_t) tgt->to_free;
1289 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1290 tgt->tgt_end = tgt->tgt_start + tgt_size;
7324369a
JJ
1291
1292 if (cbuf.use_cnt == 1)
1293 cbuf.chunk_cnt--;
1294 if (cbuf.chunk_cnt > 0)
1295 {
1296 cbuf.buf
a44c1790 1297 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
7324369a
JJ
1298 if (cbuf.buf)
1299 {
1300 cbuf.tgt = tgt;
1301 cbufp = &cbuf;
1302 }
1303 }
1df3f842
JJ
1304 }
1305 else
1306 {
1307 tgt->to_free = NULL;
1308 tgt->tgt_start = 0;
1309 tgt->tgt_end = 0;
1310 }
1311
1312 tgt_size = 0;
f8332e52 1313 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1314 tgt_size = mapnum * sizeof (void *);
1315
1316 tgt->array = NULL;
972da557 1317 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1df3f842 1318 {
d9a6bd32
JJ
1319 if (not_found_cnt)
1320 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1df3f842 1321 splay_tree_node array = tgt->array;
5cafae2c 1322 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
d9a6bd32 1323 uintptr_t field_tgt_base = 0;
275c736e 1324 splay_tree_key field_tgt_structelem_first = NULL;
1df3f842
JJ
1325
1326 for (i = 0; i < mapnum; i++)
972da557
TB
1327 if (has_always_ptrset
1328 && tgt->list[i].key
1329 && (get_kind (short_mapkind, kinds, i) & typemask)
1330 == GOMP_MAP_TO_PSET)
1331 {
1332 splay_tree_key k = tgt->list[i].key;
1b9bdd52 1333 bool has_nullptr = false;
972da557
TB
1334 size_t j;
1335 for (j = 0; j < k->tgt->list_count; j++)
1336 if (k->tgt->list[j].key == k)
1337 {
1338 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1339 break;
1340 }
1341 if (k->tgt->list_count == 0)
1342 has_nullptr = true;
1343 else
1344 assert (j < k->tgt->list_count);
1345
1346 tgt->list[i].has_null_ptr_assoc = false;
1347 for (j = i + 1; j < mapnum; j++)
1348 {
1349 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1350 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1351 && (!has_nullptr
1352 || !GOMP_MAP_POINTER_P (ptr_kind)
1353 || *(void **) hostaddrs[j] == NULL))
1354 break;
1355 else if ((uintptr_t) hostaddrs[j] < k->host_start
1356 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1357 > k->host_end))
1358 break;
1359 else
1360 {
1361 if (*(void **) hostaddrs[j] == NULL)
1362 tgt->list[i].has_null_ptr_assoc = true;
1363 tgt->list[j].key = k;
1364 tgt->list[j].copy_from = false;
1365 tgt->list[j].always_copy_from = false;
1366 tgt->list[j].is_attach = false;
275c736e 1367 gomp_increment_refcount (k, refcount_set);
972da557
TB
1368 gomp_map_pointer (k->tgt, aq,
1369 (uintptr_t) *(void **) hostaddrs[j],
1370 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1371 - k->host_start),
0ab29cf0 1372 sizes[j], cbufp, false);
972da557
TB
1373 }
1374 }
1375 i = j - 1;
1376 }
1377 else if (tgt->list[i].key == NULL)
1df3f842 1378 {
d9a6bd32 1379 int kind = get_kind (short_mapkind, kinds, i);
b7e20480 1380 bool implicit = get_implicit (short_mapkind, kinds, i);
1df3f842
JJ
1381 if (hostaddrs[i] == NULL)
1382 continue;
d9a6bd32
JJ
1383 switch (kind & typemask)
1384 {
1385 size_t align, len, first, last;
1386 splay_tree_key n;
1387 case GOMP_MAP_FIRSTPRIVATE:
1388 align = (size_t) 1 << (kind >> rshift);
1389 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1390 tgt->list[i].offset = tgt_size;
1391 len = sizes[i];
1f4c5b9b 1392 gomp_copy_host2dev (devicep, aq,
6ce13072 1393 (void *) (tgt->tgt_start + tgt_size),
9c41f5b9 1394 (void *) hostaddrs[i], len, false, cbufp);
49d1a2f9
TB
1395 /* Save device address in hostaddr to permit latter availablity
1396 when doing a deep-firstprivate with pointer attach. */
1397 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
d9a6bd32 1398 tgt_size += len;
49d1a2f9
TB
1399
1400 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1401 firstprivate to hostaddrs[i+1], which is assumed to contain a
1402 device address. */
1403 if (i + 1 < mapnum
1404 && (GOMP_MAP_ATTACH
1405 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1406 {
1407 uintptr_t target = (uintptr_t) hostaddrs[i];
1408 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
e8fec699
TS
1409 /* Per
1410 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1411 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1412 this probably needs revision for 'aq' usage. */
1413 assert (!aq);
49d1a2f9
TB
1414 gomp_copy_host2dev (devicep, aq, devptr, &target,
1415 sizeof (void *), false, cbufp);
1416 ++i;
1417 }
d9a6bd32
JJ
1418 continue;
1419 case GOMP_MAP_FIRSTPRIVATE_INT:
d9a6bd32
JJ
1420 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1421 continue;
d5c23c6c 1422 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
b3b75e66
TS
1423 /* The OpenACC 'host_data' construct only allows 'use_device'
1424 "mapping" clauses, so in the first loop, 'not_found_cnt'
1425 must always have been zero, so all OpenACC 'use_device'
1426 clauses have already been handled. (We can only easily test
1427 'use_device' with 'if_present' clause here.) */
1428 assert (tgt->list[i].offset == OFFSET_INLINED);
1429 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1430 code conceptually simple, similar to the first loop. */
1431 case GOMP_MAP_USE_DEVICE_PTR:
8860d270
JJ
1432 if (tgt->list[i].offset == 0)
1433 {
1434 cur_node.host_start = (uintptr_t) hostaddrs[i];
1435 cur_node.host_end = cur_node.host_start;
1436 n = gomp_map_lookup (mem_map, &cur_node);
b3b75e66
TS
1437 if (n != NULL)
1438 {
1439 cur_node.host_start -= n->host_start;
1440 hostaddrs[i]
1441 = (void *) (n->tgt->tgt_start + n->tgt_offset
1442 + cur_node.host_start);
1443 }
1444 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
8860d270
JJ
1445 {
1446 gomp_mutex_unlock (&devicep->lock);
1447 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1448 }
b3b75e66
TS
1449 else if ((kind & typemask)
1450 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1451 /* If not present, continue using the host address. */
1452 ;
1453 else
1454 __builtin_unreachable ();
1455 tgt->list[i].offset = OFFSET_INLINED;
8860d270
JJ
1456 }
1457 continue;
d9a6bd32
JJ
1458 case GOMP_MAP_STRUCT:
1459 first = i + 1;
1460 last = i + sizes[i];
1461 cur_node.host_start = (uintptr_t) hostaddrs[i];
1462 cur_node.host_end = (uintptr_t) hostaddrs[last]
1463 + sizes[last];
1464 if (tgt->list[first].key != NULL)
1465 continue;
1466 n = splay_tree_lookup (mem_map, &cur_node);
1467 if (n == NULL)
1468 {
1469 size_t align = (size_t) 1 << (kind >> rshift);
1470 tgt_size -= (uintptr_t) hostaddrs[first]
1471 - (uintptr_t) hostaddrs[i];
1472 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1473 tgt_size += (uintptr_t) hostaddrs[first]
1474 - (uintptr_t) hostaddrs[i];
1475 field_tgt_base = (uintptr_t) hostaddrs[first];
1476 field_tgt_offset = tgt_size;
1477 field_tgt_clear = last;
275c736e 1478 field_tgt_structelem_first = NULL;
d9a6bd32
JJ
1479 tgt_size += cur_node.host_end
1480 - (uintptr_t) hostaddrs[first];
1481 continue;
1482 }
1483 for (i = first; i <= last; i++)
1f4c5b9b 1484 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
275c736e 1485 sizes, kinds, cbufp, refcount_set);
d9a6bd32
JJ
1486 i--;
1487 continue;
e01d41e5
JJ
1488 case GOMP_MAP_ALWAYS_POINTER:
1489 cur_node.host_start = (uintptr_t) hostaddrs[i];
1490 cur_node.host_end = cur_node.host_start + sizeof (void *);
1491 n = splay_tree_lookup (mem_map, &cur_node);
1492 if (n == NULL
1493 || n->host_start > cur_node.host_start
1494 || n->host_end < cur_node.host_end)
1495 {
1496 gomp_mutex_unlock (&devicep->lock);
1497 gomp_fatal ("always pointer not mapped");
1498 }
c7a9655b
TB
1499 if (i > 0
1500 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1501 != GOMP_MAP_ALWAYS_POINTER))
e01d41e5
JJ
1502 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1503 if (cur_node.tgt_offset)
1504 cur_node.tgt_offset -= sizes[i];
1f4c5b9b 1505 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1506 (void *) (n->tgt->tgt_start
1507 + n->tgt_offset
1508 + cur_node.host_start
1509 - n->host_start),
1510 (void *) &cur_node.tgt_offset,
9c41f5b9 1511 sizeof (void *), true, cbufp);
e01d41e5
JJ
1512 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1513 + cur_node.host_start - n->host_start;
1514 continue;
a6163563
JB
1515 case GOMP_MAP_IF_PRESENT:
1516 /* Not present - otherwise handled above. Skip over its
1517 MAP_POINTER as well. */
1518 if (i + 1 < mapnum
1519 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1520 == GOMP_MAP_POINTER))
1521 ++i;
1522 continue;
8e7e71ff 1523 case GOMP_MAP_ATTACH:
0ab29cf0 1524 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
8e7e71ff
JB
1525 {
1526 cur_node.host_start = (uintptr_t) hostaddrs[i];
1527 cur_node.host_end = cur_node.host_start + sizeof (void *);
1528 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1529 if (n != NULL)
1530 {
1531 tgt->list[i].key = n;
1532 tgt->list[i].offset = cur_node.host_start - n->host_start;
1533 tgt->list[i].length = n->host_end - n->host_start;
1534 tgt->list[i].copy_from = false;
1535 tgt->list[i].always_copy_from = false;
bc4ed079
JB
1536 tgt->list[i].is_attach = true;
1537 /* OpenACC 'attach'/'detach' doesn't affect
1538 structured/dynamic reference counts ('n->refcount',
1539 'n->dynamic_refcount'). */
9e628024 1540
0ab29cf0
CLT
1541 bool zlas
1542 = ((kind & typemask)
1543 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
9e628024
CLT
1544 gomp_attach_pointer (devicep, aq, mem_map, n,
1545 (uintptr_t) hostaddrs[i], sizes[i],
0ab29cf0 1546 cbufp, zlas);
8e7e71ff 1547 }
9e628024 1548 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
8e7e71ff
JB
1549 {
1550 gomp_mutex_unlock (&devicep->lock);
1551 gomp_fatal ("outer struct not mapped for attach");
1552 }
8e7e71ff
JB
1553 continue;
1554 }
d9a6bd32
JJ
1555 default:
1556 break;
1557 }
1df3f842
JJ
1558 splay_tree_key k = &array->key;
1559 k->host_start = (uintptr_t) hostaddrs[i];
41dbbb37 1560 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
1561 k->host_end = k->host_start + sizes[i];
1562 else
1563 k->host_end = k->host_start + sizeof (void *);
a51df54e 1564 splay_tree_key n = splay_tree_lookup (mem_map, k);
4a38b02b 1565 if (n && n->refcount != REFCOUNT_LINK)
1f4c5b9b 1566 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
b7e20480 1567 kind & typemask, false, implicit, cbufp,
275c736e 1568 refcount_set);
1df3f842
JJ
1569 else
1570 {
2a656a93 1571 k->aux = NULL;
4a38b02b
IV
1572 if (n && n->refcount == REFCOUNT_LINK)
1573 {
1574 /* Replace target address of the pointer with target address
1575 of mapped object in the splay tree. */
1576 splay_tree_remove (mem_map, n);
2a656a93
JB
1577 k->aux
1578 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1579 k->aux->link_key = n;
4a38b02b 1580 }
41dbbb37 1581 size_t align = (size_t) 1 << (kind >> rshift);
d9a6bd32 1582 tgt->list[i].key = k;
1df3f842 1583 k->tgt = tgt;
275c736e
CLT
1584 k->refcount = 0;
1585 k->dynamic_refcount = 0;
6c7e076b 1586 if (field_tgt_clear != FIELD_TGT_EMPTY)
d9a6bd32
JJ
1587 {
1588 k->tgt_offset = k->host_start - field_tgt_base
1589 + field_tgt_offset;
275c736e
CLT
1590 if (openmp_p)
1591 {
1592 k->refcount = REFCOUNT_STRUCTELEM;
1593 if (field_tgt_structelem_first == NULL)
1594 {
1595 /* Set to first structure element of sequence. */
1596 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1597 field_tgt_structelem_first = k;
1598 }
1599 else
1600 /* Point to refcount of leading element, but do not
1601 increment again. */
1602 k->structelem_refcount_ptr
1603 = &field_tgt_structelem_first->structelem_refcount;
1604
1605 if (i == field_tgt_clear)
1606 {
1607 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1608 field_tgt_structelem_first = NULL;
1609 }
1610 }
d9a6bd32 1611 if (i == field_tgt_clear)
6c7e076b 1612 field_tgt_clear = FIELD_TGT_EMPTY;
d9a6bd32
JJ
1613 }
1614 else
1615 {
1616 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1617 k->tgt_offset = tgt_size;
1618 tgt_size += k->host_end - k->host_start;
1619 }
275c736e
CLT
1620 /* First increment, from 0 to 1. gomp_increment_refcount
1621 encapsulates the different increment cases, so use this
1622 instead of directly setting 1 during initialization. */
1623 gomp_increment_refcount (k, refcount_set);
1624
d9a6bd32
JJ
1625 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1626 tgt->list[i].always_copy_from
1627 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
bc4ed079 1628 tgt->list[i].is_attach = false;
d9a6bd32
JJ
1629 tgt->list[i].offset = 0;
1630 tgt->list[i].length = k->host_end - k->host_start;
1df3f842
JJ
1631 tgt->refcount++;
1632 array->left = NULL;
1633 array->right = NULL;
a51df54e 1634 splay_tree_insert (mem_map, array);
41dbbb37 1635 switch (kind & typemask)
1df3f842 1636 {
41dbbb37
TS
1637 case GOMP_MAP_ALLOC:
1638 case GOMP_MAP_FROM:
1639 case GOMP_MAP_FORCE_ALLOC:
1640 case GOMP_MAP_FORCE_FROM:
d9a6bd32 1641 case GOMP_MAP_ALWAYS_FROM:
1df3f842 1642 break;
41dbbb37
TS
1643 case GOMP_MAP_TO:
1644 case GOMP_MAP_TOFROM:
1645 case GOMP_MAP_FORCE_TO:
1646 case GOMP_MAP_FORCE_TOFROM:
d9a6bd32
JJ
1647 case GOMP_MAP_ALWAYS_TO:
1648 case GOMP_MAP_ALWAYS_TOFROM:
1f4c5b9b 1649 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1650 (void *) (tgt->tgt_start
1651 + k->tgt_offset),
1652 (void *) k->host_start,
9c41f5b9
JB
1653 k->host_end - k->host_start,
1654 false, cbufp);
1df3f842 1655 break;
41dbbb37 1656 case GOMP_MAP_POINTER:
0ab29cf0
CLT
1657 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1658 gomp_map_pointer
1659 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1660 k->tgt_offset, sizes[i], cbufp,
1661 ((kind & typemask)
1662 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1df3f842 1663 break;
41dbbb37 1664 case GOMP_MAP_TO_PSET:
1f4c5b9b 1665 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1666 (void *) (tgt->tgt_start
1667 + k->tgt_offset),
1668 (void *) k->host_start,
9c41f5b9
JB
1669 k->host_end - k->host_start,
1670 false, cbufp);
972da557 1671 tgt->list[i].has_null_ptr_assoc = false;
41dbbb37 1672
1df3f842 1673 for (j = i + 1; j < mapnum; j++)
972da557
TB
1674 {
1675 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1676 & typemask);
1677 if (!GOMP_MAP_POINTER_P (ptr_kind)
1678 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1679 break;
1680 else if ((uintptr_t) hostaddrs[j] < k->host_start
1681 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1682 > k->host_end))
1683 break;
1684 else
1685 {
1686 tgt->list[j].key = k;
1687 tgt->list[j].copy_from = false;
1688 tgt->list[j].always_copy_from = false;
1689 tgt->list[j].is_attach = false;
1690 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
275c736e
CLT
1691 /* For OpenMP, the use of refcount_sets causes
1692 errors if we set k->refcount = 1 above but also
1693 increment it again here, for decrementing will
1694 not properly match, since we decrement only once
1695 for each key's refcount. Therefore avoid this
1696 increment for OpenMP constructs. */
1697 if (!openmp_p)
1698 gomp_increment_refcount (k, refcount_set);
972da557
TB
1699 gomp_map_pointer (tgt, aq,
1700 (uintptr_t) *(void **) hostaddrs[j],
1701 k->tgt_offset
1702 + ((uintptr_t) hostaddrs[j]
1703 - k->host_start),
0ab29cf0 1704 sizes[j], cbufp, false);
972da557 1705 }
ea4b23d9 1706 }
972da557 1707 i = j - 1;
41dbbb37
TS
1708 break;
1709 case GOMP_MAP_FORCE_PRESENT:
38944ec2
TB
1710 case GOMP_MAP_ALWAYS_PRESENT_TO:
1711 case GOMP_MAP_ALWAYS_PRESENT_FROM:
1712 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
41dbbb37
TS
1713 {
1714 /* We already looked up the memory region above and it
1715 was missing. */
1716 size_t size = k->host_end - k->host_start;
a51df54e 1717 gomp_mutex_unlock (&devicep->lock);
01c0b3b0 1718#ifdef HAVE_INTTYPES_H
38944ec2
TB
1719 gomp_fatal ("present clause: not present on the device "
1720 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
1721 "dev: %d)", (void *) k->host_start,
1722 (uint64_t) size, (uint64_t) size,
1723 devicep->target_id);
01c0b3b0 1724#else
38944ec2
TB
1725 gomp_fatal ("present clause: not present on the device "
1726 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1727 (void *) k->host_start,
1728 (unsigned long) size, (unsigned long) size,
1729 devicep->target_id);
01c0b3b0 1730#endif
41dbbb37
TS
1731 }
1732 break;
1733 case GOMP_MAP_FORCE_DEVICEPTR:
1734 assert (k->host_end - k->host_start == sizeof (void *));
1f4c5b9b 1735 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1736 (void *) (tgt->tgt_start
1737 + k->tgt_offset),
1738 (void *) k->host_start,
9c41f5b9 1739 sizeof (void *), false, cbufp);
41dbbb37
TS
1740 break;
1741 default:
a51df54e 1742 gomp_mutex_unlock (&devicep->lock);
41dbbb37
TS
1743 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1744 kind);
1df3f842 1745 }
4a38b02b 1746
2a656a93 1747 if (k->aux && k->aux->link_key)
4a38b02b
IV
1748 {
1749 /* Set link pointer on target to the device address of the
1750 mapped object. */
1751 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
84ca08d2
TS
1752 /* We intentionally do not use coalescing here, as it's not
1753 data allocated by the current call to this function. */
1f4c5b9b 1754 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
9c41f5b9 1755 &tgt_addr, sizeof (void *), true, NULL);
4a38b02b 1756 }
1df3f842
JJ
1757 array++;
1758 }
1759 }
1760 }
41dbbb37 1761
f8332e52 1762 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1763 {
1764 for (i = 0; i < mapnum; i++)
1765 {
e01d41e5 1766 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1f4c5b9b 1767 gomp_copy_host2dev (devicep, aq,
6ce13072 1768 (void *) (tgt->tgt_start + i * sizeof (void *)),
7324369a 1769 (void *) &cur_node.tgt_offset, sizeof (void *),
9c41f5b9 1770 true, cbufp);
1df3f842
JJ
1771 }
1772 }
1773
7324369a
JJ
1774 if (cbufp)
1775 {
1776 long c = 0;
1777 for (c = 0; c < cbuf.chunk_cnt; ++c)
1f4c5b9b 1778 gomp_copy_host2dev (devicep, aq,
a44c1790
TS
1779 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1780 (char *) cbuf.buf + (cbuf.chunks[c].start
1781 - cbuf.chunks[0].start),
9c41f5b9 1782 cbuf.chunks[c].end - cbuf.chunks[c].start,
2b2340e2
TS
1783 false, NULL);
1784 if (aq)
1785 /* Free once the transfer has completed. */
1786 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1787 else
1788 free (cbuf.buf);
a44c1790
TS
1789 cbuf.buf = NULL;
1790 cbufp = NULL;
7324369a
JJ
1791 }
1792
d9a6bd32
JJ
1793 /* If the variable from "omp target enter data" map-list was already mapped,
1794 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1795 gomp_exit_data. */
9e628024 1796 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
6f5b4b64 1797 {
d9a6bd32
JJ
1798 free (tgt);
1799 tgt = NULL;
1800 }
1801
a51df54e 1802 gomp_mutex_unlock (&devicep->lock);
1df3f842
JJ
1803 return tgt;
1804}
1805
275c736e 1806static struct target_mem_desc *
1f4c5b9b
CLT
1807gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1808 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
275c736e
CLT
1809 bool short_mapkind, htab_t *refcount_set,
1810 enum gomp_map_vars_kind pragma_kind)
1f4c5b9b 1811{
275c736e
CLT
1812 /* This management of a local refcount_set is for convenience of callers
1813 who do not share a refcount_set over multiple map/unmap uses. */
1814 htab_t local_refcount_set = NULL;
1815 if (refcount_set == NULL)
1816 {
1817 local_refcount_set = htab_create (mapnum);
1818 refcount_set = &local_refcount_set;
1819 }
1820
1821 struct target_mem_desc *tgt;
1822 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1823 sizes, kinds, short_mapkind, refcount_set,
1824 pragma_kind);
1825 if (local_refcount_set)
1826 htab_free (local_refcount_set);
1827
1828 return tgt;
1f4c5b9b
CLT
1829}
1830
1831attribute_hidden struct target_mem_desc *
275c736e
CLT
1832goacc_map_vars (struct gomp_device_descr *devicep,
1833 struct goacc_asyncqueue *aq, size_t mapnum,
1834 void **hostaddrs, void **devaddrs, size_t *sizes,
1835 void *kinds, bool short_mapkind,
1836 enum gomp_map_vars_kind pragma_kind)
1f4c5b9b
CLT
1837{
1838 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
275c736e
CLT
1839 sizes, kinds, short_mapkind, NULL,
1840 GOMP_MAP_VARS_OPENACC | pragma_kind);
1f4c5b9b
CLT
1841}
1842
6278b549 1843static void
1df3f842
JJ
1844gomp_unmap_tgt (struct target_mem_desc *tgt)
1845{
1846 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1847 if (tgt->tgt_end)
6ce13072 1848 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1df3f842
JJ
1849
1850 free (tgt->array);
1851 free (tgt);
1852}
1853
1cbd94e8
JB
1854static bool
1855gomp_unref_tgt (void *ptr)
829c6349
CLT
1856{
1857 bool is_tgt_unmapped = false;
1cbd94e8
JB
1858
1859 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1860
1861 if (tgt->refcount > 1)
1862 tgt->refcount--;
829c6349
CLT
1863 else
1864 {
1cbd94e8 1865 gomp_unmap_tgt (tgt);
829c6349 1866 is_tgt_unmapped = true;
829c6349 1867 }
1cbd94e8 1868
829c6349
CLT
1869 return is_tgt_unmapped;
1870}
1871
1f4c5b9b 1872static void
1cbd94e8 1873gomp_unref_tgt_void (void *ptr)
1f4c5b9b 1874{
1cbd94e8
JB
1875 (void) gomp_unref_tgt (ptr);
1876}
1f4c5b9b 1877
275c736e
CLT
1878static void
1879gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1cbd94e8 1880{
275c736e 1881 splay_tree_remove (sp, k);
2a656a93
JB
1882 if (k->aux)
1883 {
1884 if (k->aux->link_key)
275c736e 1885 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
5d5be7bf
JB
1886 if (k->aux->attach_count)
1887 free (k->aux->attach_count);
2a656a93
JB
1888 free (k->aux);
1889 k->aux = NULL;
1890 }
275c736e
CLT
1891}
1892
1893static inline __attribute__((always_inline)) bool
1894gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1895 struct goacc_asyncqueue *aq)
1896{
1897 bool is_tgt_unmapped = false;
1898
1899 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1900 {
1901 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1902 /* Infer the splay_tree_key of the first structelem key using the
1903 pointer to the first structleme_refcount. */
1904 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1905 - offsetof (struct splay_tree_key_s,
1906 structelem_refcount));
1907 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1908
1909 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1910 with the splay_tree_keys embedded inside. */
1911 splay_tree_node node =
1912 (splay_tree_node) ((char *) k
1913 - offsetof (struct splay_tree_node_s, key));
1914 while (true)
1915 {
1916 /* Starting from the _FIRST key, and continue for all following
1917 sibling keys. */
1918 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1919 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1920 break;
1921 else
1922 k = &(++node)->key;
1923 }
1924 }
1925 else
1926 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1927
1cbd94e8
JB
1928 if (aq)
1929 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1930 (void *) k->tgt);
1f4c5b9b 1931 else
1cbd94e8
JB
1932 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1933 return is_tgt_unmapped;
1934}
1935
1936attribute_hidden bool
1937gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1938{
1939 return gomp_remove_var_internal (devicep, k, NULL);
1940}
1941
1942/* Remove a variable asynchronously. This actually removes the variable
1943 mapping immediately, but retains the linked target_mem_desc until the
1944 asynchronous operation has completed (as it may still refer to target
1945 memory). The device lock must be held before entry, and remains locked on
1946 exit. */
1947
1948attribute_hidden void
1949gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1950 struct goacc_asyncqueue *aq)
1951{
1952 (void) gomp_remove_var_internal (devicep, k, aq);
1f4c5b9b
CLT
1953}
1954
41dbbb37
TS
1955/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1956 variables back from device to host: if it is false, it is assumed that this
b4557008 1957 has been done already. */
41dbbb37 1958
1f4c5b9b
CLT
1959static inline __attribute__((always_inline)) void
1960gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
275c736e 1961 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1df3f842
JJ
1962{
1963 struct gomp_device_descr *devicep = tgt->device_descr;
1964
1965 if (tgt->list_count == 0)
1966 {
1967 free (tgt);
1968 return;
1969 }
1970
a51df54e 1971 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1972 if (devicep->state == GOMP_DEVICE_FINALIZED)
1973 {
1974 gomp_mutex_unlock (&devicep->lock);
1975 free (tgt->array);
1976 free (tgt);
1977 return;
1978 }
41dbbb37 1979
1df3f842 1980 size_t i;
8e7e71ff
JB
1981
1982 /* We must perform detachments before any copies back to the host. */
1983 for (i = 0; i < tgt->list_count; i++)
1984 {
1985 splay_tree_key k = tgt->list[i].key;
1986
bc4ed079 1987 if (k != NULL && tgt->list[i].is_attach)
8e7e71ff
JB
1988 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1989 + tgt->list[i].offset,
25bce75c 1990 false, NULL);
8e7e71ff
JB
1991 }
1992
1df3f842 1993 for (i = 0; i < tgt->list_count; i++)
d9a6bd32
JJ
1994 {
1995 splay_tree_key k = tgt->list[i].key;
1996 if (k == NULL)
1997 continue;
1998
bc4ed079
JB
1999 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2000 counts ('n->refcount', 'n->dynamic_refcount'). */
2001 if (tgt->list[i].is_attach)
2002 continue;
2003
275c736e
CLT
2004 bool do_copy, do_remove;
2005 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
d9a6bd32 2006
275c736e 2007 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
d9a6bd32 2008 || tgt->list[i].always_copy_from)
1f4c5b9b 2009 gomp_copy_dev2host (devicep, aq,
6ce13072
CLT
2010 (void *) (k->host_start + tgt->list[i].offset),
2011 (void *) (k->tgt->tgt_start + k->tgt_offset
2012 + tgt->list[i].offset),
2013 tgt->list[i].length);
275c736e 2014 if (do_remove)
83d1d065
TS
2015 {
2016 struct target_mem_desc *k_tgt = k->tgt;
2017 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2018 /* It would be bad if TGT got unmapped while we're still iterating
2019 over its LIST_COUNT, and also expect to use it in the following
2020 code. */
2021 assert (!is_tgt_unmapped
2022 || k_tgt != tgt);
2023 }
d9a6bd32 2024 }
1df3f842 2025
1f4c5b9b 2026 if (aq)
1cbd94e8 2027 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1f4c5b9b 2028 (void *) tgt);
1df3f842 2029 else
1f4c5b9b 2030 gomp_unref_tgt ((void *) tgt);
41dbbb37 2031
a51df54e 2032 gomp_mutex_unlock (&devicep->lock);
1df3f842
JJ
2033}
2034
275c736e
CLT
2035static void
2036gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2037 htab_t *refcount_set)
1f4c5b9b 2038{
275c736e
CLT
2039 /* This management of a local refcount_set is for convenience of callers
2040 who do not share a refcount_set over multiple map/unmap uses. */
2041 htab_t local_refcount_set = NULL;
2042 if (refcount_set == NULL)
2043 {
2044 local_refcount_set = htab_create (tgt->list_count);
2045 refcount_set = &local_refcount_set;
2046 }
2047
2048 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2049
2050 if (local_refcount_set)
2051 htab_free (local_refcount_set);
1f4c5b9b
CLT
2052}
2053
2054attribute_hidden void
275c736e
CLT
2055goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2056 struct goacc_asyncqueue *aq)
1f4c5b9b 2057{
275c736e 2058 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1f4c5b9b
CLT
2059}
2060
1df3f842 2061static void
a51df54e 2062gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
d9a6bd32 2063 size_t *sizes, void *kinds, bool short_mapkind)
1df3f842
JJ
2064{
2065 size_t i;
2066 struct splay_tree_key_s cur_node;
d9a6bd32 2067 const int typemask = short_mapkind ? 0xff : 0x7;
1df3f842
JJ
2068
2069 if (!devicep)
2070 return;
2071
2072 if (mapnum == 0)
2073 return;
2074
a51df54e 2075 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2076 if (devicep->state == GOMP_DEVICE_FINALIZED)
2077 {
2078 gomp_mutex_unlock (&devicep->lock);
2079 return;
2080 }
2081
1df3f842
JJ
2082 for (i = 0; i < mapnum; i++)
2083 if (sizes[i])
2084 {
2085 cur_node.host_start = (uintptr_t) hostaddrs[i];
2086 cur_node.host_end = cur_node.host_start + sizes[i];
a51df54e 2087 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1df3f842
JJ
2088 if (n)
2089 {
d9a6bd32 2090 int kind = get_kind (short_mapkind, kinds, i);
1df3f842
JJ
2091 if (n->host_start > cur_node.host_start
2092 || n->host_end < cur_node.host_end)
a51df54e
IV
2093 {
2094 gomp_mutex_unlock (&devicep->lock);
2095 gomp_fatal ("Trying to update [%p..%p) object when "
2096 "only [%p..%p) is mapped",
2097 (void *) cur_node.host_start,
2098 (void *) cur_node.host_end,
2099 (void *) n->host_start,
2100 (void *) n->host_end);
2101 }
6ce13072 2102
6c039937
CLT
2103 if (n->aux && n->aux->attach_count)
2104 {
2105 uintptr_t addr = cur_node.host_start;
2106 while (addr < cur_node.host_end)
2107 {
2108 /* We have to be careful not to overwrite still attached
2109 pointers during host<->device updates. */
2110 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2111 if (n->aux->attach_count[i] == 0)
2112 {
2113 void *devaddr = (void *) (n->tgt->tgt_start
2114 + n->tgt_offset
2115 + addr - n->host_start);
2116 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2117 gomp_copy_host2dev (devicep, NULL,
2118 devaddr, (void *) addr,
2119 sizeof (void *), false, NULL);
2120 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2121 gomp_copy_dev2host (devicep, NULL,
2122 (void *) addr, devaddr,
2123 sizeof (void *));
2124 }
2125 addr += sizeof (void *);
2126 }
2127 }
2128 else
2129 {
2130 void *hostaddr = (void *) cur_node.host_start;
2131 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2132 + cur_node.host_start
2133 - n->host_start);
2134 size_t size = cur_node.host_end - cur_node.host_start;
2135
2136 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2137 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2138 false, NULL);
2139 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2140 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2141 }
1df3f842 2142 }
4ede915d
TB
2143 else
2144 {
2145 int kind = get_kind (short_mapkind, kinds, i);
2146
2147 if (GOMP_MAP_PRESENT_P (kind))
2148 {
2149 /* We already looked up the memory region above and it
2150 was missing. */
2151 gomp_mutex_unlock (&devicep->lock);
38944ec2 2152#ifdef HAVE_INTTYPES_H
4ede915d 2153 gomp_fatal ("present clause: not present on the device "
38944ec2
TB
2154 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
2155 "dev: %d)", (void *) hostaddrs[i],
2156 (uint64_t) sizes[i], (uint64_t) sizes[i],
2157 devicep->target_id);
2158#else
2159 gomp_fatal ("present clause: not present on the device "
2160 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2161 (void *) hostaddrs[i], (unsigned long) sizes[i],
2162 (unsigned long) sizes[i], devicep->target_id);
2163#endif
4ede915d
TB
2164 }
2165 }
1df3f842 2166 }
a51df54e
IV
2167 gomp_mutex_unlock (&devicep->lock);
2168}
2169
9f2fca56
MV
2170static struct gomp_offload_icv_list *
2171gomp_get_offload_icv_item (int dev_num)
2172{
2173 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2174 while (l != NULL && l->device_num != dev_num)
2175 l = l->next;
2176
2177 return l;
2178}
2179
2180/* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2181 depending on the device num and the variable hierarchy
2182 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2183 device and thus no item with that device number is contained in
2184 gomp_offload_icv_list, then a new item is created and added to the list. */
2185
2186static struct gomp_offload_icvs *
2187get_gomp_offload_icvs (int dev_num)
2188{
2189 struct gomp_icv_list *dev
2190 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2191 struct gomp_icv_list *all
2192 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2193 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2194 struct gomp_offload_icv_list *offload_icvs
2195 = gomp_get_offload_icv_item (dev_num);
2196
2197 if (offload_icvs != NULL)
2198 return &offload_icvs->icvs;
2199
2200 struct gomp_offload_icv_list *new
2201 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2202
2203 new->device_num = dev_num;
2204 new->icvs.device_num = dev_num;
2205 new->next = gomp_offload_icv_list;
2206
2207 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2208 new->icvs.nteams = dev_x->icvs.nteams_var;
2209 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2210 new->icvs.nteams = dev->icvs.nteams_var;
2211 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2212 new->icvs.nteams = all->icvs.nteams_var;
2213 else
2214 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2215
81476bc4
MV
2216 if (dev_x != NULL
2217 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2218 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2219 else if (dev != NULL
2220 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2221 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2222 else if (all != NULL
2223 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2224 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2225 else
2226 new->icvs.teams_thread_limit
2227 = gomp_default_icv_values.teams_thread_limit_var;
2228
9f2fca56
MV
2229 if (dev_x != NULL
2230 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2231 new->icvs.default_device = dev_x->icvs.default_device_var;
2232 else if (dev != NULL
2233 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2234 new->icvs.default_device = dev->icvs.default_device_var;
2235 else if (all != NULL
2236 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2237 new->icvs.default_device = all->icvs.default_device_var;
2238 else
2239 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2240
2241 gomp_offload_icv_list = new;
2242 return &new->icvs;
2243}
2244
a51df54e
IV
2245/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2246 And insert to splay tree the mapping between addresses from HOST_TABLE and
22be2349
NS
2247 from loaded target image. We rely in the host and device compiler
2248 emitting variable and functions in the same order. */
a51df54e
IV
2249
2250static void
2a21ff19 2251gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
22be2349
NS
2252 const void *host_table, const void *target_data,
2253 bool is_register_lock)
a51df54e
IV
2254{
2255 void **host_func_table = ((void ***) host_table)[0];
2256 void **host_funcs_end = ((void ***) host_table)[1];
2257 void **host_var_table = ((void ***) host_table)[2];
2258 void **host_vars_end = ((void ***) host_table)[3];
a49c7d31
KCY
2259 void **host_ind_func_table = NULL;
2260 void **host_ind_funcs_end = NULL;
a51df54e 2261
a49c7d31
KCY
2262 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
2263 {
2264 host_ind_func_table = ((void ***) host_table)[4];
2265 host_ind_funcs_end = ((void ***) host_table)[5];
2266 }
2267
2268 /* The func and ind_func tables contain only addresses, the var table
2269 contains addresses and corresponding sizes. */
a51df54e
IV
2270 int num_funcs = host_funcs_end - host_func_table;
2271 int num_vars = (host_vars_end - host_var_table) / 2;
a49c7d31 2272 int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
a51df54e
IV
2273
2274 /* Load image to device and get target addresses for the image. */
2275 struct addr_pair *target_table = NULL;
ea4b23d9 2276 uint64_t *rev_target_fn_table = NULL;
2a21ff19
NS
2277 int i, num_target_entries;
2278
ea4b23d9
TB
2279 /* With reverse offload, insert also target-host addresses. */
2280 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2281
2a21ff19
NS
2282 num_target_entries
2283 = devicep->load_image_func (devicep->target_id, version,
ea4b23d9 2284 target_data, &target_table,
a49c7d31
KCY
2285 rev_lookup ? &rev_target_fn_table : NULL,
2286 num_ind_funcs
2287 ? (uint64_t *) host_ind_func_table : NULL);
a51df54e 2288
0bac793e 2289 if (num_target_entries != num_funcs + num_vars
9f2fca56
MV
2290 /* "+1" due to the additional ICV struct. */
2291 && num_target_entries != num_funcs + num_vars + 1)
a51df54e
IV
2292 {
2293 gomp_mutex_unlock (&devicep->lock);
2294 if (is_register_lock)
2295 gomp_mutex_unlock (&register_lock);
2a21ff19
NS
2296 gomp_fatal ("Cannot map target functions or variables"
2297 " (expected %u, have %u)", num_funcs + num_vars,
2298 num_target_entries);
a51df54e
IV
2299 }
2300
2301 /* Insert host-target address mapping into splay tree. */
2302 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
9f2fca56
MV
2303 /* "+1" due to the additional ICV struct. */
2304 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2305 * sizeof (*tgt->array));
ea4b23d9
TB
2306 if (rev_target_fn_table)
2307 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2308 else
2309 tgt->rev_array = NULL;
d9a6bd32 2310 tgt->refcount = REFCOUNT_INFINITY;
a51df54e
IV
2311 tgt->tgt_start = 0;
2312 tgt->tgt_end = 0;
2313 tgt->to_free = NULL;
2314 tgt->prev = NULL;
2315 tgt->list_count = 0;
2316 tgt->device_descr = devicep;
2317 splay_tree_node array = tgt->array;
ea4b23d9 2318 reverse_splay_tree_node rev_array = tgt->rev_array;
a51df54e
IV
2319
2320 for (i = 0; i < num_funcs; i++)
2321 {
2322 splay_tree_key k = &array->key;
2323 k->host_start = (uintptr_t) host_func_table[i];
2324 k->host_end = k->host_start + 1;
2325 k->tgt = tgt;
2326 k->tgt_offset = target_table[i].start;
d9a6bd32 2327 k->refcount = REFCOUNT_INFINITY;
6f5b4b64 2328 k->dynamic_refcount = 0;
2a656a93 2329 k->aux = NULL;
a51df54e
IV
2330 array->left = NULL;
2331 array->right = NULL;
2332 splay_tree_insert (&devicep->mem_map, array);
ea4b23d9
TB
2333 if (rev_target_fn_table)
2334 {
2335 reverse_splay_tree_key k2 = &rev_array->key;
2336 k2->dev = rev_target_fn_table[i];
2337 k2->k = k;
2338 rev_array->left = NULL;
2339 rev_array->right = NULL;
2340 if (k2->dev != 0)
2341 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2342 rev_array++;
2343 }
a51df54e
IV
2344 array++;
2345 }
2346
4a38b02b
IV
2347 /* Most significant bit of the size in host and target tables marks
2348 "omp declare target link" variables. */
2349 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2350 const uintptr_t size_mask = ~link_bit;
2351
a51df54e
IV
2352 for (i = 0; i < num_vars; i++)
2353 {
2354 struct addr_pair *target_var = &target_table[num_funcs + i];
4a38b02b 2355 uintptr_t target_size = target_var->end - target_var->start;
4897bb00 2356 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
4a38b02b 2357
4897bb00 2358 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
a51df54e
IV
2359 {
2360 gomp_mutex_unlock (&devicep->lock);
2361 if (is_register_lock)
2362 gomp_mutex_unlock (&register_lock);
4a38b02b 2363 gomp_fatal ("Cannot map target variables (size mismatch)");
a51df54e
IV
2364 }
2365
2366 splay_tree_key k = &array->key;
2367 k->host_start = (uintptr_t) host_var_table[i * 2];
4a38b02b
IV
2368 k->host_end
2369 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
a51df54e
IV
2370 k->tgt = tgt;
2371 k->tgt_offset = target_var->start;
4897bb00 2372 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
6f5b4b64 2373 k->dynamic_refcount = 0;
2a656a93 2374 k->aux = NULL;
a51df54e
IV
2375 array->left = NULL;
2376 array->right = NULL;
2377 splay_tree_insert (&devicep->mem_map, array);
2378 array++;
2379 }
2380
9f2fca56
MV
2381 /* Last entry is for a ICVs variable.
2382 Tolerate case where plugin does not return those entries. */
0bac793e
CLT
2383 if (num_funcs + num_vars < num_target_entries)
2384 {
9f2fca56
MV
2385 struct addr_pair *var = &target_table[num_funcs + num_vars];
2386
2387 /* Start address will be non-zero for the ICVs variable if
2388 the variable was found in this image. */
2389 if (var->start != 0)
0bac793e
CLT
2390 {
2391 /* The index of the devicep within devices[] is regarded as its
2392 'device number', which is different from the per-device type
2393 devicep->target_id. */
9f2fca56
MV
2394 int dev_num = (int) (devicep - &devices[0]);
2395 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2396 size_t var_size = var->end - var->start;
81476bc4
MV
2397 if (var_size != sizeof (struct gomp_offload_icvs))
2398 {
2399 gomp_mutex_unlock (&devicep->lock);
2400 if (is_register_lock)
2401 gomp_mutex_unlock (&register_lock);
2402 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2403 "format");
2404 }
9f2fca56
MV
2405 /* Copy the ICVs variable to place on device memory, hereby
2406 actually designating its device number into effect. */
2407 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2408 var_size, false, NULL);
2409 splay_tree_key k = &array->key;
2410 k->host_start = (uintptr_t) icvs;
2411 k->host_end =
2412 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2413 k->tgt = tgt;
2414 k->tgt_offset = var->start;
2415 k->refcount = REFCOUNT_INFINITY;
2416 k->dynamic_refcount = 0;
2417 k->aux = NULL;
2418 array->left = NULL;
2419 array->right = NULL;
2420 splay_tree_insert (&devicep->mem_map, array);
2421 array++;
0bac793e
CLT
2422 }
2423 }
2424
a51df54e 2425 free (target_table);
1df3f842
JJ
2426}
2427
22be2349
NS
2428/* Unload the mappings described by target_data from device DEVICE_P.
2429 The device must be locked. */
2430
2431static void
2432gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2a21ff19 2433 unsigned version,
22be2349
NS
2434 const void *host_table, const void *target_data)
2435{
2436 void **host_func_table = ((void ***) host_table)[0];
2437 void **host_funcs_end = ((void ***) host_table)[1];
2438 void **host_var_table = ((void ***) host_table)[2];
2439 void **host_vars_end = ((void ***) host_table)[3];
2440
2441 /* The func table contains only addresses, the var table contains addresses
2442 and corresponding sizes. */
2443 int num_funcs = host_funcs_end - host_func_table;
2444 int num_vars = (host_vars_end - host_var_table) / 2;
2445
22be2349
NS
2446 struct splay_tree_key_s k;
2447 splay_tree_key node = NULL;
2448
2449 /* Find mapping at start of node array */
2450 if (num_funcs || num_vars)
2451 {
2452 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2453 : (uintptr_t) host_var_table[0]);
2454 k.host_end = k.host_start + 1;
2455 node = splay_tree_lookup (&devicep->mem_map, &k);
2456 }
2a21ff19 2457
6ce13072
CLT
2458 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2459 {
2460 gomp_mutex_unlock (&devicep->lock);
2461 gomp_fatal ("image unload fail");
2462 }
ea4b23d9
TB
2463 if (devicep->mem_map_rev.root)
2464 {
2465 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2466 real allocation. */
2467 assert (node && node->tgt && node->tgt->rev_array);
2468 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2469 free (node->tgt->rev_array);
2470 devicep->mem_map_rev.root = NULL;
2471 }
22be2349
NS
2472
2473 /* Remove mappings from splay tree. */
4a38b02b
IV
2474 int i;
2475 for (i = 0; i < num_funcs; i++)
22be2349 2476 {
4a38b02b 2477 k.host_start = (uintptr_t) host_func_table[i];
22be2349
NS
2478 k.host_end = k.host_start + 1;
2479 splay_tree_remove (&devicep->mem_map, &k);
2480 }
2481
4a38b02b
IV
2482 /* Most significant bit of the size in host and target tables marks
2483 "omp declare target link" variables. */
2484 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2485 const uintptr_t size_mask = ~link_bit;
2486 bool is_tgt_unmapped = false;
2487
2488 for (i = 0; i < num_vars; i++)
22be2349 2489 {
4a38b02b
IV
2490 k.host_start = (uintptr_t) host_var_table[i * 2];
2491 k.host_end
2492 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2493
2494 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2495 splay_tree_remove (&devicep->mem_map, &k);
2496 else
2497 {
2498 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
829c6349 2499 is_tgt_unmapped = gomp_remove_var (devicep, n);
4a38b02b 2500 }
22be2349
NS
2501 }
2502
4a38b02b 2503 if (node && !is_tgt_unmapped)
22be2349
NS
2504 {
2505 free (node->tgt);
2506 free (node);
2507 }
2508}
2509
683f1184
TB
2510static void
2511gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2512{
2513 char *end = buf + size, *p = buf;
2514 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2515 p += snprintf (p, end - p, "unified_address");
2516 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2517 p += snprintf (p, end - p, "%sunified_shared_memory",
2518 (p == buf ? "" : ", "));
2519 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2520 p += snprintf (p, end - p, "%sreverse_offload",
2521 (p == buf ? "" : ", "));
2522}
2523
a51df54e 2524/* This function should be called from every offload image while loading.
1df3f842 2525 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
3f05e03d 2526 the target, and DATA. */
1df3f842
JJ
2527
2528void
2a21ff19 2529GOMP_offload_register_ver (unsigned version, const void *host_table,
3f05e03d 2530 int target_type, const void *data)
1df3f842 2531{
a51df54e 2532 int i;
2a21ff19
NS
2533
2534 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2535 gomp_fatal ("Library too old for offload (version %u < %u)",
2536 GOMP_VERSION, GOMP_VERSION_LIB (version));
683f1184 2537
3f05e03d
TS
2538 int omp_req;
2539 const void *target_data;
683f1184
TB
2540 if (GOMP_VERSION_LIB (version) > 1)
2541 {
3f05e03d
TS
2542 omp_req = (int) (size_t) ((void **) data)[0];
2543 target_data = &((void **) data)[1];
2544 }
2545 else
2546 {
2547 omp_req = 0;
2548 target_data = data;
683f1184
TB
2549 }
2550
a51df54e
IV
2551 gomp_mutex_lock (&register_lock);
2552
683f1184
TB
2553 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2554 {
2555 char buf1[sizeof ("unified_address, unified_shared_memory, "
2556 "reverse_offload")];
2557 char buf2[sizeof ("unified_address, unified_shared_memory, "
2558 "reverse_offload")];
2559 gomp_requires_to_name (buf2, sizeof (buf2),
2560 omp_req != GOMP_REQUIRES_TARGET_USED
2561 ? omp_req : omp_requires_mask);
2562 if (omp_req != GOMP_REQUIRES_TARGET_USED
2563 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2564 {
2565 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2566 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2567 "in multiple compilation units: '%s' vs. '%s'",
2568 buf1, buf2);
2569 }
2570 else
2571 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2572 "some compilation units", buf2);
2573 }
2574 omp_requires_mask = omp_req;
2575
a51df54e
IV
2576 /* Load image to all initialized devices. */
2577 for (i = 0; i < num_devices; i++)
2578 {
2579 struct gomp_device_descr *devicep = &devices[i];
2580 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2581 if (devicep->type == target_type
2582 && devicep->state == GOMP_DEVICE_INITIALIZED)
2a21ff19
NS
2583 gomp_load_image_to_device (devicep, version,
2584 host_table, target_data, true);
a51df54e
IV
2585 gomp_mutex_unlock (&devicep->lock);
2586 }
1df3f842 2587
a51df54e
IV
2588 /* Insert image to array of pending images. */
2589 offload_images
2590 = gomp_realloc_unlock (offload_images,
2591 (num_offload_images + 1)
2592 * sizeof (struct offload_image_descr));
2a21ff19 2593 offload_images[num_offload_images].version = version;
1df3f842
JJ
2594 offload_images[num_offload_images].type = target_type;
2595 offload_images[num_offload_images].host_table = host_table;
2596 offload_images[num_offload_images].target_data = target_data;
2597
2598 num_offload_images++;
a51df54e 2599 gomp_mutex_unlock (&register_lock);
1df3f842
JJ
2600}
2601
9ef71453
TS
2602/* Legacy entry point. */
2603
2a21ff19
NS
2604void
2605GOMP_offload_register (const void *host_table, int target_type,
2606 const void *target_data)
2607{
2608 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2609}
2610
a51df54e
IV
2611/* This function should be called from every offload image while unloading.
2612 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
3f05e03d 2613 the target, and DATA. */
1df3f842 2614
a51df54e 2615void
2a21ff19 2616GOMP_offload_unregister_ver (unsigned version, const void *host_table,
3f05e03d 2617 int target_type, const void *data)
1df3f842 2618{
a51df54e
IV
2619 int i;
2620
3f05e03d
TS
2621 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2622 gomp_fatal ("Library too old for offload (version %u < %u)",
2623 GOMP_VERSION, GOMP_VERSION_LIB (version));
2624
2625 const void *target_data;
2626 if (GOMP_VERSION_LIB (version) > 1)
2627 target_data = &((void **) data)[1];
2628 else
2629 target_data = data;
2630
a51df54e
IV
2631 gomp_mutex_lock (&register_lock);
2632
2633 /* Unload image from all initialized devices. */
2634 for (i = 0; i < num_devices; i++)
2635 {
a51df54e
IV
2636 struct gomp_device_descr *devicep = &devices[i];
2637 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2638 if (devicep->type == target_type
2639 && devicep->state == GOMP_DEVICE_INITIALIZED)
2a21ff19
NS
2640 gomp_unload_image_from_device (devicep, version,
2641 host_table, target_data);
a51df54e
IV
2642 gomp_mutex_unlock (&devicep->lock);
2643 }
2644
2645 /* Remove image from array of pending images. */
2646 for (i = 0; i < num_offload_images; i++)
2647 if (offload_images[i].target_data == target_data)
2648 {
2649 offload_images[i] = offload_images[--num_offload_images];
2650 break;
2651 }
2652
2653 gomp_mutex_unlock (&register_lock);
41dbbb37 2654}
1df3f842 2655
9ef71453
TS
2656/* Legacy entry point. */
2657
2a21ff19
NS
2658void
2659GOMP_offload_unregister (const void *host_table, int target_type,
2660 const void *target_data)
2661{
2662 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2663}
2664
a51df54e
IV
2665/* This function initializes the target device, specified by DEVICEP. DEVICEP
2666 must be locked on entry, and remains locked on return. */
41dbbb37
TS
2667
2668attribute_hidden void
a51df54e 2669gomp_init_device (struct gomp_device_descr *devicep)
41dbbb37 2670{
1df3f842 2671 int i;
6ce13072
CLT
2672 if (!devicep->init_device_func (devicep->target_id))
2673 {
2674 gomp_mutex_unlock (&devicep->lock);
2675 gomp_fatal ("device initialization failed");
2676 }
a51df54e
IV
2677
2678 /* Load to device all images registered by the moment. */
2679 for (i = 0; i < num_offload_images; i++)
1df3f842 2680 {
a51df54e
IV
2681 struct offload_image_descr *image = &offload_images[i];
2682 if (image->type == devicep->type)
2a21ff19
NS
2683 gomp_load_image_to_device (devicep, image->version,
2684 image->host_table, image->target_data,
2685 false);
1df3f842
JJ
2686 }
2687
1f4c5b9b
CLT
2688 /* Initialize OpenACC asynchronous queues. */
2689 goacc_init_asyncqueues (devicep);
2690
d84ffc0a 2691 devicep->state = GOMP_DEVICE_INITIALIZED;
41dbbb37
TS
2692}
2693
1f4c5b9b
CLT
2694/* This function finalizes the target device, specified by DEVICEP. DEVICEP
2695 must be locked on entry, and remains locked on return. */
2696
2697attribute_hidden bool
2698gomp_fini_device (struct gomp_device_descr *devicep)
2699{
2700 bool ret = goacc_fini_asyncqueues (devicep);
2701 ret &= devicep->fini_device_func (devicep->target_id);
2702 devicep->state = GOMP_DEVICE_FINALIZED;
2703 return ret;
2704}
2705
22be2349
NS
2706attribute_hidden void
2707gomp_unload_device (struct gomp_device_descr *devicep)
2708{
d84ffc0a 2709 if (devicep->state == GOMP_DEVICE_INITIALIZED)
22be2349
NS
2710 {
2711 unsigned i;
d22cd774 2712
22be2349
NS
2713 /* Unload from device all images registered at the moment. */
2714 for (i = 0; i < num_offload_images; i++)
2715 {
2716 struct offload_image_descr *image = &offload_images[i];
2717 if (image->type == devicep->type)
2a21ff19
NS
2718 gomp_unload_image_from_device (devicep, image->version,
2719 image->host_table,
22be2349
NS
2720 image->target_data);
2721 }
2722 }
2723}
2724
e01d41e5 2725/* Host fallback for GOMP_target{,_ext} routines. */
d9a6bd32
JJ
2726
2727static void
1bfc07d1 2728gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
aea72386 2729 struct gomp_device_descr *devicep, void **args)
d9a6bd32
JJ
2730{
2731 struct gomp_thread old_thr, *thr = gomp_thread ();
1bfc07d1
KCY
2732
2733 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2734 && devicep != NULL)
2735 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2736 "be used for offloading");
2737
d9a6bd32
JJ
2738 old_thr = *thr;
2739 memset (thr, '\0', sizeof (*thr));
2740 if (gomp_places_list)
2741 {
2742 thr->place = old_thr.place;
2743 thr->ts.place_partition_len = gomp_places_list_len;
2744 }
aea72386
JJ
2745 if (args)
2746 while (*args)
2747 {
2748 intptr_t id = (intptr_t) *args++, val;
2749 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2750 val = (intptr_t) *args++;
2751 else
2752 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2753 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2754 continue;
2755 id &= GOMP_TARGET_ARG_ID_MASK;
2756 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2757 continue;
2758 val = val > INT_MAX ? INT_MAX : val;
2759 if (val)
2760 gomp_icv (true)->thread_limit_var = val;
2761 break;
2762 }
2763
d9a6bd32
JJ
2764 fn (hostaddrs);
2765 gomp_free_thread (thr);
2766 *thr = old_thr;
2767}
2768
b2b40051
MJ
2769/* Calculate alignment and size requirements of a private copy of data shared
2770 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
e01d41e5 2771
b2b40051
MJ
2772static inline void
2773calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2774 unsigned short *kinds, size_t *tgt_align,
2775 size_t *tgt_size)
e01d41e5 2776{
b2b40051
MJ
2777 size_t i;
2778 for (i = 0; i < mapnum; i++)
2779 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2780 {
2781 size_t align = (size_t) 1 << (kinds[i] >> 8);
2782 if (*tgt_align < align)
2783 *tgt_align = align;
2784 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2785 *tgt_size += sizes[i];
2786 }
2787}
2788
2789/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2790
2791static inline void
2792copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2793 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2794 size_t tgt_size)
2795{
2796 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2797 if (al)
2798 tgt += tgt_align - al;
2799 tgt_size = 0;
2800 size_t i;
e01d41e5 2801 for (i = 0; i < mapnum; i++)
bbb7f860 2802 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
e01d41e5
JJ
2803 {
2804 size_t align = (size_t) 1 << (kinds[i] >> 8);
e01d41e5 2805 tgt_size = (tgt_size + align - 1) & ~(align - 1);
b2b40051
MJ
2806 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2807 hostaddrs[i] = tgt + tgt_size;
2808 tgt_size = tgt_size + sizes[i];
49d1a2f9
TB
2809 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2810 {
2811 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2812 ++i;
2813 }
e01d41e5 2814 }
b2b40051
MJ
2815}
2816
e01d41e5 2817/* Helper function of GOMP_target{,_ext} routines. */
d9a6bd32
JJ
2818
2819static void *
2820gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2821 void (*host_fn) (void *))
2822{
2823 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2824 return (void *) host_fn;
2825 else
2826 {
2827 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2828 if (devicep->state == GOMP_DEVICE_FINALIZED)
2829 {
2830 gomp_mutex_unlock (&devicep->lock);
2831 return NULL;
2832 }
2833
d9a6bd32
JJ
2834 struct splay_tree_key_s k;
2835 k.host_start = (uintptr_t) host_fn;
2836 k.host_end = k.host_start + 1;
2837 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2838 gomp_mutex_unlock (&devicep->lock);
2839 if (tgt_fn == NULL)
eb4048f2 2840 return NULL;
d9a6bd32
JJ
2841
2842 return (void *) tgt_fn->tgt_offset;
2843 }
2844}
2845
acf0174b 2846/* Called when encountering a target directive. If DEVICE
41dbbb37
TS
2847 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2848 GOMP_DEVICE_HOST_FALLBACK (or any value
2849 larger than last available hw device), use host fallback.
128b26dc
TS
2850 FN is address of host code, UNUSED is part of the current ABI, but
2851 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
acf0174b
JJ
2852 with MAPNUM entries, with addresses of the host objects,
2853 sizes of the host objects (resp. for pointer kind pointer bias
2854 and assumed sizeof (void *) size) and kinds. */
2855
2856void
128b26dc 2857GOMP_target (int device, void (*fn) (void *), const void *unused,
acf0174b
JJ
2858 size_t mapnum, void **hostaddrs, size_t *sizes,
2859 unsigned char *kinds)
2860{
1158fe43 2861 struct gomp_device_descr *devicep = resolve_device (device, true);
41dbbb37 2862
d84ffc0a 2863 void *fn_addr;
41dbbb37 2864 if (devicep == NULL
d84ffc0a 2865 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
2866 /* All shared memory devices should use the GOMP_target_ext function. */
2867 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
d84ffc0a 2868 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
aea72386 2869 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
d9a6bd32 2870
275c736e 2871 htab_t refcount_set = htab_create (mapnum);
d9a6bd32
JJ
2872 struct target_mem_desc *tgt_vars
2873 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
275c736e 2874 &refcount_set, GOMP_MAP_VARS_TARGET);
b2b40051
MJ
2875 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2876 NULL);
275c736e
CLT
2877 htab_clear (refcount_set);
2878 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2879 htab_free (refcount_set);
d9a6bd32 2880}
1df3f842 2881
001ab12e
FH
2882static inline unsigned int
2883clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2884{
2885 /* If we cannot run asynchronously, simply ignore nowait. */
2886 if (devicep != NULL && devicep->async_run_func == NULL)
2887 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2888
2889 return flags;
2890}
2891
81476bc4
MV
2892static void
2893gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2894{
2895 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2896 if (item == NULL)
2897 return;
2898
2899 void *host_ptr = &item->icvs;
2900 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2901 if (dev_ptr != NULL)
2902 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2903 sizeof (struct gomp_offload_icvs));
2904}
2905
e01d41e5
JJ
2906/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2907 and several arguments have been added:
2908 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2909 DEPEND is array of dependencies, see GOMP_task for details.
b2b40051
MJ
2910
2911 ARGS is a pointer to an array consisting of a variable number of both
2912 device-independent and device-specific arguments, which can take one two
2913 elements where the first specifies for which device it is intended, the type
2914 and optionally also the value. If the value is not present in the first
2915 one, the whole second element the actual value. The last element of the
2916 array is a single NULL. Among the device independent can be for example
2917 NUM_TEAMS and THREAD_LIMIT.
2918
e01d41e5
JJ
2919 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2920 that value, or 1 if teams construct is not present, or 0, if
2921 teams construct does not have num_teams clause and so the choice is
2922 implementation defined, and -1 if it can't be determined on the host
2923 what value will GOMP_teams have on the device.
2924 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2925 body with that value, or 0, if teams construct does not have thread_limit
2926 clause or the teams construct is not present, or -1 if it can't be
2927 determined on the host what value will GOMP_teams have on the device. */
2928
d9a6bd32 2929void
e01d41e5
JJ
2930GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2931 void **hostaddrs, size_t *sizes, unsigned short *kinds,
b2b40051 2932 unsigned int flags, void **depend, void **args)
d9a6bd32 2933{
1158fe43 2934 struct gomp_device_descr *devicep = resolve_device (device, true);
21f3a236
JJ
2935 size_t tgt_align = 0, tgt_size = 0;
2936 bool fpc_done = false;
41dbbb37 2937
81476bc4
MV
2938 /* Obtain the original TEAMS and THREADS values from ARGS. */
2939 intptr_t orig_teams = 1, orig_threads = 0;
2940 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
2941 void **tmpargs = args;
2942 while (*tmpargs)
2943 {
2944 intptr_t id = (intptr_t) *tmpargs++, val;
2945 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2946 {
2947 val = (intptr_t) *tmpargs++;
2948 len = 2;
2949 }
2950 else
2951 {
2952 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2953 len = 1;
2954 }
2955 num_args += len;
2956 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2957 continue;
2958 val = val > INT_MAX ? INT_MAX : val;
2959 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
2960 {
2961 orig_teams = val;
2962 teams_len = len;
2963 }
2964 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
2965 {
2966 orig_threads = val;
2967 threads_len = len;
2968 }
2969 }
2970
2971 intptr_t new_teams = orig_teams, new_threads = orig_threads;
2972 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2973 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2974 value could not be determined. No change.
2975 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2976 Set device-specific value.
2977 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2978 No change. */
2979 if (orig_teams == -2)
2980 new_teams = 1;
2981 else if (orig_teams == 0)
2982 {
2983 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2984 if (item != NULL)
2985 new_teams = item->icvs.nteams;
2986 }
2987 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2988 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2989 e.g. a THREAD_LIMIT clause. */
2990 if (orig_teams > -2 && orig_threads == 0)
2991 {
2992 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2993 if (item != NULL)
2994 new_threads = item->icvs.teams_thread_limit;
2995 }
2996
2997 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2998 updated. */
2999 void **new_args = args;
3000 if (orig_teams != new_teams || orig_threads != new_threads)
3001 {
3002 size_t tms_len = (orig_teams == new_teams
3003 ? teams_len
3004 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3005 ? 1 : 2));
3006 size_t ths_len = (orig_threads == new_threads
3007 ? threads_len
3008 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3009 ? 1 : 2));
3010 /* One additional item after the last arg must be NULL. */
3011 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3012 + ths_len + 1;
3013 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3014
3015 tmpargs = args;
3016 void **tmp_new_args = new_args;
3017 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3018 too if they have not been changed and skipped otherwise. */
3019 while (*tmpargs)
3020 {
3021 intptr_t id = (intptr_t) *tmpargs;
3022 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3023 && orig_teams != new_teams)
3024 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3025 && orig_threads != new_threads))
3026 {
3027 tmpargs++;
3028 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3029 tmpargs++;
3030 }
3031 else
3032 {
3033 *tmp_new_args++ = *tmpargs++;
3034 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3035 *tmp_new_args++ = *tmpargs++;
3036 }
3037 }
3038
3039 /* Add the new TEAMS arg to the new args list if it has been changed. */
3040 if (orig_teams != new_teams)
3041 {
3042 intptr_t new_val = new_teams;
3043 if (tms_len == 1)
3044 {
3045 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3046 | GOMP_TARGET_ARG_NUM_TEAMS;
3047 *tmp_new_args++ = (void *) new_val;
3048 }
3049 else
3050 {
3051 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3052 | GOMP_TARGET_ARG_NUM_TEAMS);
3053 *tmp_new_args++ = (void *) new_val;
3054 }
3055 }
3056
3057 /* Add the new THREADS arg to the new args list if it has been changed. */
3058 if (orig_threads != new_threads)
3059 {
3060 intptr_t new_val = new_threads;
3061 if (ths_len == 1)
3062 {
3063 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3064 | GOMP_TARGET_ARG_THREAD_LIMIT;
3065 *tmp_new_args++ = (void *) new_val;
3066 }
3067 else
3068 {
3069 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3070 | GOMP_TARGET_ARG_THREAD_LIMIT);
3071 *tmp_new_args++ = (void *) new_val;
3072 }
3073 }
3074
3075 *tmp_new_args = NULL;
3076 }
3077
001ab12e
FH
3078 flags = clear_unsupported_flags (devicep, flags);
3079
e4606348
JJ
3080 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3081 {
3082 struct gomp_thread *thr = gomp_thread ();
3083 /* Create a team if we don't have any around, as nowait
3084 target tasks make sense to run asynchronously even when
3085 outside of any parallel. */
3086 if (__builtin_expect (thr->ts.team == NULL, 0))
3087 {
3088 struct gomp_team *team = gomp_new_team (1);
3089 struct gomp_task *task = thr->task;
a58a965e 3090 struct gomp_task **implicit_task = &task;
e4606348
JJ
3091 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3092 team->prev_ts = thr->ts;
3093 thr->ts.team = team;
3094 thr->ts.team_id = 0;
3095 thr->ts.work_share = &team->work_shares[0];
3096 thr->ts.last_work_share = NULL;
3097#ifdef HAVE_SYNC_BUILTINS
3098 thr->ts.single_count = 0;
3099#endif
3100 thr->ts.static_trip = 0;
3101 thr->task = &team->implicit_task[0];
3102 gomp_init_task (thr->task, NULL, icv);
a58a965e
JJ
3103 while (*implicit_task
3104 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3105 implicit_task = &(*implicit_task)->parent;
3106 if (*implicit_task)
e4606348 3107 {
a58a965e 3108 thr->task = *implicit_task;
e4606348 3109 gomp_end_task ();
a58a965e 3110 free (*implicit_task);
e4606348
JJ
3111 thr->task = &team->implicit_task[0];
3112 }
3113 else
3114 pthread_setspecific (gomp_thread_destructor, thr);
a58a965e
JJ
3115 if (implicit_task != &task)
3116 {
3117 *implicit_task = thr->task;
3118 thr->task = task;
3119 }
e4606348
JJ
3120 }
3121 if (thr->ts.team
3122 && !thr->task->final_task)
3123 {
3124 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
81476bc4 3125 sizes, kinds, flags, depend, new_args,
e4606348
JJ
3126 GOMP_TARGET_TASK_BEFORE_MAP);
3127 return;
3128 }
3129 }
3130
3131 /* If there are depend clauses, but nowait is not present
3132 (or we are in a final task), block the parent task until the
3133 dependencies are resolved and then just continue with the rest
3134 of the function as if it is a merged task. */
d9a6bd32
JJ
3135 if (depend != NULL)
3136 {
3137 struct gomp_thread *thr = gomp_thread ();
3138 if (thr->task && thr->task->depend_hash)
21f3a236
JJ
3139 {
3140 /* If we might need to wait, copy firstprivate now. */
3141 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3142 &tgt_align, &tgt_size);
3143 if (tgt_align)
3144 {
3145 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3146 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3147 tgt_align, tgt_size);
3148 }
3149 fpc_done = true;
3150 gomp_task_maybe_wait_for_dependencies (depend);
3151 }
d9a6bd32 3152 }
41dbbb37 3153
d84ffc0a 3154 void *fn_addr;
d9a6bd32 3155 if (devicep == NULL
d84ffc0a 3156 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
3157 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3158 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
41dbbb37 3159 {
21f3a236
JJ
3160 if (!fpc_done)
3161 {
3162 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3163 &tgt_align, &tgt_size);
3164 if (tgt_align)
3165 {
3166 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3167 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3168 tgt_align, tgt_size);
3169 }
3170 }
81476bc4 3171 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
d9a6bd32 3172 return;
41dbbb37 3173 }
1df3f842 3174
b2b40051 3175 struct target_mem_desc *tgt_vars;
275c736e
CLT
3176 htab_t refcount_set = NULL;
3177
b2b40051
MJ
3178 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3179 {
21f3a236
JJ
3180 if (!fpc_done)
3181 {
3182 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3183 &tgt_align, &tgt_size);
3184 if (tgt_align)
3185 {
3186 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3187 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3188 tgt_align, tgt_size);
3189 }
3190 }
b2b40051
MJ
3191 tgt_vars = NULL;
3192 }
3193 else
275c736e
CLT
3194 {
3195 refcount_set = htab_create (mapnum);
3196 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3197 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3198 }
b2b40051
MJ
3199 devicep->run_func (devicep->target_id, fn_addr,
3200 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
81476bc4 3201 new_args);
b2b40051 3202 if (tgt_vars)
275c736e
CLT
3203 {
3204 htab_clear (refcount_set);
3205 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3206 }
3207 if (refcount_set)
3208 htab_free (refcount_set);
81476bc4
MV
3209
3210 /* Copy back ICVs from device to host.
3211 HOST_PTR is expected to exist since it was added in
3212 gomp_load_image_to_device if not already available. */
3213 gomp_copy_back_icvs (devicep, device);
3214
acf0174b
JJ
3215}
3216
ea4b23d9
TB
3217
3218/* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3219 keeping track of all variable handling - assuming that reverse offload occurs
3220 ony very rarely. Downside is that the reverse search is slow. */
3221
3222struct gomp_splay_tree_rev_lookup_data {
3223 uintptr_t tgt_start;
3224 uintptr_t tgt_end;
3225 splay_tree_key key;
3226};
3227
3228static int
3229gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3230{
3231 struct gomp_splay_tree_rev_lookup_data *data;
3232 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3233 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3234
3235 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3236 return 0;
3237
3238 size_t j;
3239 for (j = 0; j < key->tgt->list_count; j++)
3240 if (key->tgt->list[j].key == key)
3241 break;
3242 assert (j < key->tgt->list_count);
3243 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3244
3245 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3246 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3247 {
3248 data->key = key;
3249 return 1;
3250 }
3251 return 0;
3252}
3253
3254static inline splay_tree_key
3255gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3256 bool zero_len)
3257{
3258 struct gomp_splay_tree_rev_lookup_data data;
3259 data.key = NULL;
3260 data.tgt_start = tgt_start;
3261 data.tgt_end = tgt_end;
3262
3263 if (tgt_start != tgt_end)
3264 {
3265 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3266 return data.key;
3267 }
3268
3269 data.tgt_end++;
3270 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3271 if (data.key != NULL || zero_len)
3272 return data.key;
3273 data.tgt_end--;
3274
3275 data.tgt_start--;
3276 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3277 return data.key;
3278}
3279
3280struct cpy_data
3281{
3282 uint64_t devaddr;
3283 bool present, aligned;
3284};
3285
3286
3287/* Search just mapped reverse-offload data; returns index if found,
3288 otherwise >= n. */
3289
3290static inline int
3291gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3292 unsigned short *kinds, uint64_t *sizes, size_t n,
3293 uint64_t tgt_start, uint64_t tgt_end)
3294{
3295 const bool short_mapkind = true;
3296 const int typemask = short_mapkind ? 0xff : 0x7;
3297 size_t i;
3298 for (i = 0; i < n; i++)
3299 {
3300 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3301 == GOMP_MAP_STRUCT);
3302 uint64_t dev_end;
3303 if (!is_struct)
3304 dev_end = d[i].devaddr + sizes[i];
3305 else
3306 {
3307 if (i + sizes[i] < n)
3308 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3309 else
3310 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3311 }
3312 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3313 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3314 break;
3315 if (is_struct)
3316 i += sizes[i];
3317 }
3318 return i;
3319}
3320
3321static inline int
3322gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3323 unsigned short *kinds, uint64_t *sizes,
3324 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3325 bool zero_len)
3326{
3327 size_t i;
3328 if (tgt_start != tgt_end)
3329 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3330 tgt_start, tgt_end);
3331 tgt_end++;
3332 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3333 tgt_start, tgt_end);
3334 if (i < n || zero_len)
3335 return i;
3336 tgt_end--;
3337
3338 tgt_start--;
3339 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3340 tgt_start, tgt_end);
3341}
3342
131d18e9 3343/* Handle reverse offload. This is called by the device plugins for a
ea4b23d9
TB
3344 reverse offload; it is not called if the outer target runs on the host.
3345 The mapping is simplified device-affecting constructs (except for target
3346 with device(ancestor:1)) must not be encountered; in particular not
3347 target (enter/exit) data. */
131d18e9
TB
3348
3349void
ea4b23d9
TB
3350gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3351 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
130c2f3c 3352 struct goacc_asyncqueue *aq)
131d18e9 3353{
ea4b23d9
TB
3354 /* Return early if there is no offload code. */
3355 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3356 return;
3357 /* Currently, this fails because of calculate_firstprivate_requirements
3358 below; it could be fixed but additional code needs to be updated to
3359 handle 32bit hosts - thus, it is not worthwhile. */
3360 if (sizeof (void *) != sizeof (uint64_t))
3361 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3362
3363 struct cpy_data *cdata = NULL;
3364 uint64_t *devaddrs;
3365 uint64_t *sizes;
3366 unsigned short *kinds;
3367 const bool short_mapkind = true;
3368 const int typemask = short_mapkind ? 0xff : 0x7;
3369 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3370
3371 reverse_splay_tree_key n;
3372 struct reverse_splay_tree_key_s k;
3373 k.dev = fn_ptr;
3374
3375 gomp_mutex_lock (&devicep->lock);
3376 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3377 gomp_mutex_unlock (&devicep->lock);
3378
3379 if (n == NULL)
3380 gomp_fatal ("Cannot find reverse-offload function");
3381 void (*host_fn)() = (void (*)()) n->k->host_start;
3382
0b1ce70a 3383 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
ea4b23d9
TB
3384 {
3385 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3386 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3387 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3388 }
3389 else
3390 {
3391 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3392 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3393 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
130c2f3c
TS
3394 gomp_copy_dev2host (devicep, aq, devaddrs,
3395 (const void *) (uintptr_t) devaddrs_ptr,
3396 mapnum * sizeof (uint64_t));
3397 gomp_copy_dev2host (devicep, aq, sizes,
3398 (const void *) (uintptr_t) sizes_ptr,
3399 mapnum * sizeof (uint64_t));
3400 gomp_copy_dev2host (devicep, aq, kinds,
3401 (const void *) (uintptr_t) kinds_ptr,
3402 mapnum * sizeof (unsigned short));
3403 if (aq && !devicep->openacc.async.synchronize_func (aq))
3404 exit (EXIT_FAILURE);
ea4b23d9
TB
3405 }
3406
3407 size_t tgt_align = 0, tgt_size = 0;
3408
3409 /* If actually executed on 32bit systems, the casts lead to wrong code;
3410 but 32bit with offloading is not supported; see top of this function. */
3411 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3412 (void *) (uintptr_t) kinds,
3413 &tgt_align, &tgt_size);
3414
3415 if (tgt_align)
3416 {
3417 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3418 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3419 if (al)
3420 tgt += tgt_align - al;
3421 tgt_size = 0;
3422 for (uint64_t i = 0; i < mapnum; i++)
3423 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3424 && devaddrs[i] != 0)
3425 {
3426 size_t align = (size_t) 1 << (kinds[i] >> 8);
3427 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3428 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3429 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3430 (size_t) sizes[i]);
ea4b23d9 3431 else
130c2f3c
TS
3432 {
3433 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3434 (void *) (uintptr_t) devaddrs[i],
3435 (size_t) sizes[i]);
3436 if (aq && !devicep->openacc.async.synchronize_func (aq))
3437 exit (EXIT_FAILURE);
3438 }
ea4b23d9
TB
3439 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3440 tgt_size = tgt_size + sizes[i];
3441 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3442 && i + 1 < mapnum
3443 && ((get_kind (short_mapkind, kinds, i) & typemask)
3444 == GOMP_MAP_ATTACH))
3445 {
3446 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3447 = (uint64_t) devaddrs[i];
3448 ++i;
3449 }
3450 }
3451 }
3452
0b1ce70a 3453 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
ea4b23d9
TB
3454 {
3455 size_t j, struct_cpy = 0;
3456 splay_tree_key n2;
3457 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3458 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3459 gomp_mutex_lock (&devicep->lock);
3460 for (uint64_t i = 0; i < mapnum; i++)
3461 {
3462 if (devaddrs[i] == 0)
3463 continue;
3464 n = NULL;
3465 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3466 switch (kind)
3467 {
3468 case GOMP_MAP_FIRSTPRIVATE:
3469 case GOMP_MAP_FIRSTPRIVATE_INT:
3470 continue;
3471
3472 case GOMP_MAP_DELETE:
3473 case GOMP_MAP_RELEASE:
3474 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
4ede915d
TB
3475 /* Assume it is present; look it up - but ignore unless the
3476 present clause is there. */
ea4b23d9
TB
3477 case GOMP_MAP_ALLOC:
3478 case GOMP_MAP_FROM:
3479 case GOMP_MAP_FORCE_ALLOC:
3480 case GOMP_MAP_FORCE_FROM:
3481 case GOMP_MAP_ALWAYS_FROM:
3482 case GOMP_MAP_TO:
3483 case GOMP_MAP_TOFROM:
3484 case GOMP_MAP_FORCE_TO:
3485 case GOMP_MAP_FORCE_TOFROM:
3486 case GOMP_MAP_ALWAYS_TO:
3487 case GOMP_MAP_ALWAYS_TOFROM:
38944ec2 3488 case GOMP_MAP_FORCE_PRESENT:
4ede915d
TB
3489 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3490 case GOMP_MAP_ALWAYS_PRESENT_TO:
3491 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
ea4b23d9
TB
3492 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3493 cdata[i].devaddr = devaddrs[i];
3494 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3495 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3496 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3497 devaddrs[i],
3498 devaddrs[i] + sizes[i], zero_len);
3499 if (j < i)
3500 {
3501 n2 = NULL;
3502 cdata[i].present = true;
3503 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3504 }
3505 else
3506 {
3507 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3508 devaddrs[i],
3509 devaddrs[i] + sizes[i], zero_len);
3510 cdata[i].present = n2 != NULL;
3511 }
4ede915d
TB
3512 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3513 {
3514 gomp_mutex_unlock (&devicep->lock);
3515#ifdef HAVE_INTTYPES_H
3516 gomp_fatal ("present clause: no corresponding data on "
3517 "parent device at %p with size %"PRIu64,
3518 (void *) (uintptr_t) devaddrs[i],
3519 (uint64_t) sizes[i]);
3520#else
3521 gomp_fatal ("present clause: no corresponding data on "
3522 "parent device at %p with size %lu",
3523 (void *) (uintptr_t) devaddrs[i],
3524 (unsigned long) sizes[i]);
3525#endif
3526 break;
3527 }
3528 else if (!cdata[i].present
ea4b23d9
TB
3529 && kind != GOMP_MAP_DELETE
3530 && kind != GOMP_MAP_RELEASE
3531 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3532 {
3533 cdata[i].aligned = true;
3534 size_t align = (size_t) 1 << (kinds[i] >> 8);
3535 devaddrs[i]
3536 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3537 sizes[i]);
3538 }
3539 else if (n2 != NULL)
3540 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3541 - (n2->tgt->tgt_start + n2->tgt_offset));
3542 if (((!cdata[i].present || struct_cpy)
3543 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3544 || kind == GOMP_MAP_FORCE_TO
3545 || kind == GOMP_MAP_FORCE_TOFROM
4ede915d 3546 || GOMP_MAP_ALWAYS_TO_P (kind))
ea4b23d9 3547 {
130c2f3c
TS
3548 gomp_copy_dev2host (devicep, aq,
3549 (void *) (uintptr_t) devaddrs[i],
3550 (void *) (uintptr_t) cdata[i].devaddr,
3551 sizes[i]);
3552 if (aq && !devicep->openacc.async.synchronize_func (aq))
3553 {
3554 gomp_mutex_unlock (&devicep->lock);
3555 exit (EXIT_FAILURE);
3556 }
ea4b23d9
TB
3557 }
3558 if (struct_cpy)
3559 struct_cpy--;
3560 break;
3561 case GOMP_MAP_ATTACH:
3562 case GOMP_MAP_POINTER:
3563 case GOMP_MAP_ALWAYS_POINTER:
3564 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3565 devaddrs[i] + sizes[i],
3566 devaddrs[i] + sizes[i]
3567 + sizeof (void*), false);
3568 cdata[i].present = n2 != NULL;
3569 cdata[i].devaddr = devaddrs[i];
3570 if (n2)
3571 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3572 - (n2->tgt->tgt_start + n2->tgt_offset));
3573 else
3574 {
3575 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3576 devaddrs[i] + sizes[i],
3577 devaddrs[i] + sizes[i]
3578 + sizeof (void*), false);
3579 if (j < i)
3580 {
3581 cdata[i].present = true;
3582 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3583 - cdata[j].devaddr);
3584 }
3585 }
3586 if (!cdata[i].present)
3587 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3588 /* Assume that when present, the pointer is already correct. */
3589 if (!n2)
3590 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3591 = devaddrs[i-1];
3592 break;
3593 case GOMP_MAP_TO_PSET:
3594 /* Assume that when present, the pointers are fine and no 'to:'
3595 is required. */
3596 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3597 devaddrs[i], devaddrs[i] + sizes[i],
3598 false);
3599 cdata[i].present = n2 != NULL;
3600 cdata[i].devaddr = devaddrs[i];
3601 if (n2)
3602 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3603 - (n2->tgt->tgt_start + n2->tgt_offset));
3604 else
3605 {
3606 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3607 devaddrs[i],
3608 devaddrs[i] + sizes[i], false);
3609 if (j < i)
3610 {
3611 cdata[i].present = true;
3612 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3613 - cdata[j].devaddr);
3614 }
3615 }
3616 if (!cdata[i].present)
3617 {
3618 cdata[i].aligned = true;
3619 size_t align = (size_t) 1 << (kinds[i] >> 8);
3620 devaddrs[i]
3621 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3622 sizes[i]);
130c2f3c
TS
3623 gomp_copy_dev2host (devicep, aq,
3624 (void *) (uintptr_t) devaddrs[i],
3625 (void *) (uintptr_t) cdata[i].devaddr,
3626 sizes[i]);
3627 if (aq && !devicep->openacc.async.synchronize_func (aq))
3628 {
3629 gomp_mutex_unlock (&devicep->lock);
3630 exit (EXIT_FAILURE);
3631 }
ea4b23d9
TB
3632 }
3633 for (j = i + 1; j < mapnum; j++)
3634 {
3635 kind = get_kind (short_mapkind, kinds, j) & typemask;
3636 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3637 && !GOMP_MAP_POINTER_P (kind))
3638 break;
3639 if (devaddrs[j] < devaddrs[i])
3640 break;
3641 if (cdata[i].present)
3642 continue;
3643 if (devaddrs[j] == 0)
3644 {
3645 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3646 continue;
3647 }
3648 int k;
3649 n2 = NULL;
edaf1d60
TB
3650 /* Dereference devaddrs[j] to get the device addr. */
3651 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3652 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3653 + sizes[j]);
3654 cdata[j].present = true;
ea4b23d9 3655 cdata[j].devaddr = devaddrs[j];
edaf1d60
TB
3656 if (devaddrs[j] == 0)
3657 continue;
ea4b23d9
TB
3658 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3659 devaddrs[j],
3660 devaddrs[j] + sizeof (void*),
3661 false);
3662 if (k < j)
3663 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3664 - cdata[k].devaddr);
3665 else
3666 {
3667 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3668 devaddrs[j],
3669 devaddrs[j] + sizeof (void*),
3670 false);
3671 if (n2 == NULL)
3672 {
3673 gomp_mutex_unlock (&devicep->lock);
3674 gomp_fatal ("Pointer target wasn't mapped");
3675 }
3676 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3677 - (n2->tgt->tgt_start + n2->tgt_offset));
3678 }
3679 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3680 = (void *) (uintptr_t) devaddrs[j];
3681 }
3682 i = j -1;
3683 break;
3684 case GOMP_MAP_STRUCT:
3685 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3686 devaddrs[i + sizes[i]]
3687 + sizes[i + sizes[i]], false);
3688 cdata[i].present = n2 != NULL;
3689 cdata[i].devaddr = devaddrs[i];
3690 struct_cpy = cdata[i].present ? 0 : sizes[i];
3691 if (!n2)
3692 {
3693 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3694 - devaddrs[i+1]
3695 + sizes[i + sizes[i]]);
3696 size_t align = (size_t) 1 << (kinds[i] >> 8);
3697 cdata[i].aligned = true;
3698 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3699 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3700 }
3701 else
3702 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3703 - (n2->tgt->tgt_start + n2->tgt_offset));
3704 break;
3705 default:
3706 gomp_mutex_unlock (&devicep->lock);
3707 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3708 }
3709 }
3710 gomp_mutex_unlock (&devicep->lock);
3711 }
3712
3713 host_fn (devaddrs);
3714
0b1ce70a 3715 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
ea4b23d9
TB
3716 {
3717 uint64_t struct_cpy = 0;
3718 bool clean_struct = false;
3719 for (uint64_t i = 0; i < mapnum; i++)
3720 {
3721 if (cdata[i].devaddr == 0)
3722 continue;
3723 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3724 bool copy = !cdata[i].present || struct_cpy;
3725 switch (kind)
3726 {
3727 case GOMP_MAP_FORCE_FROM:
3728 case GOMP_MAP_FORCE_TOFROM:
3729 case GOMP_MAP_ALWAYS_FROM:
3730 case GOMP_MAP_ALWAYS_TOFROM:
4ede915d
TB
3731 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3732 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
ea4b23d9
TB
3733 copy = true;
3734 /* FALLTHRU */
3735 case GOMP_MAP_FROM:
3736 case GOMP_MAP_TOFROM:
130c2f3c
TS
3737 if (copy)
3738 {
3739 gomp_copy_host2dev (devicep, aq,
3740 (void *) (uintptr_t) cdata[i].devaddr,
3741 (void *) (uintptr_t) devaddrs[i],
3742 sizes[i], false, NULL);
3743 if (aq && !devicep->openacc.async.synchronize_func (aq))
3744 exit (EXIT_FAILURE);
3745 }
ea4b23d9
TB
3746 default:
3747 break;
3748 }
3749 if (struct_cpy)
3750 {
3751 struct_cpy--;
3752 continue;
3753 }
3754 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3755 {
3756 clean_struct = true;
3757 struct_cpy = sizes[i];
3758 }
0b1ce70a 3759 else if (!cdata[i].present && cdata[i].aligned)
ea4b23d9
TB
3760 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3761 else if (!cdata[i].present)
3762 free ((void *) (uintptr_t) devaddrs[i]);
3763 }
3764 if (clean_struct)
3765 for (uint64_t i = 0; i < mapnum; i++)
3766 if (!cdata[i].present
3767 && ((get_kind (short_mapkind, kinds, i) & typemask)
3768 == GOMP_MAP_STRUCT))
3769 {
3770 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3771 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3772 }
3773
3774 free (devaddrs);
3775 free (sizes);
3776 free (kinds);
3777 }
131d18e9
TB
3778}
3779
e01d41e5 3780/* Host fallback for GOMP_target_data{,_ext} routines. */
d9a6bd32
JJ
3781
3782static void
1bfc07d1 3783gomp_target_data_fallback (struct gomp_device_descr *devicep)
d9a6bd32
JJ
3784{
3785 struct gomp_task_icv *icv = gomp_icv (false);
1bfc07d1
KCY
3786
3787 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3788 && devicep != NULL)
3789 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3790 "be used for offloading");
3791
d9a6bd32
JJ
3792 if (icv->target_data)
3793 {
3794 /* Even when doing a host fallback, if there are any active
3795 #pragma omp target data constructs, need to remember the
3796 new #pragma omp target data, otherwise GOMP_target_end_data
3797 would get out of sync. */
3798 struct target_mem_desc *tgt
3799 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
275c736e 3800 NULL, GOMP_MAP_VARS_DATA);
d9a6bd32
JJ
3801 tgt->prev = icv->target_data;
3802 icv->target_data = tgt;
3803 }
3804}
3805
acf0174b 3806void
128b26dc 3807GOMP_target_data (int device, const void *unused, size_t mapnum,
acf0174b
JJ
3808 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3809{
1158fe43 3810 struct gomp_device_descr *devicep = resolve_device (device, true);
41dbbb37
TS
3811
3812 if (devicep == NULL
b2b40051
MJ
3813 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3814 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1bfc07d1 3815 return gomp_target_data_fallback (devicep);
41dbbb37 3816
1df3f842 3817 struct target_mem_desc *tgt
41dbbb37 3818 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
275c736e 3819 NULL, GOMP_MAP_VARS_DATA);
d9a6bd32
JJ
3820 struct gomp_task_icv *icv = gomp_icv (true);
3821 tgt->prev = icv->target_data;
3822 icv->target_data = tgt;
3823}
3824
3825void
e01d41e5
JJ
3826GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3827 size_t *sizes, unsigned short *kinds)
d9a6bd32 3828{
1158fe43 3829 struct gomp_device_descr *devicep = resolve_device (device, true);
d9a6bd32
JJ
3830
3831 if (devicep == NULL
b2b40051
MJ
3832 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3833 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1bfc07d1 3834 return gomp_target_data_fallback (devicep);
d9a6bd32
JJ
3835
3836 struct target_mem_desc *tgt
3837 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
275c736e 3838 NULL, GOMP_MAP_VARS_DATA);
1df3f842
JJ
3839 struct gomp_task_icv *icv = gomp_icv (true);
3840 tgt->prev = icv->target_data;
3841 icv->target_data = tgt;
acf0174b
JJ
3842}
3843
3844void
3845GOMP_target_end_data (void)
3846{
1df3f842
JJ
3847 struct gomp_task_icv *icv = gomp_icv (false);
3848 if (icv->target_data)
3849 {
3850 struct target_mem_desc *tgt = icv->target_data;
3851 icv->target_data = tgt->prev;
275c736e 3852 gomp_unmap_vars (tgt, true, NULL);
1df3f842 3853 }
acf0174b
JJ
3854}
3855
3856void
128b26dc 3857GOMP_target_update (int device, const void *unused, size_t mapnum,
acf0174b
JJ
3858 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3859{
1158fe43 3860 struct gomp_device_descr *devicep = resolve_device (device, true);
41dbbb37
TS
3861
3862 if (devicep == NULL
b2b40051
MJ
3863 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3864 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1df3f842
JJ
3865 return;
3866
d9a6bd32
JJ
3867 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3868}
3869
3870void
e01d41e5
JJ
3871GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3872 size_t *sizes, unsigned short *kinds,
3873 unsigned int flags, void **depend)
d9a6bd32 3874{
1158fe43 3875 struct gomp_device_descr *devicep = resolve_device (device, true);
d9a6bd32
JJ
3876
3877 /* If there are depend clauses, but nowait is not present,
3878 block the parent task until the dependencies are resolved
3879 and then just continue with the rest of the function as if it
3880 is a merged task. Until we are able to schedule task during
3881 variable mapping or unmapping, ignore nowait if depend clauses
3882 are not present. */
3883 if (depend != NULL)
3884 {
3885 struct gomp_thread *thr = gomp_thread ();
3886 if (thr->task && thr->task->depend_hash)
3887 {
3888 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3889 && thr->ts.team
3890 && !thr->task->final_task)
3891 {
e4606348
JJ
3892 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3893 mapnum, hostaddrs, sizes, kinds,
3894 flags | GOMP_TARGET_FLAG_UPDATE,
b2b40051 3895 depend, NULL, GOMP_TARGET_TASK_DATA))
e4606348
JJ
3896 return;
3897 }
3898 else
3899 {
3900 struct gomp_team *team = thr->ts.team;
3901 /* If parallel or taskgroup has been cancelled, don't start new
3902 tasks. */
28567c40
JJ
3903 if (__builtin_expect (gomp_cancel_var, 0) && team)
3904 {
3905 if (gomp_team_barrier_cancelled (&team->barrier))
3906 return;
3907 if (thr->task->taskgroup)
3908 {
3909 if (thr->task->taskgroup->cancelled)
3910 return;
3911 if (thr->task->taskgroup->workshare
3912 && thr->task->taskgroup->prev
3913 && thr->task->taskgroup->prev->cancelled)
3914 return;
3915 }
3916 }
e4606348
JJ
3917
3918 gomp_task_maybe_wait_for_dependencies (depend);
d9a6bd32 3919 }
d9a6bd32
JJ
3920 }
3921 }
3922
3923 if (devicep == NULL
b2b40051
MJ
3924 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3925 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3926 return;
3927
3928 struct gomp_thread *thr = gomp_thread ();
3929 struct gomp_team *team = thr->ts.team;
3930 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
3931 if (__builtin_expect (gomp_cancel_var, 0) && team)
3932 {
3933 if (gomp_team_barrier_cancelled (&team->barrier))
3934 return;
3935 if (thr->task->taskgroup)
3936 {
3937 if (thr->task->taskgroup->cancelled)
3938 return;
3939 if (thr->task->taskgroup->workshare
3940 && thr->task->taskgroup->prev
3941 && thr->task->taskgroup->prev->cancelled)
3942 return;
3943 }
3944 }
d9a6bd32
JJ
3945
3946 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3947}
3948
3949static void
3950gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
275c736e
CLT
3951 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3952 htab_t *refcount_set)
d9a6bd32
JJ
3953{
3954 const int typemask = 0xff;
3955 size_t i;
41dbbb37 3956 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
3957 if (devicep->state == GOMP_DEVICE_FINALIZED)
3958 {
3959 gomp_mutex_unlock (&devicep->lock);
3960 return;
3961 }
3962
9e628024
CLT
3963 for (i = 0; i < mapnum; i++)
3964 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3965 {
3966 struct splay_tree_key_s cur_node;
3967 cur_node.host_start = (uintptr_t) hostaddrs[i];
3968 cur_node.host_end = cur_node.host_start + sizeof (void *);
3969 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3970
3971 if (n)
3972 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3973 false, NULL);
3974 }
3975
275c736e
CLT
3976 int nrmvars = 0;
3977 splay_tree_key remove_vars[mapnum];
3978
d9a6bd32
JJ
3979 for (i = 0; i < mapnum; i++)
3980 {
3981 struct splay_tree_key_s cur_node;
3982 unsigned char kind = kinds[i] & typemask;
3983 switch (kind)
3984 {
3985 case GOMP_MAP_FROM:
3986 case GOMP_MAP_ALWAYS_FROM:
3987 case GOMP_MAP_DELETE:
3988 case GOMP_MAP_RELEASE:
3989 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3990 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3991 cur_node.host_start = (uintptr_t) hostaddrs[i];
3992 cur_node.host_end = cur_node.host_start + sizes[i];
3993 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3994 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
e01d41e5 3995 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
d9a6bd32
JJ
3996 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3997 if (!k)
3998 continue;
3999
275c736e
CLT
4000 bool delete_p = (kind == GOMP_MAP_DELETE
4001 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4002 bool do_copy, do_remove;
4003 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4004 &do_remove);
d9a6bd32 4005
275c736e 4006 if ((kind == GOMP_MAP_FROM && do_copy)
d9a6bd32 4007 || kind == GOMP_MAP_ALWAYS_FROM)
6c039937
CLT
4008 {
4009 if (k->aux && k->aux->attach_count)
4010 {
4011 /* We have to be careful not to overwrite still attached
4012 pointers during the copyback to host. */
4013 uintptr_t addr = k->host_start;
4014 while (addr < k->host_end)
4015 {
4016 size_t i = (addr - k->host_start) / sizeof (void *);
4017 if (k->aux->attach_count[i] == 0)
4018 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4019 (void *) (k->tgt->tgt_start
4020 + k->tgt_offset
4021 + addr - k->host_start),
4022 sizeof (void *));
4023 addr += sizeof (void *);
4024 }
4025 }
4026 else
4027 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4028 (void *) (k->tgt->tgt_start + k->tgt_offset
4029 + cur_node.host_start
4030 - k->host_start),
4031 cur_node.host_end - cur_node.host_start);
4032 }
275c736e
CLT
4033
4034 /* Structure elements lists are removed altogether at once, which
4035 may cause immediate deallocation of the target_mem_desc, causing
4036 errors if we still have following element siblings to copy back.
4037 While we're at it, it also seems more disciplined to simply
4038 queue all removals together for processing below.
4039
4040 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4041 not have this problem, since they maintain an additional
4042 tgt->refcount = 1 reference to the target_mem_desc to start with.
4043 */
4044 if (do_remove)
4045 remove_vars[nrmvars++] = k;
9e628024 4046 break;
d9a6bd32 4047
9e628024 4048 case GOMP_MAP_DETACH:
d9a6bd32
JJ
4049 break;
4050 default:
4051 gomp_mutex_unlock (&devicep->lock);
4052 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4053 kind);
4054 }
4055 }
4056
275c736e
CLT
4057 for (int i = 0; i < nrmvars; i++)
4058 gomp_remove_var (devicep, remove_vars[i]);
4059
41dbbb37 4060 gomp_mutex_unlock (&devicep->lock);
d9a6bd32 4061}
41dbbb37 4062
d9a6bd32
JJ
4063void
4064GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4065 size_t *sizes, unsigned short *kinds,
4066 unsigned int flags, void **depend)
4067{
1158fe43 4068 struct gomp_device_descr *devicep = resolve_device (device, true);
d9a6bd32
JJ
4069
4070 /* If there are depend clauses, but nowait is not present,
4071 block the parent task until the dependencies are resolved
4072 and then just continue with the rest of the function as if it
4073 is a merged task. Until we are able to schedule task during
4074 variable mapping or unmapping, ignore nowait if depend clauses
4075 are not present. */
4076 if (depend != NULL)
4077 {
4078 struct gomp_thread *thr = gomp_thread ();
4079 if (thr->task && thr->task->depend_hash)
4080 {
4081 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4082 && thr->ts.team
4083 && !thr->task->final_task)
4084 {
e4606348
JJ
4085 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4086 mapnum, hostaddrs, sizes, kinds,
b2b40051 4087 flags, depend, NULL,
e4606348
JJ
4088 GOMP_TARGET_TASK_DATA))
4089 return;
4090 }
4091 else
4092 {
4093 struct gomp_team *team = thr->ts.team;
4094 /* If parallel or taskgroup has been cancelled, don't start new
4095 tasks. */
28567c40
JJ
4096 if (__builtin_expect (gomp_cancel_var, 0) && team)
4097 {
4098 if (gomp_team_barrier_cancelled (&team->barrier))
4099 return;
4100 if (thr->task->taskgroup)
4101 {
4102 if (thr->task->taskgroup->cancelled)
4103 return;
4104 if (thr->task->taskgroup->workshare
4105 && thr->task->taskgroup->prev
4106 && thr->task->taskgroup->prev->cancelled)
4107 return;
4108 }
4109 }
e4606348
JJ
4110
4111 gomp_task_maybe_wait_for_dependencies (depend);
d9a6bd32 4112 }
d9a6bd32
JJ
4113 }
4114 }
4115
4116 if (devicep == NULL
b2b40051
MJ
4117 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4118 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4119 return;
4120
4121 struct gomp_thread *thr = gomp_thread ();
4122 struct gomp_team *team = thr->ts.team;
4123 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
4124 if (__builtin_expect (gomp_cancel_var, 0) && team)
4125 {
4126 if (gomp_team_barrier_cancelled (&team->barrier))
4127 return;
4128 if (thr->task->taskgroup)
4129 {
4130 if (thr->task->taskgroup->cancelled)
4131 return;
4132 if (thr->task->taskgroup->workshare
4133 && thr->task->taskgroup->prev
4134 && thr->task->taskgroup->prev->cancelled)
4135 return;
4136 }
4137 }
d9a6bd32 4138
275c736e
CLT
4139 htab_t refcount_set = htab_create (mapnum);
4140
689418b9
TB
4141 /* The variables are mapped separately such that they can be released
4142 independently. */
4143 size_t i, j;
d9a6bd32
JJ
4144 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4145 for (i = 0; i < mapnum; i++)
4146 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4147 {
4148 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
275c736e
CLT
4149 &kinds[i], true, &refcount_set,
4150 GOMP_MAP_VARS_ENTER_DATA);
d9a6bd32
JJ
4151 i += sizes[i];
4152 }
689418b9
TB
4153 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4154 {
4155 for (j = i + 1; j < mapnum; j++)
972da557
TB
4156 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4157 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
689418b9
TB
4158 break;
4159 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
275c736e
CLT
4160 &kinds[i], true, &refcount_set,
4161 GOMP_MAP_VARS_ENTER_DATA);
689418b9
TB
4162 i += j - i - 1;
4163 }
c7a9655b
TB
4164 else if (i + 1 < mapnum
4165 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4166 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4167 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
9e628024
CLT
4168 {
4169 /* An attach operation must be processed together with the mapped
4170 base-pointer list item. */
4171 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
275c736e 4172 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
9e628024
CLT
4173 i += 1;
4174 }
d9a6bd32
JJ
4175 else
4176 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
275c736e 4177 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
d9a6bd32 4178 else
275c736e
CLT
4179 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4180 htab_free (refcount_set);
d9a6bd32
JJ
4181}
4182
e4606348 4183bool
d9a6bd32
JJ
4184gomp_target_task_fn (void *data)
4185{
4186 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
e4606348
JJ
4187 struct gomp_device_descr *devicep = ttask->devicep;
4188
d9a6bd32
JJ
4189 if (ttask->fn != NULL)
4190 {
d84ffc0a 4191 void *fn_addr;
e4606348 4192 if (devicep == NULL
d84ffc0a 4193 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
4194 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4195 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
e4606348
JJ
4196 {
4197 ttask->state = GOMP_TARGET_TASK_FALLBACK;
aea72386
JJ
4198 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4199 ttask->args);
e4606348
JJ
4200 return false;
4201 }
4202
4203 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4204 {
b2b40051 4205 if (ttask->tgt)
275c736e 4206 gomp_unmap_vars (ttask->tgt, true, NULL);
e4606348
JJ
4207 return false;
4208 }
4209
b2b40051
MJ
4210 void *actual_arguments;
4211 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4212 {
4213 ttask->tgt = NULL;
b2b40051
MJ
4214 actual_arguments = ttask->hostaddrs;
4215 }
4216 else
4217 {
4218 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4219 NULL, ttask->sizes, ttask->kinds, true,
275c736e 4220 NULL, GOMP_MAP_VARS_TARGET);
b2b40051
MJ
4221 actual_arguments = (void *) ttask->tgt->tgt_start;
4222 }
e4606348
JJ
4223 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4224
001ab12e 4225 assert (devicep->async_run_func);
b2b40051
MJ
4226 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4227 ttask->args, (void *) ttask);
e4606348 4228 return true;
d9a6bd32 4229 }
e4606348 4230 else if (devicep == NULL
b2b40051
MJ
4231 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4232 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
e4606348 4233 return false;
d9a6bd32
JJ
4234
4235 size_t i;
4236 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
e4606348 4237 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
d9a6bd32 4238 ttask->kinds, true);
d9a6bd32 4239 else
275c736e
CLT
4240 {
4241 htab_t refcount_set = htab_create (ttask->mapnum);
4242 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4243 for (i = 0; i < ttask->mapnum; i++)
4244 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4245 {
4246 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4247 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4248 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4249 i += ttask->sizes[i];
4250 }
4251 else
4252 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4253 &ttask->kinds[i], true, &refcount_set,
4254 GOMP_MAP_VARS_ENTER_DATA);
4255 else
4256 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4257 ttask->kinds, &refcount_set);
4258 htab_free (refcount_set);
4259 }
e4606348 4260 return false;
acf0174b
JJ
4261}
4262
4263void
4264GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4265{
4266 if (thread_limit)
4267 {
4268 struct gomp_task_icv *icv = gomp_icv (true);
4269 icv->thread_limit_var
4270 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4271 }
4272 (void) num_teams;
4273}
1df3f842 4274
7d6da11f
JJ
4275bool
4276GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4277 unsigned int thread_limit, bool first)
4278{
4279 struct gomp_thread *thr = gomp_thread ();
4280 if (first)
4281 {
4282 if (thread_limit)
4283 {
4284 struct gomp_task_icv *icv = gomp_icv (true);
4285 icv->thread_limit_var
4286 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4287 }
4288 (void) num_teams_high;
4289 if (num_teams_low == 0)
4290 num_teams_low = 1;
4291 thr->num_teams = num_teams_low - 1;
4292 thr->team_num = 0;
4293 }
4294 else if (thr->team_num == thr->num_teams)
4295 return false;
4296 else
4297 ++thr->team_num;
4298 return true;
4299}
4300
d9a6bd32
JJ
4301void *
4302omp_target_alloc (size_t size, int device_num)
4303{
1158fe43
JJ
4304 if (device_num == omp_initial_device
4305 || device_num == gomp_get_num_devices ())
d9a6bd32
JJ
4306 return malloc (size);
4307
1158fe43 4308 struct gomp_device_descr *devicep = resolve_device (device_num, false);
d9a6bd32
JJ
4309 if (devicep == NULL)
4310 return NULL;
4311
b2b40051
MJ
4312 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4313 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4314 return malloc (size);
4315
4316 gomp_mutex_lock (&devicep->lock);
4317 void *ret = devicep->alloc_func (devicep->target_id, size);
4318 gomp_mutex_unlock (&devicep->lock);
4319 return ret;
4320}
4321
4322void
4323omp_target_free (void *device_ptr, int device_num)
4324{
1158fe43
JJ
4325 if (device_num == omp_initial_device
4326 || device_num == gomp_get_num_devices ())
d9a6bd32
JJ
4327 {
4328 free (device_ptr);
4329 return;
4330 }
4331
1158fe43
JJ
4332 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4333 if (devicep == NULL || device_ptr == NULL)
d9a6bd32
JJ
4334 return;
4335
b2b40051
MJ
4336 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4337 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4338 {
4339 free (device_ptr);
4340 return;
4341 }
4342
4343 gomp_mutex_lock (&devicep->lock);
6ce13072 4344 gomp_free_device_memory (devicep, device_ptr);
d9a6bd32
JJ
4345 gomp_mutex_unlock (&devicep->lock);
4346}
4347
4348int
28567c40 4349omp_target_is_present (const void *ptr, int device_num)
d9a6bd32 4350{
1158fe43
JJ
4351 if (device_num == omp_initial_device
4352 || device_num == gomp_get_num_devices ())
d9a6bd32
JJ
4353 return 1;
4354
1158fe43 4355 struct gomp_device_descr *devicep = resolve_device (device_num, false);
d9a6bd32
JJ
4356 if (devicep == NULL)
4357 return 0;
4358
1158fe43
JJ
4359 if (ptr == NULL)
4360 return 1;
4361
b2b40051
MJ
4362 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4363 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4364 return 1;
4365
4366 gomp_mutex_lock (&devicep->lock);
4367 struct splay_tree_s *mem_map = &devicep->mem_map;
4368 struct splay_tree_key_s cur_node;
4369
4370 cur_node.host_start = (uintptr_t) ptr;
4371 cur_node.host_end = cur_node.host_start;
e01d41e5 4372 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
d9a6bd32
JJ
4373 int ret = n != NULL;
4374 gomp_mutex_unlock (&devicep->lock);
4375 return ret;
4376}
4377
6c420193
MV
4378static int
4379omp_target_memcpy_check (int dst_device_num, int src_device_num,
4380 struct gomp_device_descr **dst_devicep,
4381 struct gomp_device_descr **src_devicep)
d9a6bd32 4382{
1158fe43
JJ
4383 if (dst_device_num != gomp_get_num_devices ()
4384 /* Above gomp_get_num_devices has to be called unconditionally. */
4385 && dst_device_num != omp_initial_device)
d9a6bd32 4386 {
1158fe43 4387 *dst_devicep = resolve_device (dst_device_num, false);
6c420193 4388 if (*dst_devicep == NULL)
d9a6bd32
JJ
4389 return EINVAL;
4390
6c420193
MV
4391 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4392 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4393 *dst_devicep = NULL;
d9a6bd32 4394 }
6c420193 4395
1158fe43
JJ
4396 if (src_device_num != num_devices_openmp
4397 && src_device_num != omp_initial_device)
d9a6bd32 4398 {
1158fe43 4399 *src_devicep = resolve_device (src_device_num, false);
6c420193 4400 if (*src_devicep == NULL)
d9a6bd32
JJ
4401 return EINVAL;
4402
6c420193
MV
4403 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4404 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4405 *src_devicep = NULL;
d9a6bd32 4406 }
6c420193
MV
4407
4408 return 0;
4409}
4410
4411static int
4412omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4413 size_t dst_offset, size_t src_offset,
4414 struct gomp_device_descr *dst_devicep,
4415 struct gomp_device_descr *src_devicep)
4416{
4417 bool ret;
d9a6bd32
JJ
4418 if (src_devicep == NULL && dst_devicep == NULL)
4419 {
4420 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4421 return 0;
4422 }
4423 if (src_devicep == NULL)
4424 {
4425 gomp_mutex_lock (&dst_devicep->lock);
6ce13072
CLT
4426 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4427 (char *) dst + dst_offset,
4428 (char *) src + src_offset, length);
d9a6bd32 4429 gomp_mutex_unlock (&dst_devicep->lock);
6ce13072 4430 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
4431 }
4432 if (dst_devicep == NULL)
4433 {
4434 gomp_mutex_lock (&src_devicep->lock);
6ce13072
CLT
4435 ret = src_devicep->dev2host_func (src_devicep->target_id,
4436 (char *) dst + dst_offset,
4437 (char *) src + src_offset, length);
d9a6bd32 4438 gomp_mutex_unlock (&src_devicep->lock);
6ce13072 4439 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
4440 }
4441 if (src_devicep == dst_devicep)
4442 {
4443 gomp_mutex_lock (&src_devicep->lock);
6ce13072
CLT
4444 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4445 (char *) dst + dst_offset,
4446 (char *) src + src_offset, length);
d9a6bd32 4447 gomp_mutex_unlock (&src_devicep->lock);
6ce13072 4448 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
4449 }
4450 return EINVAL;
4451}
4452
6c420193
MV
4453int
4454omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4455 size_t src_offset, int dst_device_num, int src_device_num)
4456{
4457 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4458 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4459 &dst_devicep, &src_devicep);
4460
4461 if (ret)
4462 return ret;
4463
4464 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4465 dst_devicep, src_devicep);
4466
4467 return ret;
4468}
4469
4470typedef struct
4471{
4472 void *dst;
4473 const void *src;
4474 size_t length;
4475 size_t dst_offset;
4476 size_t src_offset;
4477 struct gomp_device_descr *dst_devicep;
4478 struct gomp_device_descr *src_devicep;
4479} omp_target_memcpy_data;
4480
4481static void
4482omp_target_memcpy_async_helper (void *args)
4483{
4484 omp_target_memcpy_data *a = args;
4485 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4486 a->src_offset, a->dst_devicep, a->src_devicep))
4487 gomp_fatal ("omp_target_memcpy failed");
4488}
4489
4490int
4491omp_target_memcpy_async (void *dst, const void *src, size_t length,
4492 size_t dst_offset, size_t src_offset,
4493 int dst_device_num, int src_device_num,
4494 int depobj_count, omp_depend_t *depobj_list)
4495{
4496 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4497 unsigned int flags = 0;
4498 void *depend[depobj_count + 5];
4499 int i;
4500 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4501 &dst_devicep, &src_devicep);
4502
4503 omp_target_memcpy_data s = {
4504 .dst = dst,
4505 .src = src,
4506 .length = length,
4507 .dst_offset = dst_offset,
4508 .src_offset = src_offset,
4509 .dst_devicep = dst_devicep,
4510 .src_devicep = src_devicep
4511 };
4512
4513 if (check)
4514 return check;
4515
4516 if (depobj_count > 0 && depobj_list != NULL)
4517 {
4518 flags |= GOMP_TASK_FLAG_DEPEND;
4519 depend[0] = 0;
4520 depend[1] = (void *) (uintptr_t) depobj_count;
4521 depend[2] = depend[3] = depend[4] = 0;
4522 for (i = 0; i < depobj_count; ++i)
4523 depend[i + 5] = &depobj_list[i];
4524 }
4525
4526 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4527 __alignof__ (s), true, flags, depend, 0, NULL);
4528
4529 return 0;
4530}
4531
d9a6bd32 4532static int
28567c40 4533omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
d9a6bd32
JJ
4534 int num_dims, const size_t *volume,
4535 const size_t *dst_offsets,
4536 const size_t *src_offsets,
4537 const size_t *dst_dimensions,
4538 const size_t *src_dimensions,
4539 struct gomp_device_descr *dst_devicep,
25072a47
TB
4540 struct gomp_device_descr *src_devicep,
4541 size_t *tmp_size, void **tmp)
d9a6bd32
JJ
4542{
4543 size_t dst_slice = element_size;
4544 size_t src_slice = element_size;
4545 size_t j, dst_off, src_off, length;
4546 int i, ret;
4547
4548 if (num_dims == 1)
4549 {
4550 if (__builtin_mul_overflow (element_size, volume[0], &length)
4551 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4552 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4553 return EINVAL;
8b9e559f
TB
4554 if (dst_devicep == NULL && src_devicep == NULL)
4555 {
4556 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4557 length);
4558 ret = 1;
4559 }
4560 else if (src_devicep == NULL)
4561 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
6ce13072 4562 (char *) dst + dst_off,
28567c40
JJ
4563 (const char *) src + src_off,
4564 length);
8b9e559f
TB
4565 else if (dst_devicep == NULL)
4566 ret = src_devicep->dev2host_func (src_devicep->target_id,
6ce13072 4567 (char *) dst + dst_off,
28567c40
JJ
4568 (const char *) src + src_off,
4569 length);
d9a6bd32 4570 else if (src_devicep == dst_devicep)
6ce13072
CLT
4571 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4572 (char *) dst + dst_off,
28567c40
JJ
4573 (const char *) src + src_off,
4574 length);
d9a6bd32 4575 else
25072a47
TB
4576 {
4577 if (*tmp_size == 0)
4578 {
4579 *tmp_size = length;
4580 *tmp = malloc (length);
4581 if (*tmp == NULL)
4582 return ENOMEM;
4583 }
4584 else if (*tmp_size < length)
4585 {
4586 *tmp_size = length;
8b9e559f
TB
4587 free (*tmp);
4588 *tmp = malloc (length);
25072a47
TB
4589 if (*tmp == NULL)
4590 return ENOMEM;
4591 }
4592 ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
4593 (const char *) src + src_off,
4594 length);
4595 if (ret == 1)
4596 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4597 (char *) dst + dst_off, *tmp,
4598 length);
4599 }
6ce13072 4600 return ret ? 0 : EINVAL;
d9a6bd32
JJ
4601 }
4602
8b9e559f 4603 /* host->device, device->host and intra device. */
25072a47
TB
4604 if (num_dims == 2
4605 && ((src_devicep
4606 && src_devicep == dst_devicep
4607 && src_devicep->memcpy2d_func)
4608 || (!src_devicep != !dst_devicep
4609 && ((src_devicep && src_devicep->memcpy2d_func)
4610 || (dst_devicep && dst_devicep->memcpy2d_func)))))
4611 {
4612 size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
4613 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4614 int src_id = src_devicep ? src_devicep->target_id : -1;
4615 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4616
4617 if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
4618 || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
4619 || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
4620 || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
4621 || __builtin_mul_overflow (src_offsets[1], element_size,
4622 &src_off_sz1))
4623 return EINVAL;
4624 ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
4625 dst, dst_off_sz1, dst_offsets[0], dst_sz1,
4626 src, src_off_sz1, src_offsets[0], src_sz1);
4627 if (ret != -1)
4628 return ret ? 0 : EINVAL;
4629 }
4630 else if (num_dims == 3
4631 && ((src_devicep
4632 && src_devicep == dst_devicep
4633 && src_devicep->memcpy3d_func)
4634 || (!src_devicep != !dst_devicep
4635 && ((src_devicep && src_devicep->memcpy3d_func)
4636 || (dst_devicep && dst_devicep->memcpy3d_func)))))
4637 {
4638 size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
4639 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4640 int src_id = src_devicep ? src_devicep->target_id : -1;
4641 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4642
4643 if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
4644 || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
4645 || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
4646 || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
4647 || __builtin_mul_overflow (src_offsets[2], element_size,
4648 &src_off_sz2))
4649 return EINVAL;
4650 ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
4651 dst, dst_off_sz2, dst_offsets[1],
4652 dst_offsets[0], dst_sz2, dst_dimensions[1],
4653 src, src_off_sz2, src_offsets[1],
4654 src_offsets[0], src_sz2, src_dimensions[1]);
4655 if (ret != -1)
4656 return ret ? 0 : EINVAL;
4657 }
d9a6bd32
JJ
4658
4659 for (i = 1; i < num_dims; i++)
4660 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4661 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4662 return EINVAL;
4663 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4664 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4665 return EINVAL;
4666 for (j = 0; j < volume[0]; j++)
4667 {
4668 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
28567c40 4669 (const char *) src + src_off,
d9a6bd32
JJ
4670 element_size, num_dims - 1,
4671 volume + 1, dst_offsets + 1,
4672 src_offsets + 1, dst_dimensions + 1,
4673 src_dimensions + 1, dst_devicep,
25072a47 4674 src_devicep, tmp_size, tmp);
d9a6bd32
JJ
4675 if (ret)
4676 return ret;
4677 dst_off += dst_slice;
4678 src_off += src_slice;
4679 }
4680 return 0;
4681}
4682
6c420193
MV
4683static int
4684omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4685 int src_device_num,
4686 struct gomp_device_descr **dst_devicep,
4687 struct gomp_device_descr **src_devicep)
d9a6bd32 4688{
d9a6bd32
JJ
4689 if (!dst && !src)
4690 return INT_MAX;
4691
6c420193
MV
4692 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4693 dst_devicep, src_devicep);
4694 if (ret)
4695 return ret;
d9a6bd32 4696
6c420193
MV
4697 return 0;
4698}
4699
4700static int
4701omp_target_memcpy_rect_copy (void *dst, const void *src,
4702 size_t element_size, int num_dims,
4703 const size_t *volume, const size_t *dst_offsets,
4704 const size_t *src_offsets,
4705 const size_t *dst_dimensions,
4706 const size_t *src_dimensions,
4707 struct gomp_device_descr *dst_devicep,
4708 struct gomp_device_descr *src_devicep)
4709{
25072a47
TB
4710 size_t tmp_size = 0;
4711 void *tmp = NULL;
4712 bool lock_src;
4713 bool lock_dst;
4714
8b9e559f
TB
4715 lock_src = src_devicep != NULL;
4716 lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
25072a47 4717 if (lock_src)
d9a6bd32 4718 gomp_mutex_lock (&src_devicep->lock);
25072a47 4719 if (lock_dst)
d9a6bd32
JJ
4720 gomp_mutex_lock (&dst_devicep->lock);
4721 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4722 volume, dst_offsets, src_offsets,
4723 dst_dimensions, src_dimensions,
25072a47
TB
4724 dst_devicep, src_devicep,
4725 &tmp_size, &tmp);
4726 if (lock_src)
d9a6bd32 4727 gomp_mutex_unlock (&src_devicep->lock);
25072a47 4728 if (lock_dst)
d9a6bd32 4729 gomp_mutex_unlock (&dst_devicep->lock);
25072a47
TB
4730 if (tmp)
4731 free (tmp);
6c420193
MV
4732
4733 return ret;
4734}
4735
4736int
4737omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4738 int num_dims, const size_t *volume,
4739 const size_t *dst_offsets,
4740 const size_t *src_offsets,
4741 const size_t *dst_dimensions,
4742 const size_t *src_dimensions,
4743 int dst_device_num, int src_device_num)
4744{
4745 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4746
4747 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4748 src_device_num, &dst_devicep,
4749 &src_devicep);
4750
4751 if (check)
4752 return check;
4753
4754 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4755 volume, dst_offsets, src_offsets,
4756 dst_dimensions, src_dimensions,
4757 dst_devicep, src_devicep);
4758
d9a6bd32
JJ
4759 return ret;
4760}
4761
6c420193
MV
4762typedef struct
4763{
4764 void *dst;
4765 const void *src;
4766 size_t element_size;
4767 const size_t *volume;
4768 const size_t *dst_offsets;
4769 const size_t *src_offsets;
4770 const size_t *dst_dimensions;
4771 const size_t *src_dimensions;
4772 struct gomp_device_descr *dst_devicep;
4773 struct gomp_device_descr *src_devicep;
4774 int num_dims;
4775} omp_target_memcpy_rect_data;
4776
4777static void
4778omp_target_memcpy_rect_async_helper (void *args)
4779{
4780 omp_target_memcpy_rect_data *a = args;
4781 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4782 a->num_dims, a->volume, a->dst_offsets,
4783 a->src_offsets, a->dst_dimensions,
4784 a->src_dimensions, a->dst_devicep,
4785 a->src_devicep);
4786 if (ret)
4787 gomp_fatal ("omp_target_memcpy_rect failed");
4788}
4789
4790int
4791omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4792 int num_dims, const size_t *volume,
4793 const size_t *dst_offsets,
4794 const size_t *src_offsets,
4795 const size_t *dst_dimensions,
4796 const size_t *src_dimensions,
4797 int dst_device_num, int src_device_num,
4798 int depobj_count, omp_depend_t *depobj_list)
4799{
4800 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4801 unsigned flags = 0;
4802 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4803 src_device_num, &dst_devicep,
4804 &src_devicep);
4805 void *depend[depobj_count + 5];
4806 int i;
4807
4808 omp_target_memcpy_rect_data s = {
4809 .dst = dst,
4810 .src = src,
4811 .element_size = element_size,
4812 .num_dims = num_dims,
4813 .volume = volume,
4814 .dst_offsets = dst_offsets,
4815 .src_offsets = src_offsets,
4816 .dst_dimensions = dst_dimensions,
4817 .src_dimensions = src_dimensions,
4818 .dst_devicep = dst_devicep,
4819 .src_devicep = src_devicep
4820 };
4821
4822 if (check)
4823 return check;
4824
4825 if (depobj_count > 0 && depobj_list != NULL)
4826 {
4827 flags |= GOMP_TASK_FLAG_DEPEND;
4828 depend[0] = 0;
4829 depend[1] = (void *) (uintptr_t) depobj_count;
4830 depend[2] = depend[3] = depend[4] = 0;
4831 for (i = 0; i < depobj_count; ++i)
4832 depend[i + 5] = &depobj_list[i];
4833 }
4834
4835 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4836 __alignof__ (s), true, flags, depend, 0, NULL);
4837
4838 return 0;
4839}
4840
d9a6bd32 4841int
28567c40
JJ
4842omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4843 size_t size, size_t device_offset, int device_num)
d9a6bd32 4844{
1158fe43
JJ
4845 if (device_num == omp_initial_device
4846 || device_num == gomp_get_num_devices ())
d9a6bd32
JJ
4847 return EINVAL;
4848
1158fe43 4849 struct gomp_device_descr *devicep = resolve_device (device_num, false);
d9a6bd32
JJ
4850 if (devicep == NULL)
4851 return EINVAL;
4852
b2b40051
MJ
4853 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4854 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
4855 return EINVAL;
4856
4857 gomp_mutex_lock (&devicep->lock);
4858
4859 struct splay_tree_s *mem_map = &devicep->mem_map;
4860 struct splay_tree_key_s cur_node;
4861 int ret = EINVAL;
4862
4863 cur_node.host_start = (uintptr_t) host_ptr;
4864 cur_node.host_end = cur_node.host_start + size;
4865 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4866 if (n)
4867 {
4868 if (n->tgt->tgt_start + n->tgt_offset
4869 == (uintptr_t) device_ptr + device_offset
4870 && n->host_start <= cur_node.host_start
4871 && n->host_end >= cur_node.host_end)
4872 ret = 0;
4873 }
4874 else
4875 {
4876 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4877 tgt->array = gomp_malloc (sizeof (*tgt->array));
4878 tgt->refcount = 1;
4879 tgt->tgt_start = 0;
4880 tgt->tgt_end = 0;
4881 tgt->to_free = NULL;
4882 tgt->prev = NULL;
4883 tgt->list_count = 0;
4884 tgt->device_descr = devicep;
4885 splay_tree_node array = tgt->array;
4886 splay_tree_key k = &array->key;
4887 k->host_start = cur_node.host_start;
4888 k->host_end = cur_node.host_end;
4889 k->tgt = tgt;
4890 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4891 k->refcount = REFCOUNT_INFINITY;
6f5b4b64 4892 k->dynamic_refcount = 0;
2a656a93 4893 k->aux = NULL;
d9a6bd32
JJ
4894 array->left = NULL;
4895 array->right = NULL;
4896 splay_tree_insert (&devicep->mem_map, array);
4897 ret = 0;
4898 }
4899 gomp_mutex_unlock (&devicep->lock);
4900 return ret;
4901}
4902
4903int
28567c40 4904omp_target_disassociate_ptr (const void *ptr, int device_num)
d9a6bd32 4905{
1158fe43 4906 struct gomp_device_descr *devicep = resolve_device (device_num, false);
d9a6bd32
JJ
4907 if (devicep == NULL)
4908 return EINVAL;
4909
4910 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4911 return EINVAL;
4912
4913 gomp_mutex_lock (&devicep->lock);
4914
4915 struct splay_tree_s *mem_map = &devicep->mem_map;
4916 struct splay_tree_key_s cur_node;
4917 int ret = EINVAL;
4918
4919 cur_node.host_start = (uintptr_t) ptr;
4920 cur_node.host_end = cur_node.host_start;
4921 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4922 if (n
4923 && n->host_start == cur_node.host_start
4924 && n->refcount == REFCOUNT_INFINITY
4925 && n->tgt->tgt_start == 0
4926 && n->tgt->to_free == NULL
4927 && n->tgt->refcount == 1
4928 && n->tgt->list_count == 0)
4929 {
4930 splay_tree_remove (&devicep->mem_map, n);
4931 gomp_unmap_tgt (n->tgt);
4932 ret = 0;
4933 }
4934
4935 gomp_mutex_unlock (&devicep->lock);
4936 return ret;
4937}
4938
941cdc8b
MV
4939void *
4940omp_get_mapped_ptr (const void *ptr, int device_num)
4941{
1158fe43
JJ
4942 if (device_num == omp_initial_device
4943 || device_num == omp_get_initial_device ())
941cdc8b
MV
4944 return (void *) ptr;
4945
1158fe43 4946 struct gomp_device_descr *devicep = resolve_device (device_num, false);
941cdc8b
MV
4947 if (devicep == NULL)
4948 return NULL;
4949
4950 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4951 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4952 return (void *) ptr;
4953
4954 gomp_mutex_lock (&devicep->lock);
4955
4956 struct splay_tree_s *mem_map = &devicep->mem_map;
4957 struct splay_tree_key_s cur_node;
4958 void *ret = NULL;
4959
4960 cur_node.host_start = (uintptr_t) ptr;
4961 cur_node.host_end = cur_node.host_start;
4962 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4963
4964 if (n)
4965 {
4966 uintptr_t offset = cur_node.host_start - n->host_start;
4967 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
4968 }
4969
4970 gomp_mutex_unlock (&devicep->lock);
4971
4972 return ret;
4973}
4974
4043f53c
MV
4975int
4976omp_target_is_accessible (const void *ptr, size_t size, int device_num)
4977{
1158fe43
JJ
4978 if (device_num == omp_initial_device
4979 || device_num == gomp_get_num_devices ())
4043f53c
MV
4980 return true;
4981
1158fe43 4982 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4043f53c
MV
4983 if (devicep == NULL)
4984 return false;
4985
4986 /* TODO: Unified shared memory must be handled when available. */
4987
4988 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
4989}
4990
28567c40
JJ
4991int
4992omp_pause_resource (omp_pause_resource_t kind, int device_num)
4993{
4994 (void) kind;
1158fe43
JJ
4995 if (device_num == omp_initial_device
4996 || device_num == gomp_get_num_devices ())
28567c40 4997 return gomp_pause_host ();
1158fe43
JJ
4998
4999 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5000 if (devicep == NULL)
28567c40 5001 return -1;
1158fe43 5002
28567c40
JJ
5003 /* Do nothing for target devices for now. */
5004 return 0;
5005}
5006
5007int
5008omp_pause_resource_all (omp_pause_resource_t kind)
5009{
5010 (void) kind;
5011 if (gomp_pause_host ())
5012 return -1;
5013 /* Do nothing for target devices for now. */
5014 return 0;
5015}
5016
5017ialias (omp_pause_resource)
5018ialias (omp_pause_resource_all)
5019
1df3f842
JJ
5020#ifdef PLUGIN_SUPPORT
5021
5022/* This function tries to load a plugin for DEVICE. Name of plugin is passed
5023 in PLUGIN_NAME.
5024 The handles of the found functions are stored in the corresponding fields
5025 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5026
5027static bool
5028gomp_load_plugin_for_device (struct gomp_device_descr *device,
5029 const char *plugin_name)
5030{
196904d8 5031 const char *err = NULL, *last_missing = NULL;
41dbbb37 5032
1df3f842
JJ
5033 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5034 if (!plugin_handle)
fe5bfa67
TB
5035#if OFFLOAD_DEFAULTED
5036 return 0;
5037#else
2a21ff19 5038 goto dl_fail;
fe5bfa67 5039#endif
1df3f842
JJ
5040
5041 /* Check if all required functions are available in the plugin and store
2a21ff19
NS
5042 their handlers. None of the symbols can legitimately be NULL,
5043 so we don't need to check dlerror all the time. */
41dbbb37 5044#define DLSYM(f) \
2a21ff19
NS
5045 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5046 goto dl_fail
5047 /* Similar, but missing functions are not an error. Return false if
5048 failed, true otherwise. */
5049#define DLSYM_OPT(f, n) \
5050 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5051 || (last_missing = #n, 0))
5052
5053 DLSYM (version);
5054 if (device->version_func () != GOMP_VERSION)
5055 {
5056 err = "plugin version mismatch";
5057 goto fail;
5058 }
41dbbb37
TS
5059
5060 DLSYM (get_name);
5061 DLSYM (get_caps);
1df3f842
JJ
5062 DLSYM (get_type);
5063 DLSYM (get_num_devices);
1df3f842 5064 DLSYM (init_device);
41dbbb37 5065 DLSYM (fini_device);
a51df54e
IV
5066 DLSYM (load_image);
5067 DLSYM (unload_image);
1df3f842
JJ
5068 DLSYM (alloc);
5069 DLSYM (free);
5070 DLSYM (dev2host);
5071 DLSYM (host2dev);
8b9e559f
TB
5072 DLSYM_OPT (memcpy2d, memcpy2d);
5073 DLSYM_OPT (memcpy3d, memcpy3d);
41dbbb37
TS
5074 device->capabilities = device->get_caps_func ();
5075 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
d9a6bd32
JJ
5076 {
5077 DLSYM (run);
001ab12e 5078 DLSYM_OPT (async_run, async_run);
b2b40051 5079 DLSYM_OPT (can_run, can_run);
d9a6bd32
JJ
5080 DLSYM (dev2dev);
5081 }
41dbbb37
TS
5082 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5083 {
345a8c17 5084 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2a21ff19
NS
5085 || !DLSYM_OPT (openacc.create_thread_data,
5086 openacc_create_thread_data)
5087 || !DLSYM_OPT (openacc.destroy_thread_data,
1f4c5b9b
CLT
5088 openacc_destroy_thread_data)
5089 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5090 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5091 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5092 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5093 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5094 || !DLSYM_OPT (openacc.async.queue_callback,
5095 openacc_async_queue_callback)
5096 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5097 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
6fc0385c
TS
5098 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5099 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
41dbbb37 5100 {
2a21ff19
NS
5101 /* Require all the OpenACC handlers if we have
5102 GOMP_OFFLOAD_CAP_OPENACC_200. */
41dbbb37 5103 err = "plugin missing OpenACC handler function";
2a21ff19 5104 goto fail;
41dbbb37 5105 }
2a21ff19
NS
5106
5107 unsigned cuda = 0;
5108 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
345a8c17 5109 openacc_cuda_get_current_device);
2a21ff19 5110 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
345a8c17
TS
5111 openacc_cuda_get_current_context);
5112 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5113 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2a21ff19 5114 if (cuda && cuda != 4)
41dbbb37 5115 {
2a21ff19 5116 /* Make sure all the CUDA functions are there if any of them are. */
41dbbb37 5117 err = "plugin missing OpenACC CUDA handler function";
2a21ff19 5118 goto fail;
41dbbb37
TS
5119 }
5120 }
1df3f842 5121#undef DLSYM
41dbbb37 5122#undef DLSYM_OPT
1df3f842 5123
2a21ff19
NS
5124 return 1;
5125
5126 dl_fail:
5127 err = dlerror ();
5128 fail:
5129 gomp_error ("while loading %s: %s", plugin_name, err);
5130 if (last_missing)
5131 gomp_error ("missing function was %s", last_missing);
5132 if (plugin_handle)
5133 dlclose (plugin_handle);
5134
5135 return 0;
1df3f842
JJ
5136}
5137
d84ffc0a
IV
5138/* This function finalizes all initialized devices. */
5139
5140static void
5141gomp_target_fini (void)
5142{
5143 int i;
5144 for (i = 0; i < num_devices; i++)
5145 {
6ce13072 5146 bool ret = true;
d84ffc0a
IV
5147 struct gomp_device_descr *devicep = &devices[i];
5148 gomp_mutex_lock (&devicep->lock);
5149 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1f4c5b9b 5150 ret = gomp_fini_device (devicep);
d84ffc0a 5151 gomp_mutex_unlock (&devicep->lock);
6ce13072
CLT
5152 if (!ret)
5153 gomp_fatal ("device finalization failed");
d84ffc0a
IV
5154 }
5155}
5156
ee332b4a
TS
5157/* This function initializes the runtime for offloading.
5158 It parses the list of offload plugins, and tries to load these.
5159 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
41dbbb37
TS
5160 will be set, and the array DEVICES initialized, containing descriptors for
5161 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5162 by the others. */
1df3f842
JJ
5163
5164static void
5165gomp_target_init (void)
5166{
5167 const char *prefix ="libgomp-plugin-";
b5f7a6ca 5168 const char *suffix = SONAME_SUFFIX (1);
1df3f842
JJ
5169 const char *cur, *next;
5170 char *plugin_name;
35f258f4
JJ
5171 int i, new_num_devs;
5172 int num_devs = 0, num_devs_openmp;
5173 struct gomp_device_descr *devs = NULL;
1df3f842 5174
1bfc07d1
KCY
5175 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5176 return;
5177
ee332b4a 5178 cur = OFFLOAD_PLUGINS;
1df3f842
JJ
5179 if (*cur)
5180 do
5181 {
5182 struct gomp_device_descr current_device;
b13547d8 5183 size_t prefix_len, suffix_len, cur_len;
1df3f842
JJ
5184
5185 next = strchr (cur, ',');
5186
b13547d8
JJ
5187 prefix_len = strlen (prefix);
5188 cur_len = next ? next - cur : strlen (cur);
5189 suffix_len = strlen (suffix);
5190
5191 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
1df3f842
JJ
5192 if (!plugin_name)
5193 {
35f258f4 5194 num_devs = 0;
1df3f842
JJ
5195 break;
5196 }
5197
b13547d8
JJ
5198 memcpy (plugin_name, prefix, prefix_len);
5199 memcpy (plugin_name + prefix_len, cur, cur_len);
5200 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
1df3f842
JJ
5201
5202 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5203 {
683f1184
TB
5204 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5205 new_num_devs = current_device.get_num_devices_func (omp_req);
5206 if (gomp_debug_var > 0 && new_num_devs < 0)
5207 {
5208 bool found = false;
5209 int type = current_device.get_type_func ();
5210 for (int img = 0; img < num_offload_images; img++)
5211 if (type == offload_images[img].type)
5212 found = true;
5213 if (found)
5214 {
5215 char buf[sizeof ("unified_address, unified_shared_memory, "
5216 "reverse_offload")];
5217 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5218 char *name = (char *) malloc (cur_len + 1);
5219 memcpy (name, cur, cur_len);
5220 name[cur_len] = '\0';
5221 gomp_debug (1,
5222 "%s devices present but 'omp requires %s' "
220bef46 5223 "cannot be fulfilled\n", name, buf);
683f1184
TB
5224 free (name);
5225 }
5226 }
5227 else if (new_num_devs >= 1)
1df3f842 5228 {
41dbbb37
TS
5229 /* Augment DEVICES and NUM_DEVICES. */
5230
35f258f4
JJ
5231 devs = realloc (devs, (num_devs + new_num_devs)
5232 * sizeof (struct gomp_device_descr));
5233 if (!devs)
1df3f842 5234 {
35f258f4 5235 num_devs = 0;
1df3f842
JJ
5236 free (plugin_name);
5237 break;
5238 }
5239
41dbbb37
TS
5240 current_device.name = current_device.get_name_func ();
5241 /* current_device.capabilities has already been set. */
1df3f842 5242 current_device.type = current_device.get_type_func ();
a51df54e 5243 current_device.mem_map.root = NULL;
ea4b23d9 5244 current_device.mem_map_rev.root = NULL;
d84ffc0a 5245 current_device.state = GOMP_DEVICE_UNINITIALIZED;
35f258f4 5246 for (i = 0; i < new_num_devs; i++)
1df3f842 5247 {
1df3f842 5248 current_device.target_id = i;
35f258f4
JJ
5249 devs[num_devs] = current_device;
5250 gomp_mutex_init (&devs[num_devs].lock);
5251 num_devs++;
1df3f842
JJ
5252 }
5253 }
5254 }
5255
5256 free (plugin_name);
5257 cur = next + 1;
5258 }
5259 while (next);
5260
41dbbb37
TS
5261 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5262 NUM_DEVICES_OPENMP. */
35f258f4
JJ
5263 struct gomp_device_descr *devs_s
5264 = malloc (num_devs * sizeof (struct gomp_device_descr));
5265 if (!devs_s)
5266 {
5267 num_devs = 0;
5268 free (devs);
5269 devs = NULL;
5270 }
5271 num_devs_openmp = 0;
5272 for (i = 0; i < num_devs; i++)
5273 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5274 devs_s[num_devs_openmp++] = devs[i];
5275 int num_devs_after_openmp = num_devs_openmp;
5276 for (i = 0; i < num_devs; i++)
5277 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5278 devs_s[num_devs_after_openmp++] = devs[i];
5279 free (devs);
5280 devs = devs_s;
5281
5282 for (i = 0; i < num_devs; i++)
41dbbb37 5283 {
41dbbb37
TS
5284 /* The 'devices' array can be moved (by the realloc call) until we have
5285 found all the plugins, so registering with the OpenACC runtime (which
5286 takes a copy of the pointer argument) must be delayed until now. */
35f258f4
JJ
5287 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5288 goacc_register (&devs[i]);
41dbbb37 5289 }
18c8b56c
TB
5290 if (gomp_global_icv.default_device_var == INT_MIN)
5291 {
5292 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5293 struct gomp_icv_list *none;
5294 none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
5295 gomp_global_icv.default_device_var = (num_devs_openmp
5296 ? 0 : omp_invalid_device);
5297 none->icvs.default_device_var = gomp_global_icv.default_device_var;
5298 }
d84ffc0a 5299
35f258f4
JJ
5300 num_devices = num_devs;
5301 num_devices_openmp = num_devs_openmp;
5302 devices = devs;
d84ffc0a
IV
5303 if (atexit (gomp_target_fini) != 0)
5304 gomp_fatal ("atexit failed");
1df3f842
JJ
5305}
5306
5307#else /* PLUGIN_SUPPORT */
5308/* If dlfcn.h is unavailable we always fallback to host execution.
5309 GOMP_target* routines are just stubs for this case. */
5310static void
5311gomp_target_init (void)
5312{
5313}
5314#endif /* PLUGIN_SUPPORT */