]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/target.c
Move more code to new gimple-ssa-warn-access pass.
[thirdparty/gcc.git] / libgomp / target.c
CommitLineData
99dee823 1/* Copyright (C) 2013-2021 Free Software Foundation, Inc.
acf0174b
JJ
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
acf0174b
JJ
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
1df3f842 26/* This file contains the support of offloading. */
acf0174b
JJ
27
28#include "libgomp.h"
41dbbb37
TS
29#include "oacc-plugin.h"
30#include "oacc-int.h"
31#include "gomp-constants.h"
acf0174b
JJ
32#include <limits.h>
33#include <stdbool.h>
34#include <stdlib.h>
01c0b3b0
KT
35#ifdef HAVE_INTTYPES_H
36# include <inttypes.h> /* For PRIu64. */
37#endif
acf0174b 38#include <string.h>
41dbbb37 39#include <assert.h>
d9a6bd32 40#include <errno.h>
acf0174b 41
1df3f842
JJ
42#ifdef PLUGIN_SUPPORT
43#include <dlfcn.h>
b5f7a6ca 44#include "plugin-suffix.h"
1df3f842
JJ
45#endif
46
275c736e
CLT
47typedef uintptr_t *hash_entry_type;
48static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
49static inline void htab_free (void *ptr) { free (ptr); }
50#include "hashtab.h"
51
52static inline hashval_t
53htab_hash (hash_entry_type element)
54{
55 return hash_pointer ((void *) element);
56}
57
58static inline bool
59htab_eq (hash_entry_type x, hash_entry_type y)
60{
61 return x == y;
62}
63
6c7e076b
JB
64#define FIELD_TGT_EMPTY (~(size_t) 0)
65
1df3f842
JJ
66static void gomp_target_init (void);
67
41dbbb37 68/* The whole initialization code for offloading plugins is only run one. */
1df3f842
JJ
69static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
70
a51df54e
IV
71/* Mutex for offload image registration. */
72static gomp_mutex_t register_lock;
73
1df3f842
JJ
74/* This structure describes an offload image.
75 It contains type of the target device, pointer to host table descriptor, and
76 pointer to target data. */
77struct offload_image_descr {
2a21ff19 78 unsigned version;
1df3f842 79 enum offload_target_type type;
ebe4a560 80 const void *host_table;
afb2d80b 81 const void *target_data;
1df3f842
JJ
82};
83
84/* Array of descriptors of offload images. */
85static struct offload_image_descr *offload_images;
86
87/* Total number of offload images. */
88static int num_offload_images;
89
90/* Array of descriptors for all available devices. */
91static struct gomp_device_descr *devices;
92
93/* Total number of available devices. */
94static int num_devices;
95
41dbbb37
TS
96/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
97static int num_devices_openmp;
98
a51df54e
IV
99/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
100
101static void *
102gomp_realloc_unlock (void *old, size_t size)
103{
104 void *ret = realloc (old, size);
105 if (ret == NULL)
106 {
107 gomp_mutex_unlock (&register_lock);
108 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
109 }
110 return ret;
111}
112
41dbbb37
TS
113attribute_hidden void
114gomp_init_targets_once (void)
1df3f842 115{
41dbbb37
TS
116 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
117}
1df3f842 118
acf0174b
JJ
119attribute_hidden int
120gomp_get_num_devices (void)
121{
41dbbb37
TS
122 gomp_init_targets_once ();
123 return num_devices_openmp;
1df3f842
JJ
124}
125
126static struct gomp_device_descr *
127resolve_device (int device_id)
128{
41dbbb37 129 if (device_id == GOMP_DEVICE_ICV)
1df3f842
JJ
130 {
131 struct gomp_task_icv *icv = gomp_icv (false);
132 device_id = icv->default_device_var;
133 }
134
135 if (device_id < 0 || device_id >= gomp_get_num_devices ())
1bfc07d1
KCY
136 {
137 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
74c9882b
JJ
138 && device_id != GOMP_DEVICE_HOST_FALLBACK
139 && device_id != num_devices_openmp)
1bfc07d1
KCY
140 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 "but device not found");
142
143 return NULL;
144 }
1df3f842 145
d9a6bd32 146 gomp_mutex_lock (&devices[device_id].lock);
d84ffc0a 147 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
d9a6bd32 148 gomp_init_device (&devices[device_id]);
d84ffc0a
IV
149 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
150 {
151 gomp_mutex_unlock (&devices[device_id].lock);
1bfc07d1 152
74c9882b 153 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
1bfc07d1
KCY
154 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 "but device is finalized");
156
d84ffc0a
IV
157 return NULL;
158 }
d9a6bd32
JJ
159 gomp_mutex_unlock (&devices[device_id].lock);
160
1df3f842
JJ
161 return &devices[device_id];
162}
163
164
d9a6bd32
JJ
165static inline splay_tree_key
166gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
167{
168 if (key->host_start != key->host_end)
169 return splay_tree_lookup (mem_map, key);
170
171 key->host_end++;
172 splay_tree_key n = splay_tree_lookup (mem_map, key);
173 key->host_end--;
174 if (n)
175 return n;
176 key->host_start--;
177 n = splay_tree_lookup (mem_map, key);
178 key->host_start++;
179 if (n)
180 return n;
181 return splay_tree_lookup (mem_map, key);
182}
183
e01d41e5
JJ
184static inline splay_tree_key
185gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
186{
187 if (key->host_start != key->host_end)
188 return splay_tree_lookup (mem_map, key);
189
190 key->host_end++;
191 splay_tree_key n = splay_tree_lookup (mem_map, key);
192 key->host_end--;
193 return n;
194}
195
6ce13072
CLT
196static inline void
197gomp_device_copy (struct gomp_device_descr *devicep,
198 bool (*copy_func) (int, void *, const void *, size_t),
199 const char *dst, void *dstaddr,
200 const char *src, const void *srcaddr,
201 size_t size)
202{
203 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
204 {
205 gomp_mutex_unlock (&devicep->lock);
206 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
208 }
209}
210
1f4c5b9b
CLT
211static inline void
212goacc_device_copy_async (struct gomp_device_descr *devicep,
213 bool (*copy_func) (int, void *, const void *, size_t,
214 struct goacc_asyncqueue *),
215 const char *dst, void *dstaddr,
216 const char *src, const void *srcaddr,
9c41f5b9 217 const void *srcaddr_orig,
1f4c5b9b
CLT
218 size_t size, struct goacc_asyncqueue *aq)
219{
220 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
221 {
222 gomp_mutex_unlock (&devicep->lock);
9c41f5b9
JB
223 if (srcaddr_orig && srcaddr_orig != srcaddr)
224 gomp_fatal ("Copying of %s object [%p..%p)"
225 " via buffer %s object [%p..%p)"
226 " to %s object [%p..%p) failed",
227 src, srcaddr_orig, srcaddr_orig + size,
228 src, srcaddr, srcaddr + size,
229 dst, dstaddr, dstaddr + size);
230 else
231 gomp_fatal ("Copying of %s object [%p..%p)"
232 " to %s object [%p..%p) failed",
233 src, srcaddr, srcaddr + size,
234 dst, dstaddr, dstaddr + size);
1f4c5b9b
CLT
235 }
236}
237
7324369a
JJ
238/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239 host to device memory transfers. */
240
a44c1790
TS
241struct gomp_coalesce_chunk
242{
243 /* The starting and ending point of a coalesced chunk of memory. */
244 size_t start, end;
245};
246
7324369a
JJ
247struct gomp_coalesce_buf
248{
249 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250 it will be copied to the device. */
251 void *buf;
252 struct target_mem_desc *tgt;
a44c1790
TS
253 /* Array with offsets, chunks[i].start is the starting offset and
254 chunks[i].end ending offset relative to tgt->tgt_start device address
7324369a 255 of chunks which are to be copied to buf and later copied to device. */
a44c1790 256 struct gomp_coalesce_chunk *chunks;
7324369a
JJ
257 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
258 be performed. */
259 long chunk_cnt;
260 /* During construction of chunks array, how many memory regions are within
261 the last chunk. If there is just one memory region for a chunk, we copy
262 it directly to device rather than going through buf. */
263 long use_cnt;
264};
265
266/* Maximum size of memory region considered for coalescing. Larger copies
267 are performed directly. */
268#define MAX_COALESCE_BUF_SIZE (32 * 1024)
269
270/* Maximum size of a gap in between regions to consider them being copied
271 within the same chunk. All the device offsets considered are within
272 newly allocated device memory, so it isn't fatal if we copy some padding
273 in between from host to device. The gaps come either from alignment
274 padding or from memory regions which are not supposed to be copied from
275 host to device (e.g. map(alloc:), map(from:) etc.). */
276#define MAX_COALESCE_BUF_GAP (4 * 1024)
277
d88a6951
TS
278/* Add region with device tgt_start relative offset and length to CBUF.
279
280 This must not be used for asynchronous copies, because the host data might
281 not be computed yet (by an earlier asynchronous compute region, for
282 example).
283 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
284 is it more performant to use libgomp CBUF buffering or individual device
285 asyncronous copying?) */
7324369a
JJ
286
287static inline void
288gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
289{
290 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
291 return;
292 if (cbuf->chunk_cnt)
293 {
294 if (cbuf->chunk_cnt < 0)
295 return;
a44c1790 296 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
7324369a
JJ
297 {
298 cbuf->chunk_cnt = -1;
299 return;
300 }
a44c1790 301 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
7324369a 302 {
a44c1790 303 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
7324369a
JJ
304 cbuf->use_cnt++;
305 return;
306 }
307 /* If the last chunk is only used by one mapping, discard it,
308 as it will be one host to device copy anyway and
309 memcpying it around will only waste cycles. */
310 if (cbuf->use_cnt == 1)
311 cbuf->chunk_cnt--;
312 }
a44c1790
TS
313 cbuf->chunks[cbuf->chunk_cnt].start = start;
314 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
7324369a
JJ
315 cbuf->chunk_cnt++;
316 cbuf->use_cnt = 1;
317}
318
319/* Return true for mapping kinds which need to copy data from the
320 host to device for regions that weren't previously mapped. */
321
322static inline bool
323gomp_to_device_kind_p (int kind)
324{
325 switch (kind)
326 {
327 case GOMP_MAP_ALLOC:
328 case GOMP_MAP_FROM:
329 case GOMP_MAP_FORCE_ALLOC:
cc3f11f5 330 case GOMP_MAP_FORCE_FROM:
7324369a
JJ
331 case GOMP_MAP_ALWAYS_FROM:
332 return false;
333 default:
334 return true;
335 }
336}
337
9c41f5b9
JB
338/* Copy host memory to an offload device. In asynchronous mode (if AQ is
339 non-NULL), when the source data is stack or may otherwise be deallocated
340 before the asynchronous copy takes place, EPHEMERAL must be passed as
341 TRUE. */
342
1f4c5b9b 343attribute_hidden void
6ce13072 344gomp_copy_host2dev (struct gomp_device_descr *devicep,
1f4c5b9b 345 struct goacc_asyncqueue *aq,
7324369a 346 void *d, const void *h, size_t sz,
9c41f5b9 347 bool ephemeral, struct gomp_coalesce_buf *cbuf)
6ce13072 348{
d88a6951
TS
349 if (__builtin_expect (aq != NULL, 0))
350 {
351 /* See 'gomp_coalesce_buf_add'. */
352 assert (!cbuf);
353
354 void *h_buf = (void *) h;
355 if (ephemeral)
356 {
357 /* We're queueing up an asynchronous copy from data that may
358 disappear before the transfer takes place (i.e. because it is a
359 stack local in a function that is no longer executing). Make a
360 copy of the data into a temporary buffer in those cases. */
361 h_buf = gomp_malloc (sz);
362 memcpy (h_buf, h, sz);
363 }
364 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
365 "dev", d, "host", h_buf, h, sz, aq);
366 if (ephemeral)
367 /* Free temporary buffer once the transfer has completed. */
368 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
369
370 return;
371 }
372
7324369a
JJ
373 if (cbuf)
374 {
375 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
a44c1790 376 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
7324369a
JJ
377 {
378 long first = 0;
379 long last = cbuf->chunk_cnt - 1;
380 while (first <= last)
381 {
382 long middle = (first + last) >> 1;
a44c1790 383 if (cbuf->chunks[middle].end <= doff)
7324369a 384 first = middle + 1;
a44c1790 385 else if (cbuf->chunks[middle].start <= doff)
7324369a 386 {
a44c1790 387 if (doff + sz > cbuf->chunks[middle].end)
7324369a 388 gomp_fatal ("internal libgomp cbuf error");
a44c1790 389 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
7324369a
JJ
390 h, sz);
391 return;
392 }
393 else
394 last = middle - 1;
395 }
396 }
397 }
d88a6951
TS
398
399 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
6ce13072
CLT
400}
401
1f4c5b9b 402attribute_hidden void
6ce13072 403gomp_copy_dev2host (struct gomp_device_descr *devicep,
1f4c5b9b 404 struct goacc_asyncqueue *aq,
6ce13072
CLT
405 void *h, const void *d, size_t sz)
406{
1f4c5b9b
CLT
407 if (__builtin_expect (aq != NULL, 0))
408 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
9c41f5b9 409 "host", h, "dev", d, NULL, sz, aq);
1f4c5b9b
CLT
410 else
411 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
6ce13072
CLT
412}
413
414static void
415gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
416{
417 if (!devicep->free_func (devicep->target_id, devptr))
418 {
419 gomp_mutex_unlock (&devicep->lock);
420 gomp_fatal ("error in freeing device memory block at %p", devptr);
421 }
422}
423
275c736e
CLT
424/* Increment reference count of a splay_tree_key region K by 1.
425 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
426 increment the value if refcount is not yet contained in the set (used for
427 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
428 once for each construct). */
429
430static inline void
431gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
432{
433 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
434 return;
435
436 uintptr_t *refcount_ptr = &k->refcount;
437
438 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
439 refcount_ptr = &k->structelem_refcount;
440 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
441 refcount_ptr = k->structelem_refcount_ptr;
442
443 if (refcount_set)
444 {
445 if (htab_find (*refcount_set, refcount_ptr))
446 return;
447 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
448 *slot = refcount_ptr;
449 }
450
451 *refcount_ptr += 1;
452 return;
453}
454
455/* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
456 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
457 track already seen refcounts, and only adjust the value if refcount is not
458 yet contained in the set (like gomp_increment_refcount).
459
460 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
461 it is already zero and we know we decremented it earlier. This signals that
462 associated maps should be copied back to host.
463
464 *DO_REMOVE is set to true when we this is the first handling of this refcount
465 and we are setting it to zero. This signals a removal of this key from the
466 splay-tree map.
467
468 Copy and removal are separated due to cases like handling of structure
469 elements, e.g. each map of a structure element representing a possible copy
470 out of a structure field has to be handled individually, but we only signal
471 removal for one (the first encountered) sibing map. */
472
473static inline void
474gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
475 bool *do_copy, bool *do_remove)
476{
477 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
478 {
479 *do_copy = *do_remove = false;
480 return;
481 }
482
483 uintptr_t *refcount_ptr = &k->refcount;
484
485 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
486 refcount_ptr = &k->structelem_refcount;
487 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
488 refcount_ptr = k->structelem_refcount_ptr;
489
490 bool new_encountered_refcount;
491 bool set_to_zero = false;
492 bool is_zero = false;
493
494 uintptr_t orig_refcount = *refcount_ptr;
495
496 if (refcount_set)
497 {
498 if (htab_find (*refcount_set, refcount_ptr))
499 {
500 new_encountered_refcount = false;
501 goto end;
502 }
503
504 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
505 *slot = refcount_ptr;
506 new_encountered_refcount = true;
507 }
508 else
509 /* If no refcount_set being used, assume all keys are being decremented
510 for the first time. */
511 new_encountered_refcount = true;
512
513 if (delete_p)
514 *refcount_ptr = 0;
515 else if (*refcount_ptr > 0)
516 *refcount_ptr -= 1;
517
518 end:
519 if (*refcount_ptr == 0)
520 {
521 if (orig_refcount > 0)
522 set_to_zero = true;
523
524 is_zero = true;
525 }
526
527 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
528 *do_remove = (new_encountered_refcount && set_to_zero);
529}
530
e01d41e5
JJ
531/* Handle the case where gomp_map_lookup, splay_tree_lookup or
532 gomp_map_0len_lookup found oldn for newn.
1df3f842
JJ
533 Helper function of gomp_map_vars. */
534
535static inline void
1f4c5b9b
CLT
536gomp_map_vars_existing (struct gomp_device_descr *devicep,
537 struct goacc_asyncqueue *aq, splay_tree_key oldn,
d9a6bd32 538 splay_tree_key newn, struct target_var_desc *tgt_var,
972da557 539 unsigned char kind, bool always_to_flag,
275c736e
CLT
540 struct gomp_coalesce_buf *cbuf,
541 htab_t *refcount_set)
1df3f842 542{
aff43ac0
TS
543 assert (kind != GOMP_MAP_ATTACH);
544
d9a6bd32
JJ
545 tgt_var->key = oldn;
546 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
547 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
bc4ed079 548 tgt_var->is_attach = false;
d9a6bd32
JJ
549 tgt_var->offset = newn->host_start - oldn->host_start;
550 tgt_var->length = newn->host_end - newn->host_start;
551
41dbbb37
TS
552 if ((kind & GOMP_MAP_FLAG_FORCE)
553 || oldn->host_start > newn->host_start
1df3f842 554 || oldn->host_end < newn->host_end)
a51df54e
IV
555 {
556 gomp_mutex_unlock (&devicep->lock);
557 gomp_fatal ("Trying to map into device [%p..%p) object when "
558 "[%p..%p) is already mapped",
559 (void *) newn->host_start, (void *) newn->host_end,
560 (void *) oldn->host_start, (void *) oldn->host_end);
561 }
d9a6bd32 562
972da557 563 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
1f4c5b9b 564 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
565 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
566 + newn->host_start - oldn->host_start),
567 (void *) newn->host_start,
9c41f5b9 568 newn->host_end - newn->host_start, false, cbuf);
6ce13072 569
275c736e 570 gomp_increment_refcount (oldn, refcount_set);
1df3f842
JJ
571}
572
41dbbb37 573static int
d9a6bd32 574get_kind (bool short_mapkind, void *kinds, int idx)
41dbbb37 575{
d9a6bd32
JJ
576 return short_mapkind ? ((unsigned short *) kinds)[idx]
577 : ((unsigned char *) kinds)[idx];
41dbbb37
TS
578}
579
1716efeb 580static void
1f4c5b9b
CLT
581gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
582 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
7324369a 583 struct gomp_coalesce_buf *cbuf)
1716efeb
CLT
584{
585 struct gomp_device_descr *devicep = tgt->device_descr;
586 struct splay_tree_s *mem_map = &devicep->mem_map;
587 struct splay_tree_key_s cur_node;
588
589 cur_node.host_start = host_ptr;
590 if (cur_node.host_start == (uintptr_t) NULL)
591 {
592 cur_node.tgt_offset = (uintptr_t) NULL;
1f4c5b9b 593 gomp_copy_host2dev (devicep, aq,
6ce13072 594 (void *) (tgt->tgt_start + target_offset),
9c41f5b9
JB
595 (void *) &cur_node.tgt_offset, sizeof (void *),
596 true, cbuf);
1716efeb
CLT
597 return;
598 }
599 /* Add bias to the pointer value. */
600 cur_node.host_start += bias;
d9a6bd32
JJ
601 cur_node.host_end = cur_node.host_start;
602 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1716efeb
CLT
603 if (n == NULL)
604 {
605 gomp_mutex_unlock (&devicep->lock);
606 gomp_fatal ("Pointer target of array section wasn't mapped");
607 }
608 cur_node.host_start -= n->host_start;
609 cur_node.tgt_offset
610 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
611 /* At this point tgt_offset is target address of the
612 array section. Now subtract bias to get what we want
613 to initialize the pointer with. */
614 cur_node.tgt_offset -= bias;
1f4c5b9b 615 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
9c41f5b9
JB
616 (void *) &cur_node.tgt_offset, sizeof (void *),
617 true, cbuf);
1716efeb
CLT
618}
619
d9a6bd32 620static void
1f4c5b9b
CLT
621gomp_map_fields_existing (struct target_mem_desc *tgt,
622 struct goacc_asyncqueue *aq, splay_tree_key n,
d9a6bd32 623 size_t first, size_t i, void **hostaddrs,
7324369a 624 size_t *sizes, void *kinds,
275c736e 625 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
d9a6bd32
JJ
626{
627 struct gomp_device_descr *devicep = tgt->device_descr;
628 struct splay_tree_s *mem_map = &devicep->mem_map;
629 struct splay_tree_key_s cur_node;
630 int kind;
631 const bool short_mapkind = true;
632 const int typemask = short_mapkind ? 0xff : 0x7;
633
634 cur_node.host_start = (uintptr_t) hostaddrs[i];
635 cur_node.host_end = cur_node.host_start + sizes[i];
636 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
637 kind = get_kind (short_mapkind, kinds, i);
638 if (n2
639 && n2->tgt == n->tgt
640 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
641 {
972da557 642 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
275c736e 643 kind & typemask, false, cbuf, refcount_set);
d9a6bd32
JJ
644 return;
645 }
646 if (sizes[i] == 0)
647 {
648 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
649 {
650 cur_node.host_start--;
651 n2 = splay_tree_lookup (mem_map, &cur_node);
652 cur_node.host_start++;
653 if (n2
654 && n2->tgt == n->tgt
655 && n2->host_start - n->host_start
656 == n2->tgt_offset - n->tgt_offset)
657 {
972da557 658 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
275c736e 659 kind & typemask, false, cbuf, refcount_set);
d9a6bd32
JJ
660 return;
661 }
662 }
663 cur_node.host_end++;
664 n2 = splay_tree_lookup (mem_map, &cur_node);
665 cur_node.host_end--;
666 if (n2
667 && n2->tgt == n->tgt
668 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
669 {
1f4c5b9b 670 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
275c736e 671 kind & typemask, false, cbuf, refcount_set);
d9a6bd32
JJ
672 return;
673 }
674 }
675 gomp_mutex_unlock (&devicep->lock);
676 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
677 "other mapped elements from the same structure weren't mapped "
678 "together with it", (void *) cur_node.host_start,
679 (void *) cur_node.host_end);
680}
681
5d5be7bf
JB
682attribute_hidden void
683gomp_attach_pointer (struct gomp_device_descr *devicep,
684 struct goacc_asyncqueue *aq, splay_tree mem_map,
685 splay_tree_key n, uintptr_t attach_to, size_t bias,
686 struct gomp_coalesce_buf *cbufp)
687{
688 struct splay_tree_key_s s;
689 size_t size, idx;
690
691 if (n == NULL)
692 {
693 gomp_mutex_unlock (&devicep->lock);
694 gomp_fatal ("enclosing struct not mapped for attach");
695 }
696
697 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
698 /* We might have a pointer in a packed struct: however we cannot have more
699 than one such pointer in each pointer-sized portion of the struct, so
700 this is safe. */
701 idx = (attach_to - n->host_start) / sizeof (void *);
702
703 if (!n->aux)
704 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
705
706 if (!n->aux->attach_count)
707 n->aux->attach_count
708 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
709
710 if (n->aux->attach_count[idx] < UINTPTR_MAX)
711 n->aux->attach_count[idx]++;
712 else
713 {
714 gomp_mutex_unlock (&devicep->lock);
715 gomp_fatal ("attach count overflow");
716 }
717
718 if (n->aux->attach_count[idx] == 1)
719 {
720 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
721 - n->host_start;
722 uintptr_t target = (uintptr_t) *(void **) attach_to;
723 splay_tree_key tn;
724 uintptr_t data;
725
726 if ((void *) target == NULL)
727 {
728 gomp_mutex_unlock (&devicep->lock);
729 gomp_fatal ("attempt to attach null pointer");
730 }
731
732 s.host_start = target + bias;
733 s.host_end = s.host_start + 1;
734 tn = splay_tree_lookup (mem_map, &s);
735
736 if (!tn)
737 {
738 gomp_mutex_unlock (&devicep->lock);
739 gomp_fatal ("pointer target not mapped for attach");
740 }
741
742 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
743
744 gomp_debug (1,
745 "%s: attaching host %p, target %p (struct base %p) to %p\n",
746 __FUNCTION__, (void *) attach_to, (void *) devptr,
747 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
748
749 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
9c41f5b9 750 sizeof (void *), true, cbufp);
5d5be7bf
JB
751 }
752 else
753 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
754 (void *) attach_to, (int) n->aux->attach_count[idx]);
755}
756
757attribute_hidden void
758gomp_detach_pointer (struct gomp_device_descr *devicep,
759 struct goacc_asyncqueue *aq, splay_tree_key n,
760 uintptr_t detach_from, bool finalize,
761 struct gomp_coalesce_buf *cbufp)
762{
763 size_t idx;
764
765 if (n == NULL)
766 {
767 gomp_mutex_unlock (&devicep->lock);
768 gomp_fatal ("enclosing struct not mapped for detach");
769 }
770
771 idx = (detach_from - n->host_start) / sizeof (void *);
772
773 if (!n->aux || !n->aux->attach_count)
774 {
775 gomp_mutex_unlock (&devicep->lock);
776 gomp_fatal ("no attachment counters for struct");
777 }
778
779 if (finalize)
780 n->aux->attach_count[idx] = 1;
781
782 if (n->aux->attach_count[idx] == 0)
783 {
784 gomp_mutex_unlock (&devicep->lock);
785 gomp_fatal ("attach count underflow");
786 }
787 else
788 n->aux->attach_count[idx]--;
789
790 if (n->aux->attach_count[idx] == 0)
791 {
792 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
793 - n->host_start;
794 uintptr_t target = (uintptr_t) *(void **) detach_from;
795
796 gomp_debug (1,
797 "%s: detaching host %p, target %p (struct base %p) to %p\n",
798 __FUNCTION__, (void *) detach_from, (void *) devptr,
799 (void *) (n->tgt->tgt_start + n->tgt_offset),
800 (void *) target);
801
802 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
9c41f5b9 803 sizeof (void *), true, cbufp);
5d5be7bf
JB
804 }
805 else
806 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
807 (void *) detach_from, (int) n->aux->attach_count[idx]);
808}
809
5bcd470b 810attribute_hidden uintptr_t
e01d41e5
JJ
811gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
812{
813 if (tgt->list[i].key != NULL)
814 return tgt->list[i].key->tgt->tgt_start
815 + tgt->list[i].key->tgt_offset
816 + tgt->list[i].offset;
6c7e076b
JB
817
818 switch (tgt->list[i].offset)
819 {
820 case OFFSET_INLINED:
821 return (uintptr_t) hostaddrs[i];
822
823 case OFFSET_POINTER:
824 return 0;
825
826 case OFFSET_STRUCT:
827 return tgt->list[i + 1].key->tgt->tgt_start
828 + tgt->list[i + 1].key->tgt_offset
829 + tgt->list[i + 1].offset
830 + (uintptr_t) hostaddrs[i]
831 - (uintptr_t) hostaddrs[i + 1];
832
833 default:
834 return tgt->tgt_start + tgt->list[i].offset;
835 }
e01d41e5
JJ
836}
837
1f4c5b9b
CLT
838static inline __attribute__((always_inline)) struct target_mem_desc *
839gomp_map_vars_internal (struct gomp_device_descr *devicep,
840 struct goacc_asyncqueue *aq, size_t mapnum,
841 void **hostaddrs, void **devaddrs, size_t *sizes,
842 void *kinds, bool short_mapkind,
275c736e 843 htab_t *refcount_set,
1f4c5b9b 844 enum gomp_map_vars_kind pragma_kind)
1df3f842
JJ
845{
846 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
d9a6bd32 847 bool has_firstprivate = false;
972da557 848 bool has_always_ptrset = false;
275c736e 849 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
d9a6bd32
JJ
850 const int rshift = short_mapkind ? 8 : 3;
851 const int typemask = short_mapkind ? 0xff : 0x7;
a51df54e 852 struct splay_tree_s *mem_map = &devicep->mem_map;
1df3f842
JJ
853 struct splay_tree_key_s cur_node;
854 struct target_mem_desc *tgt
855 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
856 tgt->list_count = mapnum;
9e628024 857 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
1df3f842 858 tgt->device_descr = devicep;
378da98f 859 tgt->prev = NULL;
7324369a 860 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
1df3f842
JJ
861
862 if (mapnum == 0)
3837c6d6
IV
863 {
864 tgt->tgt_start = 0;
865 tgt->tgt_end = 0;
866 return tgt;
867 }
1df3f842
JJ
868
869 tgt_align = sizeof (void *);
870 tgt_size = 0;
7324369a
JJ
871 cbuf.chunks = NULL;
872 cbuf.chunk_cnt = -1;
873 cbuf.use_cnt = 0;
874 cbuf.buf = NULL;
875 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
876 {
a44c1790
TS
877 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
878 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
7324369a
JJ
879 cbuf.chunk_cnt = 0;
880 }
d9a6bd32 881 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
882 {
883 size_t align = 4 * sizeof (void *);
884 tgt_align = align;
885 tgt_size = mapnum * sizeof (void *);
7324369a
JJ
886 cbuf.chunk_cnt = 1;
887 cbuf.use_cnt = 1 + (mapnum > 1);
a44c1790
TS
888 cbuf.chunks[0].start = 0;
889 cbuf.chunks[0].end = tgt_size;
1df3f842
JJ
890 }
891
a51df54e 892 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
893 if (devicep->state == GOMP_DEVICE_FINALIZED)
894 {
895 gomp_mutex_unlock (&devicep->lock);
896 free (tgt);
897 return NULL;
898 }
41dbbb37 899
1df3f842
JJ
900 for (i = 0; i < mapnum; i++)
901 {
d9a6bd32
JJ
902 int kind = get_kind (short_mapkind, kinds, i);
903 if (hostaddrs[i] == NULL
904 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1df3f842 905 {
d9a6bd32 906 tgt->list[i].key = NULL;
6c7e076b 907 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32
JJ
908 continue;
909 }
d5c23c6c
TB
910 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
911 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
d9a6bd32 912 {
8860d270
JJ
913 tgt->list[i].key = NULL;
914 if (!not_found_cnt)
d9a6bd32 915 {
7d48e14f
JJ
916 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
917 on a separate construct prior to using use_device_{addr,ptr}.
918 In OpenMP 5.0, map directives need to be ordered by the
919 middle-end before the use_device_* clauses. If
920 !not_found_cnt, all mappings requested (if any) are already
921 mapped, so use_device_{addr,ptr} can be resolved right away.
922 Otherwise, if not_found_cnt, gomp_map_lookup might fail
923 now but would succeed after performing the mappings in the
924 following loop. We can't defer this always to the second
925 loop, because it is not even invoked when !not_found_cnt
926 after the first loop. */
927 cur_node.host_start = (uintptr_t) hostaddrs[i];
928 cur_node.host_end = cur_node.host_start;
929 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
b3b75e66
TS
930 if (n != NULL)
931 {
932 cur_node.host_start -= n->host_start;
933 hostaddrs[i]
934 = (void *) (n->tgt->tgt_start + n->tgt_offset
935 + cur_node.host_start);
936 }
937 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
7d48e14f
JJ
938 {
939 gomp_mutex_unlock (&devicep->lock);
940 gomp_fatal ("use_device_ptr pointer wasn't mapped");
941 }
b3b75e66
TS
942 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
943 /* If not present, continue using the host address. */
944 ;
945 else
946 __builtin_unreachable ();
947 tgt->list[i].offset = OFFSET_INLINED;
d9a6bd32 948 }
8860d270
JJ
949 else
950 tgt->list[i].offset = 0;
d9a6bd32
JJ
951 continue;
952 }
953 else if ((kind & typemask) == GOMP_MAP_STRUCT)
954 {
955 size_t first = i + 1;
956 size_t last = i + sizes[i];
957 cur_node.host_start = (uintptr_t) hostaddrs[i];
958 cur_node.host_end = (uintptr_t) hostaddrs[last]
959 + sizes[last];
960 tgt->list[i].key = NULL;
6c7e076b 961 tgt->list[i].offset = OFFSET_STRUCT;
d9a6bd32
JJ
962 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
963 if (n == NULL)
964 {
965 size_t align = (size_t) 1 << (kind >> rshift);
966 if (tgt_align < align)
967 tgt_align = align;
7324369a 968 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
d9a6bd32 969 tgt_size = (tgt_size + align - 1) & ~(align - 1);
7324369a 970 tgt_size += cur_node.host_end - cur_node.host_start;
d9a6bd32
JJ
971 not_found_cnt += last - i;
972 for (i = first; i <= last; i++)
7324369a
JJ
973 {
974 tgt->list[i].key = NULL;
d88a6951
TS
975 if (!aq
976 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
977 & typemask))
7324369a
JJ
978 gomp_coalesce_buf_add (&cbuf,
979 tgt_size - cur_node.host_end
980 + (uintptr_t) hostaddrs[i],
981 sizes[i]);
982 }
d9a6bd32
JJ
983 i--;
984 continue;
985 }
986 for (i = first; i <= last; i++)
1f4c5b9b 987 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
275c736e 988 sizes, kinds, NULL, refcount_set);
d9a6bd32 989 i--;
1df3f842
JJ
990 continue;
991 }
e01d41e5
JJ
992 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
993 {
994 tgt->list[i].key = NULL;
6c7e076b 995 tgt->list[i].offset = OFFSET_POINTER;
e01d41e5
JJ
996 has_firstprivate = true;
997 continue;
998 }
8e7e71ff
JB
999 else if ((kind & typemask) == GOMP_MAP_ATTACH)
1000 {
1001 tgt->list[i].key = NULL;
1002 has_firstprivate = true;
1003 continue;
1004 }
1df3f842 1005 cur_node.host_start = (uintptr_t) hostaddrs[i];
aff43ac0 1006 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
1007 cur_node.host_end = cur_node.host_start + sizes[i];
1008 else
1009 cur_node.host_end = cur_node.host_start + sizeof (void *);
d9a6bd32
JJ
1010 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1011 {
1012 tgt->list[i].key = NULL;
1013
1014 size_t align = (size_t) 1 << (kind >> rshift);
1015 if (tgt_align < align)
1016 tgt_align = align;
1017 tgt_size = (tgt_size + align - 1) & ~(align - 1);
d88a6951
TS
1018 if (!aq)
1019 gomp_coalesce_buf_add (&cbuf, tgt_size,
1020 cur_node.host_end - cur_node.host_start);
d9a6bd32
JJ
1021 tgt_size += cur_node.host_end - cur_node.host_start;
1022 has_firstprivate = true;
1023 continue;
1024 }
1025 splay_tree_key n;
1026 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1df3f842 1027 {
e01d41e5 1028 n = gomp_map_0len_lookup (mem_map, &cur_node);
d9a6bd32
JJ
1029 if (!n)
1030 {
1031 tgt->list[i].key = NULL;
6c7e076b 1032 tgt->list[i].offset = OFFSET_POINTER;
d9a6bd32
JJ
1033 continue;
1034 }
1df3f842 1035 }
d9a6bd32
JJ
1036 else
1037 n = splay_tree_lookup (mem_map, &cur_node);
4a38b02b 1038 if (n && n->refcount != REFCOUNT_LINK)
972da557
TB
1039 {
1040 int always_to_cnt = 0;
1041 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1042 {
1b9bdd52 1043 bool has_nullptr = false;
972da557
TB
1044 size_t j;
1045 for (j = 0; j < n->tgt->list_count; j++)
1046 if (n->tgt->list[j].key == n)
1047 {
1048 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1049 break;
1050 }
1051 if (n->tgt->list_count == 0)
1052 {
1053 /* 'declare target'; assume has_nullptr; it could also be
1054 statically assigned pointer, but that it should be to
1055 the equivalent variable on the host. */
1056 assert (n->refcount == REFCOUNT_INFINITY);
1057 has_nullptr = true;
1058 }
1059 else
1060 assert (j < n->tgt->list_count);
1061 /* Re-map the data if there is an 'always' modifier or if it a
1062 null pointer was there and non a nonnull has been found; that
1063 permits transparent re-mapping for Fortran array descriptors
1064 which were previously mapped unallocated. */
1065 for (j = i + 1; j < mapnum; j++)
1066 {
1067 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1068 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1069 && (!has_nullptr
1070 || !GOMP_MAP_POINTER_P (ptr_kind)
1071 || *(void **) hostaddrs[j] == NULL))
1072 break;
1073 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1074 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1075 > cur_node.host_end))
1076 break;
1077 else
1078 {
1079 has_always_ptrset = true;
1080 ++always_to_cnt;
1081 }
1082 }
1083 }
1084 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
275c736e
CLT
1085 kind & typemask, always_to_cnt > 0, NULL,
1086 refcount_set);
972da557
TB
1087 i += always_to_cnt;
1088 }
1df3f842
JJ
1089 else
1090 {
d9a6bd32 1091 tgt->list[i].key = NULL;
41dbbb37 1092
a6163563
JB
1093 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1094 {
1095 /* Not present, hence, skip entry - including its MAP_POINTER,
1096 when existing. */
5bcd470b 1097 tgt->list[i].offset = OFFSET_POINTER;
a6163563
JB
1098 if (i + 1 < mapnum
1099 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1100 == GOMP_MAP_POINTER))
1101 {
1102 ++i;
1103 tgt->list[i].key = NULL;
1104 tgt->list[i].offset = 0;
1105 }
1106 continue;
1107 }
41dbbb37 1108 size_t align = (size_t) 1 << (kind >> rshift);
1df3f842
JJ
1109 not_found_cnt++;
1110 if (tgt_align < align)
1111 tgt_align = align;
1112 tgt_size = (tgt_size + align - 1) & ~(align - 1);
d88a6951
TS
1113 if (!aq
1114 && gomp_to_device_kind_p (kind & typemask))
7324369a
JJ
1115 gomp_coalesce_buf_add (&cbuf, tgt_size,
1116 cur_node.host_end - cur_node.host_start);
1df3f842 1117 tgt_size += cur_node.host_end - cur_node.host_start;
41dbbb37 1118 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1df3f842
JJ
1119 {
1120 size_t j;
972da557 1121 int kind;
1df3f842 1122 for (j = i + 1; j < mapnum; j++)
972da557
TB
1123 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1124 kinds, j)) & typemask))
1125 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1df3f842
JJ
1126 break;
1127 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1128 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1129 > cur_node.host_end))
1130 break;
1131 else
1132 {
d9a6bd32 1133 tgt->list[j].key = NULL;
1df3f842
JJ
1134 i++;
1135 }
1136 }
1137 }
1138 }
1139
41dbbb37
TS
1140 if (devaddrs)
1141 {
1142 if (mapnum != 1)
a51df54e
IV
1143 {
1144 gomp_mutex_unlock (&devicep->lock);
1145 gomp_fatal ("unexpected aggregation");
1146 }
41dbbb37
TS
1147 tgt->to_free = devaddrs[0];
1148 tgt->tgt_start = (uintptr_t) tgt->to_free;
1149 tgt->tgt_end = tgt->tgt_start + sizes[0];
1150 }
d9a6bd32 1151 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1152 {
1153 /* Allocate tgt_align aligned tgt_size block of memory. */
1154 /* FIXME: Perhaps change interface to allocate properly aligned
1155 memory. */
1156 tgt->to_free = devicep->alloc_func (devicep->target_id,
1157 tgt_size + tgt_align - 1);
6ce13072
CLT
1158 if (!tgt->to_free)
1159 {
1160 gomp_mutex_unlock (&devicep->lock);
1161 gomp_fatal ("device memory allocation fail");
1162 }
1163
1df3f842
JJ
1164 tgt->tgt_start = (uintptr_t) tgt->to_free;
1165 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1166 tgt->tgt_end = tgt->tgt_start + tgt_size;
7324369a
JJ
1167
1168 if (cbuf.use_cnt == 1)
1169 cbuf.chunk_cnt--;
1170 if (cbuf.chunk_cnt > 0)
1171 {
1172 cbuf.buf
a44c1790 1173 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
7324369a
JJ
1174 if (cbuf.buf)
1175 {
1176 cbuf.tgt = tgt;
1177 cbufp = &cbuf;
1178 }
1179 }
1df3f842
JJ
1180 }
1181 else
1182 {
1183 tgt->to_free = NULL;
1184 tgt->tgt_start = 0;
1185 tgt->tgt_end = 0;
1186 }
1187
1188 tgt_size = 0;
d9a6bd32 1189 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1190 tgt_size = mapnum * sizeof (void *);
1191
1192 tgt->array = NULL;
972da557 1193 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1df3f842 1194 {
d9a6bd32
JJ
1195 if (not_found_cnt)
1196 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1df3f842 1197 splay_tree_node array = tgt->array;
5cafae2c 1198 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
d9a6bd32 1199 uintptr_t field_tgt_base = 0;
275c736e 1200 splay_tree_key field_tgt_structelem_first = NULL;
1df3f842
JJ
1201
1202 for (i = 0; i < mapnum; i++)
972da557
TB
1203 if (has_always_ptrset
1204 && tgt->list[i].key
1205 && (get_kind (short_mapkind, kinds, i) & typemask)
1206 == GOMP_MAP_TO_PSET)
1207 {
1208 splay_tree_key k = tgt->list[i].key;
1b9bdd52 1209 bool has_nullptr = false;
972da557
TB
1210 size_t j;
1211 for (j = 0; j < k->tgt->list_count; j++)
1212 if (k->tgt->list[j].key == k)
1213 {
1214 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1215 break;
1216 }
1217 if (k->tgt->list_count == 0)
1218 has_nullptr = true;
1219 else
1220 assert (j < k->tgt->list_count);
1221
1222 tgt->list[i].has_null_ptr_assoc = false;
1223 for (j = i + 1; j < mapnum; j++)
1224 {
1225 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1226 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1227 && (!has_nullptr
1228 || !GOMP_MAP_POINTER_P (ptr_kind)
1229 || *(void **) hostaddrs[j] == NULL))
1230 break;
1231 else if ((uintptr_t) hostaddrs[j] < k->host_start
1232 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1233 > k->host_end))
1234 break;
1235 else
1236 {
1237 if (*(void **) hostaddrs[j] == NULL)
1238 tgt->list[i].has_null_ptr_assoc = true;
1239 tgt->list[j].key = k;
1240 tgt->list[j].copy_from = false;
1241 tgt->list[j].always_copy_from = false;
1242 tgt->list[j].is_attach = false;
275c736e 1243 gomp_increment_refcount (k, refcount_set);
972da557
TB
1244 gomp_map_pointer (k->tgt, aq,
1245 (uintptr_t) *(void **) hostaddrs[j],
1246 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1247 - k->host_start),
1248 sizes[j], cbufp);
1249 }
1250 }
1251 i = j - 1;
1252 }
1253 else if (tgt->list[i].key == NULL)
1df3f842 1254 {
d9a6bd32 1255 int kind = get_kind (short_mapkind, kinds, i);
1df3f842
JJ
1256 if (hostaddrs[i] == NULL)
1257 continue;
d9a6bd32
JJ
1258 switch (kind & typemask)
1259 {
1260 size_t align, len, first, last;
1261 splay_tree_key n;
1262 case GOMP_MAP_FIRSTPRIVATE:
1263 align = (size_t) 1 << (kind >> rshift);
1264 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1265 tgt->list[i].offset = tgt_size;
1266 len = sizes[i];
1f4c5b9b 1267 gomp_copy_host2dev (devicep, aq,
6ce13072 1268 (void *) (tgt->tgt_start + tgt_size),
9c41f5b9 1269 (void *) hostaddrs[i], len, false, cbufp);
d9a6bd32
JJ
1270 tgt_size += len;
1271 continue;
1272 case GOMP_MAP_FIRSTPRIVATE_INT:
d9a6bd32
JJ
1273 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1274 continue;
d5c23c6c 1275 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
b3b75e66
TS
1276 /* The OpenACC 'host_data' construct only allows 'use_device'
1277 "mapping" clauses, so in the first loop, 'not_found_cnt'
1278 must always have been zero, so all OpenACC 'use_device'
1279 clauses have already been handled. (We can only easily test
1280 'use_device' with 'if_present' clause here.) */
1281 assert (tgt->list[i].offset == OFFSET_INLINED);
1282 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1283 code conceptually simple, similar to the first loop. */
1284 case GOMP_MAP_USE_DEVICE_PTR:
8860d270
JJ
1285 if (tgt->list[i].offset == 0)
1286 {
1287 cur_node.host_start = (uintptr_t) hostaddrs[i];
1288 cur_node.host_end = cur_node.host_start;
1289 n = gomp_map_lookup (mem_map, &cur_node);
b3b75e66
TS
1290 if (n != NULL)
1291 {
1292 cur_node.host_start -= n->host_start;
1293 hostaddrs[i]
1294 = (void *) (n->tgt->tgt_start + n->tgt_offset
1295 + cur_node.host_start);
1296 }
1297 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
8860d270
JJ
1298 {
1299 gomp_mutex_unlock (&devicep->lock);
1300 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1301 }
b3b75e66
TS
1302 else if ((kind & typemask)
1303 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1304 /* If not present, continue using the host address. */
1305 ;
1306 else
1307 __builtin_unreachable ();
1308 tgt->list[i].offset = OFFSET_INLINED;
8860d270
JJ
1309 }
1310 continue;
d9a6bd32
JJ
1311 case GOMP_MAP_STRUCT:
1312 first = i + 1;
1313 last = i + sizes[i];
1314 cur_node.host_start = (uintptr_t) hostaddrs[i];
1315 cur_node.host_end = (uintptr_t) hostaddrs[last]
1316 + sizes[last];
1317 if (tgt->list[first].key != NULL)
1318 continue;
1319 n = splay_tree_lookup (mem_map, &cur_node);
1320 if (n == NULL)
1321 {
1322 size_t align = (size_t) 1 << (kind >> rshift);
1323 tgt_size -= (uintptr_t) hostaddrs[first]
1324 - (uintptr_t) hostaddrs[i];
1325 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1326 tgt_size += (uintptr_t) hostaddrs[first]
1327 - (uintptr_t) hostaddrs[i];
1328 field_tgt_base = (uintptr_t) hostaddrs[first];
1329 field_tgt_offset = tgt_size;
1330 field_tgt_clear = last;
275c736e 1331 field_tgt_structelem_first = NULL;
d9a6bd32
JJ
1332 tgt_size += cur_node.host_end
1333 - (uintptr_t) hostaddrs[first];
1334 continue;
1335 }
1336 for (i = first; i <= last; i++)
1f4c5b9b 1337 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
275c736e 1338 sizes, kinds, cbufp, refcount_set);
d9a6bd32
JJ
1339 i--;
1340 continue;
e01d41e5
JJ
1341 case GOMP_MAP_ALWAYS_POINTER:
1342 cur_node.host_start = (uintptr_t) hostaddrs[i];
1343 cur_node.host_end = cur_node.host_start + sizeof (void *);
1344 n = splay_tree_lookup (mem_map, &cur_node);
1345 if (n == NULL
1346 || n->host_start > cur_node.host_start
1347 || n->host_end < cur_node.host_end)
1348 {
1349 gomp_mutex_unlock (&devicep->lock);
1350 gomp_fatal ("always pointer not mapped");
1351 }
1352 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1353 != GOMP_MAP_ALWAYS_POINTER)
1354 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1355 if (cur_node.tgt_offset)
1356 cur_node.tgt_offset -= sizes[i];
1f4c5b9b 1357 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1358 (void *) (n->tgt->tgt_start
1359 + n->tgt_offset
1360 + cur_node.host_start
1361 - n->host_start),
1362 (void *) &cur_node.tgt_offset,
9c41f5b9 1363 sizeof (void *), true, cbufp);
e01d41e5
JJ
1364 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1365 + cur_node.host_start - n->host_start;
1366 continue;
a6163563
JB
1367 case GOMP_MAP_IF_PRESENT:
1368 /* Not present - otherwise handled above. Skip over its
1369 MAP_POINTER as well. */
1370 if (i + 1 < mapnum
1371 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1372 == GOMP_MAP_POINTER))
1373 ++i;
1374 continue;
8e7e71ff
JB
1375 case GOMP_MAP_ATTACH:
1376 {
1377 cur_node.host_start = (uintptr_t) hostaddrs[i];
1378 cur_node.host_end = cur_node.host_start + sizeof (void *);
1379 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1380 if (n != NULL)
1381 {
1382 tgt->list[i].key = n;
1383 tgt->list[i].offset = cur_node.host_start - n->host_start;
1384 tgt->list[i].length = n->host_end - n->host_start;
1385 tgt->list[i].copy_from = false;
1386 tgt->list[i].always_copy_from = false;
bc4ed079
JB
1387 tgt->list[i].is_attach = true;
1388 /* OpenACC 'attach'/'detach' doesn't affect
1389 structured/dynamic reference counts ('n->refcount',
1390 'n->dynamic_refcount'). */
9e628024
CLT
1391
1392 gomp_attach_pointer (devicep, aq, mem_map, n,
1393 (uintptr_t) hostaddrs[i], sizes[i],
1394 cbufp);
8e7e71ff 1395 }
9e628024 1396 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
8e7e71ff
JB
1397 {
1398 gomp_mutex_unlock (&devicep->lock);
1399 gomp_fatal ("outer struct not mapped for attach");
1400 }
8e7e71ff
JB
1401 continue;
1402 }
d9a6bd32
JJ
1403 default:
1404 break;
1405 }
1df3f842
JJ
1406 splay_tree_key k = &array->key;
1407 k->host_start = (uintptr_t) hostaddrs[i];
41dbbb37 1408 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
1409 k->host_end = k->host_start + sizes[i];
1410 else
1411 k->host_end = k->host_start + sizeof (void *);
a51df54e 1412 splay_tree_key n = splay_tree_lookup (mem_map, k);
4a38b02b 1413 if (n && n->refcount != REFCOUNT_LINK)
1f4c5b9b 1414 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
275c736e
CLT
1415 kind & typemask, false, cbufp,
1416 refcount_set);
1df3f842
JJ
1417 else
1418 {
2a656a93 1419 k->aux = NULL;
4a38b02b
IV
1420 if (n && n->refcount == REFCOUNT_LINK)
1421 {
1422 /* Replace target address of the pointer with target address
1423 of mapped object in the splay tree. */
1424 splay_tree_remove (mem_map, n);
2a656a93
JB
1425 k->aux
1426 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1427 k->aux->link_key = n;
4a38b02b 1428 }
41dbbb37 1429 size_t align = (size_t) 1 << (kind >> rshift);
d9a6bd32 1430 tgt->list[i].key = k;
1df3f842 1431 k->tgt = tgt;
275c736e
CLT
1432 k->refcount = 0;
1433 k->dynamic_refcount = 0;
6c7e076b 1434 if (field_tgt_clear != FIELD_TGT_EMPTY)
d9a6bd32
JJ
1435 {
1436 k->tgt_offset = k->host_start - field_tgt_base
1437 + field_tgt_offset;
275c736e
CLT
1438 if (openmp_p)
1439 {
1440 k->refcount = REFCOUNT_STRUCTELEM;
1441 if (field_tgt_structelem_first == NULL)
1442 {
1443 /* Set to first structure element of sequence. */
1444 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1445 field_tgt_structelem_first = k;
1446 }
1447 else
1448 /* Point to refcount of leading element, but do not
1449 increment again. */
1450 k->structelem_refcount_ptr
1451 = &field_tgt_structelem_first->structelem_refcount;
1452
1453 if (i == field_tgt_clear)
1454 {
1455 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1456 field_tgt_structelem_first = NULL;
1457 }
1458 }
d9a6bd32 1459 if (i == field_tgt_clear)
6c7e076b 1460 field_tgt_clear = FIELD_TGT_EMPTY;
d9a6bd32
JJ
1461 }
1462 else
1463 {
1464 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1465 k->tgt_offset = tgt_size;
1466 tgt_size += k->host_end - k->host_start;
1467 }
275c736e
CLT
1468 /* First increment, from 0 to 1. gomp_increment_refcount
1469 encapsulates the different increment cases, so use this
1470 instead of directly setting 1 during initialization. */
1471 gomp_increment_refcount (k, refcount_set);
1472
d9a6bd32
JJ
1473 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1474 tgt->list[i].always_copy_from
1475 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
bc4ed079 1476 tgt->list[i].is_attach = false;
d9a6bd32
JJ
1477 tgt->list[i].offset = 0;
1478 tgt->list[i].length = k->host_end - k->host_start;
1df3f842
JJ
1479 tgt->refcount++;
1480 array->left = NULL;
1481 array->right = NULL;
a51df54e 1482 splay_tree_insert (mem_map, array);
41dbbb37 1483 switch (kind & typemask)
1df3f842 1484 {
41dbbb37
TS
1485 case GOMP_MAP_ALLOC:
1486 case GOMP_MAP_FROM:
1487 case GOMP_MAP_FORCE_ALLOC:
1488 case GOMP_MAP_FORCE_FROM:
d9a6bd32 1489 case GOMP_MAP_ALWAYS_FROM:
1df3f842 1490 break;
41dbbb37
TS
1491 case GOMP_MAP_TO:
1492 case GOMP_MAP_TOFROM:
1493 case GOMP_MAP_FORCE_TO:
1494 case GOMP_MAP_FORCE_TOFROM:
d9a6bd32
JJ
1495 case GOMP_MAP_ALWAYS_TO:
1496 case GOMP_MAP_ALWAYS_TOFROM:
1f4c5b9b 1497 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1498 (void *) (tgt->tgt_start
1499 + k->tgt_offset),
1500 (void *) k->host_start,
9c41f5b9
JB
1501 k->host_end - k->host_start,
1502 false, cbufp);
1df3f842 1503 break;
41dbbb37 1504 case GOMP_MAP_POINTER:
1f4c5b9b
CLT
1505 gomp_map_pointer (tgt, aq,
1506 (uintptr_t) *(void **) k->host_start,
7324369a 1507 k->tgt_offset, sizes[i], cbufp);
1df3f842 1508 break;
41dbbb37 1509 case GOMP_MAP_TO_PSET:
1f4c5b9b 1510 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1511 (void *) (tgt->tgt_start
1512 + k->tgt_offset),
1513 (void *) k->host_start,
9c41f5b9
JB
1514 k->host_end - k->host_start,
1515 false, cbufp);
972da557 1516 tgt->list[i].has_null_ptr_assoc = false;
41dbbb37 1517
1df3f842 1518 for (j = i + 1; j < mapnum; j++)
972da557
TB
1519 {
1520 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1521 & typemask);
1522 if (!GOMP_MAP_POINTER_P (ptr_kind)
1523 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1524 break;
1525 else if ((uintptr_t) hostaddrs[j] < k->host_start
1526 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1527 > k->host_end))
1528 break;
1529 else
1530 {
1531 tgt->list[j].key = k;
1532 tgt->list[j].copy_from = false;
1533 tgt->list[j].always_copy_from = false;
1534 tgt->list[j].is_attach = false;
1535 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
275c736e
CLT
1536 /* For OpenMP, the use of refcount_sets causes
1537 errors if we set k->refcount = 1 above but also
1538 increment it again here, for decrementing will
1539 not properly match, since we decrement only once
1540 for each key's refcount. Therefore avoid this
1541 increment for OpenMP constructs. */
1542 if (!openmp_p)
1543 gomp_increment_refcount (k, refcount_set);
972da557
TB
1544 gomp_map_pointer (tgt, aq,
1545 (uintptr_t) *(void **) hostaddrs[j],
1546 k->tgt_offset
1547 + ((uintptr_t) hostaddrs[j]
1548 - k->host_start),
1549 sizes[j], cbufp);
1550 }
1df3f842 1551 }
972da557 1552 i = j - 1;
41dbbb37
TS
1553 break;
1554 case GOMP_MAP_FORCE_PRESENT:
1555 {
1556 /* We already looked up the memory region above and it
1557 was missing. */
1558 size_t size = k->host_end - k->host_start;
a51df54e 1559 gomp_mutex_unlock (&devicep->lock);
01c0b3b0
KT
1560#ifdef HAVE_INTTYPES_H
1561 gomp_fatal ("present clause: !acc_is_present (%p, "
1562 "%"PRIu64" (0x%"PRIx64"))",
1563 (void *) k->host_start,
1564 (uint64_t) size, (uint64_t) size);
1565#else
41dbbb37 1566 gomp_fatal ("present clause: !acc_is_present (%p, "
01c0b3b0
KT
1567 "%lu (0x%lx))", (void *) k->host_start,
1568 (unsigned long) size, (unsigned long) size);
1569#endif
41dbbb37
TS
1570 }
1571 break;
1572 case GOMP_MAP_FORCE_DEVICEPTR:
1573 assert (k->host_end - k->host_start == sizeof (void *));
1f4c5b9b 1574 gomp_copy_host2dev (devicep, aq,
6ce13072
CLT
1575 (void *) (tgt->tgt_start
1576 + k->tgt_offset),
1577 (void *) k->host_start,
9c41f5b9 1578 sizeof (void *), false, cbufp);
41dbbb37
TS
1579 break;
1580 default:
a51df54e 1581 gomp_mutex_unlock (&devicep->lock);
41dbbb37
TS
1582 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1583 kind);
1df3f842 1584 }
4a38b02b 1585
2a656a93 1586 if (k->aux && k->aux->link_key)
4a38b02b
IV
1587 {
1588 /* Set link pointer on target to the device address of the
1589 mapped object. */
1590 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
84ca08d2
TS
1591 /* We intentionally do not use coalescing here, as it's not
1592 data allocated by the current call to this function. */
1f4c5b9b 1593 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
9c41f5b9 1594 &tgt_addr, sizeof (void *), true, NULL);
4a38b02b 1595 }
1df3f842
JJ
1596 array++;
1597 }
1598 }
1599 }
41dbbb37 1600
d9a6bd32 1601 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1df3f842
JJ
1602 {
1603 for (i = 0; i < mapnum; i++)
1604 {
e01d41e5 1605 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1f4c5b9b 1606 gomp_copy_host2dev (devicep, aq,
6ce13072 1607 (void *) (tgt->tgt_start + i * sizeof (void *)),
7324369a 1608 (void *) &cur_node.tgt_offset, sizeof (void *),
9c41f5b9 1609 true, cbufp);
1df3f842
JJ
1610 }
1611 }
1612
7324369a
JJ
1613 if (cbufp)
1614 {
d88a6951
TS
1615 /* See 'gomp_coalesce_buf_add'. */
1616 assert (!aq);
1617
7324369a
JJ
1618 long c = 0;
1619 for (c = 0; c < cbuf.chunk_cnt; ++c)
1f4c5b9b 1620 gomp_copy_host2dev (devicep, aq,
a44c1790
TS
1621 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1622 (char *) cbuf.buf + (cbuf.chunks[c].start
1623 - cbuf.chunks[0].start),
9c41f5b9
JB
1624 cbuf.chunks[c].end - cbuf.chunks[c].start,
1625 true, NULL);
7324369a 1626 free (cbuf.buf);
a44c1790
TS
1627 cbuf.buf = NULL;
1628 cbufp = NULL;
7324369a
JJ
1629 }
1630
d9a6bd32
JJ
1631 /* If the variable from "omp target enter data" map-list was already mapped,
1632 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1633 gomp_exit_data. */
9e628024 1634 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
6f5b4b64 1635 {
d9a6bd32
JJ
1636 free (tgt);
1637 tgt = NULL;
1638 }
1639
a51df54e 1640 gomp_mutex_unlock (&devicep->lock);
1df3f842
JJ
1641 return tgt;
1642}
1643
275c736e 1644static struct target_mem_desc *
1f4c5b9b
CLT
1645gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1646 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
275c736e
CLT
1647 bool short_mapkind, htab_t *refcount_set,
1648 enum gomp_map_vars_kind pragma_kind)
1f4c5b9b 1649{
275c736e
CLT
1650 /* This management of a local refcount_set is for convenience of callers
1651 who do not share a refcount_set over multiple map/unmap uses. */
1652 htab_t local_refcount_set = NULL;
1653 if (refcount_set == NULL)
1654 {
1655 local_refcount_set = htab_create (mapnum);
1656 refcount_set = &local_refcount_set;
1657 }
1658
1659 struct target_mem_desc *tgt;
1660 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1661 sizes, kinds, short_mapkind, refcount_set,
1662 pragma_kind);
1663 if (local_refcount_set)
1664 htab_free (local_refcount_set);
1665
1666 return tgt;
1f4c5b9b
CLT
1667}
1668
1669attribute_hidden struct target_mem_desc *
275c736e
CLT
1670goacc_map_vars (struct gomp_device_descr *devicep,
1671 struct goacc_asyncqueue *aq, size_t mapnum,
1672 void **hostaddrs, void **devaddrs, size_t *sizes,
1673 void *kinds, bool short_mapkind,
1674 enum gomp_map_vars_kind pragma_kind)
1f4c5b9b
CLT
1675{
1676 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
275c736e
CLT
1677 sizes, kinds, short_mapkind, NULL,
1678 GOMP_MAP_VARS_OPENACC | pragma_kind);
1f4c5b9b
CLT
1679}
1680
6278b549 1681static void
1df3f842
JJ
1682gomp_unmap_tgt (struct target_mem_desc *tgt)
1683{
1684 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1685 if (tgt->tgt_end)
6ce13072 1686 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1df3f842
JJ
1687
1688 free (tgt->array);
1689 free (tgt);
1690}
1691
1cbd94e8
JB
1692static bool
1693gomp_unref_tgt (void *ptr)
829c6349
CLT
1694{
1695 bool is_tgt_unmapped = false;
1cbd94e8
JB
1696
1697 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1698
1699 if (tgt->refcount > 1)
1700 tgt->refcount--;
829c6349
CLT
1701 else
1702 {
1cbd94e8 1703 gomp_unmap_tgt (tgt);
829c6349 1704 is_tgt_unmapped = true;
829c6349 1705 }
1cbd94e8 1706
829c6349
CLT
1707 return is_tgt_unmapped;
1708}
1709
1f4c5b9b 1710static void
1cbd94e8 1711gomp_unref_tgt_void (void *ptr)
1f4c5b9b 1712{
1cbd94e8
JB
1713 (void) gomp_unref_tgt (ptr);
1714}
1f4c5b9b 1715
275c736e
CLT
1716static void
1717gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1cbd94e8 1718{
275c736e 1719 splay_tree_remove (sp, k);
2a656a93
JB
1720 if (k->aux)
1721 {
1722 if (k->aux->link_key)
275c736e 1723 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
5d5be7bf
JB
1724 if (k->aux->attach_count)
1725 free (k->aux->attach_count);
2a656a93
JB
1726 free (k->aux);
1727 k->aux = NULL;
1728 }
275c736e
CLT
1729}
1730
1731static inline __attribute__((always_inline)) bool
1732gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1733 struct goacc_asyncqueue *aq)
1734{
1735 bool is_tgt_unmapped = false;
1736
1737 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1738 {
1739 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1740 /* Infer the splay_tree_key of the first structelem key using the
1741 pointer to the first structleme_refcount. */
1742 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1743 - offsetof (struct splay_tree_key_s,
1744 structelem_refcount));
1745 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1746
1747 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1748 with the splay_tree_keys embedded inside. */
1749 splay_tree_node node =
1750 (splay_tree_node) ((char *) k
1751 - offsetof (struct splay_tree_node_s, key));
1752 while (true)
1753 {
1754 /* Starting from the _FIRST key, and continue for all following
1755 sibling keys. */
1756 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1757 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1758 break;
1759 else
1760 k = &(++node)->key;
1761 }
1762 }
1763 else
1764 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1765
1cbd94e8
JB
1766 if (aq)
1767 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1768 (void *) k->tgt);
1f4c5b9b 1769 else
1cbd94e8
JB
1770 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1771 return is_tgt_unmapped;
1772}
1773
1774attribute_hidden bool
1775gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1776{
1777 return gomp_remove_var_internal (devicep, k, NULL);
1778}
1779
1780/* Remove a variable asynchronously. This actually removes the variable
1781 mapping immediately, but retains the linked target_mem_desc until the
1782 asynchronous operation has completed (as it may still refer to target
1783 memory). The device lock must be held before entry, and remains locked on
1784 exit. */
1785
1786attribute_hidden void
1787gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1788 struct goacc_asyncqueue *aq)
1789{
1790 (void) gomp_remove_var_internal (devicep, k, aq);
1f4c5b9b
CLT
1791}
1792
41dbbb37
TS
1793/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1794 variables back from device to host: if it is false, it is assumed that this
b4557008 1795 has been done already. */
41dbbb37 1796
1f4c5b9b
CLT
1797static inline __attribute__((always_inline)) void
1798gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
275c736e 1799 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1df3f842
JJ
1800{
1801 struct gomp_device_descr *devicep = tgt->device_descr;
1802
1803 if (tgt->list_count == 0)
1804 {
1805 free (tgt);
1806 return;
1807 }
1808
a51df54e 1809 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1810 if (devicep->state == GOMP_DEVICE_FINALIZED)
1811 {
1812 gomp_mutex_unlock (&devicep->lock);
1813 free (tgt->array);
1814 free (tgt);
1815 return;
1816 }
41dbbb37 1817
1df3f842 1818 size_t i;
8e7e71ff
JB
1819
1820 /* We must perform detachments before any copies back to the host. */
1821 for (i = 0; i < tgt->list_count; i++)
1822 {
1823 splay_tree_key k = tgt->list[i].key;
1824
bc4ed079 1825 if (k != NULL && tgt->list[i].is_attach)
8e7e71ff
JB
1826 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1827 + tgt->list[i].offset,
25bce75c 1828 false, NULL);
8e7e71ff
JB
1829 }
1830
1df3f842 1831 for (i = 0; i < tgt->list_count; i++)
d9a6bd32
JJ
1832 {
1833 splay_tree_key k = tgt->list[i].key;
1834 if (k == NULL)
1835 continue;
1836
bc4ed079
JB
1837 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1838 counts ('n->refcount', 'n->dynamic_refcount'). */
1839 if (tgt->list[i].is_attach)
1840 continue;
1841
275c736e
CLT
1842 bool do_copy, do_remove;
1843 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
d9a6bd32 1844
275c736e 1845 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
d9a6bd32 1846 || tgt->list[i].always_copy_from)
1f4c5b9b 1847 gomp_copy_dev2host (devicep, aq,
6ce13072
CLT
1848 (void *) (k->host_start + tgt->list[i].offset),
1849 (void *) (k->tgt->tgt_start + k->tgt_offset
1850 + tgt->list[i].offset),
1851 tgt->list[i].length);
275c736e 1852 if (do_remove)
83d1d065
TS
1853 {
1854 struct target_mem_desc *k_tgt = k->tgt;
1855 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1856 /* It would be bad if TGT got unmapped while we're still iterating
1857 over its LIST_COUNT, and also expect to use it in the following
1858 code. */
1859 assert (!is_tgt_unmapped
1860 || k_tgt != tgt);
1861 }
d9a6bd32 1862 }
1df3f842 1863
1f4c5b9b 1864 if (aq)
1cbd94e8 1865 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1f4c5b9b 1866 (void *) tgt);
1df3f842 1867 else
1f4c5b9b 1868 gomp_unref_tgt ((void *) tgt);
41dbbb37 1869
a51df54e 1870 gomp_mutex_unlock (&devicep->lock);
1df3f842
JJ
1871}
1872
275c736e
CLT
1873static void
1874gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1875 htab_t *refcount_set)
1f4c5b9b 1876{
275c736e
CLT
1877 /* This management of a local refcount_set is for convenience of callers
1878 who do not share a refcount_set over multiple map/unmap uses. */
1879 htab_t local_refcount_set = NULL;
1880 if (refcount_set == NULL)
1881 {
1882 local_refcount_set = htab_create (tgt->list_count);
1883 refcount_set = &local_refcount_set;
1884 }
1885
1886 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1887
1888 if (local_refcount_set)
1889 htab_free (local_refcount_set);
1f4c5b9b
CLT
1890}
1891
1892attribute_hidden void
275c736e
CLT
1893goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1894 struct goacc_asyncqueue *aq)
1f4c5b9b 1895{
275c736e 1896 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1f4c5b9b
CLT
1897}
1898
1df3f842 1899static void
a51df54e 1900gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
d9a6bd32 1901 size_t *sizes, void *kinds, bool short_mapkind)
1df3f842
JJ
1902{
1903 size_t i;
1904 struct splay_tree_key_s cur_node;
d9a6bd32 1905 const int typemask = short_mapkind ? 0xff : 0x7;
1df3f842
JJ
1906
1907 if (!devicep)
1908 return;
1909
1910 if (mapnum == 0)
1911 return;
1912
a51df54e 1913 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
1914 if (devicep->state == GOMP_DEVICE_FINALIZED)
1915 {
1916 gomp_mutex_unlock (&devicep->lock);
1917 return;
1918 }
1919
1df3f842
JJ
1920 for (i = 0; i < mapnum; i++)
1921 if (sizes[i])
1922 {
1923 cur_node.host_start = (uintptr_t) hostaddrs[i];
1924 cur_node.host_end = cur_node.host_start + sizes[i];
a51df54e 1925 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1df3f842
JJ
1926 if (n)
1927 {
d9a6bd32 1928 int kind = get_kind (short_mapkind, kinds, i);
1df3f842
JJ
1929 if (n->host_start > cur_node.host_start
1930 || n->host_end < cur_node.host_end)
a51df54e
IV
1931 {
1932 gomp_mutex_unlock (&devicep->lock);
1933 gomp_fatal ("Trying to update [%p..%p) object when "
1934 "only [%p..%p) is mapped",
1935 (void *) cur_node.host_start,
1936 (void *) cur_node.host_end,
1937 (void *) n->host_start,
1938 (void *) n->host_end);
1939 }
6ce13072
CLT
1940
1941
1942 void *hostaddr = (void *) cur_node.host_start;
1943 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1944 + cur_node.host_start - n->host_start);
1945 size_t size = cur_node.host_end - cur_node.host_start;
1946
41dbbb37 1947 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1f4c5b9b 1948 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
9c41f5b9 1949 false, NULL);
41dbbb37 1950 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1f4c5b9b 1951 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1df3f842 1952 }
1df3f842 1953 }
a51df54e
IV
1954 gomp_mutex_unlock (&devicep->lock);
1955}
1956
1957/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1958 And insert to splay tree the mapping between addresses from HOST_TABLE and
22be2349
NS
1959 from loaded target image. We rely in the host and device compiler
1960 emitting variable and functions in the same order. */
a51df54e
IV
1961
1962static void
2a21ff19 1963gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
22be2349
NS
1964 const void *host_table, const void *target_data,
1965 bool is_register_lock)
a51df54e
IV
1966{
1967 void **host_func_table = ((void ***) host_table)[0];
1968 void **host_funcs_end = ((void ***) host_table)[1];
1969 void **host_var_table = ((void ***) host_table)[2];
1970 void **host_vars_end = ((void ***) host_table)[3];
1971
1972 /* The func table contains only addresses, the var table contains addresses
1973 and corresponding sizes. */
1974 int num_funcs = host_funcs_end - host_func_table;
1975 int num_vars = (host_vars_end - host_var_table) / 2;
1976
0bac793e
CLT
1977 /* Others currently is only 'device_num' */
1978 int num_others = 1;
1979
a51df54e
IV
1980 /* Load image to device and get target addresses for the image. */
1981 struct addr_pair *target_table = NULL;
2a21ff19
NS
1982 int i, num_target_entries;
1983
1984 num_target_entries
1985 = devicep->load_image_func (devicep->target_id, version,
1986 target_data, &target_table);
a51df54e 1987
0bac793e
CLT
1988 if (num_target_entries != num_funcs + num_vars
1989 /* Others (device_num) are included as trailing entries in pair list. */
1990 && num_target_entries != num_funcs + num_vars + num_others)
a51df54e
IV
1991 {
1992 gomp_mutex_unlock (&devicep->lock);
1993 if (is_register_lock)
1994 gomp_mutex_unlock (&register_lock);
2a21ff19
NS
1995 gomp_fatal ("Cannot map target functions or variables"
1996 " (expected %u, have %u)", num_funcs + num_vars,
1997 num_target_entries);
a51df54e
IV
1998 }
1999
2000 /* Insert host-target address mapping into splay tree. */
2001 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2002 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
d9a6bd32 2003 tgt->refcount = REFCOUNT_INFINITY;
a51df54e
IV
2004 tgt->tgt_start = 0;
2005 tgt->tgt_end = 0;
2006 tgt->to_free = NULL;
2007 tgt->prev = NULL;
2008 tgt->list_count = 0;
2009 tgt->device_descr = devicep;
2010 splay_tree_node array = tgt->array;
2011
2012 for (i = 0; i < num_funcs; i++)
2013 {
2014 splay_tree_key k = &array->key;
2015 k->host_start = (uintptr_t) host_func_table[i];
2016 k->host_end = k->host_start + 1;
2017 k->tgt = tgt;
2018 k->tgt_offset = target_table[i].start;
d9a6bd32 2019 k->refcount = REFCOUNT_INFINITY;
6f5b4b64 2020 k->dynamic_refcount = 0;
2a656a93 2021 k->aux = NULL;
a51df54e
IV
2022 array->left = NULL;
2023 array->right = NULL;
2024 splay_tree_insert (&devicep->mem_map, array);
2025 array++;
2026 }
2027
4a38b02b
IV
2028 /* Most significant bit of the size in host and target tables marks
2029 "omp declare target link" variables. */
2030 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2031 const uintptr_t size_mask = ~link_bit;
2032
a51df54e
IV
2033 for (i = 0; i < num_vars; i++)
2034 {
2035 struct addr_pair *target_var = &target_table[num_funcs + i];
4a38b02b 2036 uintptr_t target_size = target_var->end - target_var->start;
4897bb00 2037 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
4a38b02b 2038
4897bb00 2039 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
a51df54e
IV
2040 {
2041 gomp_mutex_unlock (&devicep->lock);
2042 if (is_register_lock)
2043 gomp_mutex_unlock (&register_lock);
4a38b02b 2044 gomp_fatal ("Cannot map target variables (size mismatch)");
a51df54e
IV
2045 }
2046
2047 splay_tree_key k = &array->key;
2048 k->host_start = (uintptr_t) host_var_table[i * 2];
4a38b02b
IV
2049 k->host_end
2050 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
a51df54e
IV
2051 k->tgt = tgt;
2052 k->tgt_offset = target_var->start;
4897bb00 2053 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
6f5b4b64 2054 k->dynamic_refcount = 0;
2a656a93 2055 k->aux = NULL;
a51df54e
IV
2056 array->left = NULL;
2057 array->right = NULL;
2058 splay_tree_insert (&devicep->mem_map, array);
2059 array++;
2060 }
2061
0bac793e
CLT
2062 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2063 where plugin does not return this entry. */
2064 if (num_funcs + num_vars < num_target_entries)
2065 {
2066 struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2067 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2068 was found in this image. */
2069 if (device_num_var->start != 0)
2070 {
2071 /* The index of the devicep within devices[] is regarded as its
2072 'device number', which is different from the per-device type
2073 devicep->target_id. */
2074 int device_num_val = (int) (devicep - &devices[0]);
2075 if (device_num_var->end - device_num_var->start != sizeof (int))
2076 {
2077 gomp_mutex_unlock (&devicep->lock);
2078 if (is_register_lock)
2079 gomp_mutex_unlock (&register_lock);
2080 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2081 "format");
2082 }
2083
2084 /* Copy device_num value to place on device memory, hereby actually
2085 designating its device number into effect. */
2086 gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2087 &device_num_val, sizeof (int), false, NULL);
2088 }
2089 }
2090
a51df54e 2091 free (target_table);
1df3f842
JJ
2092}
2093
22be2349
NS
2094/* Unload the mappings described by target_data from device DEVICE_P.
2095 The device must be locked. */
2096
2097static void
2098gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2a21ff19 2099 unsigned version,
22be2349
NS
2100 const void *host_table, const void *target_data)
2101{
2102 void **host_func_table = ((void ***) host_table)[0];
2103 void **host_funcs_end = ((void ***) host_table)[1];
2104 void **host_var_table = ((void ***) host_table)[2];
2105 void **host_vars_end = ((void ***) host_table)[3];
2106
2107 /* The func table contains only addresses, the var table contains addresses
2108 and corresponding sizes. */
2109 int num_funcs = host_funcs_end - host_func_table;
2110 int num_vars = (host_vars_end - host_var_table) / 2;
2111
22be2349
NS
2112 struct splay_tree_key_s k;
2113 splay_tree_key node = NULL;
2114
2115 /* Find mapping at start of node array */
2116 if (num_funcs || num_vars)
2117 {
2118 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2119 : (uintptr_t) host_var_table[0]);
2120 k.host_end = k.host_start + 1;
2121 node = splay_tree_lookup (&devicep->mem_map, &k);
2122 }
2a21ff19 2123
6ce13072
CLT
2124 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2125 {
2126 gomp_mutex_unlock (&devicep->lock);
2127 gomp_fatal ("image unload fail");
2128 }
22be2349
NS
2129
2130 /* Remove mappings from splay tree. */
4a38b02b
IV
2131 int i;
2132 for (i = 0; i < num_funcs; i++)
22be2349 2133 {
4a38b02b 2134 k.host_start = (uintptr_t) host_func_table[i];
22be2349
NS
2135 k.host_end = k.host_start + 1;
2136 splay_tree_remove (&devicep->mem_map, &k);
2137 }
2138
4a38b02b
IV
2139 /* Most significant bit of the size in host and target tables marks
2140 "omp declare target link" variables. */
2141 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2142 const uintptr_t size_mask = ~link_bit;
2143 bool is_tgt_unmapped = false;
2144
2145 for (i = 0; i < num_vars; i++)
22be2349 2146 {
4a38b02b
IV
2147 k.host_start = (uintptr_t) host_var_table[i * 2];
2148 k.host_end
2149 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2150
2151 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2152 splay_tree_remove (&devicep->mem_map, &k);
2153 else
2154 {
2155 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
829c6349 2156 is_tgt_unmapped = gomp_remove_var (devicep, n);
4a38b02b 2157 }
22be2349
NS
2158 }
2159
4a38b02b 2160 if (node && !is_tgt_unmapped)
22be2349
NS
2161 {
2162 free (node->tgt);
2163 free (node);
2164 }
2165}
2166
a51df54e 2167/* This function should be called from every offload image while loading.
1df3f842
JJ
2168 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2169 the target, and TARGET_DATA needed by target plugin. */
2170
2171void
2a21ff19
NS
2172GOMP_offload_register_ver (unsigned version, const void *host_table,
2173 int target_type, const void *target_data)
1df3f842 2174{
a51df54e 2175 int i;
2a21ff19
NS
2176
2177 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2178 gomp_fatal ("Library too old for offload (version %u < %u)",
2179 GOMP_VERSION, GOMP_VERSION_LIB (version));
2180
a51df54e
IV
2181 gomp_mutex_lock (&register_lock);
2182
2183 /* Load image to all initialized devices. */
2184 for (i = 0; i < num_devices; i++)
2185 {
2186 struct gomp_device_descr *devicep = &devices[i];
2187 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2188 if (devicep->type == target_type
2189 && devicep->state == GOMP_DEVICE_INITIALIZED)
2a21ff19
NS
2190 gomp_load_image_to_device (devicep, version,
2191 host_table, target_data, true);
a51df54e
IV
2192 gomp_mutex_unlock (&devicep->lock);
2193 }
1df3f842 2194
a51df54e
IV
2195 /* Insert image to array of pending images. */
2196 offload_images
2197 = gomp_realloc_unlock (offload_images,
2198 (num_offload_images + 1)
2199 * sizeof (struct offload_image_descr));
2a21ff19 2200 offload_images[num_offload_images].version = version;
1df3f842
JJ
2201 offload_images[num_offload_images].type = target_type;
2202 offload_images[num_offload_images].host_table = host_table;
2203 offload_images[num_offload_images].target_data = target_data;
2204
2205 num_offload_images++;
a51df54e 2206 gomp_mutex_unlock (&register_lock);
1df3f842
JJ
2207}
2208
2a21ff19
NS
2209void
2210GOMP_offload_register (const void *host_table, int target_type,
2211 const void *target_data)
2212{
2213 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2214}
2215
a51df54e
IV
2216/* This function should be called from every offload image while unloading.
2217 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2218 the target, and TARGET_DATA needed by target plugin. */
1df3f842 2219
a51df54e 2220void
2a21ff19
NS
2221GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2222 int target_type, const void *target_data)
1df3f842 2223{
a51df54e
IV
2224 int i;
2225
a51df54e
IV
2226 gomp_mutex_lock (&register_lock);
2227
2228 /* Unload image from all initialized devices. */
2229 for (i = 0; i < num_devices; i++)
2230 {
a51df54e
IV
2231 struct gomp_device_descr *devicep = &devices[i];
2232 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2233 if (devicep->type == target_type
2234 && devicep->state == GOMP_DEVICE_INITIALIZED)
2a21ff19
NS
2235 gomp_unload_image_from_device (devicep, version,
2236 host_table, target_data);
a51df54e
IV
2237 gomp_mutex_unlock (&devicep->lock);
2238 }
2239
2240 /* Remove image from array of pending images. */
2241 for (i = 0; i < num_offload_images; i++)
2242 if (offload_images[i].target_data == target_data)
2243 {
2244 offload_images[i] = offload_images[--num_offload_images];
2245 break;
2246 }
2247
2248 gomp_mutex_unlock (&register_lock);
41dbbb37 2249}
1df3f842 2250
2a21ff19
NS
2251void
2252GOMP_offload_unregister (const void *host_table, int target_type,
2253 const void *target_data)
2254{
2255 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2256}
2257
a51df54e
IV
2258/* This function initializes the target device, specified by DEVICEP. DEVICEP
2259 must be locked on entry, and remains locked on return. */
41dbbb37
TS
2260
2261attribute_hidden void
a51df54e 2262gomp_init_device (struct gomp_device_descr *devicep)
41dbbb37 2263{
1df3f842 2264 int i;
6ce13072
CLT
2265 if (!devicep->init_device_func (devicep->target_id))
2266 {
2267 gomp_mutex_unlock (&devicep->lock);
2268 gomp_fatal ("device initialization failed");
2269 }
a51df54e
IV
2270
2271 /* Load to device all images registered by the moment. */
2272 for (i = 0; i < num_offload_images; i++)
1df3f842 2273 {
a51df54e
IV
2274 struct offload_image_descr *image = &offload_images[i];
2275 if (image->type == devicep->type)
2a21ff19
NS
2276 gomp_load_image_to_device (devicep, image->version,
2277 image->host_table, image->target_data,
2278 false);
1df3f842
JJ
2279 }
2280
1f4c5b9b
CLT
2281 /* Initialize OpenACC asynchronous queues. */
2282 goacc_init_asyncqueues (devicep);
2283
d84ffc0a 2284 devicep->state = GOMP_DEVICE_INITIALIZED;
41dbbb37
TS
2285}
2286
1f4c5b9b
CLT
2287/* This function finalizes the target device, specified by DEVICEP. DEVICEP
2288 must be locked on entry, and remains locked on return. */
2289
2290attribute_hidden bool
2291gomp_fini_device (struct gomp_device_descr *devicep)
2292{
2293 bool ret = goacc_fini_asyncqueues (devicep);
2294 ret &= devicep->fini_device_func (devicep->target_id);
2295 devicep->state = GOMP_DEVICE_FINALIZED;
2296 return ret;
2297}
2298
22be2349
NS
2299attribute_hidden void
2300gomp_unload_device (struct gomp_device_descr *devicep)
2301{
d84ffc0a 2302 if (devicep->state == GOMP_DEVICE_INITIALIZED)
22be2349
NS
2303 {
2304 unsigned i;
2305
2306 /* Unload from device all images registered at the moment. */
2307 for (i = 0; i < num_offload_images; i++)
2308 {
2309 struct offload_image_descr *image = &offload_images[i];
2310 if (image->type == devicep->type)
2a21ff19
NS
2311 gomp_unload_image_from_device (devicep, image->version,
2312 image->host_table,
22be2349
NS
2313 image->target_data);
2314 }
2315 }
2316}
2317
e01d41e5 2318/* Host fallback for GOMP_target{,_ext} routines. */
d9a6bd32
JJ
2319
2320static void
1bfc07d1
KCY
2321gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2322 struct gomp_device_descr *devicep)
d9a6bd32
JJ
2323{
2324 struct gomp_thread old_thr, *thr = gomp_thread ();
1bfc07d1
KCY
2325
2326 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2327 && devicep != NULL)
2328 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2329 "be used for offloading");
2330
d9a6bd32
JJ
2331 old_thr = *thr;
2332 memset (thr, '\0', sizeof (*thr));
2333 if (gomp_places_list)
2334 {
2335 thr->place = old_thr.place;
2336 thr->ts.place_partition_len = gomp_places_list_len;
2337 }
2338 fn (hostaddrs);
2339 gomp_free_thread (thr);
2340 *thr = old_thr;
2341}
2342
b2b40051
MJ
2343/* Calculate alignment and size requirements of a private copy of data shared
2344 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
e01d41e5 2345
b2b40051
MJ
2346static inline void
2347calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2348 unsigned short *kinds, size_t *tgt_align,
2349 size_t *tgt_size)
e01d41e5 2350{
b2b40051
MJ
2351 size_t i;
2352 for (i = 0; i < mapnum; i++)
2353 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2354 {
2355 size_t align = (size_t) 1 << (kinds[i] >> 8);
2356 if (*tgt_align < align)
2357 *tgt_align = align;
2358 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2359 *tgt_size += sizes[i];
2360 }
2361}
2362
2363/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2364
2365static inline void
2366copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2367 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2368 size_t tgt_size)
2369{
2370 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2371 if (al)
2372 tgt += tgt_align - al;
2373 tgt_size = 0;
2374 size_t i;
e01d41e5
JJ
2375 for (i = 0; i < mapnum; i++)
2376 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2377 {
2378 size_t align = (size_t) 1 << (kinds[i] >> 8);
e01d41e5 2379 tgt_size = (tgt_size + align - 1) & ~(align - 1);
b2b40051
MJ
2380 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2381 hostaddrs[i] = tgt + tgt_size;
2382 tgt_size = tgt_size + sizes[i];
e01d41e5 2383 }
b2b40051
MJ
2384}
2385
e01d41e5 2386/* Helper function of GOMP_target{,_ext} routines. */
d9a6bd32
JJ
2387
2388static void *
2389gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2390 void (*host_fn) (void *))
2391{
2392 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2393 return (void *) host_fn;
2394 else
2395 {
2396 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2397 if (devicep->state == GOMP_DEVICE_FINALIZED)
2398 {
2399 gomp_mutex_unlock (&devicep->lock);
2400 return NULL;
2401 }
2402
d9a6bd32
JJ
2403 struct splay_tree_key_s k;
2404 k.host_start = (uintptr_t) host_fn;
2405 k.host_end = k.host_start + 1;
2406 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2407 gomp_mutex_unlock (&devicep->lock);
2408 if (tgt_fn == NULL)
eb4048f2 2409 return NULL;
d9a6bd32
JJ
2410
2411 return (void *) tgt_fn->tgt_offset;
2412 }
2413}
2414
acf0174b 2415/* Called when encountering a target directive. If DEVICE
41dbbb37
TS
2416 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2417 GOMP_DEVICE_HOST_FALLBACK (or any value
2418 larger than last available hw device), use host fallback.
128b26dc
TS
2419 FN is address of host code, UNUSED is part of the current ABI, but
2420 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
acf0174b
JJ
2421 with MAPNUM entries, with addresses of the host objects,
2422 sizes of the host objects (resp. for pointer kind pointer bias
2423 and assumed sizeof (void *) size) and kinds. */
2424
2425void
128b26dc 2426GOMP_target (int device, void (*fn) (void *), const void *unused,
acf0174b
JJ
2427 size_t mapnum, void **hostaddrs, size_t *sizes,
2428 unsigned char *kinds)
2429{
1df3f842 2430 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37 2431
d84ffc0a 2432 void *fn_addr;
41dbbb37 2433 if (devicep == NULL
d84ffc0a 2434 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
2435 /* All shared memory devices should use the GOMP_target_ext function. */
2436 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
d84ffc0a 2437 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1bfc07d1 2438 return gomp_target_fallback (fn, hostaddrs, devicep);
d9a6bd32 2439
275c736e 2440 htab_t refcount_set = htab_create (mapnum);
d9a6bd32
JJ
2441 struct target_mem_desc *tgt_vars
2442 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
275c736e 2443 &refcount_set, GOMP_MAP_VARS_TARGET);
b2b40051
MJ
2444 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2445 NULL);
275c736e
CLT
2446 htab_clear (refcount_set);
2447 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2448 htab_free (refcount_set);
d9a6bd32 2449}
1df3f842 2450
001ab12e
FH
2451static inline unsigned int
2452clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2453{
2454 /* If we cannot run asynchronously, simply ignore nowait. */
2455 if (devicep != NULL && devicep->async_run_func == NULL)
2456 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2457
2458 return flags;
2459}
2460
e01d41e5
JJ
2461/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2462 and several arguments have been added:
2463 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2464 DEPEND is array of dependencies, see GOMP_task for details.
b2b40051
MJ
2465
2466 ARGS is a pointer to an array consisting of a variable number of both
2467 device-independent and device-specific arguments, which can take one two
2468 elements where the first specifies for which device it is intended, the type
2469 and optionally also the value. If the value is not present in the first
2470 one, the whole second element the actual value. The last element of the
2471 array is a single NULL. Among the device independent can be for example
2472 NUM_TEAMS and THREAD_LIMIT.
2473
e01d41e5
JJ
2474 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2475 that value, or 1 if teams construct is not present, or 0, if
2476 teams construct does not have num_teams clause and so the choice is
2477 implementation defined, and -1 if it can't be determined on the host
2478 what value will GOMP_teams have on the device.
2479 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2480 body with that value, or 0, if teams construct does not have thread_limit
2481 clause or the teams construct is not present, or -1 if it can't be
2482 determined on the host what value will GOMP_teams have on the device. */
2483
d9a6bd32 2484void
e01d41e5
JJ
2485GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2486 void **hostaddrs, size_t *sizes, unsigned short *kinds,
b2b40051 2487 unsigned int flags, void **depend, void **args)
d9a6bd32
JJ
2488{
2489 struct gomp_device_descr *devicep = resolve_device (device);
21f3a236
JJ
2490 size_t tgt_align = 0, tgt_size = 0;
2491 bool fpc_done = false;
41dbbb37 2492
001ab12e
FH
2493 flags = clear_unsupported_flags (devicep, flags);
2494
e4606348
JJ
2495 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2496 {
2497 struct gomp_thread *thr = gomp_thread ();
2498 /* Create a team if we don't have any around, as nowait
2499 target tasks make sense to run asynchronously even when
2500 outside of any parallel. */
2501 if (__builtin_expect (thr->ts.team == NULL, 0))
2502 {
2503 struct gomp_team *team = gomp_new_team (1);
2504 struct gomp_task *task = thr->task;
2505 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2506 team->prev_ts = thr->ts;
2507 thr->ts.team = team;
2508 thr->ts.team_id = 0;
2509 thr->ts.work_share = &team->work_shares[0];
2510 thr->ts.last_work_share = NULL;
2511#ifdef HAVE_SYNC_BUILTINS
2512 thr->ts.single_count = 0;
2513#endif
2514 thr->ts.static_trip = 0;
2515 thr->task = &team->implicit_task[0];
2516 gomp_init_task (thr->task, NULL, icv);
2517 if (task)
2518 {
2519 thr->task = task;
2520 gomp_end_task ();
2521 free (task);
2522 thr->task = &team->implicit_task[0];
2523 }
2524 else
2525 pthread_setspecific (gomp_thread_destructor, thr);
2526 }
2527 if (thr->ts.team
2528 && !thr->task->final_task)
2529 {
2530 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
b2b40051 2531 sizes, kinds, flags, depend, args,
e4606348
JJ
2532 GOMP_TARGET_TASK_BEFORE_MAP);
2533 return;
2534 }
2535 }
2536
2537 /* If there are depend clauses, but nowait is not present
2538 (or we are in a final task), block the parent task until the
2539 dependencies are resolved and then just continue with the rest
2540 of the function as if it is a merged task. */
d9a6bd32
JJ
2541 if (depend != NULL)
2542 {
2543 struct gomp_thread *thr = gomp_thread ();
2544 if (thr->task && thr->task->depend_hash)
21f3a236
JJ
2545 {
2546 /* If we might need to wait, copy firstprivate now. */
2547 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2548 &tgt_align, &tgt_size);
2549 if (tgt_align)
2550 {
2551 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2552 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2553 tgt_align, tgt_size);
2554 }
2555 fpc_done = true;
2556 gomp_task_maybe_wait_for_dependencies (depend);
2557 }
d9a6bd32 2558 }
41dbbb37 2559
d84ffc0a 2560 void *fn_addr;
d9a6bd32 2561 if (devicep == NULL
d84ffc0a 2562 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
2563 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2564 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
41dbbb37 2565 {
21f3a236
JJ
2566 if (!fpc_done)
2567 {
2568 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2569 &tgt_align, &tgt_size);
2570 if (tgt_align)
2571 {
2572 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2573 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2574 tgt_align, tgt_size);
2575 }
2576 }
1bfc07d1 2577 gomp_target_fallback (fn, hostaddrs, devicep);
d9a6bd32 2578 return;
41dbbb37 2579 }
1df3f842 2580
b2b40051 2581 struct target_mem_desc *tgt_vars;
275c736e
CLT
2582 htab_t refcount_set = NULL;
2583
b2b40051
MJ
2584 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2585 {
21f3a236
JJ
2586 if (!fpc_done)
2587 {
2588 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2589 &tgt_align, &tgt_size);
2590 if (tgt_align)
2591 {
2592 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2593 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2594 tgt_align, tgt_size);
2595 }
2596 }
b2b40051
MJ
2597 tgt_vars = NULL;
2598 }
2599 else
275c736e
CLT
2600 {
2601 refcount_set = htab_create (mapnum);
2602 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2603 true, &refcount_set, GOMP_MAP_VARS_TARGET);
2604 }
b2b40051
MJ
2605 devicep->run_func (devicep->target_id, fn_addr,
2606 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2607 args);
2608 if (tgt_vars)
275c736e
CLT
2609 {
2610 htab_clear (refcount_set);
2611 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2612 }
2613 if (refcount_set)
2614 htab_free (refcount_set);
acf0174b
JJ
2615}
2616
e01d41e5 2617/* Host fallback for GOMP_target_data{,_ext} routines. */
d9a6bd32
JJ
2618
2619static void
1bfc07d1 2620gomp_target_data_fallback (struct gomp_device_descr *devicep)
d9a6bd32
JJ
2621{
2622 struct gomp_task_icv *icv = gomp_icv (false);
1bfc07d1
KCY
2623
2624 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2625 && devicep != NULL)
2626 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2627 "be used for offloading");
2628
d9a6bd32
JJ
2629 if (icv->target_data)
2630 {
2631 /* Even when doing a host fallback, if there are any active
2632 #pragma omp target data constructs, need to remember the
2633 new #pragma omp target data, otherwise GOMP_target_end_data
2634 would get out of sync. */
2635 struct target_mem_desc *tgt
2636 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
275c736e 2637 NULL, GOMP_MAP_VARS_DATA);
d9a6bd32
JJ
2638 tgt->prev = icv->target_data;
2639 icv->target_data = tgt;
2640 }
2641}
2642
acf0174b 2643void
128b26dc 2644GOMP_target_data (int device, const void *unused, size_t mapnum,
acf0174b
JJ
2645 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2646{
1df3f842 2647 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37
TS
2648
2649 if (devicep == NULL
b2b40051
MJ
2650 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2651 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1bfc07d1 2652 return gomp_target_data_fallback (devicep);
41dbbb37 2653
1df3f842 2654 struct target_mem_desc *tgt
41dbbb37 2655 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
275c736e 2656 NULL, GOMP_MAP_VARS_DATA);
d9a6bd32
JJ
2657 struct gomp_task_icv *icv = gomp_icv (true);
2658 tgt->prev = icv->target_data;
2659 icv->target_data = tgt;
2660}
2661
2662void
e01d41e5
JJ
2663GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2664 size_t *sizes, unsigned short *kinds)
d9a6bd32
JJ
2665{
2666 struct gomp_device_descr *devicep = resolve_device (device);
2667
2668 if (devicep == NULL
b2b40051
MJ
2669 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2670 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1bfc07d1 2671 return gomp_target_data_fallback (devicep);
d9a6bd32
JJ
2672
2673 struct target_mem_desc *tgt
2674 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
275c736e 2675 NULL, GOMP_MAP_VARS_DATA);
1df3f842
JJ
2676 struct gomp_task_icv *icv = gomp_icv (true);
2677 tgt->prev = icv->target_data;
2678 icv->target_data = tgt;
acf0174b
JJ
2679}
2680
2681void
2682GOMP_target_end_data (void)
2683{
1df3f842
JJ
2684 struct gomp_task_icv *icv = gomp_icv (false);
2685 if (icv->target_data)
2686 {
2687 struct target_mem_desc *tgt = icv->target_data;
2688 icv->target_data = tgt->prev;
275c736e 2689 gomp_unmap_vars (tgt, true, NULL);
1df3f842 2690 }
acf0174b
JJ
2691}
2692
2693void
128b26dc 2694GOMP_target_update (int device, const void *unused, size_t mapnum,
acf0174b
JJ
2695 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2696{
1df3f842 2697 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37
TS
2698
2699 if (devicep == NULL
b2b40051
MJ
2700 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2701 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1df3f842
JJ
2702 return;
2703
d9a6bd32
JJ
2704 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2705}
2706
2707void
e01d41e5
JJ
2708GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2709 size_t *sizes, unsigned short *kinds,
2710 unsigned int flags, void **depend)
d9a6bd32
JJ
2711{
2712 struct gomp_device_descr *devicep = resolve_device (device);
2713
2714 /* If there are depend clauses, but nowait is not present,
2715 block the parent task until the dependencies are resolved
2716 and then just continue with the rest of the function as if it
2717 is a merged task. Until we are able to schedule task during
2718 variable mapping or unmapping, ignore nowait if depend clauses
2719 are not present. */
2720 if (depend != NULL)
2721 {
2722 struct gomp_thread *thr = gomp_thread ();
2723 if (thr->task && thr->task->depend_hash)
2724 {
2725 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2726 && thr->ts.team
2727 && !thr->task->final_task)
2728 {
e4606348
JJ
2729 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2730 mapnum, hostaddrs, sizes, kinds,
2731 flags | GOMP_TARGET_FLAG_UPDATE,
b2b40051 2732 depend, NULL, GOMP_TARGET_TASK_DATA))
e4606348
JJ
2733 return;
2734 }
2735 else
2736 {
2737 struct gomp_team *team = thr->ts.team;
2738 /* If parallel or taskgroup has been cancelled, don't start new
2739 tasks. */
28567c40
JJ
2740 if (__builtin_expect (gomp_cancel_var, 0) && team)
2741 {
2742 if (gomp_team_barrier_cancelled (&team->barrier))
2743 return;
2744 if (thr->task->taskgroup)
2745 {
2746 if (thr->task->taskgroup->cancelled)
2747 return;
2748 if (thr->task->taskgroup->workshare
2749 && thr->task->taskgroup->prev
2750 && thr->task->taskgroup->prev->cancelled)
2751 return;
2752 }
2753 }
e4606348
JJ
2754
2755 gomp_task_maybe_wait_for_dependencies (depend);
d9a6bd32 2756 }
d9a6bd32
JJ
2757 }
2758 }
2759
2760 if (devicep == NULL
b2b40051
MJ
2761 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2762 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2763 return;
2764
2765 struct gomp_thread *thr = gomp_thread ();
2766 struct gomp_team *team = thr->ts.team;
2767 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
2768 if (__builtin_expect (gomp_cancel_var, 0) && team)
2769 {
2770 if (gomp_team_barrier_cancelled (&team->barrier))
2771 return;
2772 if (thr->task->taskgroup)
2773 {
2774 if (thr->task->taskgroup->cancelled)
2775 return;
2776 if (thr->task->taskgroup->workshare
2777 && thr->task->taskgroup->prev
2778 && thr->task->taskgroup->prev->cancelled)
2779 return;
2780 }
2781 }
d9a6bd32
JJ
2782
2783 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2784}
2785
2786static void
2787gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
275c736e
CLT
2788 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2789 htab_t *refcount_set)
d9a6bd32
JJ
2790{
2791 const int typemask = 0xff;
2792 size_t i;
41dbbb37 2793 gomp_mutex_lock (&devicep->lock);
d84ffc0a
IV
2794 if (devicep->state == GOMP_DEVICE_FINALIZED)
2795 {
2796 gomp_mutex_unlock (&devicep->lock);
2797 return;
2798 }
2799
9e628024
CLT
2800 for (i = 0; i < mapnum; i++)
2801 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2802 {
2803 struct splay_tree_key_s cur_node;
2804 cur_node.host_start = (uintptr_t) hostaddrs[i];
2805 cur_node.host_end = cur_node.host_start + sizeof (void *);
2806 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2807
2808 if (n)
2809 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2810 false, NULL);
2811 }
2812
275c736e
CLT
2813 int nrmvars = 0;
2814 splay_tree_key remove_vars[mapnum];
2815
d9a6bd32
JJ
2816 for (i = 0; i < mapnum; i++)
2817 {
2818 struct splay_tree_key_s cur_node;
2819 unsigned char kind = kinds[i] & typemask;
2820 switch (kind)
2821 {
2822 case GOMP_MAP_FROM:
2823 case GOMP_MAP_ALWAYS_FROM:
2824 case GOMP_MAP_DELETE:
2825 case GOMP_MAP_RELEASE:
2826 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2827 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2828 cur_node.host_start = (uintptr_t) hostaddrs[i];
2829 cur_node.host_end = cur_node.host_start + sizes[i];
2830 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2831 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
e01d41e5 2832 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
d9a6bd32
JJ
2833 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2834 if (!k)
2835 continue;
2836
275c736e
CLT
2837 bool delete_p = (kind == GOMP_MAP_DELETE
2838 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2839 bool do_copy, do_remove;
2840 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2841 &do_remove);
d9a6bd32 2842
275c736e 2843 if ((kind == GOMP_MAP_FROM && do_copy)
d9a6bd32 2844 || kind == GOMP_MAP_ALWAYS_FROM)
1f4c5b9b 2845 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
6ce13072
CLT
2846 (void *) (k->tgt->tgt_start + k->tgt_offset
2847 + cur_node.host_start
2848 - k->host_start),
2849 cur_node.host_end - cur_node.host_start);
275c736e
CLT
2850
2851 /* Structure elements lists are removed altogether at once, which
2852 may cause immediate deallocation of the target_mem_desc, causing
2853 errors if we still have following element siblings to copy back.
2854 While we're at it, it also seems more disciplined to simply
2855 queue all removals together for processing below.
2856
2857 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
2858 not have this problem, since they maintain an additional
2859 tgt->refcount = 1 reference to the target_mem_desc to start with.
2860 */
2861 if (do_remove)
2862 remove_vars[nrmvars++] = k;
9e628024 2863 break;
d9a6bd32 2864
9e628024 2865 case GOMP_MAP_DETACH:
d9a6bd32
JJ
2866 break;
2867 default:
2868 gomp_mutex_unlock (&devicep->lock);
2869 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2870 kind);
2871 }
2872 }
2873
275c736e
CLT
2874 for (int i = 0; i < nrmvars; i++)
2875 gomp_remove_var (devicep, remove_vars[i]);
2876
41dbbb37 2877 gomp_mutex_unlock (&devicep->lock);
d9a6bd32 2878}
41dbbb37 2879
d9a6bd32
JJ
2880void
2881GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2882 size_t *sizes, unsigned short *kinds,
2883 unsigned int flags, void **depend)
2884{
2885 struct gomp_device_descr *devicep = resolve_device (device);
2886
2887 /* If there are depend clauses, but nowait is not present,
2888 block the parent task until the dependencies are resolved
2889 and then just continue with the rest of the function as if it
2890 is a merged task. Until we are able to schedule task during
2891 variable mapping or unmapping, ignore nowait if depend clauses
2892 are not present. */
2893 if (depend != NULL)
2894 {
2895 struct gomp_thread *thr = gomp_thread ();
2896 if (thr->task && thr->task->depend_hash)
2897 {
2898 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2899 && thr->ts.team
2900 && !thr->task->final_task)
2901 {
e4606348
JJ
2902 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2903 mapnum, hostaddrs, sizes, kinds,
b2b40051 2904 flags, depend, NULL,
e4606348
JJ
2905 GOMP_TARGET_TASK_DATA))
2906 return;
2907 }
2908 else
2909 {
2910 struct gomp_team *team = thr->ts.team;
2911 /* If parallel or taskgroup has been cancelled, don't start new
2912 tasks. */
28567c40
JJ
2913 if (__builtin_expect (gomp_cancel_var, 0) && team)
2914 {
2915 if (gomp_team_barrier_cancelled (&team->barrier))
2916 return;
2917 if (thr->task->taskgroup)
2918 {
2919 if (thr->task->taskgroup->cancelled)
2920 return;
2921 if (thr->task->taskgroup->workshare
2922 && thr->task->taskgroup->prev
2923 && thr->task->taskgroup->prev->cancelled)
2924 return;
2925 }
2926 }
e4606348
JJ
2927
2928 gomp_task_maybe_wait_for_dependencies (depend);
d9a6bd32 2929 }
d9a6bd32
JJ
2930 }
2931 }
2932
2933 if (devicep == NULL
b2b40051
MJ
2934 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2935 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
2936 return;
2937
2938 struct gomp_thread *thr = gomp_thread ();
2939 struct gomp_team *team = thr->ts.team;
2940 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
28567c40
JJ
2941 if (__builtin_expect (gomp_cancel_var, 0) && team)
2942 {
2943 if (gomp_team_barrier_cancelled (&team->barrier))
2944 return;
2945 if (thr->task->taskgroup)
2946 {
2947 if (thr->task->taskgroup->cancelled)
2948 return;
2949 if (thr->task->taskgroup->workshare
2950 && thr->task->taskgroup->prev
2951 && thr->task->taskgroup->prev->cancelled)
2952 return;
2953 }
2954 }
d9a6bd32 2955
275c736e
CLT
2956 htab_t refcount_set = htab_create (mapnum);
2957
689418b9
TB
2958 /* The variables are mapped separately such that they can be released
2959 independently. */
2960 size_t i, j;
d9a6bd32
JJ
2961 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2962 for (i = 0; i < mapnum; i++)
2963 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2964 {
2965 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
275c736e
CLT
2966 &kinds[i], true, &refcount_set,
2967 GOMP_MAP_VARS_ENTER_DATA);
d9a6bd32
JJ
2968 i += sizes[i];
2969 }
689418b9
TB
2970 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2971 {
2972 for (j = i + 1; j < mapnum; j++)
972da557
TB
2973 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
2974 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
689418b9
TB
2975 break;
2976 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
275c736e
CLT
2977 &kinds[i], true, &refcount_set,
2978 GOMP_MAP_VARS_ENTER_DATA);
689418b9
TB
2979 i += j - i - 1;
2980 }
9e628024
CLT
2981 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
2982 {
2983 /* An attach operation must be processed together with the mapped
2984 base-pointer list item. */
2985 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
275c736e 2986 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
9e628024
CLT
2987 i += 1;
2988 }
d9a6bd32
JJ
2989 else
2990 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
275c736e 2991 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
d9a6bd32 2992 else
275c736e
CLT
2993 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
2994 htab_free (refcount_set);
d9a6bd32
JJ
2995}
2996
e4606348 2997bool
d9a6bd32
JJ
2998gomp_target_task_fn (void *data)
2999{
3000 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
e4606348
JJ
3001 struct gomp_device_descr *devicep = ttask->devicep;
3002
d9a6bd32
JJ
3003 if (ttask->fn != NULL)
3004 {
d84ffc0a 3005 void *fn_addr;
e4606348 3006 if (devicep == NULL
d84ffc0a 3007 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
b2b40051
MJ
3008 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3009 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
e4606348
JJ
3010 {
3011 ttask->state = GOMP_TARGET_TASK_FALLBACK;
1bfc07d1 3012 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
e4606348
JJ
3013 return false;
3014 }
3015
3016 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3017 {
b2b40051 3018 if (ttask->tgt)
275c736e 3019 gomp_unmap_vars (ttask->tgt, true, NULL);
e4606348
JJ
3020 return false;
3021 }
3022
b2b40051
MJ
3023 void *actual_arguments;
3024 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3025 {
3026 ttask->tgt = NULL;
b2b40051
MJ
3027 actual_arguments = ttask->hostaddrs;
3028 }
3029 else
3030 {
3031 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3032 NULL, ttask->sizes, ttask->kinds, true,
275c736e 3033 NULL, GOMP_MAP_VARS_TARGET);
b2b40051
MJ
3034 actual_arguments = (void *) ttask->tgt->tgt_start;
3035 }
e4606348
JJ
3036 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3037
001ab12e 3038 assert (devicep->async_run_func);
b2b40051
MJ
3039 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3040 ttask->args, (void *) ttask);
e4606348 3041 return true;
d9a6bd32 3042 }
e4606348 3043 else if (devicep == NULL
b2b40051
MJ
3044 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3045 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
e4606348 3046 return false;
d9a6bd32
JJ
3047
3048 size_t i;
3049 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
e4606348 3050 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
d9a6bd32 3051 ttask->kinds, true);
d9a6bd32 3052 else
275c736e
CLT
3053 {
3054 htab_t refcount_set = htab_create (ttask->mapnum);
3055 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3056 for (i = 0; i < ttask->mapnum; i++)
3057 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3058 {
3059 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3060 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3061 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3062 i += ttask->sizes[i];
3063 }
3064 else
3065 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3066 &ttask->kinds[i], true, &refcount_set,
3067 GOMP_MAP_VARS_ENTER_DATA);
3068 else
3069 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3070 ttask->kinds, &refcount_set);
3071 htab_free (refcount_set);
3072 }
e4606348 3073 return false;
acf0174b
JJ
3074}
3075
3076void
3077GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3078{
3079 if (thread_limit)
3080 {
3081 struct gomp_task_icv *icv = gomp_icv (true);
3082 icv->thread_limit_var
3083 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3084 }
3085 (void) num_teams;
3086}
1df3f842 3087
d9a6bd32
JJ
3088void *
3089omp_target_alloc (size_t size, int device_num)
3090{
74c9882b 3091 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
3092 return malloc (size);
3093
3094 if (device_num < 0)
3095 return NULL;
3096
3097 struct gomp_device_descr *devicep = resolve_device (device_num);
3098 if (devicep == NULL)
3099 return NULL;
3100
b2b40051
MJ
3101 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3102 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3103 return malloc (size);
3104
3105 gomp_mutex_lock (&devicep->lock);
3106 void *ret = devicep->alloc_func (devicep->target_id, size);
3107 gomp_mutex_unlock (&devicep->lock);
3108 return ret;
3109}
3110
3111void
3112omp_target_free (void *device_ptr, int device_num)
3113{
3114 if (device_ptr == NULL)
3115 return;
3116
74c9882b 3117 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
3118 {
3119 free (device_ptr);
3120 return;
3121 }
3122
3123 if (device_num < 0)
3124 return;
3125
3126 struct gomp_device_descr *devicep = resolve_device (device_num);
3127 if (devicep == NULL)
3128 return;
3129
b2b40051
MJ
3130 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3131 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3132 {
3133 free (device_ptr);
3134 return;
3135 }
3136
3137 gomp_mutex_lock (&devicep->lock);
6ce13072 3138 gomp_free_device_memory (devicep, device_ptr);
d9a6bd32
JJ
3139 gomp_mutex_unlock (&devicep->lock);
3140}
3141
3142int
28567c40 3143omp_target_is_present (const void *ptr, int device_num)
d9a6bd32
JJ
3144{
3145 if (ptr == NULL)
3146 return 1;
3147
74c9882b 3148 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
3149 return 1;
3150
3151 if (device_num < 0)
3152 return 0;
3153
3154 struct gomp_device_descr *devicep = resolve_device (device_num);
3155 if (devicep == NULL)
3156 return 0;
3157
b2b40051
MJ
3158 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3159 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3160 return 1;
3161
3162 gomp_mutex_lock (&devicep->lock);
3163 struct splay_tree_s *mem_map = &devicep->mem_map;
3164 struct splay_tree_key_s cur_node;
3165
3166 cur_node.host_start = (uintptr_t) ptr;
3167 cur_node.host_end = cur_node.host_start;
e01d41e5 3168 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
d9a6bd32
JJ
3169 int ret = n != NULL;
3170 gomp_mutex_unlock (&devicep->lock);
3171 return ret;
3172}
3173
3174int
28567c40
JJ
3175omp_target_memcpy (void *dst, const void *src, size_t length,
3176 size_t dst_offset, size_t src_offset, int dst_device_num,
3177 int src_device_num)
d9a6bd32
JJ
3178{
3179 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
6ce13072 3180 bool ret;
d9a6bd32 3181
74c9882b 3182 if (dst_device_num != gomp_get_num_devices ())
d9a6bd32
JJ
3183 {
3184 if (dst_device_num < 0)
3185 return EINVAL;
3186
3187 dst_devicep = resolve_device (dst_device_num);
3188 if (dst_devicep == NULL)
3189 return EINVAL;
3190
b2b40051
MJ
3191 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3192 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3193 dst_devicep = NULL;
3194 }
74c9882b 3195 if (src_device_num != num_devices_openmp)
d9a6bd32
JJ
3196 {
3197 if (src_device_num < 0)
3198 return EINVAL;
3199
3200 src_devicep = resolve_device (src_device_num);
3201 if (src_devicep == NULL)
3202 return EINVAL;
3203
b2b40051
MJ
3204 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3205 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3206 src_devicep = NULL;
3207 }
3208 if (src_devicep == NULL && dst_devicep == NULL)
3209 {
3210 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3211 return 0;
3212 }
3213 if (src_devicep == NULL)
3214 {
3215 gomp_mutex_lock (&dst_devicep->lock);
6ce13072
CLT
3216 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3217 (char *) dst + dst_offset,
3218 (char *) src + src_offset, length);
d9a6bd32 3219 gomp_mutex_unlock (&dst_devicep->lock);
6ce13072 3220 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
3221 }
3222 if (dst_devicep == NULL)
3223 {
3224 gomp_mutex_lock (&src_devicep->lock);
6ce13072
CLT
3225 ret = src_devicep->dev2host_func (src_devicep->target_id,
3226 (char *) dst + dst_offset,
3227 (char *) src + src_offset, length);
d9a6bd32 3228 gomp_mutex_unlock (&src_devicep->lock);
6ce13072 3229 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
3230 }
3231 if (src_devicep == dst_devicep)
3232 {
3233 gomp_mutex_lock (&src_devicep->lock);
6ce13072
CLT
3234 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3235 (char *) dst + dst_offset,
3236 (char *) src + src_offset, length);
d9a6bd32 3237 gomp_mutex_unlock (&src_devicep->lock);
6ce13072 3238 return (ret ? 0 : EINVAL);
d9a6bd32
JJ
3239 }
3240 return EINVAL;
3241}
3242
3243static int
28567c40 3244omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
d9a6bd32
JJ
3245 int num_dims, const size_t *volume,
3246 const size_t *dst_offsets,
3247 const size_t *src_offsets,
3248 const size_t *dst_dimensions,
3249 const size_t *src_dimensions,
3250 struct gomp_device_descr *dst_devicep,
3251 struct gomp_device_descr *src_devicep)
3252{
3253 size_t dst_slice = element_size;
3254 size_t src_slice = element_size;
3255 size_t j, dst_off, src_off, length;
3256 int i, ret;
3257
3258 if (num_dims == 1)
3259 {
3260 if (__builtin_mul_overflow (element_size, volume[0], &length)
3261 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3262 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3263 return EINVAL;
3264 if (dst_devicep == NULL && src_devicep == NULL)
6ce13072 3265 {
28567c40
JJ
3266 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3267 length);
6ce13072
CLT
3268 ret = 1;
3269 }
d9a6bd32 3270 else if (src_devicep == NULL)
6ce13072
CLT
3271 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3272 (char *) dst + dst_off,
28567c40
JJ
3273 (const char *) src + src_off,
3274 length);
d9a6bd32 3275 else if (dst_devicep == NULL)
6ce13072
CLT
3276 ret = src_devicep->dev2host_func (src_devicep->target_id,
3277 (char *) dst + dst_off,
28567c40
JJ
3278 (const char *) src + src_off,
3279 length);
d9a6bd32 3280 else if (src_devicep == dst_devicep)
6ce13072
CLT
3281 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3282 (char *) dst + dst_off,
28567c40
JJ
3283 (const char *) src + src_off,
3284 length);
d9a6bd32 3285 else
6ce13072
CLT
3286 ret = 0;
3287 return ret ? 0 : EINVAL;
d9a6bd32
JJ
3288 }
3289
3290 /* FIXME: it would be nice to have some plugin function to handle
3291 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3292 be handled in the generic recursion below, and for host-host it
3293 should be used even for any num_dims >= 2. */
3294
3295 for (i = 1; i < num_dims; i++)
3296 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3297 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3298 return EINVAL;
3299 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3300 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3301 return EINVAL;
3302 for (j = 0; j < volume[0]; j++)
3303 {
3304 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
28567c40 3305 (const char *) src + src_off,
d9a6bd32
JJ
3306 element_size, num_dims - 1,
3307 volume + 1, dst_offsets + 1,
3308 src_offsets + 1, dst_dimensions + 1,
3309 src_dimensions + 1, dst_devicep,
3310 src_devicep);
3311 if (ret)
3312 return ret;
3313 dst_off += dst_slice;
3314 src_off += src_slice;
3315 }
3316 return 0;
3317}
3318
3319int
28567c40 3320omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
d9a6bd32
JJ
3321 int num_dims, const size_t *volume,
3322 const size_t *dst_offsets,
3323 const size_t *src_offsets,
3324 const size_t *dst_dimensions,
3325 const size_t *src_dimensions,
3326 int dst_device_num, int src_device_num)
3327{
3328 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3329
3330 if (!dst && !src)
3331 return INT_MAX;
3332
74c9882b 3333 if (dst_device_num != gomp_get_num_devices ())
d9a6bd32
JJ
3334 {
3335 if (dst_device_num < 0)
3336 return EINVAL;
3337
3338 dst_devicep = resolve_device (dst_device_num);
3339 if (dst_devicep == NULL)
3340 return EINVAL;
3341
b2b40051
MJ
3342 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3343 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3344 dst_devicep = NULL;
3345 }
74c9882b 3346 if (src_device_num != num_devices_openmp)
d9a6bd32
JJ
3347 {
3348 if (src_device_num < 0)
3349 return EINVAL;
3350
3351 src_devicep = resolve_device (src_device_num);
3352 if (src_devicep == NULL)
3353 return EINVAL;
3354
b2b40051
MJ
3355 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3356 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3357 src_devicep = NULL;
3358 }
3359
3360 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3361 return EINVAL;
3362
3363 if (src_devicep)
3364 gomp_mutex_lock (&src_devicep->lock);
3365 else if (dst_devicep)
3366 gomp_mutex_lock (&dst_devicep->lock);
3367 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3368 volume, dst_offsets, src_offsets,
3369 dst_dimensions, src_dimensions,
3370 dst_devicep, src_devicep);
3371 if (src_devicep)
3372 gomp_mutex_unlock (&src_devicep->lock);
3373 else if (dst_devicep)
3374 gomp_mutex_unlock (&dst_devicep->lock);
3375 return ret;
3376}
3377
3378int
28567c40
JJ
3379omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3380 size_t size, size_t device_offset, int device_num)
d9a6bd32 3381{
74c9882b 3382 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
3383 return EINVAL;
3384
3385 if (device_num < 0)
3386 return EINVAL;
3387
3388 struct gomp_device_descr *devicep = resolve_device (device_num);
3389 if (devicep == NULL)
3390 return EINVAL;
3391
b2b40051
MJ
3392 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3393 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
d9a6bd32
JJ
3394 return EINVAL;
3395
3396 gomp_mutex_lock (&devicep->lock);
3397
3398 struct splay_tree_s *mem_map = &devicep->mem_map;
3399 struct splay_tree_key_s cur_node;
3400 int ret = EINVAL;
3401
3402 cur_node.host_start = (uintptr_t) host_ptr;
3403 cur_node.host_end = cur_node.host_start + size;
3404 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3405 if (n)
3406 {
3407 if (n->tgt->tgt_start + n->tgt_offset
3408 == (uintptr_t) device_ptr + device_offset
3409 && n->host_start <= cur_node.host_start
3410 && n->host_end >= cur_node.host_end)
3411 ret = 0;
3412 }
3413 else
3414 {
3415 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3416 tgt->array = gomp_malloc (sizeof (*tgt->array));
3417 tgt->refcount = 1;
3418 tgt->tgt_start = 0;
3419 tgt->tgt_end = 0;
3420 tgt->to_free = NULL;
3421 tgt->prev = NULL;
3422 tgt->list_count = 0;
3423 tgt->device_descr = devicep;
3424 splay_tree_node array = tgt->array;
3425 splay_tree_key k = &array->key;
3426 k->host_start = cur_node.host_start;
3427 k->host_end = cur_node.host_end;
3428 k->tgt = tgt;
3429 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3430 k->refcount = REFCOUNT_INFINITY;
6f5b4b64 3431 k->dynamic_refcount = 0;
2a656a93 3432 k->aux = NULL;
d9a6bd32
JJ
3433 array->left = NULL;
3434 array->right = NULL;
3435 splay_tree_insert (&devicep->mem_map, array);
3436 ret = 0;
3437 }
3438 gomp_mutex_unlock (&devicep->lock);
3439 return ret;
3440}
3441
3442int
28567c40 3443omp_target_disassociate_ptr (const void *ptr, int device_num)
d9a6bd32 3444{
74c9882b 3445 if (device_num == gomp_get_num_devices ())
d9a6bd32
JJ
3446 return EINVAL;
3447
3448 if (device_num < 0)
3449 return EINVAL;
3450
3451 struct gomp_device_descr *devicep = resolve_device (device_num);
3452 if (devicep == NULL)
3453 return EINVAL;
3454
3455 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3456 return EINVAL;
3457
3458 gomp_mutex_lock (&devicep->lock);
3459
3460 struct splay_tree_s *mem_map = &devicep->mem_map;
3461 struct splay_tree_key_s cur_node;
3462 int ret = EINVAL;
3463
3464 cur_node.host_start = (uintptr_t) ptr;
3465 cur_node.host_end = cur_node.host_start;
3466 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3467 if (n
3468 && n->host_start == cur_node.host_start
3469 && n->refcount == REFCOUNT_INFINITY
3470 && n->tgt->tgt_start == 0
3471 && n->tgt->to_free == NULL
3472 && n->tgt->refcount == 1
3473 && n->tgt->list_count == 0)
3474 {
3475 splay_tree_remove (&devicep->mem_map, n);
3476 gomp_unmap_tgt (n->tgt);
3477 ret = 0;
3478 }
3479
3480 gomp_mutex_unlock (&devicep->lock);
3481 return ret;
3482}
3483
28567c40
JJ
3484int
3485omp_pause_resource (omp_pause_resource_t kind, int device_num)
3486{
3487 (void) kind;
74c9882b 3488 if (device_num == gomp_get_num_devices ())
28567c40 3489 return gomp_pause_host ();
74c9882b 3490 if (device_num < 0 || device_num >= num_devices_openmp)
28567c40
JJ
3491 return -1;
3492 /* Do nothing for target devices for now. */
3493 return 0;
3494}
3495
3496int
3497omp_pause_resource_all (omp_pause_resource_t kind)
3498{
3499 (void) kind;
3500 if (gomp_pause_host ())
3501 return -1;
3502 /* Do nothing for target devices for now. */
3503 return 0;
3504}
3505
3506ialias (omp_pause_resource)
3507ialias (omp_pause_resource_all)
3508
1df3f842
JJ
3509#ifdef PLUGIN_SUPPORT
3510
3511/* This function tries to load a plugin for DEVICE. Name of plugin is passed
3512 in PLUGIN_NAME.
3513 The handles of the found functions are stored in the corresponding fields
3514 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3515
3516static bool
3517gomp_load_plugin_for_device (struct gomp_device_descr *device,
3518 const char *plugin_name)
3519{
196904d8 3520 const char *err = NULL, *last_missing = NULL;
41dbbb37 3521
1df3f842
JJ
3522 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3523 if (!plugin_handle)
fe5bfa67
TB
3524#if OFFLOAD_DEFAULTED
3525 return 0;
3526#else
2a21ff19 3527 goto dl_fail;
fe5bfa67 3528#endif
1df3f842
JJ
3529
3530 /* Check if all required functions are available in the plugin and store
2a21ff19
NS
3531 their handlers. None of the symbols can legitimately be NULL,
3532 so we don't need to check dlerror all the time. */
41dbbb37 3533#define DLSYM(f) \
2a21ff19
NS
3534 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3535 goto dl_fail
3536 /* Similar, but missing functions are not an error. Return false if
3537 failed, true otherwise. */
3538#define DLSYM_OPT(f, n) \
3539 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3540 || (last_missing = #n, 0))
3541
3542 DLSYM (version);
3543 if (device->version_func () != GOMP_VERSION)
3544 {
3545 err = "plugin version mismatch";
3546 goto fail;
3547 }
41dbbb37
TS
3548
3549 DLSYM (get_name);
3550 DLSYM (get_caps);
1df3f842
JJ
3551 DLSYM (get_type);
3552 DLSYM (get_num_devices);
1df3f842 3553 DLSYM (init_device);
41dbbb37 3554 DLSYM (fini_device);
a51df54e
IV
3555 DLSYM (load_image);
3556 DLSYM (unload_image);
1df3f842
JJ
3557 DLSYM (alloc);
3558 DLSYM (free);
3559 DLSYM (dev2host);
3560 DLSYM (host2dev);
41dbbb37
TS
3561 device->capabilities = device->get_caps_func ();
3562 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
d9a6bd32
JJ
3563 {
3564 DLSYM (run);
001ab12e 3565 DLSYM_OPT (async_run, async_run);
b2b40051 3566 DLSYM_OPT (can_run, can_run);
d9a6bd32
JJ
3567 DLSYM (dev2dev);
3568 }
41dbbb37
TS
3569 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3570 {
345a8c17 3571 if (!DLSYM_OPT (openacc.exec, openacc_exec)
2a21ff19
NS
3572 || !DLSYM_OPT (openacc.create_thread_data,
3573 openacc_create_thread_data)
3574 || !DLSYM_OPT (openacc.destroy_thread_data,
1f4c5b9b
CLT
3575 openacc_destroy_thread_data)
3576 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3577 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3578 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3579 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3580 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3581 || !DLSYM_OPT (openacc.async.queue_callback,
3582 openacc_async_queue_callback)
3583 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3584 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
6fc0385c
TS
3585 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3586 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
41dbbb37 3587 {
2a21ff19
NS
3588 /* Require all the OpenACC handlers if we have
3589 GOMP_OFFLOAD_CAP_OPENACC_200. */
41dbbb37 3590 err = "plugin missing OpenACC handler function";
2a21ff19 3591 goto fail;
41dbbb37 3592 }
2a21ff19
NS
3593
3594 unsigned cuda = 0;
3595 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
345a8c17 3596 openacc_cuda_get_current_device);
2a21ff19 3597 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
345a8c17
TS
3598 openacc_cuda_get_current_context);
3599 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3600 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2a21ff19 3601 if (cuda && cuda != 4)
41dbbb37 3602 {
2a21ff19 3603 /* Make sure all the CUDA functions are there if any of them are. */
41dbbb37 3604 err = "plugin missing OpenACC CUDA handler function";
2a21ff19 3605 goto fail;
41dbbb37
TS
3606 }
3607 }
1df3f842 3608#undef DLSYM
41dbbb37 3609#undef DLSYM_OPT
1df3f842 3610
2a21ff19
NS
3611 return 1;
3612
3613 dl_fail:
3614 err = dlerror ();
3615 fail:
3616 gomp_error ("while loading %s: %s", plugin_name, err);
3617 if (last_missing)
3618 gomp_error ("missing function was %s", last_missing);
3619 if (plugin_handle)
3620 dlclose (plugin_handle);
3621
3622 return 0;
1df3f842
JJ
3623}
3624
d84ffc0a
IV
3625/* This function finalizes all initialized devices. */
3626
3627static void
3628gomp_target_fini (void)
3629{
3630 int i;
3631 for (i = 0; i < num_devices; i++)
3632 {
6ce13072 3633 bool ret = true;
d84ffc0a
IV
3634 struct gomp_device_descr *devicep = &devices[i];
3635 gomp_mutex_lock (&devicep->lock);
3636 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1f4c5b9b 3637 ret = gomp_fini_device (devicep);
d84ffc0a 3638 gomp_mutex_unlock (&devicep->lock);
6ce13072
CLT
3639 if (!ret)
3640 gomp_fatal ("device finalization failed");
d84ffc0a
IV
3641 }
3642}
3643
ee332b4a
TS
3644/* This function initializes the runtime for offloading.
3645 It parses the list of offload plugins, and tries to load these.
3646 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
41dbbb37
TS
3647 will be set, and the array DEVICES initialized, containing descriptors for
3648 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3649 by the others. */
1df3f842
JJ
3650
3651static void
3652gomp_target_init (void)
3653{
3654 const char *prefix ="libgomp-plugin-";
b5f7a6ca 3655 const char *suffix = SONAME_SUFFIX (1);
1df3f842
JJ
3656 const char *cur, *next;
3657 char *plugin_name;
35f258f4
JJ
3658 int i, new_num_devs;
3659 int num_devs = 0, num_devs_openmp;
3660 struct gomp_device_descr *devs = NULL;
1df3f842 3661
1bfc07d1
KCY
3662 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3663 return;
3664
ee332b4a 3665 cur = OFFLOAD_PLUGINS;
1df3f842
JJ
3666 if (*cur)
3667 do
3668 {
3669 struct gomp_device_descr current_device;
b13547d8 3670 size_t prefix_len, suffix_len, cur_len;
1df3f842
JJ
3671
3672 next = strchr (cur, ',');
3673
b13547d8
JJ
3674 prefix_len = strlen (prefix);
3675 cur_len = next ? next - cur : strlen (cur);
3676 suffix_len = strlen (suffix);
3677
3678 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
1df3f842
JJ
3679 if (!plugin_name)
3680 {
35f258f4 3681 num_devs = 0;
1df3f842
JJ
3682 break;
3683 }
3684
b13547d8
JJ
3685 memcpy (plugin_name, prefix, prefix_len);
3686 memcpy (plugin_name + prefix_len, cur, cur_len);
3687 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
1df3f842
JJ
3688
3689 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3690 {
35f258f4
JJ
3691 new_num_devs = current_device.get_num_devices_func ();
3692 if (new_num_devs >= 1)
1df3f842 3693 {
41dbbb37
TS
3694 /* Augment DEVICES and NUM_DEVICES. */
3695
35f258f4
JJ
3696 devs = realloc (devs, (num_devs + new_num_devs)
3697 * sizeof (struct gomp_device_descr));
3698 if (!devs)
1df3f842 3699 {
35f258f4 3700 num_devs = 0;
1df3f842
JJ
3701 free (plugin_name);
3702 break;
3703 }
3704
41dbbb37
TS
3705 current_device.name = current_device.get_name_func ();
3706 /* current_device.capabilities has already been set. */
1df3f842 3707 current_device.type = current_device.get_type_func ();
a51df54e 3708 current_device.mem_map.root = NULL;
d84ffc0a 3709 current_device.state = GOMP_DEVICE_UNINITIALIZED;
35f258f4 3710 for (i = 0; i < new_num_devs; i++)
1df3f842 3711 {
1df3f842 3712 current_device.target_id = i;
35f258f4
JJ
3713 devs[num_devs] = current_device;
3714 gomp_mutex_init (&devs[num_devs].lock);
3715 num_devs++;
1df3f842
JJ
3716 }
3717 }
3718 }
3719
3720 free (plugin_name);
3721 cur = next + 1;
3722 }
3723 while (next);
3724
41dbbb37
TS
3725 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3726 NUM_DEVICES_OPENMP. */
35f258f4
JJ
3727 struct gomp_device_descr *devs_s
3728 = malloc (num_devs * sizeof (struct gomp_device_descr));
3729 if (!devs_s)
3730 {
3731 num_devs = 0;
3732 free (devs);
3733 devs = NULL;
3734 }
3735 num_devs_openmp = 0;
3736 for (i = 0; i < num_devs; i++)
3737 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3738 devs_s[num_devs_openmp++] = devs[i];
3739 int num_devs_after_openmp = num_devs_openmp;
3740 for (i = 0; i < num_devs; i++)
3741 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3742 devs_s[num_devs_after_openmp++] = devs[i];
3743 free (devs);
3744 devs = devs_s;
3745
3746 for (i = 0; i < num_devs; i++)
41dbbb37 3747 {
41dbbb37
TS
3748 /* The 'devices' array can be moved (by the realloc call) until we have
3749 found all the plugins, so registering with the OpenACC runtime (which
3750 takes a copy of the pointer argument) must be delayed until now. */
35f258f4
JJ
3751 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3752 goacc_register (&devs[i]);
41dbbb37 3753 }
d84ffc0a 3754
35f258f4
JJ
3755 num_devices = num_devs;
3756 num_devices_openmp = num_devs_openmp;
3757 devices = devs;
d84ffc0a
IV
3758 if (atexit (gomp_target_fini) != 0)
3759 gomp_fatal ("atexit failed");
1df3f842
JJ
3760}
3761
3762#else /* PLUGIN_SUPPORT */
3763/* If dlfcn.h is unavailable we always fallback to host execution.
3764 GOMP_target* routines are just stubs for this case. */
3765static void
3766gomp_target_init (void)
3767{
3768}
3769#endif /* PLUGIN_SUPPORT */