]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
OpenMP: Move omp requires checks to libgomp
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
42
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47
48 typedef uintptr_t *hash_entry_type;
49 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
50 static inline void htab_free (void *ptr) { free (ptr); }
51 #include "hashtab.h"
52
53 ialias_redirect (GOMP_task)
54
55 static inline hashval_t
56 htab_hash (hash_entry_type element)
57 {
58 return hash_pointer ((void *) element);
59 }
60
61 static inline bool
62 htab_eq (hash_entry_type x, hash_entry_type y)
63 {
64 return x == y;
65 }
66
67 #define FIELD_TGT_EMPTY (~(size_t) 0)
68
69 static void gomp_target_init (void);
70
71 /* The whole initialization code for offloading plugins is only run one. */
72 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
73
74 /* Mutex for offload image registration. */
75 static gomp_mutex_t register_lock;
76
77 /* This structure describes an offload image.
78 It contains type of the target device, pointer to host table descriptor, and
79 pointer to target data. */
80 struct offload_image_descr {
81 unsigned version;
82 enum offload_target_type type;
83 const void *host_table;
84 const void *target_data;
85 };
86
87 /* Array of descriptors of offload images. */
88 static struct offload_image_descr *offload_images;
89
90 /* Total number of offload images. */
91 static int num_offload_images;
92
93 /* Array of descriptors for all available devices. */
94 static struct gomp_device_descr *devices;
95
96 /* Total number of available devices. */
97 static int num_devices;
98
99 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
100 static int num_devices_openmp;
101
102 /* OpenMP requires mask. */
103 static int omp_requires_mask;
104
105 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
106
107 static void *
108 gomp_realloc_unlock (void *old, size_t size)
109 {
110 void *ret = realloc (old, size);
111 if (ret == NULL)
112 {
113 gomp_mutex_unlock (&register_lock);
114 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
115 }
116 return ret;
117 }
118
119 attribute_hidden void
120 gomp_init_targets_once (void)
121 {
122 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
123 }
124
125 attribute_hidden int
126 gomp_get_num_devices (void)
127 {
128 gomp_init_targets_once ();
129 return num_devices_openmp;
130 }
131
132 static struct gomp_device_descr *
133 resolve_device (int device_id, bool remapped)
134 {
135 if (remapped && device_id == GOMP_DEVICE_ICV)
136 {
137 struct gomp_task_icv *icv = gomp_icv (false);
138 device_id = icv->default_device_var;
139 remapped = false;
140 }
141
142 if (device_id < 0)
143 {
144 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
145 : omp_initial_device))
146 return NULL;
147 if (device_id == omp_invalid_device)
148 gomp_fatal ("omp_invalid_device encountered");
149 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
150 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
151 "but device not found");
152
153 return NULL;
154 }
155 else if (device_id >= gomp_get_num_devices ())
156 {
157 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
158 && device_id != num_devices_openmp)
159 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
160 "but device not found");
161
162 return NULL;
163 }
164
165 gomp_mutex_lock (&devices[device_id].lock);
166 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
167 gomp_init_device (&devices[device_id]);
168 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
169 {
170 gomp_mutex_unlock (&devices[device_id].lock);
171
172 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
173 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
174 "but device is finalized");
175
176 return NULL;
177 }
178 gomp_mutex_unlock (&devices[device_id].lock);
179
180 return &devices[device_id];
181 }
182
183
184 static inline splay_tree_key
185 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
186 {
187 if (key->host_start != key->host_end)
188 return splay_tree_lookup (mem_map, key);
189
190 key->host_end++;
191 splay_tree_key n = splay_tree_lookup (mem_map, key);
192 key->host_end--;
193 if (n)
194 return n;
195 key->host_start--;
196 n = splay_tree_lookup (mem_map, key);
197 key->host_start++;
198 if (n)
199 return n;
200 return splay_tree_lookup (mem_map, key);
201 }
202
203 static inline splay_tree_key
204 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
205 {
206 if (key->host_start != key->host_end)
207 return splay_tree_lookup (mem_map, key);
208
209 key->host_end++;
210 splay_tree_key n = splay_tree_lookup (mem_map, key);
211 key->host_end--;
212 return n;
213 }
214
215 static inline void
216 gomp_device_copy (struct gomp_device_descr *devicep,
217 bool (*copy_func) (int, void *, const void *, size_t),
218 const char *dst, void *dstaddr,
219 const char *src, const void *srcaddr,
220 size_t size)
221 {
222 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
223 {
224 gomp_mutex_unlock (&devicep->lock);
225 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
226 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
227 }
228 }
229
230 static inline void
231 goacc_device_copy_async (struct gomp_device_descr *devicep,
232 bool (*copy_func) (int, void *, const void *, size_t,
233 struct goacc_asyncqueue *),
234 const char *dst, void *dstaddr,
235 const char *src, const void *srcaddr,
236 const void *srcaddr_orig,
237 size_t size, struct goacc_asyncqueue *aq)
238 {
239 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
240 {
241 gomp_mutex_unlock (&devicep->lock);
242 if (srcaddr_orig && srcaddr_orig != srcaddr)
243 gomp_fatal ("Copying of %s object [%p..%p)"
244 " via buffer %s object [%p..%p)"
245 " to %s object [%p..%p) failed",
246 src, srcaddr_orig, srcaddr_orig + size,
247 src, srcaddr, srcaddr + size,
248 dst, dstaddr, dstaddr + size);
249 else
250 gomp_fatal ("Copying of %s object [%p..%p)"
251 " to %s object [%p..%p) failed",
252 src, srcaddr, srcaddr + size,
253 dst, dstaddr, dstaddr + size);
254 }
255 }
256
257 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
258 host to device memory transfers. */
259
260 struct gomp_coalesce_chunk
261 {
262 /* The starting and ending point of a coalesced chunk of memory. */
263 size_t start, end;
264 };
265
266 struct gomp_coalesce_buf
267 {
268 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
269 it will be copied to the device. */
270 void *buf;
271 struct target_mem_desc *tgt;
272 /* Array with offsets, chunks[i].start is the starting offset and
273 chunks[i].end ending offset relative to tgt->tgt_start device address
274 of chunks which are to be copied to buf and later copied to device. */
275 struct gomp_coalesce_chunk *chunks;
276 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
277 be performed. */
278 long chunk_cnt;
279 /* During construction of chunks array, how many memory regions are within
280 the last chunk. If there is just one memory region for a chunk, we copy
281 it directly to device rather than going through buf. */
282 long use_cnt;
283 };
284
285 /* Maximum size of memory region considered for coalescing. Larger copies
286 are performed directly. */
287 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
288
289 /* Maximum size of a gap in between regions to consider them being copied
290 within the same chunk. All the device offsets considered are within
291 newly allocated device memory, so it isn't fatal if we copy some padding
292 in between from host to device. The gaps come either from alignment
293 padding or from memory regions which are not supposed to be copied from
294 host to device (e.g. map(alloc:), map(from:) etc.). */
295 #define MAX_COALESCE_BUF_GAP (4 * 1024)
296
297 /* Add region with device tgt_start relative offset and length to CBUF.
298
299 This must not be used for asynchronous copies, because the host data might
300 not be computed yet (by an earlier asynchronous compute region, for
301 example).
302 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
303 is it more performant to use libgomp CBUF buffering or individual device
304 asyncronous copying?) */
305
306 static inline void
307 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
308 {
309 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
310 return;
311 if (cbuf->chunk_cnt)
312 {
313 if (cbuf->chunk_cnt < 0)
314 return;
315 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
316 {
317 cbuf->chunk_cnt = -1;
318 return;
319 }
320 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
321 {
322 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
323 cbuf->use_cnt++;
324 return;
325 }
326 /* If the last chunk is only used by one mapping, discard it,
327 as it will be one host to device copy anyway and
328 memcpying it around will only waste cycles. */
329 if (cbuf->use_cnt == 1)
330 cbuf->chunk_cnt--;
331 }
332 cbuf->chunks[cbuf->chunk_cnt].start = start;
333 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
334 cbuf->chunk_cnt++;
335 cbuf->use_cnt = 1;
336 }
337
338 /* Return true for mapping kinds which need to copy data from the
339 host to device for regions that weren't previously mapped. */
340
341 static inline bool
342 gomp_to_device_kind_p (int kind)
343 {
344 switch (kind)
345 {
346 case GOMP_MAP_ALLOC:
347 case GOMP_MAP_FROM:
348 case GOMP_MAP_FORCE_ALLOC:
349 case GOMP_MAP_FORCE_FROM:
350 case GOMP_MAP_ALWAYS_FROM:
351 return false;
352 default:
353 return true;
354 }
355 }
356
357 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
358 non-NULL), when the source data is stack or may otherwise be deallocated
359 before the asynchronous copy takes place, EPHEMERAL must be passed as
360 TRUE. */
361
362 attribute_hidden void
363 gomp_copy_host2dev (struct gomp_device_descr *devicep,
364 struct goacc_asyncqueue *aq,
365 void *d, const void *h, size_t sz,
366 bool ephemeral, struct gomp_coalesce_buf *cbuf)
367 {
368 if (__builtin_expect (aq != NULL, 0))
369 {
370 /* See 'gomp_coalesce_buf_add'. */
371 assert (!cbuf);
372
373 void *h_buf = (void *) h;
374 if (ephemeral)
375 {
376 /* We're queueing up an asynchronous copy from data that may
377 disappear before the transfer takes place (i.e. because it is a
378 stack local in a function that is no longer executing). Make a
379 copy of the data into a temporary buffer in those cases. */
380 h_buf = gomp_malloc (sz);
381 memcpy (h_buf, h, sz);
382 }
383 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
384 "dev", d, "host", h_buf, h, sz, aq);
385 if (ephemeral)
386 /* Free temporary buffer once the transfer has completed. */
387 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
388
389 return;
390 }
391
392 if (cbuf)
393 {
394 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
395 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
396 {
397 long first = 0;
398 long last = cbuf->chunk_cnt - 1;
399 while (first <= last)
400 {
401 long middle = (first + last) >> 1;
402 if (cbuf->chunks[middle].end <= doff)
403 first = middle + 1;
404 else if (cbuf->chunks[middle].start <= doff)
405 {
406 if (doff + sz > cbuf->chunks[middle].end)
407 {
408 gomp_mutex_unlock (&devicep->lock);
409 gomp_fatal ("internal libgomp cbuf error");
410 }
411 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
412 h, sz);
413 return;
414 }
415 else
416 last = middle - 1;
417 }
418 }
419 }
420
421 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
422 }
423
424 attribute_hidden void
425 gomp_copy_dev2host (struct gomp_device_descr *devicep,
426 struct goacc_asyncqueue *aq,
427 void *h, const void *d, size_t sz)
428 {
429 if (__builtin_expect (aq != NULL, 0))
430 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
431 "host", h, "dev", d, NULL, sz, aq);
432 else
433 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
434 }
435
436 static void
437 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
438 {
439 if (!devicep->free_func (devicep->target_id, devptr))
440 {
441 gomp_mutex_unlock (&devicep->lock);
442 gomp_fatal ("error in freeing device memory block at %p", devptr);
443 }
444 }
445
446 /* Increment reference count of a splay_tree_key region K by 1.
447 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
448 increment the value if refcount is not yet contained in the set (used for
449 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
450 once for each construct). */
451
452 static inline void
453 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
454 {
455 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
456 return;
457
458 uintptr_t *refcount_ptr = &k->refcount;
459
460 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
461 refcount_ptr = &k->structelem_refcount;
462 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
463 refcount_ptr = k->structelem_refcount_ptr;
464
465 if (refcount_set)
466 {
467 if (htab_find (*refcount_set, refcount_ptr))
468 return;
469 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
470 *slot = refcount_ptr;
471 }
472
473 *refcount_ptr += 1;
474 return;
475 }
476
477 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
478 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
479 track already seen refcounts, and only adjust the value if refcount is not
480 yet contained in the set (like gomp_increment_refcount).
481
482 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
483 it is already zero and we know we decremented it earlier. This signals that
484 associated maps should be copied back to host.
485
486 *DO_REMOVE is set to true when we this is the first handling of this refcount
487 and we are setting it to zero. This signals a removal of this key from the
488 splay-tree map.
489
490 Copy and removal are separated due to cases like handling of structure
491 elements, e.g. each map of a structure element representing a possible copy
492 out of a structure field has to be handled individually, but we only signal
493 removal for one (the first encountered) sibing map. */
494
495 static inline void
496 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
497 bool *do_copy, bool *do_remove)
498 {
499 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
500 {
501 *do_copy = *do_remove = false;
502 return;
503 }
504
505 uintptr_t *refcount_ptr = &k->refcount;
506
507 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
508 refcount_ptr = &k->structelem_refcount;
509 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
510 refcount_ptr = k->structelem_refcount_ptr;
511
512 bool new_encountered_refcount;
513 bool set_to_zero = false;
514 bool is_zero = false;
515
516 uintptr_t orig_refcount = *refcount_ptr;
517
518 if (refcount_set)
519 {
520 if (htab_find (*refcount_set, refcount_ptr))
521 {
522 new_encountered_refcount = false;
523 goto end;
524 }
525
526 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
527 *slot = refcount_ptr;
528 new_encountered_refcount = true;
529 }
530 else
531 /* If no refcount_set being used, assume all keys are being decremented
532 for the first time. */
533 new_encountered_refcount = true;
534
535 if (delete_p)
536 *refcount_ptr = 0;
537 else if (*refcount_ptr > 0)
538 *refcount_ptr -= 1;
539
540 end:
541 if (*refcount_ptr == 0)
542 {
543 if (orig_refcount > 0)
544 set_to_zero = true;
545
546 is_zero = true;
547 }
548
549 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
550 *do_remove = (new_encountered_refcount && set_to_zero);
551 }
552
553 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
554 gomp_map_0len_lookup found oldn for newn.
555 Helper function of gomp_map_vars. */
556
557 static inline void
558 gomp_map_vars_existing (struct gomp_device_descr *devicep,
559 struct goacc_asyncqueue *aq, splay_tree_key oldn,
560 splay_tree_key newn, struct target_var_desc *tgt_var,
561 unsigned char kind, bool always_to_flag, bool implicit,
562 struct gomp_coalesce_buf *cbuf,
563 htab_t *refcount_set)
564 {
565 assert (kind != GOMP_MAP_ATTACH
566 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
567
568 tgt_var->key = oldn;
569 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
570 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
571 tgt_var->is_attach = false;
572 tgt_var->offset = newn->host_start - oldn->host_start;
573
574 /* For implicit maps, old contained in new is valid. */
575 bool implicit_subset = (implicit
576 && newn->host_start <= oldn->host_start
577 && oldn->host_end <= newn->host_end);
578 if (implicit_subset)
579 tgt_var->length = oldn->host_end - oldn->host_start;
580 else
581 tgt_var->length = newn->host_end - newn->host_start;
582
583 if ((kind & GOMP_MAP_FLAG_FORCE)
584 /* For implicit maps, old contained in new is valid. */
585 || !(implicit_subset
586 /* Otherwise, new contained inside old is considered valid. */
587 || (oldn->host_start <= newn->host_start
588 && newn->host_end <= oldn->host_end)))
589 {
590 gomp_mutex_unlock (&devicep->lock);
591 gomp_fatal ("Trying to map into device [%p..%p) object when "
592 "[%p..%p) is already mapped",
593 (void *) newn->host_start, (void *) newn->host_end,
594 (void *) oldn->host_start, (void *) oldn->host_end);
595 }
596
597 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
598 {
599 /* Implicit + always should not happen. If this does occur, below
600 address/length adjustment is a TODO. */
601 assert (!implicit_subset);
602
603 if (oldn->aux && oldn->aux->attach_count)
604 {
605 /* We have to be careful not to overwrite still attached pointers
606 during the copyback to host. */
607 uintptr_t addr = newn->host_start;
608 while (addr < newn->host_end)
609 {
610 size_t i = (addr - oldn->host_start) / sizeof (void *);
611 if (oldn->aux->attach_count[i] == 0)
612 gomp_copy_host2dev (devicep, aq,
613 (void *) (oldn->tgt->tgt_start
614 + oldn->tgt_offset
615 + addr - oldn->host_start),
616 (void *) addr,
617 sizeof (void *), false, cbuf);
618 addr += sizeof (void *);
619 }
620 }
621 else
622 gomp_copy_host2dev (devicep, aq,
623 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
624 + newn->host_start - oldn->host_start),
625 (void *) newn->host_start,
626 newn->host_end - newn->host_start, false, cbuf);
627 }
628
629 gomp_increment_refcount (oldn, refcount_set);
630 }
631
632 static int
633 get_kind (bool short_mapkind, void *kinds, int idx)
634 {
635 if (!short_mapkind)
636 return ((unsigned char *) kinds)[idx];
637
638 int val = ((unsigned short *) kinds)[idx];
639 if (GOMP_MAP_IMPLICIT_P (val))
640 val &= ~GOMP_MAP_IMPLICIT;
641 return val;
642 }
643
644
645 static bool
646 get_implicit (bool short_mapkind, void *kinds, int idx)
647 {
648 if (!short_mapkind)
649 return false;
650
651 int val = ((unsigned short *) kinds)[idx];
652 return GOMP_MAP_IMPLICIT_P (val);
653 }
654
655 static void
656 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
657 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
658 struct gomp_coalesce_buf *cbuf,
659 bool allow_zero_length_array_sections)
660 {
661 struct gomp_device_descr *devicep = tgt->device_descr;
662 struct splay_tree_s *mem_map = &devicep->mem_map;
663 struct splay_tree_key_s cur_node;
664
665 cur_node.host_start = host_ptr;
666 if (cur_node.host_start == (uintptr_t) NULL)
667 {
668 cur_node.tgt_offset = (uintptr_t) NULL;
669 gomp_copy_host2dev (devicep, aq,
670 (void *) (tgt->tgt_start + target_offset),
671 (void *) &cur_node.tgt_offset, sizeof (void *),
672 true, cbuf);
673 return;
674 }
675 /* Add bias to the pointer value. */
676 cur_node.host_start += bias;
677 cur_node.host_end = cur_node.host_start;
678 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
679 if (n == NULL)
680 {
681 if (allow_zero_length_array_sections)
682 cur_node.tgt_offset = 0;
683 else
684 {
685 gomp_mutex_unlock (&devicep->lock);
686 gomp_fatal ("Pointer target of array section wasn't mapped");
687 }
688 }
689 else
690 {
691 cur_node.host_start -= n->host_start;
692 cur_node.tgt_offset
693 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
694 /* At this point tgt_offset is target address of the
695 array section. Now subtract bias to get what we want
696 to initialize the pointer with. */
697 cur_node.tgt_offset -= bias;
698 }
699 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
700 (void *) &cur_node.tgt_offset, sizeof (void *),
701 true, cbuf);
702 }
703
704 static void
705 gomp_map_fields_existing (struct target_mem_desc *tgt,
706 struct goacc_asyncqueue *aq, splay_tree_key n,
707 size_t first, size_t i, void **hostaddrs,
708 size_t *sizes, void *kinds,
709 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
710 {
711 struct gomp_device_descr *devicep = tgt->device_descr;
712 struct splay_tree_s *mem_map = &devicep->mem_map;
713 struct splay_tree_key_s cur_node;
714 int kind;
715 bool implicit;
716 const bool short_mapkind = true;
717 const int typemask = short_mapkind ? 0xff : 0x7;
718
719 cur_node.host_start = (uintptr_t) hostaddrs[i];
720 cur_node.host_end = cur_node.host_start + sizes[i];
721 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
722 kind = get_kind (short_mapkind, kinds, i);
723 implicit = get_implicit (short_mapkind, kinds, i);
724 if (n2
725 && n2->tgt == n->tgt
726 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
727 {
728 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
729 kind & typemask, false, implicit, cbuf,
730 refcount_set);
731 return;
732 }
733 if (sizes[i] == 0)
734 {
735 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
736 {
737 cur_node.host_start--;
738 n2 = splay_tree_lookup (mem_map, &cur_node);
739 cur_node.host_start++;
740 if (n2
741 && n2->tgt == n->tgt
742 && n2->host_start - n->host_start
743 == n2->tgt_offset - n->tgt_offset)
744 {
745 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
746 kind & typemask, false, implicit, cbuf,
747 refcount_set);
748 return;
749 }
750 }
751 cur_node.host_end++;
752 n2 = splay_tree_lookup (mem_map, &cur_node);
753 cur_node.host_end--;
754 if (n2
755 && n2->tgt == n->tgt
756 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
757 {
758 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
759 kind & typemask, false, implicit, cbuf,
760 refcount_set);
761 return;
762 }
763 }
764 gomp_mutex_unlock (&devicep->lock);
765 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
766 "other mapped elements from the same structure weren't mapped "
767 "together with it", (void *) cur_node.host_start,
768 (void *) cur_node.host_end);
769 }
770
771 attribute_hidden void
772 gomp_attach_pointer (struct gomp_device_descr *devicep,
773 struct goacc_asyncqueue *aq, splay_tree mem_map,
774 splay_tree_key n, uintptr_t attach_to, size_t bias,
775 struct gomp_coalesce_buf *cbufp,
776 bool allow_zero_length_array_sections)
777 {
778 struct splay_tree_key_s s;
779 size_t size, idx;
780
781 if (n == NULL)
782 {
783 gomp_mutex_unlock (&devicep->lock);
784 gomp_fatal ("enclosing struct not mapped for attach");
785 }
786
787 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
788 /* We might have a pointer in a packed struct: however we cannot have more
789 than one such pointer in each pointer-sized portion of the struct, so
790 this is safe. */
791 idx = (attach_to - n->host_start) / sizeof (void *);
792
793 if (!n->aux)
794 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
795
796 if (!n->aux->attach_count)
797 n->aux->attach_count
798 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
799
800 if (n->aux->attach_count[idx] < UINTPTR_MAX)
801 n->aux->attach_count[idx]++;
802 else
803 {
804 gomp_mutex_unlock (&devicep->lock);
805 gomp_fatal ("attach count overflow");
806 }
807
808 if (n->aux->attach_count[idx] == 1)
809 {
810 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
811 - n->host_start;
812 uintptr_t target = (uintptr_t) *(void **) attach_to;
813 splay_tree_key tn;
814 uintptr_t data;
815
816 if ((void *) target == NULL)
817 {
818 gomp_mutex_unlock (&devicep->lock);
819 gomp_fatal ("attempt to attach null pointer");
820 }
821
822 s.host_start = target + bias;
823 s.host_end = s.host_start + 1;
824 tn = splay_tree_lookup (mem_map, &s);
825
826 if (!tn)
827 {
828 if (allow_zero_length_array_sections)
829 /* When allowing attachment to zero-length array sections, we
830 allow attaching to NULL pointers when the target region is not
831 mapped. */
832 data = 0;
833 else
834 {
835 gomp_mutex_unlock (&devicep->lock);
836 gomp_fatal ("pointer target not mapped for attach");
837 }
838 }
839 else
840 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
841
842 gomp_debug (1,
843 "%s: attaching host %p, target %p (struct base %p) to %p\n",
844 __FUNCTION__, (void *) attach_to, (void *) devptr,
845 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
846
847 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
848 sizeof (void *), true, cbufp);
849 }
850 else
851 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
852 (void *) attach_to, (int) n->aux->attach_count[idx]);
853 }
854
855 attribute_hidden void
856 gomp_detach_pointer (struct gomp_device_descr *devicep,
857 struct goacc_asyncqueue *aq, splay_tree_key n,
858 uintptr_t detach_from, bool finalize,
859 struct gomp_coalesce_buf *cbufp)
860 {
861 size_t idx;
862
863 if (n == NULL)
864 {
865 gomp_mutex_unlock (&devicep->lock);
866 gomp_fatal ("enclosing struct not mapped for detach");
867 }
868
869 idx = (detach_from - n->host_start) / sizeof (void *);
870
871 if (!n->aux || !n->aux->attach_count)
872 {
873 gomp_mutex_unlock (&devicep->lock);
874 gomp_fatal ("no attachment counters for struct");
875 }
876
877 if (finalize)
878 n->aux->attach_count[idx] = 1;
879
880 if (n->aux->attach_count[idx] == 0)
881 {
882 gomp_mutex_unlock (&devicep->lock);
883 gomp_fatal ("attach count underflow");
884 }
885 else
886 n->aux->attach_count[idx]--;
887
888 if (n->aux->attach_count[idx] == 0)
889 {
890 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
891 - n->host_start;
892 uintptr_t target = (uintptr_t) *(void **) detach_from;
893
894 gomp_debug (1,
895 "%s: detaching host %p, target %p (struct base %p) to %p\n",
896 __FUNCTION__, (void *) detach_from, (void *) devptr,
897 (void *) (n->tgt->tgt_start + n->tgt_offset),
898 (void *) target);
899
900 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
901 sizeof (void *), true, cbufp);
902 }
903 else
904 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
905 (void *) detach_from, (int) n->aux->attach_count[idx]);
906 }
907
908 attribute_hidden uintptr_t
909 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
910 {
911 if (tgt->list[i].key != NULL)
912 return tgt->list[i].key->tgt->tgt_start
913 + tgt->list[i].key->tgt_offset
914 + tgt->list[i].offset;
915
916 switch (tgt->list[i].offset)
917 {
918 case OFFSET_INLINED:
919 return (uintptr_t) hostaddrs[i];
920
921 case OFFSET_POINTER:
922 return 0;
923
924 case OFFSET_STRUCT:
925 return tgt->list[i + 1].key->tgt->tgt_start
926 + tgt->list[i + 1].key->tgt_offset
927 + tgt->list[i + 1].offset
928 + (uintptr_t) hostaddrs[i]
929 - (uintptr_t) hostaddrs[i + 1];
930
931 default:
932 return tgt->tgt_start + tgt->list[i].offset;
933 }
934 }
935
936 static inline __attribute__((always_inline)) struct target_mem_desc *
937 gomp_map_vars_internal (struct gomp_device_descr *devicep,
938 struct goacc_asyncqueue *aq, size_t mapnum,
939 void **hostaddrs, void **devaddrs, size_t *sizes,
940 void *kinds, bool short_mapkind,
941 htab_t *refcount_set,
942 enum gomp_map_vars_kind pragma_kind)
943 {
944 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
945 bool has_firstprivate = false;
946 bool has_always_ptrset = false;
947 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
948 const int rshift = short_mapkind ? 8 : 3;
949 const int typemask = short_mapkind ? 0xff : 0x7;
950 struct splay_tree_s *mem_map = &devicep->mem_map;
951 struct splay_tree_key_s cur_node;
952 struct target_mem_desc *tgt
953 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
954 tgt->list_count = mapnum;
955 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
956 tgt->device_descr = devicep;
957 tgt->prev = NULL;
958 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
959
960 if (mapnum == 0)
961 {
962 tgt->tgt_start = 0;
963 tgt->tgt_end = 0;
964 return tgt;
965 }
966
967 tgt_align = sizeof (void *);
968 tgt_size = 0;
969 cbuf.chunks = NULL;
970 cbuf.chunk_cnt = -1;
971 cbuf.use_cnt = 0;
972 cbuf.buf = NULL;
973 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
974 {
975 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
976 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
977 cbuf.chunk_cnt = 0;
978 }
979 if (pragma_kind == GOMP_MAP_VARS_TARGET)
980 {
981 size_t align = 4 * sizeof (void *);
982 tgt_align = align;
983 tgt_size = mapnum * sizeof (void *);
984 cbuf.chunk_cnt = 1;
985 cbuf.use_cnt = 1 + (mapnum > 1);
986 cbuf.chunks[0].start = 0;
987 cbuf.chunks[0].end = tgt_size;
988 }
989
990 gomp_mutex_lock (&devicep->lock);
991 if (devicep->state == GOMP_DEVICE_FINALIZED)
992 {
993 gomp_mutex_unlock (&devicep->lock);
994 free (tgt);
995 return NULL;
996 }
997
998 for (i = 0; i < mapnum; i++)
999 {
1000 int kind = get_kind (short_mapkind, kinds, i);
1001 bool implicit = get_implicit (short_mapkind, kinds, i);
1002 if (hostaddrs[i] == NULL
1003 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1004 {
1005 tgt->list[i].key = NULL;
1006 tgt->list[i].offset = OFFSET_INLINED;
1007 continue;
1008 }
1009 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1010 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1011 {
1012 tgt->list[i].key = NULL;
1013 if (!not_found_cnt)
1014 {
1015 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1016 on a separate construct prior to using use_device_{addr,ptr}.
1017 In OpenMP 5.0, map directives need to be ordered by the
1018 middle-end before the use_device_* clauses. If
1019 !not_found_cnt, all mappings requested (if any) are already
1020 mapped, so use_device_{addr,ptr} can be resolved right away.
1021 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1022 now but would succeed after performing the mappings in the
1023 following loop. We can't defer this always to the second
1024 loop, because it is not even invoked when !not_found_cnt
1025 after the first loop. */
1026 cur_node.host_start = (uintptr_t) hostaddrs[i];
1027 cur_node.host_end = cur_node.host_start;
1028 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1029 if (n != NULL)
1030 {
1031 cur_node.host_start -= n->host_start;
1032 hostaddrs[i]
1033 = (void *) (n->tgt->tgt_start + n->tgt_offset
1034 + cur_node.host_start);
1035 }
1036 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1037 {
1038 gomp_mutex_unlock (&devicep->lock);
1039 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1040 }
1041 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1042 /* If not present, continue using the host address. */
1043 ;
1044 else
1045 __builtin_unreachable ();
1046 tgt->list[i].offset = OFFSET_INLINED;
1047 }
1048 else
1049 tgt->list[i].offset = 0;
1050 continue;
1051 }
1052 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1053 {
1054 size_t first = i + 1;
1055 size_t last = i + sizes[i];
1056 cur_node.host_start = (uintptr_t) hostaddrs[i];
1057 cur_node.host_end = (uintptr_t) hostaddrs[last]
1058 + sizes[last];
1059 tgt->list[i].key = NULL;
1060 tgt->list[i].offset = OFFSET_STRUCT;
1061 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1062 if (n == NULL)
1063 {
1064 size_t align = (size_t) 1 << (kind >> rshift);
1065 if (tgt_align < align)
1066 tgt_align = align;
1067 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1068 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1069 tgt_size += cur_node.host_end - cur_node.host_start;
1070 not_found_cnt += last - i;
1071 for (i = first; i <= last; i++)
1072 {
1073 tgt->list[i].key = NULL;
1074 if (!aq
1075 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1076 & typemask))
1077 gomp_coalesce_buf_add (&cbuf,
1078 tgt_size - cur_node.host_end
1079 + (uintptr_t) hostaddrs[i],
1080 sizes[i]);
1081 }
1082 i--;
1083 continue;
1084 }
1085 for (i = first; i <= last; i++)
1086 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1087 sizes, kinds, NULL, refcount_set);
1088 i--;
1089 continue;
1090 }
1091 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1092 {
1093 tgt->list[i].key = NULL;
1094 tgt->list[i].offset = OFFSET_POINTER;
1095 has_firstprivate = true;
1096 continue;
1097 }
1098 else if ((kind & typemask) == GOMP_MAP_ATTACH
1099 || ((kind & typemask)
1100 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1101 {
1102 tgt->list[i].key = NULL;
1103 has_firstprivate = true;
1104 continue;
1105 }
1106 cur_node.host_start = (uintptr_t) hostaddrs[i];
1107 if (!GOMP_MAP_POINTER_P (kind & typemask))
1108 cur_node.host_end = cur_node.host_start + sizes[i];
1109 else
1110 cur_node.host_end = cur_node.host_start + sizeof (void *);
1111 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1112 {
1113 tgt->list[i].key = NULL;
1114
1115 size_t align = (size_t) 1 << (kind >> rshift);
1116 if (tgt_align < align)
1117 tgt_align = align;
1118 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1119 if (!aq)
1120 gomp_coalesce_buf_add (&cbuf, tgt_size,
1121 cur_node.host_end - cur_node.host_start);
1122 tgt_size += cur_node.host_end - cur_node.host_start;
1123 has_firstprivate = true;
1124 continue;
1125 }
1126 splay_tree_key n;
1127 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1128 {
1129 n = gomp_map_0len_lookup (mem_map, &cur_node);
1130 if (!n)
1131 {
1132 tgt->list[i].key = NULL;
1133 tgt->list[i].offset = OFFSET_POINTER;
1134 continue;
1135 }
1136 }
1137 else
1138 n = splay_tree_lookup (mem_map, &cur_node);
1139 if (n && n->refcount != REFCOUNT_LINK)
1140 {
1141 int always_to_cnt = 0;
1142 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1143 {
1144 bool has_nullptr = false;
1145 size_t j;
1146 for (j = 0; j < n->tgt->list_count; j++)
1147 if (n->tgt->list[j].key == n)
1148 {
1149 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1150 break;
1151 }
1152 if (n->tgt->list_count == 0)
1153 {
1154 /* 'declare target'; assume has_nullptr; it could also be
1155 statically assigned pointer, but that it should be to
1156 the equivalent variable on the host. */
1157 assert (n->refcount == REFCOUNT_INFINITY);
1158 has_nullptr = true;
1159 }
1160 else
1161 assert (j < n->tgt->list_count);
1162 /* Re-map the data if there is an 'always' modifier or if it a
1163 null pointer was there and non a nonnull has been found; that
1164 permits transparent re-mapping for Fortran array descriptors
1165 which were previously mapped unallocated. */
1166 for (j = i + 1; j < mapnum; j++)
1167 {
1168 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1169 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1170 && (!has_nullptr
1171 || !GOMP_MAP_POINTER_P (ptr_kind)
1172 || *(void **) hostaddrs[j] == NULL))
1173 break;
1174 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1175 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1176 > cur_node.host_end))
1177 break;
1178 else
1179 {
1180 has_always_ptrset = true;
1181 ++always_to_cnt;
1182 }
1183 }
1184 }
1185 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1186 kind & typemask, always_to_cnt > 0, implicit,
1187 NULL, refcount_set);
1188 i += always_to_cnt;
1189 }
1190 else
1191 {
1192 tgt->list[i].key = NULL;
1193
1194 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1195 {
1196 /* Not present, hence, skip entry - including its MAP_POINTER,
1197 when existing. */
1198 tgt->list[i].offset = OFFSET_POINTER;
1199 if (i + 1 < mapnum
1200 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1201 == GOMP_MAP_POINTER))
1202 {
1203 ++i;
1204 tgt->list[i].key = NULL;
1205 tgt->list[i].offset = 0;
1206 }
1207 continue;
1208 }
1209 size_t align = (size_t) 1 << (kind >> rshift);
1210 not_found_cnt++;
1211 if (tgt_align < align)
1212 tgt_align = align;
1213 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1214 if (!aq
1215 && gomp_to_device_kind_p (kind & typemask))
1216 gomp_coalesce_buf_add (&cbuf, tgt_size,
1217 cur_node.host_end - cur_node.host_start);
1218 tgt_size += cur_node.host_end - cur_node.host_start;
1219 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1220 {
1221 size_t j;
1222 int kind;
1223 for (j = i + 1; j < mapnum; j++)
1224 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1225 kinds, j)) & typemask))
1226 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1227 break;
1228 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1229 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1230 > cur_node.host_end))
1231 break;
1232 else
1233 {
1234 tgt->list[j].key = NULL;
1235 i++;
1236 }
1237 }
1238 }
1239 }
1240
1241 if (devaddrs)
1242 {
1243 if (mapnum != 1)
1244 {
1245 gomp_mutex_unlock (&devicep->lock);
1246 gomp_fatal ("unexpected aggregation");
1247 }
1248 tgt->to_free = devaddrs[0];
1249 tgt->tgt_start = (uintptr_t) tgt->to_free;
1250 tgt->tgt_end = tgt->tgt_start + sizes[0];
1251 }
1252 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1253 {
1254 /* Allocate tgt_align aligned tgt_size block of memory. */
1255 /* FIXME: Perhaps change interface to allocate properly aligned
1256 memory. */
1257 tgt->to_free = devicep->alloc_func (devicep->target_id,
1258 tgt_size + tgt_align - 1);
1259 if (!tgt->to_free)
1260 {
1261 gomp_mutex_unlock (&devicep->lock);
1262 gomp_fatal ("device memory allocation fail");
1263 }
1264
1265 tgt->tgt_start = (uintptr_t) tgt->to_free;
1266 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1267 tgt->tgt_end = tgt->tgt_start + tgt_size;
1268
1269 if (cbuf.use_cnt == 1)
1270 cbuf.chunk_cnt--;
1271 if (cbuf.chunk_cnt > 0)
1272 {
1273 cbuf.buf
1274 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1275 if (cbuf.buf)
1276 {
1277 cbuf.tgt = tgt;
1278 cbufp = &cbuf;
1279 }
1280 }
1281 }
1282 else
1283 {
1284 tgt->to_free = NULL;
1285 tgt->tgt_start = 0;
1286 tgt->tgt_end = 0;
1287 }
1288
1289 tgt_size = 0;
1290 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1291 tgt_size = mapnum * sizeof (void *);
1292
1293 tgt->array = NULL;
1294 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1295 {
1296 if (not_found_cnt)
1297 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1298 splay_tree_node array = tgt->array;
1299 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1300 uintptr_t field_tgt_base = 0;
1301 splay_tree_key field_tgt_structelem_first = NULL;
1302
1303 for (i = 0; i < mapnum; i++)
1304 if (has_always_ptrset
1305 && tgt->list[i].key
1306 && (get_kind (short_mapkind, kinds, i) & typemask)
1307 == GOMP_MAP_TO_PSET)
1308 {
1309 splay_tree_key k = tgt->list[i].key;
1310 bool has_nullptr = false;
1311 size_t j;
1312 for (j = 0; j < k->tgt->list_count; j++)
1313 if (k->tgt->list[j].key == k)
1314 {
1315 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1316 break;
1317 }
1318 if (k->tgt->list_count == 0)
1319 has_nullptr = true;
1320 else
1321 assert (j < k->tgt->list_count);
1322
1323 tgt->list[i].has_null_ptr_assoc = false;
1324 for (j = i + 1; j < mapnum; j++)
1325 {
1326 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1327 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1328 && (!has_nullptr
1329 || !GOMP_MAP_POINTER_P (ptr_kind)
1330 || *(void **) hostaddrs[j] == NULL))
1331 break;
1332 else if ((uintptr_t) hostaddrs[j] < k->host_start
1333 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1334 > k->host_end))
1335 break;
1336 else
1337 {
1338 if (*(void **) hostaddrs[j] == NULL)
1339 tgt->list[i].has_null_ptr_assoc = true;
1340 tgt->list[j].key = k;
1341 tgt->list[j].copy_from = false;
1342 tgt->list[j].always_copy_from = false;
1343 tgt->list[j].is_attach = false;
1344 gomp_increment_refcount (k, refcount_set);
1345 gomp_map_pointer (k->tgt, aq,
1346 (uintptr_t) *(void **) hostaddrs[j],
1347 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1348 - k->host_start),
1349 sizes[j], cbufp, false);
1350 }
1351 }
1352 i = j - 1;
1353 }
1354 else if (tgt->list[i].key == NULL)
1355 {
1356 int kind = get_kind (short_mapkind, kinds, i);
1357 bool implicit = get_implicit (short_mapkind, kinds, i);
1358 if (hostaddrs[i] == NULL)
1359 continue;
1360 switch (kind & typemask)
1361 {
1362 size_t align, len, first, last;
1363 splay_tree_key n;
1364 case GOMP_MAP_FIRSTPRIVATE:
1365 align = (size_t) 1 << (kind >> rshift);
1366 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1367 tgt->list[i].offset = tgt_size;
1368 len = sizes[i];
1369 gomp_copy_host2dev (devicep, aq,
1370 (void *) (tgt->tgt_start + tgt_size),
1371 (void *) hostaddrs[i], len, false, cbufp);
1372 /* Save device address in hostaddr to permit latter availablity
1373 when doing a deep-firstprivate with pointer attach. */
1374 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1375 tgt_size += len;
1376
1377 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1378 firstprivate to hostaddrs[i+1], which is assumed to contain a
1379 device address. */
1380 if (i + 1 < mapnum
1381 && (GOMP_MAP_ATTACH
1382 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1383 {
1384 uintptr_t target = (uintptr_t) hostaddrs[i];
1385 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1386 gomp_copy_host2dev (devicep, aq, devptr, &target,
1387 sizeof (void *), false, cbufp);
1388 ++i;
1389 }
1390 continue;
1391 case GOMP_MAP_FIRSTPRIVATE_INT:
1392 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1393 continue;
1394 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1395 /* The OpenACC 'host_data' construct only allows 'use_device'
1396 "mapping" clauses, so in the first loop, 'not_found_cnt'
1397 must always have been zero, so all OpenACC 'use_device'
1398 clauses have already been handled. (We can only easily test
1399 'use_device' with 'if_present' clause here.) */
1400 assert (tgt->list[i].offset == OFFSET_INLINED);
1401 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1402 code conceptually simple, similar to the first loop. */
1403 case GOMP_MAP_USE_DEVICE_PTR:
1404 if (tgt->list[i].offset == 0)
1405 {
1406 cur_node.host_start = (uintptr_t) hostaddrs[i];
1407 cur_node.host_end = cur_node.host_start;
1408 n = gomp_map_lookup (mem_map, &cur_node);
1409 if (n != NULL)
1410 {
1411 cur_node.host_start -= n->host_start;
1412 hostaddrs[i]
1413 = (void *) (n->tgt->tgt_start + n->tgt_offset
1414 + cur_node.host_start);
1415 }
1416 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1417 {
1418 gomp_mutex_unlock (&devicep->lock);
1419 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1420 }
1421 else if ((kind & typemask)
1422 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1423 /* If not present, continue using the host address. */
1424 ;
1425 else
1426 __builtin_unreachable ();
1427 tgt->list[i].offset = OFFSET_INLINED;
1428 }
1429 continue;
1430 case GOMP_MAP_STRUCT:
1431 first = i + 1;
1432 last = i + sizes[i];
1433 cur_node.host_start = (uintptr_t) hostaddrs[i];
1434 cur_node.host_end = (uintptr_t) hostaddrs[last]
1435 + sizes[last];
1436 if (tgt->list[first].key != NULL)
1437 continue;
1438 n = splay_tree_lookup (mem_map, &cur_node);
1439 if (n == NULL)
1440 {
1441 size_t align = (size_t) 1 << (kind >> rshift);
1442 tgt_size -= (uintptr_t) hostaddrs[first]
1443 - (uintptr_t) hostaddrs[i];
1444 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1445 tgt_size += (uintptr_t) hostaddrs[first]
1446 - (uintptr_t) hostaddrs[i];
1447 field_tgt_base = (uintptr_t) hostaddrs[first];
1448 field_tgt_offset = tgt_size;
1449 field_tgt_clear = last;
1450 field_tgt_structelem_first = NULL;
1451 tgt_size += cur_node.host_end
1452 - (uintptr_t) hostaddrs[first];
1453 continue;
1454 }
1455 for (i = first; i <= last; i++)
1456 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1457 sizes, kinds, cbufp, refcount_set);
1458 i--;
1459 continue;
1460 case GOMP_MAP_ALWAYS_POINTER:
1461 cur_node.host_start = (uintptr_t) hostaddrs[i];
1462 cur_node.host_end = cur_node.host_start + sizeof (void *);
1463 n = splay_tree_lookup (mem_map, &cur_node);
1464 if (n == NULL
1465 || n->host_start > cur_node.host_start
1466 || n->host_end < cur_node.host_end)
1467 {
1468 gomp_mutex_unlock (&devicep->lock);
1469 gomp_fatal ("always pointer not mapped");
1470 }
1471 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1472 != GOMP_MAP_ALWAYS_POINTER)
1473 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1474 if (cur_node.tgt_offset)
1475 cur_node.tgt_offset -= sizes[i];
1476 gomp_copy_host2dev (devicep, aq,
1477 (void *) (n->tgt->tgt_start
1478 + n->tgt_offset
1479 + cur_node.host_start
1480 - n->host_start),
1481 (void *) &cur_node.tgt_offset,
1482 sizeof (void *), true, cbufp);
1483 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1484 + cur_node.host_start - n->host_start;
1485 continue;
1486 case GOMP_MAP_IF_PRESENT:
1487 /* Not present - otherwise handled above. Skip over its
1488 MAP_POINTER as well. */
1489 if (i + 1 < mapnum
1490 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1491 == GOMP_MAP_POINTER))
1492 ++i;
1493 continue;
1494 case GOMP_MAP_ATTACH:
1495 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1496 {
1497 cur_node.host_start = (uintptr_t) hostaddrs[i];
1498 cur_node.host_end = cur_node.host_start + sizeof (void *);
1499 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1500 if (n != NULL)
1501 {
1502 tgt->list[i].key = n;
1503 tgt->list[i].offset = cur_node.host_start - n->host_start;
1504 tgt->list[i].length = n->host_end - n->host_start;
1505 tgt->list[i].copy_from = false;
1506 tgt->list[i].always_copy_from = false;
1507 tgt->list[i].is_attach = true;
1508 /* OpenACC 'attach'/'detach' doesn't affect
1509 structured/dynamic reference counts ('n->refcount',
1510 'n->dynamic_refcount'). */
1511
1512 bool zlas
1513 = ((kind & typemask)
1514 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1515 gomp_attach_pointer (devicep, aq, mem_map, n,
1516 (uintptr_t) hostaddrs[i], sizes[i],
1517 cbufp, zlas);
1518 }
1519 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1520 {
1521 gomp_mutex_unlock (&devicep->lock);
1522 gomp_fatal ("outer struct not mapped for attach");
1523 }
1524 continue;
1525 }
1526 default:
1527 break;
1528 }
1529 splay_tree_key k = &array->key;
1530 k->host_start = (uintptr_t) hostaddrs[i];
1531 if (!GOMP_MAP_POINTER_P (kind & typemask))
1532 k->host_end = k->host_start + sizes[i];
1533 else
1534 k->host_end = k->host_start + sizeof (void *);
1535 splay_tree_key n = splay_tree_lookup (mem_map, k);
1536 if (n && n->refcount != REFCOUNT_LINK)
1537 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1538 kind & typemask, false, implicit, cbufp,
1539 refcount_set);
1540 else
1541 {
1542 k->aux = NULL;
1543 if (n && n->refcount == REFCOUNT_LINK)
1544 {
1545 /* Replace target address of the pointer with target address
1546 of mapped object in the splay tree. */
1547 splay_tree_remove (mem_map, n);
1548 k->aux
1549 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1550 k->aux->link_key = n;
1551 }
1552 size_t align = (size_t) 1 << (kind >> rshift);
1553 tgt->list[i].key = k;
1554 k->tgt = tgt;
1555 k->refcount = 0;
1556 k->dynamic_refcount = 0;
1557 if (field_tgt_clear != FIELD_TGT_EMPTY)
1558 {
1559 k->tgt_offset = k->host_start - field_tgt_base
1560 + field_tgt_offset;
1561 if (openmp_p)
1562 {
1563 k->refcount = REFCOUNT_STRUCTELEM;
1564 if (field_tgt_structelem_first == NULL)
1565 {
1566 /* Set to first structure element of sequence. */
1567 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1568 field_tgt_structelem_first = k;
1569 }
1570 else
1571 /* Point to refcount of leading element, but do not
1572 increment again. */
1573 k->structelem_refcount_ptr
1574 = &field_tgt_structelem_first->structelem_refcount;
1575
1576 if (i == field_tgt_clear)
1577 {
1578 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1579 field_tgt_structelem_first = NULL;
1580 }
1581 }
1582 if (i == field_tgt_clear)
1583 field_tgt_clear = FIELD_TGT_EMPTY;
1584 }
1585 else
1586 {
1587 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1588 k->tgt_offset = tgt_size;
1589 tgt_size += k->host_end - k->host_start;
1590 }
1591 /* First increment, from 0 to 1. gomp_increment_refcount
1592 encapsulates the different increment cases, so use this
1593 instead of directly setting 1 during initialization. */
1594 gomp_increment_refcount (k, refcount_set);
1595
1596 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1597 tgt->list[i].always_copy_from
1598 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1599 tgt->list[i].is_attach = false;
1600 tgt->list[i].offset = 0;
1601 tgt->list[i].length = k->host_end - k->host_start;
1602 tgt->refcount++;
1603 array->left = NULL;
1604 array->right = NULL;
1605 splay_tree_insert (mem_map, array);
1606 switch (kind & typemask)
1607 {
1608 case GOMP_MAP_ALLOC:
1609 case GOMP_MAP_FROM:
1610 case GOMP_MAP_FORCE_ALLOC:
1611 case GOMP_MAP_FORCE_FROM:
1612 case GOMP_MAP_ALWAYS_FROM:
1613 break;
1614 case GOMP_MAP_TO:
1615 case GOMP_MAP_TOFROM:
1616 case GOMP_MAP_FORCE_TO:
1617 case GOMP_MAP_FORCE_TOFROM:
1618 case GOMP_MAP_ALWAYS_TO:
1619 case GOMP_MAP_ALWAYS_TOFROM:
1620 gomp_copy_host2dev (devicep, aq,
1621 (void *) (tgt->tgt_start
1622 + k->tgt_offset),
1623 (void *) k->host_start,
1624 k->host_end - k->host_start,
1625 false, cbufp);
1626 break;
1627 case GOMP_MAP_POINTER:
1628 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1629 gomp_map_pointer
1630 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1631 k->tgt_offset, sizes[i], cbufp,
1632 ((kind & typemask)
1633 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1634 break;
1635 case GOMP_MAP_TO_PSET:
1636 gomp_copy_host2dev (devicep, aq,
1637 (void *) (tgt->tgt_start
1638 + k->tgt_offset),
1639 (void *) k->host_start,
1640 k->host_end - k->host_start,
1641 false, cbufp);
1642 tgt->list[i].has_null_ptr_assoc = false;
1643
1644 for (j = i + 1; j < mapnum; j++)
1645 {
1646 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1647 & typemask);
1648 if (!GOMP_MAP_POINTER_P (ptr_kind)
1649 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1650 break;
1651 else if ((uintptr_t) hostaddrs[j] < k->host_start
1652 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1653 > k->host_end))
1654 break;
1655 else
1656 {
1657 tgt->list[j].key = k;
1658 tgt->list[j].copy_from = false;
1659 tgt->list[j].always_copy_from = false;
1660 tgt->list[j].is_attach = false;
1661 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1662 /* For OpenMP, the use of refcount_sets causes
1663 errors if we set k->refcount = 1 above but also
1664 increment it again here, for decrementing will
1665 not properly match, since we decrement only once
1666 for each key's refcount. Therefore avoid this
1667 increment for OpenMP constructs. */
1668 if (!openmp_p)
1669 gomp_increment_refcount (k, refcount_set);
1670 gomp_map_pointer (tgt, aq,
1671 (uintptr_t) *(void **) hostaddrs[j],
1672 k->tgt_offset
1673 + ((uintptr_t) hostaddrs[j]
1674 - k->host_start),
1675 sizes[j], cbufp, false);
1676 }
1677 }
1678 i = j - 1;
1679 break;
1680 case GOMP_MAP_FORCE_PRESENT:
1681 {
1682 /* We already looked up the memory region above and it
1683 was missing. */
1684 size_t size = k->host_end - k->host_start;
1685 gomp_mutex_unlock (&devicep->lock);
1686 #ifdef HAVE_INTTYPES_H
1687 gomp_fatal ("present clause: !acc_is_present (%p, "
1688 "%"PRIu64" (0x%"PRIx64"))",
1689 (void *) k->host_start,
1690 (uint64_t) size, (uint64_t) size);
1691 #else
1692 gomp_fatal ("present clause: !acc_is_present (%p, "
1693 "%lu (0x%lx))", (void *) k->host_start,
1694 (unsigned long) size, (unsigned long) size);
1695 #endif
1696 }
1697 break;
1698 case GOMP_MAP_FORCE_DEVICEPTR:
1699 assert (k->host_end - k->host_start == sizeof (void *));
1700 gomp_copy_host2dev (devicep, aq,
1701 (void *) (tgt->tgt_start
1702 + k->tgt_offset),
1703 (void *) k->host_start,
1704 sizeof (void *), false, cbufp);
1705 break;
1706 default:
1707 gomp_mutex_unlock (&devicep->lock);
1708 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1709 kind);
1710 }
1711
1712 if (k->aux && k->aux->link_key)
1713 {
1714 /* Set link pointer on target to the device address of the
1715 mapped object. */
1716 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1717 /* We intentionally do not use coalescing here, as it's not
1718 data allocated by the current call to this function. */
1719 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1720 &tgt_addr, sizeof (void *), true, NULL);
1721 }
1722 array++;
1723 }
1724 }
1725 }
1726
1727 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1728 {
1729 for (i = 0; i < mapnum; i++)
1730 {
1731 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1732 gomp_copy_host2dev (devicep, aq,
1733 (void *) (tgt->tgt_start + i * sizeof (void *)),
1734 (void *) &cur_node.tgt_offset, sizeof (void *),
1735 true, cbufp);
1736 }
1737 }
1738
1739 if (cbufp)
1740 {
1741 /* See 'gomp_coalesce_buf_add'. */
1742 assert (!aq);
1743
1744 long c = 0;
1745 for (c = 0; c < cbuf.chunk_cnt; ++c)
1746 gomp_copy_host2dev (devicep, aq,
1747 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1748 (char *) cbuf.buf + (cbuf.chunks[c].start
1749 - cbuf.chunks[0].start),
1750 cbuf.chunks[c].end - cbuf.chunks[c].start,
1751 true, NULL);
1752 free (cbuf.buf);
1753 cbuf.buf = NULL;
1754 cbufp = NULL;
1755 }
1756
1757 /* If the variable from "omp target enter data" map-list was already mapped,
1758 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1759 gomp_exit_data. */
1760 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1761 {
1762 free (tgt);
1763 tgt = NULL;
1764 }
1765
1766 gomp_mutex_unlock (&devicep->lock);
1767 return tgt;
1768 }
1769
1770 static struct target_mem_desc *
1771 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1772 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1773 bool short_mapkind, htab_t *refcount_set,
1774 enum gomp_map_vars_kind pragma_kind)
1775 {
1776 /* This management of a local refcount_set is for convenience of callers
1777 who do not share a refcount_set over multiple map/unmap uses. */
1778 htab_t local_refcount_set = NULL;
1779 if (refcount_set == NULL)
1780 {
1781 local_refcount_set = htab_create (mapnum);
1782 refcount_set = &local_refcount_set;
1783 }
1784
1785 struct target_mem_desc *tgt;
1786 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1787 sizes, kinds, short_mapkind, refcount_set,
1788 pragma_kind);
1789 if (local_refcount_set)
1790 htab_free (local_refcount_set);
1791
1792 return tgt;
1793 }
1794
1795 attribute_hidden struct target_mem_desc *
1796 goacc_map_vars (struct gomp_device_descr *devicep,
1797 struct goacc_asyncqueue *aq, size_t mapnum,
1798 void **hostaddrs, void **devaddrs, size_t *sizes,
1799 void *kinds, bool short_mapkind,
1800 enum gomp_map_vars_kind pragma_kind)
1801 {
1802 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1803 sizes, kinds, short_mapkind, NULL,
1804 GOMP_MAP_VARS_OPENACC | pragma_kind);
1805 }
1806
1807 static void
1808 gomp_unmap_tgt (struct target_mem_desc *tgt)
1809 {
1810 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1811 if (tgt->tgt_end)
1812 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1813
1814 free (tgt->array);
1815 free (tgt);
1816 }
1817
1818 static bool
1819 gomp_unref_tgt (void *ptr)
1820 {
1821 bool is_tgt_unmapped = false;
1822
1823 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1824
1825 if (tgt->refcount > 1)
1826 tgt->refcount--;
1827 else
1828 {
1829 gomp_unmap_tgt (tgt);
1830 is_tgt_unmapped = true;
1831 }
1832
1833 return is_tgt_unmapped;
1834 }
1835
1836 static void
1837 gomp_unref_tgt_void (void *ptr)
1838 {
1839 (void) gomp_unref_tgt (ptr);
1840 }
1841
1842 static void
1843 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1844 {
1845 splay_tree_remove (sp, k);
1846 if (k->aux)
1847 {
1848 if (k->aux->link_key)
1849 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1850 if (k->aux->attach_count)
1851 free (k->aux->attach_count);
1852 free (k->aux);
1853 k->aux = NULL;
1854 }
1855 }
1856
1857 static inline __attribute__((always_inline)) bool
1858 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1859 struct goacc_asyncqueue *aq)
1860 {
1861 bool is_tgt_unmapped = false;
1862
1863 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1864 {
1865 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1866 /* Infer the splay_tree_key of the first structelem key using the
1867 pointer to the first structleme_refcount. */
1868 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1869 - offsetof (struct splay_tree_key_s,
1870 structelem_refcount));
1871 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1872
1873 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1874 with the splay_tree_keys embedded inside. */
1875 splay_tree_node node =
1876 (splay_tree_node) ((char *) k
1877 - offsetof (struct splay_tree_node_s, key));
1878 while (true)
1879 {
1880 /* Starting from the _FIRST key, and continue for all following
1881 sibling keys. */
1882 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1883 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1884 break;
1885 else
1886 k = &(++node)->key;
1887 }
1888 }
1889 else
1890 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1891
1892 if (aq)
1893 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1894 (void *) k->tgt);
1895 else
1896 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1897 return is_tgt_unmapped;
1898 }
1899
1900 attribute_hidden bool
1901 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1902 {
1903 return gomp_remove_var_internal (devicep, k, NULL);
1904 }
1905
1906 /* Remove a variable asynchronously. This actually removes the variable
1907 mapping immediately, but retains the linked target_mem_desc until the
1908 asynchronous operation has completed (as it may still refer to target
1909 memory). The device lock must be held before entry, and remains locked on
1910 exit. */
1911
1912 attribute_hidden void
1913 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1914 struct goacc_asyncqueue *aq)
1915 {
1916 (void) gomp_remove_var_internal (devicep, k, aq);
1917 }
1918
1919 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1920 variables back from device to host: if it is false, it is assumed that this
1921 has been done already. */
1922
1923 static inline __attribute__((always_inline)) void
1924 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1925 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1926 {
1927 struct gomp_device_descr *devicep = tgt->device_descr;
1928
1929 if (tgt->list_count == 0)
1930 {
1931 free (tgt);
1932 return;
1933 }
1934
1935 gomp_mutex_lock (&devicep->lock);
1936 if (devicep->state == GOMP_DEVICE_FINALIZED)
1937 {
1938 gomp_mutex_unlock (&devicep->lock);
1939 free (tgt->array);
1940 free (tgt);
1941 return;
1942 }
1943
1944 size_t i;
1945
1946 /* We must perform detachments before any copies back to the host. */
1947 for (i = 0; i < tgt->list_count; i++)
1948 {
1949 splay_tree_key k = tgt->list[i].key;
1950
1951 if (k != NULL && tgt->list[i].is_attach)
1952 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1953 + tgt->list[i].offset,
1954 false, NULL);
1955 }
1956
1957 for (i = 0; i < tgt->list_count; i++)
1958 {
1959 splay_tree_key k = tgt->list[i].key;
1960 if (k == NULL)
1961 continue;
1962
1963 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1964 counts ('n->refcount', 'n->dynamic_refcount'). */
1965 if (tgt->list[i].is_attach)
1966 continue;
1967
1968 bool do_copy, do_remove;
1969 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1970
1971 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1972 || tgt->list[i].always_copy_from)
1973 gomp_copy_dev2host (devicep, aq,
1974 (void *) (k->host_start + tgt->list[i].offset),
1975 (void *) (k->tgt->tgt_start + k->tgt_offset
1976 + tgt->list[i].offset),
1977 tgt->list[i].length);
1978 if (do_remove)
1979 {
1980 struct target_mem_desc *k_tgt = k->tgt;
1981 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1982 /* It would be bad if TGT got unmapped while we're still iterating
1983 over its LIST_COUNT, and also expect to use it in the following
1984 code. */
1985 assert (!is_tgt_unmapped
1986 || k_tgt != tgt);
1987 }
1988 }
1989
1990 if (aq)
1991 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1992 (void *) tgt);
1993 else
1994 gomp_unref_tgt ((void *) tgt);
1995
1996 gomp_mutex_unlock (&devicep->lock);
1997 }
1998
1999 static void
2000 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2001 htab_t *refcount_set)
2002 {
2003 /* This management of a local refcount_set is for convenience of callers
2004 who do not share a refcount_set over multiple map/unmap uses. */
2005 htab_t local_refcount_set = NULL;
2006 if (refcount_set == NULL)
2007 {
2008 local_refcount_set = htab_create (tgt->list_count);
2009 refcount_set = &local_refcount_set;
2010 }
2011
2012 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2013
2014 if (local_refcount_set)
2015 htab_free (local_refcount_set);
2016 }
2017
2018 attribute_hidden void
2019 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2020 struct goacc_asyncqueue *aq)
2021 {
2022 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2023 }
2024
2025 static void
2026 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2027 size_t *sizes, void *kinds, bool short_mapkind)
2028 {
2029 size_t i;
2030 struct splay_tree_key_s cur_node;
2031 const int typemask = short_mapkind ? 0xff : 0x7;
2032
2033 if (!devicep)
2034 return;
2035
2036 if (mapnum == 0)
2037 return;
2038
2039 gomp_mutex_lock (&devicep->lock);
2040 if (devicep->state == GOMP_DEVICE_FINALIZED)
2041 {
2042 gomp_mutex_unlock (&devicep->lock);
2043 return;
2044 }
2045
2046 for (i = 0; i < mapnum; i++)
2047 if (sizes[i])
2048 {
2049 cur_node.host_start = (uintptr_t) hostaddrs[i];
2050 cur_node.host_end = cur_node.host_start + sizes[i];
2051 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2052 if (n)
2053 {
2054 int kind = get_kind (short_mapkind, kinds, i);
2055 if (n->host_start > cur_node.host_start
2056 || n->host_end < cur_node.host_end)
2057 {
2058 gomp_mutex_unlock (&devicep->lock);
2059 gomp_fatal ("Trying to update [%p..%p) object when "
2060 "only [%p..%p) is mapped",
2061 (void *) cur_node.host_start,
2062 (void *) cur_node.host_end,
2063 (void *) n->host_start,
2064 (void *) n->host_end);
2065 }
2066
2067 if (n->aux && n->aux->attach_count)
2068 {
2069 uintptr_t addr = cur_node.host_start;
2070 while (addr < cur_node.host_end)
2071 {
2072 /* We have to be careful not to overwrite still attached
2073 pointers during host<->device updates. */
2074 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2075 if (n->aux->attach_count[i] == 0)
2076 {
2077 void *devaddr = (void *) (n->tgt->tgt_start
2078 + n->tgt_offset
2079 + addr - n->host_start);
2080 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2081 gomp_copy_host2dev (devicep, NULL,
2082 devaddr, (void *) addr,
2083 sizeof (void *), false, NULL);
2084 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2085 gomp_copy_dev2host (devicep, NULL,
2086 (void *) addr, devaddr,
2087 sizeof (void *));
2088 }
2089 addr += sizeof (void *);
2090 }
2091 }
2092 else
2093 {
2094 void *hostaddr = (void *) cur_node.host_start;
2095 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2096 + cur_node.host_start
2097 - n->host_start);
2098 size_t size = cur_node.host_end - cur_node.host_start;
2099
2100 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2101 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2102 false, NULL);
2103 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2104 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2105 }
2106 }
2107 }
2108 gomp_mutex_unlock (&devicep->lock);
2109 }
2110
2111 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2112 And insert to splay tree the mapping between addresses from HOST_TABLE and
2113 from loaded target image. We rely in the host and device compiler
2114 emitting variable and functions in the same order. */
2115
2116 static void
2117 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2118 const void *host_table, const void *target_data,
2119 bool is_register_lock)
2120 {
2121 void **host_func_table = ((void ***) host_table)[0];
2122 void **host_funcs_end = ((void ***) host_table)[1];
2123 void **host_var_table = ((void ***) host_table)[2];
2124 void **host_vars_end = ((void ***) host_table)[3];
2125
2126 /* The func table contains only addresses, the var table contains addresses
2127 and corresponding sizes. */
2128 int num_funcs = host_funcs_end - host_func_table;
2129 int num_vars = (host_vars_end - host_var_table) / 2;
2130
2131 /* Others currently is only 'device_num' */
2132 int num_others = 1;
2133
2134 /* Load image to device and get target addresses for the image. */
2135 struct addr_pair *target_table = NULL;
2136 int i, num_target_entries;
2137
2138 num_target_entries
2139 = devicep->load_image_func (devicep->target_id, version,
2140 target_data, &target_table);
2141
2142 if (num_target_entries != num_funcs + num_vars
2143 /* Others (device_num) are included as trailing entries in pair list. */
2144 && num_target_entries != num_funcs + num_vars + num_others)
2145 {
2146 gomp_mutex_unlock (&devicep->lock);
2147 if (is_register_lock)
2148 gomp_mutex_unlock (&register_lock);
2149 gomp_fatal ("Cannot map target functions or variables"
2150 " (expected %u, have %u)", num_funcs + num_vars,
2151 num_target_entries);
2152 }
2153
2154 /* Insert host-target address mapping into splay tree. */
2155 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2156 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2157 tgt->refcount = REFCOUNT_INFINITY;
2158 tgt->tgt_start = 0;
2159 tgt->tgt_end = 0;
2160 tgt->to_free = NULL;
2161 tgt->prev = NULL;
2162 tgt->list_count = 0;
2163 tgt->device_descr = devicep;
2164 splay_tree_node array = tgt->array;
2165
2166 for (i = 0; i < num_funcs; i++)
2167 {
2168 splay_tree_key k = &array->key;
2169 k->host_start = (uintptr_t) host_func_table[i];
2170 k->host_end = k->host_start + 1;
2171 k->tgt = tgt;
2172 k->tgt_offset = target_table[i].start;
2173 k->refcount = REFCOUNT_INFINITY;
2174 k->dynamic_refcount = 0;
2175 k->aux = NULL;
2176 array->left = NULL;
2177 array->right = NULL;
2178 splay_tree_insert (&devicep->mem_map, array);
2179 array++;
2180 }
2181
2182 /* Most significant bit of the size in host and target tables marks
2183 "omp declare target link" variables. */
2184 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2185 const uintptr_t size_mask = ~link_bit;
2186
2187 for (i = 0; i < num_vars; i++)
2188 {
2189 struct addr_pair *target_var = &target_table[num_funcs + i];
2190 uintptr_t target_size = target_var->end - target_var->start;
2191 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2192
2193 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2194 {
2195 gomp_mutex_unlock (&devicep->lock);
2196 if (is_register_lock)
2197 gomp_mutex_unlock (&register_lock);
2198 gomp_fatal ("Cannot map target variables (size mismatch)");
2199 }
2200
2201 splay_tree_key k = &array->key;
2202 k->host_start = (uintptr_t) host_var_table[i * 2];
2203 k->host_end
2204 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2205 k->tgt = tgt;
2206 k->tgt_offset = target_var->start;
2207 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2208 k->dynamic_refcount = 0;
2209 k->aux = NULL;
2210 array->left = NULL;
2211 array->right = NULL;
2212 splay_tree_insert (&devicep->mem_map, array);
2213 array++;
2214 }
2215
2216 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2217 where plugin does not return this entry. */
2218 if (num_funcs + num_vars < num_target_entries)
2219 {
2220 struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2221 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2222 was found in this image. */
2223 if (device_num_var->start != 0)
2224 {
2225 /* The index of the devicep within devices[] is regarded as its
2226 'device number', which is different from the per-device type
2227 devicep->target_id. */
2228 int device_num_val = (int) (devicep - &devices[0]);
2229 if (device_num_var->end - device_num_var->start != sizeof (int))
2230 {
2231 gomp_mutex_unlock (&devicep->lock);
2232 if (is_register_lock)
2233 gomp_mutex_unlock (&register_lock);
2234 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2235 "format");
2236 }
2237
2238 /* Copy device_num value to place on device memory, hereby actually
2239 designating its device number into effect. */
2240 gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2241 &device_num_val, sizeof (int), false, NULL);
2242 }
2243 }
2244
2245 free (target_table);
2246 }
2247
2248 /* Unload the mappings described by target_data from device DEVICE_P.
2249 The device must be locked. */
2250
2251 static void
2252 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2253 unsigned version,
2254 const void *host_table, const void *target_data)
2255 {
2256 void **host_func_table = ((void ***) host_table)[0];
2257 void **host_funcs_end = ((void ***) host_table)[1];
2258 void **host_var_table = ((void ***) host_table)[2];
2259 void **host_vars_end = ((void ***) host_table)[3];
2260
2261 /* The func table contains only addresses, the var table contains addresses
2262 and corresponding sizes. */
2263 int num_funcs = host_funcs_end - host_func_table;
2264 int num_vars = (host_vars_end - host_var_table) / 2;
2265
2266 struct splay_tree_key_s k;
2267 splay_tree_key node = NULL;
2268
2269 /* Find mapping at start of node array */
2270 if (num_funcs || num_vars)
2271 {
2272 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2273 : (uintptr_t) host_var_table[0]);
2274 k.host_end = k.host_start + 1;
2275 node = splay_tree_lookup (&devicep->mem_map, &k);
2276 }
2277
2278 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2279 {
2280 gomp_mutex_unlock (&devicep->lock);
2281 gomp_fatal ("image unload fail");
2282 }
2283
2284 /* Remove mappings from splay tree. */
2285 int i;
2286 for (i = 0; i < num_funcs; i++)
2287 {
2288 k.host_start = (uintptr_t) host_func_table[i];
2289 k.host_end = k.host_start + 1;
2290 splay_tree_remove (&devicep->mem_map, &k);
2291 }
2292
2293 /* Most significant bit of the size in host and target tables marks
2294 "omp declare target link" variables. */
2295 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2296 const uintptr_t size_mask = ~link_bit;
2297 bool is_tgt_unmapped = false;
2298
2299 for (i = 0; i < num_vars; i++)
2300 {
2301 k.host_start = (uintptr_t) host_var_table[i * 2];
2302 k.host_end
2303 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2304
2305 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2306 splay_tree_remove (&devicep->mem_map, &k);
2307 else
2308 {
2309 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2310 is_tgt_unmapped = gomp_remove_var (devicep, n);
2311 }
2312 }
2313
2314 if (node && !is_tgt_unmapped)
2315 {
2316 free (node->tgt);
2317 free (node);
2318 }
2319 }
2320
2321 static void
2322 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2323 {
2324 char *end = buf + size, *p = buf;
2325 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2326 p += snprintf (p, end - p, "unified_address");
2327 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2328 p += snprintf (p, end - p, "%sunified_shared_memory",
2329 (p == buf ? "" : ", "));
2330 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2331 p += snprintf (p, end - p, "%sreverse_offload",
2332 (p == buf ? "" : ", "));
2333 }
2334
2335 /* This function should be called from every offload image while loading.
2336 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2337 the target, and TARGET_DATA needed by target plugin. */
2338
2339 void
2340 GOMP_offload_register_ver (unsigned version, const void *host_table,
2341 int target_type, const void *target_data)
2342 {
2343 int i;
2344 int omp_req = 0;
2345
2346 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2347 gomp_fatal ("Library too old for offload (version %u < %u)",
2348 GOMP_VERSION, GOMP_VERSION_LIB (version));
2349
2350 if (GOMP_VERSION_LIB (version) > 1)
2351 {
2352 omp_req = (int) (size_t) ((void **) target_data)[0];
2353 target_data = &((void **) target_data)[1];
2354 }
2355
2356 gomp_mutex_lock (&register_lock);
2357
2358 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2359 {
2360 char buf1[sizeof ("unified_address, unified_shared_memory, "
2361 "reverse_offload")];
2362 char buf2[sizeof ("unified_address, unified_shared_memory, "
2363 "reverse_offload")];
2364 gomp_requires_to_name (buf2, sizeof (buf2),
2365 omp_req != GOMP_REQUIRES_TARGET_USED
2366 ? omp_req : omp_requires_mask);
2367 if (omp_req != GOMP_REQUIRES_TARGET_USED
2368 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2369 {
2370 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2371 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2372 "in multiple compilation units: '%s' vs. '%s'",
2373 buf1, buf2);
2374 }
2375 else
2376 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2377 "some compilation units", buf2);
2378 }
2379 omp_requires_mask = omp_req;
2380
2381 /* Load image to all initialized devices. */
2382 for (i = 0; i < num_devices; i++)
2383 {
2384 struct gomp_device_descr *devicep = &devices[i];
2385 gomp_mutex_lock (&devicep->lock);
2386 if (devicep->type == target_type
2387 && devicep->state == GOMP_DEVICE_INITIALIZED)
2388 gomp_load_image_to_device (devicep, version,
2389 host_table, target_data, true);
2390 gomp_mutex_unlock (&devicep->lock);
2391 }
2392
2393 /* Insert image to array of pending images. */
2394 offload_images
2395 = gomp_realloc_unlock (offload_images,
2396 (num_offload_images + 1)
2397 * sizeof (struct offload_image_descr));
2398 offload_images[num_offload_images].version = version;
2399 offload_images[num_offload_images].type = target_type;
2400 offload_images[num_offload_images].host_table = host_table;
2401 offload_images[num_offload_images].target_data = target_data;
2402
2403 num_offload_images++;
2404 gomp_mutex_unlock (&register_lock);
2405 }
2406
2407 void
2408 GOMP_offload_register (const void *host_table, int target_type,
2409 const void *target_data)
2410 {
2411 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2412 }
2413
2414 /* This function should be called from every offload image while unloading.
2415 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2416 the target, and TARGET_DATA needed by target plugin. */
2417
2418 void
2419 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2420 int target_type, const void *target_data)
2421 {
2422 int i;
2423
2424 gomp_mutex_lock (&register_lock);
2425
2426 /* Unload image from all initialized devices. */
2427 for (i = 0; i < num_devices; i++)
2428 {
2429 struct gomp_device_descr *devicep = &devices[i];
2430 gomp_mutex_lock (&devicep->lock);
2431 if (devicep->type == target_type
2432 && devicep->state == GOMP_DEVICE_INITIALIZED)
2433 gomp_unload_image_from_device (devicep, version,
2434 host_table, target_data);
2435 gomp_mutex_unlock (&devicep->lock);
2436 }
2437
2438 /* Remove image from array of pending images. */
2439 for (i = 0; i < num_offload_images; i++)
2440 if (offload_images[i].target_data == target_data)
2441 {
2442 offload_images[i] = offload_images[--num_offload_images];
2443 break;
2444 }
2445
2446 gomp_mutex_unlock (&register_lock);
2447 }
2448
2449 void
2450 GOMP_offload_unregister (const void *host_table, int target_type,
2451 const void *target_data)
2452 {
2453 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2454 }
2455
2456 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2457 must be locked on entry, and remains locked on return. */
2458
2459 attribute_hidden void
2460 gomp_init_device (struct gomp_device_descr *devicep)
2461 {
2462 int i;
2463 if (!devicep->init_device_func (devicep->target_id))
2464 {
2465 gomp_mutex_unlock (&devicep->lock);
2466 gomp_fatal ("device initialization failed");
2467 }
2468
2469 /* Load to device all images registered by the moment. */
2470 for (i = 0; i < num_offload_images; i++)
2471 {
2472 struct offload_image_descr *image = &offload_images[i];
2473 if (image->type == devicep->type)
2474 gomp_load_image_to_device (devicep, image->version,
2475 image->host_table, image->target_data,
2476 false);
2477 }
2478
2479 /* Initialize OpenACC asynchronous queues. */
2480 goacc_init_asyncqueues (devicep);
2481
2482 devicep->state = GOMP_DEVICE_INITIALIZED;
2483 }
2484
2485 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2486 must be locked on entry, and remains locked on return. */
2487
2488 attribute_hidden bool
2489 gomp_fini_device (struct gomp_device_descr *devicep)
2490 {
2491 bool ret = goacc_fini_asyncqueues (devicep);
2492 ret &= devicep->fini_device_func (devicep->target_id);
2493 devicep->state = GOMP_DEVICE_FINALIZED;
2494 return ret;
2495 }
2496
2497 attribute_hidden void
2498 gomp_unload_device (struct gomp_device_descr *devicep)
2499 {
2500 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2501 {
2502 unsigned i;
2503
2504 /* Unload from device all images registered at the moment. */
2505 for (i = 0; i < num_offload_images; i++)
2506 {
2507 struct offload_image_descr *image = &offload_images[i];
2508 if (image->type == devicep->type)
2509 gomp_unload_image_from_device (devicep, image->version,
2510 image->host_table,
2511 image->target_data);
2512 }
2513 }
2514 }
2515
2516 /* Host fallback for GOMP_target{,_ext} routines. */
2517
2518 static void
2519 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2520 struct gomp_device_descr *devicep, void **args)
2521 {
2522 struct gomp_thread old_thr, *thr = gomp_thread ();
2523
2524 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2525 && devicep != NULL)
2526 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2527 "be used for offloading");
2528
2529 old_thr = *thr;
2530 memset (thr, '\0', sizeof (*thr));
2531 if (gomp_places_list)
2532 {
2533 thr->place = old_thr.place;
2534 thr->ts.place_partition_len = gomp_places_list_len;
2535 }
2536 if (args)
2537 while (*args)
2538 {
2539 intptr_t id = (intptr_t) *args++, val;
2540 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2541 val = (intptr_t) *args++;
2542 else
2543 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2544 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2545 continue;
2546 id &= GOMP_TARGET_ARG_ID_MASK;
2547 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2548 continue;
2549 val = val > INT_MAX ? INT_MAX : val;
2550 if (val)
2551 gomp_icv (true)->thread_limit_var = val;
2552 break;
2553 }
2554
2555 fn (hostaddrs);
2556 gomp_free_thread (thr);
2557 *thr = old_thr;
2558 }
2559
2560 /* Calculate alignment and size requirements of a private copy of data shared
2561 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2562
2563 static inline void
2564 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2565 unsigned short *kinds, size_t *tgt_align,
2566 size_t *tgt_size)
2567 {
2568 size_t i;
2569 for (i = 0; i < mapnum; i++)
2570 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2571 {
2572 size_t align = (size_t) 1 << (kinds[i] >> 8);
2573 if (*tgt_align < align)
2574 *tgt_align = align;
2575 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2576 *tgt_size += sizes[i];
2577 }
2578 }
2579
2580 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2581
2582 static inline void
2583 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2584 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2585 size_t tgt_size)
2586 {
2587 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2588 if (al)
2589 tgt += tgt_align - al;
2590 tgt_size = 0;
2591 size_t i;
2592 for (i = 0; i < mapnum; i++)
2593 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2594 {
2595 size_t align = (size_t) 1 << (kinds[i] >> 8);
2596 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2597 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2598 hostaddrs[i] = tgt + tgt_size;
2599 tgt_size = tgt_size + sizes[i];
2600 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2601 {
2602 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2603 ++i;
2604 }
2605 }
2606 }
2607
2608 /* Helper function of GOMP_target{,_ext} routines. */
2609
2610 static void *
2611 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2612 void (*host_fn) (void *))
2613 {
2614 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2615 return (void *) host_fn;
2616 else
2617 {
2618 gomp_mutex_lock (&devicep->lock);
2619 if (devicep->state == GOMP_DEVICE_FINALIZED)
2620 {
2621 gomp_mutex_unlock (&devicep->lock);
2622 return NULL;
2623 }
2624
2625 struct splay_tree_key_s k;
2626 k.host_start = (uintptr_t) host_fn;
2627 k.host_end = k.host_start + 1;
2628 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2629 gomp_mutex_unlock (&devicep->lock);
2630 if (tgt_fn == NULL)
2631 return NULL;
2632
2633 return (void *) tgt_fn->tgt_offset;
2634 }
2635 }
2636
2637 /* Called when encountering a target directive. If DEVICE
2638 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2639 GOMP_DEVICE_HOST_FALLBACK (or any value
2640 larger than last available hw device), use host fallback.
2641 FN is address of host code, UNUSED is part of the current ABI, but
2642 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2643 with MAPNUM entries, with addresses of the host objects,
2644 sizes of the host objects (resp. for pointer kind pointer bias
2645 and assumed sizeof (void *) size) and kinds. */
2646
2647 void
2648 GOMP_target (int device, void (*fn) (void *), const void *unused,
2649 size_t mapnum, void **hostaddrs, size_t *sizes,
2650 unsigned char *kinds)
2651 {
2652 struct gomp_device_descr *devicep = resolve_device (device, true);
2653
2654 void *fn_addr;
2655 if (devicep == NULL
2656 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2657 /* All shared memory devices should use the GOMP_target_ext function. */
2658 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2659 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2660 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2661
2662 htab_t refcount_set = htab_create (mapnum);
2663 struct target_mem_desc *tgt_vars
2664 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2665 &refcount_set, GOMP_MAP_VARS_TARGET);
2666 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2667 NULL);
2668 htab_clear (refcount_set);
2669 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2670 htab_free (refcount_set);
2671 }
2672
2673 static inline unsigned int
2674 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2675 {
2676 /* If we cannot run asynchronously, simply ignore nowait. */
2677 if (devicep != NULL && devicep->async_run_func == NULL)
2678 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2679
2680 return flags;
2681 }
2682
2683 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2684 and several arguments have been added:
2685 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2686 DEPEND is array of dependencies, see GOMP_task for details.
2687
2688 ARGS is a pointer to an array consisting of a variable number of both
2689 device-independent and device-specific arguments, which can take one two
2690 elements where the first specifies for which device it is intended, the type
2691 and optionally also the value. If the value is not present in the first
2692 one, the whole second element the actual value. The last element of the
2693 array is a single NULL. Among the device independent can be for example
2694 NUM_TEAMS and THREAD_LIMIT.
2695
2696 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2697 that value, or 1 if teams construct is not present, or 0, if
2698 teams construct does not have num_teams clause and so the choice is
2699 implementation defined, and -1 if it can't be determined on the host
2700 what value will GOMP_teams have on the device.
2701 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2702 body with that value, or 0, if teams construct does not have thread_limit
2703 clause or the teams construct is not present, or -1 if it can't be
2704 determined on the host what value will GOMP_teams have on the device. */
2705
2706 void
2707 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2708 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2709 unsigned int flags, void **depend, void **args)
2710 {
2711 struct gomp_device_descr *devicep = resolve_device (device, true);
2712 size_t tgt_align = 0, tgt_size = 0;
2713 bool fpc_done = false;
2714
2715 flags = clear_unsupported_flags (devicep, flags);
2716
2717 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2718 {
2719 struct gomp_thread *thr = gomp_thread ();
2720 /* Create a team if we don't have any around, as nowait
2721 target tasks make sense to run asynchronously even when
2722 outside of any parallel. */
2723 if (__builtin_expect (thr->ts.team == NULL, 0))
2724 {
2725 struct gomp_team *team = gomp_new_team (1);
2726 struct gomp_task *task = thr->task;
2727 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2728 team->prev_ts = thr->ts;
2729 thr->ts.team = team;
2730 thr->ts.team_id = 0;
2731 thr->ts.work_share = &team->work_shares[0];
2732 thr->ts.last_work_share = NULL;
2733 #ifdef HAVE_SYNC_BUILTINS
2734 thr->ts.single_count = 0;
2735 #endif
2736 thr->ts.static_trip = 0;
2737 thr->task = &team->implicit_task[0];
2738 gomp_init_task (thr->task, NULL, icv);
2739 if (task)
2740 {
2741 thr->task = task;
2742 gomp_end_task ();
2743 free (task);
2744 thr->task = &team->implicit_task[0];
2745 }
2746 else
2747 pthread_setspecific (gomp_thread_destructor, thr);
2748 }
2749 if (thr->ts.team
2750 && !thr->task->final_task)
2751 {
2752 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2753 sizes, kinds, flags, depend, args,
2754 GOMP_TARGET_TASK_BEFORE_MAP);
2755 return;
2756 }
2757 }
2758
2759 /* If there are depend clauses, but nowait is not present
2760 (or we are in a final task), block the parent task until the
2761 dependencies are resolved and then just continue with the rest
2762 of the function as if it is a merged task. */
2763 if (depend != NULL)
2764 {
2765 struct gomp_thread *thr = gomp_thread ();
2766 if (thr->task && thr->task->depend_hash)
2767 {
2768 /* If we might need to wait, copy firstprivate now. */
2769 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2770 &tgt_align, &tgt_size);
2771 if (tgt_align)
2772 {
2773 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2774 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2775 tgt_align, tgt_size);
2776 }
2777 fpc_done = true;
2778 gomp_task_maybe_wait_for_dependencies (depend);
2779 }
2780 }
2781
2782 void *fn_addr;
2783 if (devicep == NULL
2784 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2785 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2786 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2787 {
2788 if (!fpc_done)
2789 {
2790 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2791 &tgt_align, &tgt_size);
2792 if (tgt_align)
2793 {
2794 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2795 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2796 tgt_align, tgt_size);
2797 }
2798 }
2799 gomp_target_fallback (fn, hostaddrs, devicep, args);
2800 return;
2801 }
2802
2803 struct target_mem_desc *tgt_vars;
2804 htab_t refcount_set = NULL;
2805
2806 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2807 {
2808 if (!fpc_done)
2809 {
2810 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2811 &tgt_align, &tgt_size);
2812 if (tgt_align)
2813 {
2814 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2815 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2816 tgt_align, tgt_size);
2817 }
2818 }
2819 tgt_vars = NULL;
2820 }
2821 else
2822 {
2823 refcount_set = htab_create (mapnum);
2824 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2825 true, &refcount_set, GOMP_MAP_VARS_TARGET);
2826 }
2827 devicep->run_func (devicep->target_id, fn_addr,
2828 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2829 args);
2830 if (tgt_vars)
2831 {
2832 htab_clear (refcount_set);
2833 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2834 }
2835 if (refcount_set)
2836 htab_free (refcount_set);
2837 }
2838
2839 /* Host fallback for GOMP_target_data{,_ext} routines. */
2840
2841 static void
2842 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2843 {
2844 struct gomp_task_icv *icv = gomp_icv (false);
2845
2846 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2847 && devicep != NULL)
2848 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2849 "be used for offloading");
2850
2851 if (icv->target_data)
2852 {
2853 /* Even when doing a host fallback, if there are any active
2854 #pragma omp target data constructs, need to remember the
2855 new #pragma omp target data, otherwise GOMP_target_end_data
2856 would get out of sync. */
2857 struct target_mem_desc *tgt
2858 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2859 NULL, GOMP_MAP_VARS_DATA);
2860 tgt->prev = icv->target_data;
2861 icv->target_data = tgt;
2862 }
2863 }
2864
2865 void
2866 GOMP_target_data (int device, const void *unused, size_t mapnum,
2867 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2868 {
2869 struct gomp_device_descr *devicep = resolve_device (device, true);
2870
2871 if (devicep == NULL
2872 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2873 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2874 return gomp_target_data_fallback (devicep);
2875
2876 struct target_mem_desc *tgt
2877 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2878 NULL, GOMP_MAP_VARS_DATA);
2879 struct gomp_task_icv *icv = gomp_icv (true);
2880 tgt->prev = icv->target_data;
2881 icv->target_data = tgt;
2882 }
2883
2884 void
2885 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2886 size_t *sizes, unsigned short *kinds)
2887 {
2888 struct gomp_device_descr *devicep = resolve_device (device, true);
2889
2890 if (devicep == NULL
2891 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2892 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2893 return gomp_target_data_fallback (devicep);
2894
2895 struct target_mem_desc *tgt
2896 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2897 NULL, GOMP_MAP_VARS_DATA);
2898 struct gomp_task_icv *icv = gomp_icv (true);
2899 tgt->prev = icv->target_data;
2900 icv->target_data = tgt;
2901 }
2902
2903 void
2904 GOMP_target_end_data (void)
2905 {
2906 struct gomp_task_icv *icv = gomp_icv (false);
2907 if (icv->target_data)
2908 {
2909 struct target_mem_desc *tgt = icv->target_data;
2910 icv->target_data = tgt->prev;
2911 gomp_unmap_vars (tgt, true, NULL);
2912 }
2913 }
2914
2915 void
2916 GOMP_target_update (int device, const void *unused, size_t mapnum,
2917 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2918 {
2919 struct gomp_device_descr *devicep = resolve_device (device, true);
2920
2921 if (devicep == NULL
2922 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2923 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2924 return;
2925
2926 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2927 }
2928
2929 void
2930 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2931 size_t *sizes, unsigned short *kinds,
2932 unsigned int flags, void **depend)
2933 {
2934 struct gomp_device_descr *devicep = resolve_device (device, true);
2935
2936 /* If there are depend clauses, but nowait is not present,
2937 block the parent task until the dependencies are resolved
2938 and then just continue with the rest of the function as if it
2939 is a merged task. Until we are able to schedule task during
2940 variable mapping or unmapping, ignore nowait if depend clauses
2941 are not present. */
2942 if (depend != NULL)
2943 {
2944 struct gomp_thread *thr = gomp_thread ();
2945 if (thr->task && thr->task->depend_hash)
2946 {
2947 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2948 && thr->ts.team
2949 && !thr->task->final_task)
2950 {
2951 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2952 mapnum, hostaddrs, sizes, kinds,
2953 flags | GOMP_TARGET_FLAG_UPDATE,
2954 depend, NULL, GOMP_TARGET_TASK_DATA))
2955 return;
2956 }
2957 else
2958 {
2959 struct gomp_team *team = thr->ts.team;
2960 /* If parallel or taskgroup has been cancelled, don't start new
2961 tasks. */
2962 if (__builtin_expect (gomp_cancel_var, 0) && team)
2963 {
2964 if (gomp_team_barrier_cancelled (&team->barrier))
2965 return;
2966 if (thr->task->taskgroup)
2967 {
2968 if (thr->task->taskgroup->cancelled)
2969 return;
2970 if (thr->task->taskgroup->workshare
2971 && thr->task->taskgroup->prev
2972 && thr->task->taskgroup->prev->cancelled)
2973 return;
2974 }
2975 }
2976
2977 gomp_task_maybe_wait_for_dependencies (depend);
2978 }
2979 }
2980 }
2981
2982 if (devicep == NULL
2983 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2984 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2985 return;
2986
2987 struct gomp_thread *thr = gomp_thread ();
2988 struct gomp_team *team = thr->ts.team;
2989 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2990 if (__builtin_expect (gomp_cancel_var, 0) && team)
2991 {
2992 if (gomp_team_barrier_cancelled (&team->barrier))
2993 return;
2994 if (thr->task->taskgroup)
2995 {
2996 if (thr->task->taskgroup->cancelled)
2997 return;
2998 if (thr->task->taskgroup->workshare
2999 && thr->task->taskgroup->prev
3000 && thr->task->taskgroup->prev->cancelled)
3001 return;
3002 }
3003 }
3004
3005 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3006 }
3007
3008 static void
3009 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
3010 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3011 htab_t *refcount_set)
3012 {
3013 const int typemask = 0xff;
3014 size_t i;
3015 gomp_mutex_lock (&devicep->lock);
3016 if (devicep->state == GOMP_DEVICE_FINALIZED)
3017 {
3018 gomp_mutex_unlock (&devicep->lock);
3019 return;
3020 }
3021
3022 for (i = 0; i < mapnum; i++)
3023 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3024 {
3025 struct splay_tree_key_s cur_node;
3026 cur_node.host_start = (uintptr_t) hostaddrs[i];
3027 cur_node.host_end = cur_node.host_start + sizeof (void *);
3028 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3029
3030 if (n)
3031 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3032 false, NULL);
3033 }
3034
3035 int nrmvars = 0;
3036 splay_tree_key remove_vars[mapnum];
3037
3038 for (i = 0; i < mapnum; i++)
3039 {
3040 struct splay_tree_key_s cur_node;
3041 unsigned char kind = kinds[i] & typemask;
3042 switch (kind)
3043 {
3044 case GOMP_MAP_FROM:
3045 case GOMP_MAP_ALWAYS_FROM:
3046 case GOMP_MAP_DELETE:
3047 case GOMP_MAP_RELEASE:
3048 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3049 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3050 cur_node.host_start = (uintptr_t) hostaddrs[i];
3051 cur_node.host_end = cur_node.host_start + sizes[i];
3052 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3053 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
3054 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
3055 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3056 if (!k)
3057 continue;
3058
3059 bool delete_p = (kind == GOMP_MAP_DELETE
3060 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
3061 bool do_copy, do_remove;
3062 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
3063 &do_remove);
3064
3065 if ((kind == GOMP_MAP_FROM && do_copy)
3066 || kind == GOMP_MAP_ALWAYS_FROM)
3067 {
3068 if (k->aux && k->aux->attach_count)
3069 {
3070 /* We have to be careful not to overwrite still attached
3071 pointers during the copyback to host. */
3072 uintptr_t addr = k->host_start;
3073 while (addr < k->host_end)
3074 {
3075 size_t i = (addr - k->host_start) / sizeof (void *);
3076 if (k->aux->attach_count[i] == 0)
3077 gomp_copy_dev2host (devicep, NULL, (void *) addr,
3078 (void *) (k->tgt->tgt_start
3079 + k->tgt_offset
3080 + addr - k->host_start),
3081 sizeof (void *));
3082 addr += sizeof (void *);
3083 }
3084 }
3085 else
3086 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3087 (void *) (k->tgt->tgt_start + k->tgt_offset
3088 + cur_node.host_start
3089 - k->host_start),
3090 cur_node.host_end - cur_node.host_start);
3091 }
3092
3093 /* Structure elements lists are removed altogether at once, which
3094 may cause immediate deallocation of the target_mem_desc, causing
3095 errors if we still have following element siblings to copy back.
3096 While we're at it, it also seems more disciplined to simply
3097 queue all removals together for processing below.
3098
3099 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3100 not have this problem, since they maintain an additional
3101 tgt->refcount = 1 reference to the target_mem_desc to start with.
3102 */
3103 if (do_remove)
3104 remove_vars[nrmvars++] = k;
3105 break;
3106
3107 case GOMP_MAP_DETACH:
3108 break;
3109 default:
3110 gomp_mutex_unlock (&devicep->lock);
3111 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3112 kind);
3113 }
3114 }
3115
3116 for (int i = 0; i < nrmvars; i++)
3117 gomp_remove_var (devicep, remove_vars[i]);
3118
3119 gomp_mutex_unlock (&devicep->lock);
3120 }
3121
3122 void
3123 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3124 size_t *sizes, unsigned short *kinds,
3125 unsigned int flags, void **depend)
3126 {
3127 struct gomp_device_descr *devicep = resolve_device (device, true);
3128
3129 /* If there are depend clauses, but nowait is not present,
3130 block the parent task until the dependencies are resolved
3131 and then just continue with the rest of the function as if it
3132 is a merged task. Until we are able to schedule task during
3133 variable mapping or unmapping, ignore nowait if depend clauses
3134 are not present. */
3135 if (depend != NULL)
3136 {
3137 struct gomp_thread *thr = gomp_thread ();
3138 if (thr->task && thr->task->depend_hash)
3139 {
3140 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3141 && thr->ts.team
3142 && !thr->task->final_task)
3143 {
3144 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3145 mapnum, hostaddrs, sizes, kinds,
3146 flags, depend, NULL,
3147 GOMP_TARGET_TASK_DATA))
3148 return;
3149 }
3150 else
3151 {
3152 struct gomp_team *team = thr->ts.team;
3153 /* If parallel or taskgroup has been cancelled, don't start new
3154 tasks. */
3155 if (__builtin_expect (gomp_cancel_var, 0) && team)
3156 {
3157 if (gomp_team_barrier_cancelled (&team->barrier))
3158 return;
3159 if (thr->task->taskgroup)
3160 {
3161 if (thr->task->taskgroup->cancelled)
3162 return;
3163 if (thr->task->taskgroup->workshare
3164 && thr->task->taskgroup->prev
3165 && thr->task->taskgroup->prev->cancelled)
3166 return;
3167 }
3168 }
3169
3170 gomp_task_maybe_wait_for_dependencies (depend);
3171 }
3172 }
3173 }
3174
3175 if (devicep == NULL
3176 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3177 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3178 return;
3179
3180 struct gomp_thread *thr = gomp_thread ();
3181 struct gomp_team *team = thr->ts.team;
3182 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3183 if (__builtin_expect (gomp_cancel_var, 0) && team)
3184 {
3185 if (gomp_team_barrier_cancelled (&team->barrier))
3186 return;
3187 if (thr->task->taskgroup)
3188 {
3189 if (thr->task->taskgroup->cancelled)
3190 return;
3191 if (thr->task->taskgroup->workshare
3192 && thr->task->taskgroup->prev
3193 && thr->task->taskgroup->prev->cancelled)
3194 return;
3195 }
3196 }
3197
3198 htab_t refcount_set = htab_create (mapnum);
3199
3200 /* The variables are mapped separately such that they can be released
3201 independently. */
3202 size_t i, j;
3203 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3204 for (i = 0; i < mapnum; i++)
3205 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3206 {
3207 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
3208 &kinds[i], true, &refcount_set,
3209 GOMP_MAP_VARS_ENTER_DATA);
3210 i += sizes[i];
3211 }
3212 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
3213 {
3214 for (j = i + 1; j < mapnum; j++)
3215 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
3216 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
3217 break;
3218 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
3219 &kinds[i], true, &refcount_set,
3220 GOMP_MAP_VARS_ENTER_DATA);
3221 i += j - i - 1;
3222 }
3223 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
3224 {
3225 /* An attach operation must be processed together with the mapped
3226 base-pointer list item. */
3227 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3228 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3229 i += 1;
3230 }
3231 else
3232 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3233 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3234 else
3235 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
3236 htab_free (refcount_set);
3237 }
3238
3239 bool
3240 gomp_target_task_fn (void *data)
3241 {
3242 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3243 struct gomp_device_descr *devicep = ttask->devicep;
3244
3245 if (ttask->fn != NULL)
3246 {
3247 void *fn_addr;
3248 if (devicep == NULL
3249 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3250 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3251 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3252 {
3253 ttask->state = GOMP_TARGET_TASK_FALLBACK;
3254 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
3255 ttask->args);
3256 return false;
3257 }
3258
3259 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3260 {
3261 if (ttask->tgt)
3262 gomp_unmap_vars (ttask->tgt, true, NULL);
3263 return false;
3264 }
3265
3266 void *actual_arguments;
3267 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3268 {
3269 ttask->tgt = NULL;
3270 actual_arguments = ttask->hostaddrs;
3271 }
3272 else
3273 {
3274 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3275 NULL, ttask->sizes, ttask->kinds, true,
3276 NULL, GOMP_MAP_VARS_TARGET);
3277 actual_arguments = (void *) ttask->tgt->tgt_start;
3278 }
3279 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3280
3281 assert (devicep->async_run_func);
3282 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3283 ttask->args, (void *) ttask);
3284 return true;
3285 }
3286 else if (devicep == NULL
3287 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3288 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3289 return false;
3290
3291 size_t i;
3292 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3293 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3294 ttask->kinds, true);
3295 else
3296 {
3297 htab_t refcount_set = htab_create (ttask->mapnum);
3298 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3299 for (i = 0; i < ttask->mapnum; i++)
3300 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3301 {
3302 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3303 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3304 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3305 i += ttask->sizes[i];
3306 }
3307 else
3308 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3309 &ttask->kinds[i], true, &refcount_set,
3310 GOMP_MAP_VARS_ENTER_DATA);
3311 else
3312 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3313 ttask->kinds, &refcount_set);
3314 htab_free (refcount_set);
3315 }
3316 return false;
3317 }
3318
3319 void
3320 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3321 {
3322 if (thread_limit)
3323 {
3324 struct gomp_task_icv *icv = gomp_icv (true);
3325 icv->thread_limit_var
3326 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3327 }
3328 (void) num_teams;
3329 }
3330
3331 bool
3332 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
3333 unsigned int thread_limit, bool first)
3334 {
3335 struct gomp_thread *thr = gomp_thread ();
3336 if (first)
3337 {
3338 if (thread_limit)
3339 {
3340 struct gomp_task_icv *icv = gomp_icv (true);
3341 icv->thread_limit_var
3342 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3343 }
3344 (void) num_teams_high;
3345 if (num_teams_low == 0)
3346 num_teams_low = 1;
3347 thr->num_teams = num_teams_low - 1;
3348 thr->team_num = 0;
3349 }
3350 else if (thr->team_num == thr->num_teams)
3351 return false;
3352 else
3353 ++thr->team_num;
3354 return true;
3355 }
3356
3357 void *
3358 omp_target_alloc (size_t size, int device_num)
3359 {
3360 if (device_num == omp_initial_device
3361 || device_num == gomp_get_num_devices ())
3362 return malloc (size);
3363
3364 struct gomp_device_descr *devicep = resolve_device (device_num, false);
3365 if (devicep == NULL)
3366 return NULL;
3367
3368 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3369 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3370 return malloc (size);
3371
3372 gomp_mutex_lock (&devicep->lock);
3373 void *ret = devicep->alloc_func (devicep->target_id, size);
3374 gomp_mutex_unlock (&devicep->lock);
3375 return ret;
3376 }
3377
3378 void
3379 omp_target_free (void *device_ptr, int device_num)
3380 {
3381 if (device_num == omp_initial_device
3382 || device_num == gomp_get_num_devices ())
3383 {
3384 free (device_ptr);
3385 return;
3386 }
3387
3388 struct gomp_device_descr *devicep = resolve_device (device_num, false);
3389 if (devicep == NULL || device_ptr == NULL)
3390 return;
3391
3392 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3393 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3394 {
3395 free (device_ptr);
3396 return;
3397 }
3398
3399 gomp_mutex_lock (&devicep->lock);
3400 gomp_free_device_memory (devicep, device_ptr);
3401 gomp_mutex_unlock (&devicep->lock);
3402 }
3403
3404 int
3405 omp_target_is_present (const void *ptr, int device_num)
3406 {
3407 if (device_num == omp_initial_device
3408 || device_num == gomp_get_num_devices ())
3409 return 1;
3410
3411 struct gomp_device_descr *devicep = resolve_device (device_num, false);
3412 if (devicep == NULL)
3413 return 0;
3414
3415 if (ptr == NULL)
3416 return 1;
3417
3418 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3419 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3420 return 1;
3421
3422 gomp_mutex_lock (&devicep->lock);
3423 struct splay_tree_s *mem_map = &devicep->mem_map;
3424 struct splay_tree_key_s cur_node;
3425
3426 cur_node.host_start = (uintptr_t) ptr;
3427 cur_node.host_end = cur_node.host_start;
3428 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3429 int ret = n != NULL;
3430 gomp_mutex_unlock (&devicep->lock);
3431 return ret;
3432 }
3433
3434 static int
3435 omp_target_memcpy_check (int dst_device_num, int src_device_num,
3436 struct gomp_device_descr **dst_devicep,
3437 struct gomp_device_descr **src_devicep)
3438 {
3439 if (dst_device_num != gomp_get_num_devices ()
3440 /* Above gomp_get_num_devices has to be called unconditionally. */
3441 && dst_device_num != omp_initial_device)
3442 {
3443 *dst_devicep = resolve_device (dst_device_num, false);
3444 if (*dst_devicep == NULL)
3445 return EINVAL;
3446
3447 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3448 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3449 *dst_devicep = NULL;
3450 }
3451
3452 if (src_device_num != num_devices_openmp
3453 && src_device_num != omp_initial_device)
3454 {
3455 *src_devicep = resolve_device (src_device_num, false);
3456 if (*src_devicep == NULL)
3457 return EINVAL;
3458
3459 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3460 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3461 *src_devicep = NULL;
3462 }
3463
3464 return 0;
3465 }
3466
3467 static int
3468 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
3469 size_t dst_offset, size_t src_offset,
3470 struct gomp_device_descr *dst_devicep,
3471 struct gomp_device_descr *src_devicep)
3472 {
3473 bool ret;
3474 if (src_devicep == NULL && dst_devicep == NULL)
3475 {
3476 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3477 return 0;
3478 }
3479 if (src_devicep == NULL)
3480 {
3481 gomp_mutex_lock (&dst_devicep->lock);
3482 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3483 (char *) dst + dst_offset,
3484 (char *) src + src_offset, length);
3485 gomp_mutex_unlock (&dst_devicep->lock);
3486 return (ret ? 0 : EINVAL);
3487 }
3488 if (dst_devicep == NULL)
3489 {
3490 gomp_mutex_lock (&src_devicep->lock);
3491 ret = src_devicep->dev2host_func (src_devicep->target_id,
3492 (char *) dst + dst_offset,
3493 (char *) src + src_offset, length);
3494 gomp_mutex_unlock (&src_devicep->lock);
3495 return (ret ? 0 : EINVAL);
3496 }
3497 if (src_devicep == dst_devicep)
3498 {
3499 gomp_mutex_lock (&src_devicep->lock);
3500 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3501 (char *) dst + dst_offset,
3502 (char *) src + src_offset, length);
3503 gomp_mutex_unlock (&src_devicep->lock);
3504 return (ret ? 0 : EINVAL);
3505 }
3506 return EINVAL;
3507 }
3508
3509 int
3510 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
3511 size_t src_offset, int dst_device_num, int src_device_num)
3512 {
3513 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3514 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
3515 &dst_devicep, &src_devicep);
3516
3517 if (ret)
3518 return ret;
3519
3520 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
3521 dst_devicep, src_devicep);
3522
3523 return ret;
3524 }
3525
3526 typedef struct
3527 {
3528 void *dst;
3529 const void *src;
3530 size_t length;
3531 size_t dst_offset;
3532 size_t src_offset;
3533 struct gomp_device_descr *dst_devicep;
3534 struct gomp_device_descr *src_devicep;
3535 } omp_target_memcpy_data;
3536
3537 static void
3538 omp_target_memcpy_async_helper (void *args)
3539 {
3540 omp_target_memcpy_data *a = args;
3541 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
3542 a->src_offset, a->dst_devicep, a->src_devicep))
3543 gomp_fatal ("omp_target_memcpy failed");
3544 }
3545
3546 int
3547 omp_target_memcpy_async (void *dst, const void *src, size_t length,
3548 size_t dst_offset, size_t src_offset,
3549 int dst_device_num, int src_device_num,
3550 int depobj_count, omp_depend_t *depobj_list)
3551 {
3552 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3553 unsigned int flags = 0;
3554 void *depend[depobj_count + 5];
3555 int i;
3556 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
3557 &dst_devicep, &src_devicep);
3558
3559 omp_target_memcpy_data s = {
3560 .dst = dst,
3561 .src = src,
3562 .length = length,
3563 .dst_offset = dst_offset,
3564 .src_offset = src_offset,
3565 .dst_devicep = dst_devicep,
3566 .src_devicep = src_devicep
3567 };
3568
3569 if (check)
3570 return check;
3571
3572 if (depobj_count > 0 && depobj_list != NULL)
3573 {
3574 flags |= GOMP_TASK_FLAG_DEPEND;
3575 depend[0] = 0;
3576 depend[1] = (void *) (uintptr_t) depobj_count;
3577 depend[2] = depend[3] = depend[4] = 0;
3578 for (i = 0; i < depobj_count; ++i)
3579 depend[i + 5] = &depobj_list[i];
3580 }
3581
3582 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
3583 __alignof__ (s), true, flags, depend, 0, NULL);
3584
3585 return 0;
3586 }
3587
3588 static int
3589 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3590 int num_dims, const size_t *volume,
3591 const size_t *dst_offsets,
3592 const size_t *src_offsets,
3593 const size_t *dst_dimensions,
3594 const size_t *src_dimensions,
3595 struct gomp_device_descr *dst_devicep,
3596 struct gomp_device_descr *src_devicep)
3597 {
3598 size_t dst_slice = element_size;
3599 size_t src_slice = element_size;
3600 size_t j, dst_off, src_off, length;
3601 int i, ret;
3602
3603 if (num_dims == 1)
3604 {
3605 if (__builtin_mul_overflow (element_size, volume[0], &length)
3606 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3607 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3608 return EINVAL;
3609 if (dst_devicep == NULL && src_devicep == NULL)
3610 {
3611 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3612 length);
3613 ret = 1;
3614 }
3615 else if (src_devicep == NULL)
3616 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3617 (char *) dst + dst_off,
3618 (const char *) src + src_off,
3619 length);
3620 else if (dst_devicep == NULL)
3621 ret = src_devicep->dev2host_func (src_devicep->target_id,
3622 (char *) dst + dst_off,
3623 (const char *) src + src_off,
3624 length);
3625 else if (src_devicep == dst_devicep)
3626 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3627 (char *) dst + dst_off,
3628 (const char *) src + src_off,
3629 length);
3630 else
3631 ret = 0;
3632 return ret ? 0 : EINVAL;
3633 }
3634
3635 /* FIXME: it would be nice to have some plugin function to handle
3636 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3637 be handled in the generic recursion below, and for host-host it
3638 should be used even for any num_dims >= 2. */
3639
3640 for (i = 1; i < num_dims; i++)
3641 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3642 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3643 return EINVAL;
3644 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3645 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3646 return EINVAL;
3647 for (j = 0; j < volume[0]; j++)
3648 {
3649 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3650 (const char *) src + src_off,
3651 element_size, num_dims - 1,
3652 volume + 1, dst_offsets + 1,
3653 src_offsets + 1, dst_dimensions + 1,
3654 src_dimensions + 1, dst_devicep,
3655 src_devicep);
3656 if (ret)
3657 return ret;
3658 dst_off += dst_slice;
3659 src_off += src_slice;
3660 }
3661 return 0;
3662 }
3663
3664 static int
3665 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
3666 int src_device_num,
3667 struct gomp_device_descr **dst_devicep,
3668 struct gomp_device_descr **src_devicep)
3669 {
3670 if (!dst && !src)
3671 return INT_MAX;
3672
3673 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
3674 dst_devicep, src_devicep);
3675 if (ret)
3676 return ret;
3677
3678 if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
3679 return EINVAL;
3680
3681 return 0;
3682 }
3683
3684 static int
3685 omp_target_memcpy_rect_copy (void *dst, const void *src,
3686 size_t element_size, int num_dims,
3687 const size_t *volume, const size_t *dst_offsets,
3688 const size_t *src_offsets,
3689 const size_t *dst_dimensions,
3690 const size_t *src_dimensions,
3691 struct gomp_device_descr *dst_devicep,
3692 struct gomp_device_descr *src_devicep)
3693 {
3694 if (src_devicep)
3695 gomp_mutex_lock (&src_devicep->lock);
3696 else if (dst_devicep)
3697 gomp_mutex_lock (&dst_devicep->lock);
3698 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3699 volume, dst_offsets, src_offsets,
3700 dst_dimensions, src_dimensions,
3701 dst_devicep, src_devicep);
3702 if (src_devicep)
3703 gomp_mutex_unlock (&src_devicep->lock);
3704 else if (dst_devicep)
3705 gomp_mutex_unlock (&dst_devicep->lock);
3706
3707 return ret;
3708 }
3709
3710 int
3711 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3712 int num_dims, const size_t *volume,
3713 const size_t *dst_offsets,
3714 const size_t *src_offsets,
3715 const size_t *dst_dimensions,
3716 const size_t *src_dimensions,
3717 int dst_device_num, int src_device_num)
3718 {
3719 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3720
3721 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
3722 src_device_num, &dst_devicep,
3723 &src_devicep);
3724
3725 if (check)
3726 return check;
3727
3728 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
3729 volume, dst_offsets, src_offsets,
3730 dst_dimensions, src_dimensions,
3731 dst_devicep, src_devicep);
3732
3733 return ret;
3734 }
3735
3736 typedef struct
3737 {
3738 void *dst;
3739 const void *src;
3740 size_t element_size;
3741 const size_t *volume;
3742 const size_t *dst_offsets;
3743 const size_t *src_offsets;
3744 const size_t *dst_dimensions;
3745 const size_t *src_dimensions;
3746 struct gomp_device_descr *dst_devicep;
3747 struct gomp_device_descr *src_devicep;
3748 int num_dims;
3749 } omp_target_memcpy_rect_data;
3750
3751 static void
3752 omp_target_memcpy_rect_async_helper (void *args)
3753 {
3754 omp_target_memcpy_rect_data *a = args;
3755 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
3756 a->num_dims, a->volume, a->dst_offsets,
3757 a->src_offsets, a->dst_dimensions,
3758 a->src_dimensions, a->dst_devicep,
3759 a->src_devicep);
3760 if (ret)
3761 gomp_fatal ("omp_target_memcpy_rect failed");
3762 }
3763
3764 int
3765 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
3766 int num_dims, const size_t *volume,
3767 const size_t *dst_offsets,
3768 const size_t *src_offsets,
3769 const size_t *dst_dimensions,
3770 const size_t *src_dimensions,
3771 int dst_device_num, int src_device_num,
3772 int depobj_count, omp_depend_t *depobj_list)
3773 {
3774 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3775 unsigned flags = 0;
3776 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
3777 src_device_num, &dst_devicep,
3778 &src_devicep);
3779 void *depend[depobj_count + 5];
3780 int i;
3781
3782 omp_target_memcpy_rect_data s = {
3783 .dst = dst,
3784 .src = src,
3785 .element_size = element_size,
3786 .num_dims = num_dims,
3787 .volume = volume,
3788 .dst_offsets = dst_offsets,
3789 .src_offsets = src_offsets,
3790 .dst_dimensions = dst_dimensions,
3791 .src_dimensions = src_dimensions,
3792 .dst_devicep = dst_devicep,
3793 .src_devicep = src_devicep
3794 };
3795
3796 if (check)
3797 return check;
3798
3799 if (depobj_count > 0 && depobj_list != NULL)
3800 {
3801 flags |= GOMP_TASK_FLAG_DEPEND;
3802 depend[0] = 0;
3803 depend[1] = (void *) (uintptr_t) depobj_count;
3804 depend[2] = depend[3] = depend[4] = 0;
3805 for (i = 0; i < depobj_count; ++i)
3806 depend[i + 5] = &depobj_list[i];
3807 }
3808
3809 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
3810 __alignof__ (s), true, flags, depend, 0, NULL);
3811
3812 return 0;
3813 }
3814
3815 int
3816 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3817 size_t size, size_t device_offset, int device_num)
3818 {
3819 if (device_num == omp_initial_device
3820 || device_num == gomp_get_num_devices ())
3821 return EINVAL;
3822
3823 struct gomp_device_descr *devicep = resolve_device (device_num, false);
3824 if (devicep == NULL)
3825 return EINVAL;
3826
3827 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3828 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3829 return EINVAL;
3830
3831 gomp_mutex_lock (&devicep->lock);
3832
3833 struct splay_tree_s *mem_map = &devicep->mem_map;
3834 struct splay_tree_key_s cur_node;
3835 int ret = EINVAL;
3836
3837 cur_node.host_start = (uintptr_t) host_ptr;
3838 cur_node.host_end = cur_node.host_start + size;
3839 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3840 if (n)
3841 {
3842 if (n->tgt->tgt_start + n->tgt_offset
3843 == (uintptr_t) device_ptr + device_offset
3844 && n->host_start <= cur_node.host_start
3845 && n->host_end >= cur_node.host_end)
3846 ret = 0;
3847 }
3848 else
3849 {
3850 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3851 tgt->array = gomp_malloc (sizeof (*tgt->array));
3852 tgt->refcount = 1;
3853 tgt->tgt_start = 0;
3854 tgt->tgt_end = 0;
3855 tgt->to_free = NULL;
3856 tgt->prev = NULL;
3857 tgt->list_count = 0;
3858 tgt->device_descr = devicep;
3859 splay_tree_node array = tgt->array;
3860 splay_tree_key k = &array->key;
3861 k->host_start = cur_node.host_start;
3862 k->host_end = cur_node.host_end;
3863 k->tgt = tgt;
3864 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3865 k->refcount = REFCOUNT_INFINITY;
3866 k->dynamic_refcount = 0;
3867 k->aux = NULL;
3868 array->left = NULL;
3869 array->right = NULL;
3870 splay_tree_insert (&devicep->mem_map, array);
3871 ret = 0;
3872 }
3873 gomp_mutex_unlock (&devicep->lock);
3874 return ret;
3875 }
3876
3877 int
3878 omp_target_disassociate_ptr (const void *ptr, int device_num)
3879 {
3880 struct gomp_device_descr *devicep = resolve_device (device_num, false);
3881 if (devicep == NULL)
3882 return EINVAL;
3883
3884 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3885 return EINVAL;
3886
3887 gomp_mutex_lock (&devicep->lock);
3888
3889 struct splay_tree_s *mem_map = &devicep->mem_map;
3890 struct splay_tree_key_s cur_node;
3891 int ret = EINVAL;
3892
3893 cur_node.host_start = (uintptr_t) ptr;
3894 cur_node.host_end = cur_node.host_start;
3895 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3896 if (n
3897 && n->host_start == cur_node.host_start
3898 && n->refcount == REFCOUNT_INFINITY
3899 && n->tgt->tgt_start == 0
3900 && n->tgt->to_free == NULL
3901 && n->tgt->refcount == 1
3902 && n->tgt->list_count == 0)
3903 {
3904 splay_tree_remove (&devicep->mem_map, n);
3905 gomp_unmap_tgt (n->tgt);
3906 ret = 0;
3907 }
3908
3909 gomp_mutex_unlock (&devicep->lock);
3910 return ret;
3911 }
3912
3913 void *
3914 omp_get_mapped_ptr (const void *ptr, int device_num)
3915 {
3916 if (device_num == omp_initial_device
3917 || device_num == omp_get_initial_device ())
3918 return (void *) ptr;
3919
3920 struct gomp_device_descr *devicep = resolve_device (device_num, false);
3921 if (devicep == NULL)
3922 return NULL;
3923
3924 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3925 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3926 return (void *) ptr;
3927
3928 gomp_mutex_lock (&devicep->lock);
3929
3930 struct splay_tree_s *mem_map = &devicep->mem_map;
3931 struct splay_tree_key_s cur_node;
3932 void *ret = NULL;
3933
3934 cur_node.host_start = (uintptr_t) ptr;
3935 cur_node.host_end = cur_node.host_start;
3936 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3937
3938 if (n)
3939 {
3940 uintptr_t offset = cur_node.host_start - n->host_start;
3941 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
3942 }
3943
3944 gomp_mutex_unlock (&devicep->lock);
3945
3946 return ret;
3947 }
3948
3949 int
3950 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
3951 {
3952 if (device_num == omp_initial_device
3953 || device_num == gomp_get_num_devices ())
3954 return true;
3955
3956 struct gomp_device_descr *devicep = resolve_device (device_num, false);
3957 if (devicep == NULL)
3958 return false;
3959
3960 /* TODO: Unified shared memory must be handled when available. */
3961
3962 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
3963 }
3964
3965 int
3966 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3967 {
3968 (void) kind;
3969 if (device_num == omp_initial_device
3970 || device_num == gomp_get_num_devices ())
3971 return gomp_pause_host ();
3972
3973 struct gomp_device_descr *devicep = resolve_device (device_num, false);
3974 if (devicep == NULL)
3975 return -1;
3976
3977 /* Do nothing for target devices for now. */
3978 return 0;
3979 }
3980
3981 int
3982 omp_pause_resource_all (omp_pause_resource_t kind)
3983 {
3984 (void) kind;
3985 if (gomp_pause_host ())
3986 return -1;
3987 /* Do nothing for target devices for now. */
3988 return 0;
3989 }
3990
3991 ialias (omp_pause_resource)
3992 ialias (omp_pause_resource_all)
3993
3994 #ifdef PLUGIN_SUPPORT
3995
3996 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3997 in PLUGIN_NAME.
3998 The handles of the found functions are stored in the corresponding fields
3999 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4000
4001 static bool
4002 gomp_load_plugin_for_device (struct gomp_device_descr *device,
4003 const char *plugin_name)
4004 {
4005 const char *err = NULL, *last_missing = NULL;
4006
4007 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
4008 if (!plugin_handle)
4009 #if OFFLOAD_DEFAULTED
4010 return 0;
4011 #else
4012 goto dl_fail;
4013 #endif
4014
4015 /* Check if all required functions are available in the plugin and store
4016 their handlers. None of the symbols can legitimately be NULL,
4017 so we don't need to check dlerror all the time. */
4018 #define DLSYM(f) \
4019 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4020 goto dl_fail
4021 /* Similar, but missing functions are not an error. Return false if
4022 failed, true otherwise. */
4023 #define DLSYM_OPT(f, n) \
4024 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4025 || (last_missing = #n, 0))
4026
4027 DLSYM (version);
4028 if (device->version_func () != GOMP_VERSION)
4029 {
4030 err = "plugin version mismatch";
4031 goto fail;
4032 }
4033
4034 DLSYM (get_name);
4035 DLSYM (get_caps);
4036 DLSYM (get_type);
4037 DLSYM (get_num_devices);
4038 DLSYM (init_device);
4039 DLSYM (fini_device);
4040 DLSYM (load_image);
4041 DLSYM (unload_image);
4042 DLSYM (alloc);
4043 DLSYM (free);
4044 DLSYM (dev2host);
4045 DLSYM (host2dev);
4046 device->capabilities = device->get_caps_func ();
4047 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4048 {
4049 DLSYM (run);
4050 DLSYM_OPT (async_run, async_run);
4051 DLSYM_OPT (can_run, can_run);
4052 DLSYM (dev2dev);
4053 }
4054 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4055 {
4056 if (!DLSYM_OPT (openacc.exec, openacc_exec)
4057 || !DLSYM_OPT (openacc.create_thread_data,
4058 openacc_create_thread_data)
4059 || !DLSYM_OPT (openacc.destroy_thread_data,
4060 openacc_destroy_thread_data)
4061 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
4062 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
4063 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
4064 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
4065 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
4066 || !DLSYM_OPT (openacc.async.queue_callback,
4067 openacc_async_queue_callback)
4068 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
4069 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
4070 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
4071 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
4072 {
4073 /* Require all the OpenACC handlers if we have
4074 GOMP_OFFLOAD_CAP_OPENACC_200. */
4075 err = "plugin missing OpenACC handler function";
4076 goto fail;
4077 }
4078
4079 unsigned cuda = 0;
4080 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
4081 openacc_cuda_get_current_device);
4082 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
4083 openacc_cuda_get_current_context);
4084 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
4085 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
4086 if (cuda && cuda != 4)
4087 {
4088 /* Make sure all the CUDA functions are there if any of them are. */
4089 err = "plugin missing OpenACC CUDA handler function";
4090 goto fail;
4091 }
4092 }
4093 #undef DLSYM
4094 #undef DLSYM_OPT
4095
4096 return 1;
4097
4098 dl_fail:
4099 err = dlerror ();
4100 fail:
4101 gomp_error ("while loading %s: %s", plugin_name, err);
4102 if (last_missing)
4103 gomp_error ("missing function was %s", last_missing);
4104 if (plugin_handle)
4105 dlclose (plugin_handle);
4106
4107 return 0;
4108 }
4109
4110 /* This function finalizes all initialized devices. */
4111
4112 static void
4113 gomp_target_fini (void)
4114 {
4115 int i;
4116 for (i = 0; i < num_devices; i++)
4117 {
4118 bool ret = true;
4119 struct gomp_device_descr *devicep = &devices[i];
4120 gomp_mutex_lock (&devicep->lock);
4121 if (devicep->state == GOMP_DEVICE_INITIALIZED)
4122 ret = gomp_fini_device (devicep);
4123 gomp_mutex_unlock (&devicep->lock);
4124 if (!ret)
4125 gomp_fatal ("device finalization failed");
4126 }
4127 }
4128
4129 /* This function initializes the runtime for offloading.
4130 It parses the list of offload plugins, and tries to load these.
4131 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
4132 will be set, and the array DEVICES initialized, containing descriptors for
4133 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
4134 by the others. */
4135
4136 static void
4137 gomp_target_init (void)
4138 {
4139 const char *prefix ="libgomp-plugin-";
4140 const char *suffix = SONAME_SUFFIX (1);
4141 const char *cur, *next;
4142 char *plugin_name;
4143 int i, new_num_devs;
4144 int num_devs = 0, num_devs_openmp;
4145 struct gomp_device_descr *devs = NULL;
4146
4147 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
4148 return;
4149
4150 cur = OFFLOAD_PLUGINS;
4151 if (*cur)
4152 do
4153 {
4154 struct gomp_device_descr current_device;
4155 size_t prefix_len, suffix_len, cur_len;
4156
4157 next = strchr (cur, ',');
4158
4159 prefix_len = strlen (prefix);
4160 cur_len = next ? next - cur : strlen (cur);
4161 suffix_len = strlen (suffix);
4162
4163 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
4164 if (!plugin_name)
4165 {
4166 num_devs = 0;
4167 break;
4168 }
4169
4170 memcpy (plugin_name, prefix, prefix_len);
4171 memcpy (plugin_name + prefix_len, cur, cur_len);
4172 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
4173
4174 if (gomp_load_plugin_for_device (&current_device, plugin_name))
4175 {
4176 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
4177 new_num_devs = current_device.get_num_devices_func (omp_req);
4178 if (gomp_debug_var > 0 && new_num_devs < 0)
4179 {
4180 bool found = false;
4181 int type = current_device.get_type_func ();
4182 for (int img = 0; img < num_offload_images; img++)
4183 if (type == offload_images[img].type)
4184 found = true;
4185 if (found)
4186 {
4187 char buf[sizeof ("unified_address, unified_shared_memory, "
4188 "reverse_offload")];
4189 gomp_requires_to_name (buf, sizeof (buf), omp_req);
4190 char *name = (char *) malloc (cur_len + 1);
4191 memcpy (name, cur, cur_len);
4192 name[cur_len] = '\0';
4193 gomp_debug (1,
4194 "%s devices present but 'omp requires %s' "
4195 "cannot be fulfilled", name, buf);
4196 free (name);
4197 }
4198 }
4199 else if (new_num_devs >= 1)
4200 {
4201 /* Augment DEVICES and NUM_DEVICES. */
4202
4203 devs = realloc (devs, (num_devs + new_num_devs)
4204 * sizeof (struct gomp_device_descr));
4205 if (!devs)
4206 {
4207 num_devs = 0;
4208 free (plugin_name);
4209 break;
4210 }
4211
4212 current_device.name = current_device.get_name_func ();
4213 /* current_device.capabilities has already been set. */
4214 current_device.type = current_device.get_type_func ();
4215 current_device.mem_map.root = NULL;
4216 current_device.state = GOMP_DEVICE_UNINITIALIZED;
4217 for (i = 0; i < new_num_devs; i++)
4218 {
4219 current_device.target_id = i;
4220 devs[num_devs] = current_device;
4221 gomp_mutex_init (&devs[num_devs].lock);
4222 num_devs++;
4223 }
4224 }
4225 }
4226
4227 free (plugin_name);
4228 cur = next + 1;
4229 }
4230 while (next);
4231
4232 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
4233 NUM_DEVICES_OPENMP. */
4234 struct gomp_device_descr *devs_s
4235 = malloc (num_devs * sizeof (struct gomp_device_descr));
4236 if (!devs_s)
4237 {
4238 num_devs = 0;
4239 free (devs);
4240 devs = NULL;
4241 }
4242 num_devs_openmp = 0;
4243 for (i = 0; i < num_devs; i++)
4244 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4245 devs_s[num_devs_openmp++] = devs[i];
4246 int num_devs_after_openmp = num_devs_openmp;
4247 for (i = 0; i < num_devs; i++)
4248 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4249 devs_s[num_devs_after_openmp++] = devs[i];
4250 free (devs);
4251 devs = devs_s;
4252
4253 for (i = 0; i < num_devs; i++)
4254 {
4255 /* The 'devices' array can be moved (by the realloc call) until we have
4256 found all the plugins, so registering with the OpenACC runtime (which
4257 takes a copy of the pointer argument) must be delayed until now. */
4258 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4259 goacc_register (&devs[i]);
4260 }
4261
4262 num_devices = num_devs;
4263 num_devices_openmp = num_devs_openmp;
4264 devices = devs;
4265 if (atexit (gomp_target_fini) != 0)
4266 gomp_fatal ("atexit failed");
4267 }
4268
4269 #else /* PLUGIN_SUPPORT */
4270 /* If dlfcn.h is unavailable we always fallback to host execution.
4271 GOMP_target* routines are just stubs for this case. */
4272 static void
4273 gomp_target_init (void)
4274 {
4275 }
4276 #endif /* PLUGIN_SUPPORT */