]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
Update copyright years.
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
42
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_c
51 #include "splay-tree.h"
52
53
54 typedef uintptr_t *hash_entry_type;
55 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56 static inline void htab_free (void *ptr) { free (ptr); }
57 #include "hashtab.h"
58
59 ialias_redirect (GOMP_task)
60
61 static inline hashval_t
62 htab_hash (hash_entry_type element)
63 {
64 return hash_pointer ((void *) element);
65 }
66
67 static inline bool
68 htab_eq (hash_entry_type x, hash_entry_type y)
69 {
70 return x == y;
71 }
72
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
74
75 static void gomp_target_init (void);
76
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
79
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock;
82
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr {
87 unsigned version;
88 enum offload_target_type type;
89 const void *host_table;
90 const void *target_data;
91 };
92
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr *offload_images;
95
96 /* Total number of offload images. */
97 static int num_offload_images;
98
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr *devices;
101
102 /* Total number of available devices. */
103 static int num_devices;
104
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp;
107
108 /* OpenMP requires mask. */
109 static int omp_requires_mask;
110
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
112
113 static void *
114 gomp_realloc_unlock (void *old, size_t size)
115 {
116 void *ret = realloc (old, size);
117 if (ret == NULL)
118 {
119 gomp_mutex_unlock (&register_lock);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
121 }
122 return ret;
123 }
124
125 attribute_hidden void
126 gomp_init_targets_once (void)
127 {
128 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
129 }
130
131 attribute_hidden int
132 gomp_get_num_devices (void)
133 {
134 gomp_init_targets_once ();
135 return num_devices_openmp;
136 }
137
138 static struct gomp_device_descr *
139 resolve_device (int device_id, bool remapped)
140 {
141 if (remapped && device_id == GOMP_DEVICE_ICV)
142 {
143 struct gomp_task_icv *icv = gomp_icv (false);
144 device_id = icv->default_device_var;
145 remapped = false;
146 }
147
148 if (device_id < 0)
149 {
150 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device))
152 return NULL;
153 if (device_id == omp_invalid_device)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
158
159 return NULL;
160 }
161 else if (device_id >= gomp_get_num_devices ())
162 {
163 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
164 && device_id != num_devices_openmp)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
167
168 return NULL;
169 }
170
171 gomp_mutex_lock (&devices[device_id].lock);
172 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
173 gomp_init_device (&devices[device_id]);
174 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
175 {
176 gomp_mutex_unlock (&devices[device_id].lock);
177
178 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
181
182 return NULL;
183 }
184 gomp_mutex_unlock (&devices[device_id].lock);
185
186 return &devices[device_id];
187 }
188
189
190 static inline splay_tree_key
191 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
192 {
193 if (key->host_start != key->host_end)
194 return splay_tree_lookup (mem_map, key);
195
196 key->host_end++;
197 splay_tree_key n = splay_tree_lookup (mem_map, key);
198 key->host_end--;
199 if (n)
200 return n;
201 key->host_start--;
202 n = splay_tree_lookup (mem_map, key);
203 key->host_start++;
204 if (n)
205 return n;
206 return splay_tree_lookup (mem_map, key);
207 }
208
209 static inline reverse_splay_tree_key
210 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
211 {
212 return reverse_splay_tree_lookup (mem_map_rev, key);
213 }
214
215 static inline splay_tree_key
216 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
217 {
218 if (key->host_start != key->host_end)
219 return splay_tree_lookup (mem_map, key);
220
221 key->host_end++;
222 splay_tree_key n = splay_tree_lookup (mem_map, key);
223 key->host_end--;
224 return n;
225 }
226
227 static inline void
228 gomp_device_copy (struct gomp_device_descr *devicep,
229 bool (*copy_func) (int, void *, const void *, size_t),
230 const char *dst, void *dstaddr,
231 const char *src, const void *srcaddr,
232 size_t size)
233 {
234 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
235 {
236 gomp_mutex_unlock (&devicep->lock);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
239 }
240 }
241
242 static inline void
243 goacc_device_copy_async (struct gomp_device_descr *devicep,
244 bool (*copy_func) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue *),
246 const char *dst, void *dstaddr,
247 const char *src, const void *srcaddr,
248 const void *srcaddr_orig,
249 size_t size, struct goacc_asyncqueue *aq)
250 {
251 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
252 {
253 gomp_mutex_unlock (&devicep->lock);
254 if (srcaddr_orig && srcaddr_orig != srcaddr)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src, srcaddr_orig, srcaddr_orig + size,
259 src, srcaddr, srcaddr + size,
260 dst, dstaddr, dstaddr + size);
261 else
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src, srcaddr, srcaddr + size,
265 dst, dstaddr, dstaddr + size);
266 }
267 }
268
269 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
271
272 struct gomp_coalesce_chunk
273 {
274 /* The starting and ending point of a coalesced chunk of memory. */
275 size_t start, end;
276 };
277
278 struct gomp_coalesce_buf
279 {
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
282 void *buf;
283 struct target_mem_desc *tgt;
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
286 of chunks which are to be copied to buf and later copied to device. */
287 struct gomp_coalesce_chunk *chunks;
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
289 be performed. */
290 long chunk_cnt;
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
294 long use_cnt;
295 };
296
297 /* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
300
301 /* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307 #define MAX_COALESCE_BUF_GAP (4 * 1024)
308
309 /* Add region with device tgt_start relative offset and length to CBUF.
310
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
313 example).
314 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
315 is it more performant to use libgomp CBUF buffering or individual device
316 asyncronous copying?) */
317
318 static inline void
319 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
320 {
321 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
322 return;
323 if (cbuf->chunk_cnt)
324 {
325 if (cbuf->chunk_cnt < 0)
326 return;
327 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
328 {
329 cbuf->chunk_cnt = -1;
330 return;
331 }
332 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
333 {
334 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
335 cbuf->use_cnt++;
336 return;
337 }
338 /* If the last chunk is only used by one mapping, discard it,
339 as it will be one host to device copy anyway and
340 memcpying it around will only waste cycles. */
341 if (cbuf->use_cnt == 1)
342 cbuf->chunk_cnt--;
343 }
344 cbuf->chunks[cbuf->chunk_cnt].start = start;
345 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
346 cbuf->chunk_cnt++;
347 cbuf->use_cnt = 1;
348 }
349
350 /* Return true for mapping kinds which need to copy data from the
351 host to device for regions that weren't previously mapped. */
352
353 static inline bool
354 gomp_to_device_kind_p (int kind)
355 {
356 switch (kind)
357 {
358 case GOMP_MAP_ALLOC:
359 case GOMP_MAP_FROM:
360 case GOMP_MAP_FORCE_ALLOC:
361 case GOMP_MAP_FORCE_FROM:
362 case GOMP_MAP_ALWAYS_FROM:
363 return false;
364 default:
365 return true;
366 }
367 }
368
369 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
370 non-NULL), when the source data is stack or may otherwise be deallocated
371 before the asynchronous copy takes place, EPHEMERAL must be passed as
372 TRUE. */
373
374 attribute_hidden void
375 gomp_copy_host2dev (struct gomp_device_descr *devicep,
376 struct goacc_asyncqueue *aq,
377 void *d, const void *h, size_t sz,
378 bool ephemeral, struct gomp_coalesce_buf *cbuf)
379 {
380 if (__builtin_expect (aq != NULL, 0))
381 {
382 /* See 'gomp_coalesce_buf_add'. */
383 assert (!cbuf);
384
385 void *h_buf = (void *) h;
386 if (ephemeral)
387 {
388 /* We're queueing up an asynchronous copy from data that may
389 disappear before the transfer takes place (i.e. because it is a
390 stack local in a function that is no longer executing). Make a
391 copy of the data into a temporary buffer in those cases. */
392 h_buf = gomp_malloc (sz);
393 memcpy (h_buf, h, sz);
394 }
395 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
396 "dev", d, "host", h_buf, h, sz, aq);
397 if (ephemeral)
398 /* Free temporary buffer once the transfer has completed. */
399 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
400
401 return;
402 }
403
404 if (cbuf)
405 {
406 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
407 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
408 {
409 long first = 0;
410 long last = cbuf->chunk_cnt - 1;
411 while (first <= last)
412 {
413 long middle = (first + last) >> 1;
414 if (cbuf->chunks[middle].end <= doff)
415 first = middle + 1;
416 else if (cbuf->chunks[middle].start <= doff)
417 {
418 if (doff + sz > cbuf->chunks[middle].end)
419 {
420 gomp_mutex_unlock (&devicep->lock);
421 gomp_fatal ("internal libgomp cbuf error");
422 }
423 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
424 h, sz);
425 return;
426 }
427 else
428 last = middle - 1;
429 }
430 }
431 }
432
433 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
434 }
435
436 attribute_hidden void
437 gomp_copy_dev2host (struct gomp_device_descr *devicep,
438 struct goacc_asyncqueue *aq,
439 void *h, const void *d, size_t sz)
440 {
441 if (__builtin_expect (aq != NULL, 0))
442 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
443 "host", h, "dev", d, NULL, sz, aq);
444 else
445 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
446 }
447
448 static void
449 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
450 {
451 if (!devicep->free_func (devicep->target_id, devptr))
452 {
453 gomp_mutex_unlock (&devicep->lock);
454 gomp_fatal ("error in freeing device memory block at %p", devptr);
455 }
456 }
457
458 /* Increment reference count of a splay_tree_key region K by 1.
459 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
460 increment the value if refcount is not yet contained in the set (used for
461 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
462 once for each construct). */
463
464 static inline void
465 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
466 {
467 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
468 return;
469
470 uintptr_t *refcount_ptr = &k->refcount;
471
472 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
473 refcount_ptr = &k->structelem_refcount;
474 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
475 refcount_ptr = k->structelem_refcount_ptr;
476
477 if (refcount_set)
478 {
479 if (htab_find (*refcount_set, refcount_ptr))
480 return;
481 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
482 *slot = refcount_ptr;
483 }
484
485 *refcount_ptr += 1;
486 return;
487 }
488
489 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
490 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
491 track already seen refcounts, and only adjust the value if refcount is not
492 yet contained in the set (like gomp_increment_refcount).
493
494 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
495 it is already zero and we know we decremented it earlier. This signals that
496 associated maps should be copied back to host.
497
498 *DO_REMOVE is set to true when we this is the first handling of this refcount
499 and we are setting it to zero. This signals a removal of this key from the
500 splay-tree map.
501
502 Copy and removal are separated due to cases like handling of structure
503 elements, e.g. each map of a structure element representing a possible copy
504 out of a structure field has to be handled individually, but we only signal
505 removal for one (the first encountered) sibing map. */
506
507 static inline void
508 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
509 bool *do_copy, bool *do_remove)
510 {
511 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
512 {
513 *do_copy = *do_remove = false;
514 return;
515 }
516
517 uintptr_t *refcount_ptr = &k->refcount;
518
519 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
520 refcount_ptr = &k->structelem_refcount;
521 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
522 refcount_ptr = k->structelem_refcount_ptr;
523
524 bool new_encountered_refcount;
525 bool set_to_zero = false;
526 bool is_zero = false;
527
528 uintptr_t orig_refcount = *refcount_ptr;
529
530 if (refcount_set)
531 {
532 if (htab_find (*refcount_set, refcount_ptr))
533 {
534 new_encountered_refcount = false;
535 goto end;
536 }
537
538 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
539 *slot = refcount_ptr;
540 new_encountered_refcount = true;
541 }
542 else
543 /* If no refcount_set being used, assume all keys are being decremented
544 for the first time. */
545 new_encountered_refcount = true;
546
547 if (delete_p)
548 *refcount_ptr = 0;
549 else if (*refcount_ptr > 0)
550 *refcount_ptr -= 1;
551
552 end:
553 if (*refcount_ptr == 0)
554 {
555 if (orig_refcount > 0)
556 set_to_zero = true;
557
558 is_zero = true;
559 }
560
561 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
562 *do_remove = (new_encountered_refcount && set_to_zero);
563 }
564
565 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
566 gomp_map_0len_lookup found oldn for newn.
567 Helper function of gomp_map_vars. */
568
569 static inline void
570 gomp_map_vars_existing (struct gomp_device_descr *devicep,
571 struct goacc_asyncqueue *aq, splay_tree_key oldn,
572 splay_tree_key newn, struct target_var_desc *tgt_var,
573 unsigned char kind, bool always_to_flag, bool implicit,
574 struct gomp_coalesce_buf *cbuf,
575 htab_t *refcount_set)
576 {
577 assert (kind != GOMP_MAP_ATTACH
578 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
579
580 tgt_var->key = oldn;
581 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
582 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
583 tgt_var->is_attach = false;
584 tgt_var->offset = newn->host_start - oldn->host_start;
585
586 /* For implicit maps, old contained in new is valid. */
587 bool implicit_subset = (implicit
588 && newn->host_start <= oldn->host_start
589 && oldn->host_end <= newn->host_end);
590 if (implicit_subset)
591 tgt_var->length = oldn->host_end - oldn->host_start;
592 else
593 tgt_var->length = newn->host_end - newn->host_start;
594
595 if ((kind & GOMP_MAP_FLAG_FORCE)
596 /* For implicit maps, old contained in new is valid. */
597 || !(implicit_subset
598 /* Otherwise, new contained inside old is considered valid. */
599 || (oldn->host_start <= newn->host_start
600 && newn->host_end <= oldn->host_end)))
601 {
602 gomp_mutex_unlock (&devicep->lock);
603 gomp_fatal ("Trying to map into device [%p..%p) object when "
604 "[%p..%p) is already mapped",
605 (void *) newn->host_start, (void *) newn->host_end,
606 (void *) oldn->host_start, (void *) oldn->host_end);
607 }
608
609 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
610 {
611 /* Implicit + always should not happen. If this does occur, below
612 address/length adjustment is a TODO. */
613 assert (!implicit_subset);
614
615 if (oldn->aux && oldn->aux->attach_count)
616 {
617 /* We have to be careful not to overwrite still attached pointers
618 during the copyback to host. */
619 uintptr_t addr = newn->host_start;
620 while (addr < newn->host_end)
621 {
622 size_t i = (addr - oldn->host_start) / sizeof (void *);
623 if (oldn->aux->attach_count[i] == 0)
624 gomp_copy_host2dev (devicep, aq,
625 (void *) (oldn->tgt->tgt_start
626 + oldn->tgt_offset
627 + addr - oldn->host_start),
628 (void *) addr,
629 sizeof (void *), false, cbuf);
630 addr += sizeof (void *);
631 }
632 }
633 else
634 gomp_copy_host2dev (devicep, aq,
635 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
636 + newn->host_start - oldn->host_start),
637 (void *) newn->host_start,
638 newn->host_end - newn->host_start, false, cbuf);
639 }
640
641 gomp_increment_refcount (oldn, refcount_set);
642 }
643
644 static int
645 get_kind (bool short_mapkind, void *kinds, int idx)
646 {
647 if (!short_mapkind)
648 return ((unsigned char *) kinds)[idx];
649
650 int val = ((unsigned short *) kinds)[idx];
651 if (GOMP_MAP_IMPLICIT_P (val))
652 val &= ~GOMP_MAP_IMPLICIT;
653 return val;
654 }
655
656
657 static bool
658 get_implicit (bool short_mapkind, void *kinds, int idx)
659 {
660 if (!short_mapkind)
661 return false;
662
663 int val = ((unsigned short *) kinds)[idx];
664 return GOMP_MAP_IMPLICIT_P (val);
665 }
666
667 static void
668 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
669 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
670 struct gomp_coalesce_buf *cbuf,
671 bool allow_zero_length_array_sections)
672 {
673 struct gomp_device_descr *devicep = tgt->device_descr;
674 struct splay_tree_s *mem_map = &devicep->mem_map;
675 struct splay_tree_key_s cur_node;
676
677 cur_node.host_start = host_ptr;
678 if (cur_node.host_start == (uintptr_t) NULL)
679 {
680 cur_node.tgt_offset = (uintptr_t) NULL;
681 gomp_copy_host2dev (devicep, aq,
682 (void *) (tgt->tgt_start + target_offset),
683 (void *) &cur_node.tgt_offset, sizeof (void *),
684 true, cbuf);
685 return;
686 }
687 /* Add bias to the pointer value. */
688 cur_node.host_start += bias;
689 cur_node.host_end = cur_node.host_start;
690 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
691 if (n == NULL)
692 {
693 if (allow_zero_length_array_sections)
694 cur_node.tgt_offset = 0;
695 else
696 {
697 gomp_mutex_unlock (&devicep->lock);
698 gomp_fatal ("Pointer target of array section wasn't mapped");
699 }
700 }
701 else
702 {
703 cur_node.host_start -= n->host_start;
704 cur_node.tgt_offset
705 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
706 /* At this point tgt_offset is target address of the
707 array section. Now subtract bias to get what we want
708 to initialize the pointer with. */
709 cur_node.tgt_offset -= bias;
710 }
711 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
712 (void *) &cur_node.tgt_offset, sizeof (void *),
713 true, cbuf);
714 }
715
716 static void
717 gomp_map_fields_existing (struct target_mem_desc *tgt,
718 struct goacc_asyncqueue *aq, splay_tree_key n,
719 size_t first, size_t i, void **hostaddrs,
720 size_t *sizes, void *kinds,
721 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
722 {
723 struct gomp_device_descr *devicep = tgt->device_descr;
724 struct splay_tree_s *mem_map = &devicep->mem_map;
725 struct splay_tree_key_s cur_node;
726 int kind;
727 bool implicit;
728 const bool short_mapkind = true;
729 const int typemask = short_mapkind ? 0xff : 0x7;
730
731 cur_node.host_start = (uintptr_t) hostaddrs[i];
732 cur_node.host_end = cur_node.host_start + sizes[i];
733 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
734 kind = get_kind (short_mapkind, kinds, i);
735 implicit = get_implicit (short_mapkind, kinds, i);
736 if (n2
737 && n2->tgt == n->tgt
738 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
739 {
740 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
741 kind & typemask, false, implicit, cbuf,
742 refcount_set);
743 return;
744 }
745 if (sizes[i] == 0)
746 {
747 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
748 {
749 cur_node.host_start--;
750 n2 = splay_tree_lookup (mem_map, &cur_node);
751 cur_node.host_start++;
752 if (n2
753 && n2->tgt == n->tgt
754 && n2->host_start - n->host_start
755 == n2->tgt_offset - n->tgt_offset)
756 {
757 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
758 kind & typemask, false, implicit, cbuf,
759 refcount_set);
760 return;
761 }
762 }
763 cur_node.host_end++;
764 n2 = splay_tree_lookup (mem_map, &cur_node);
765 cur_node.host_end--;
766 if (n2
767 && n2->tgt == n->tgt
768 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
769 {
770 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
771 kind & typemask, false, implicit, cbuf,
772 refcount_set);
773 return;
774 }
775 }
776 gomp_mutex_unlock (&devicep->lock);
777 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
778 "other mapped elements from the same structure weren't mapped "
779 "together with it", (void *) cur_node.host_start,
780 (void *) cur_node.host_end);
781 }
782
783 attribute_hidden void
784 gomp_attach_pointer (struct gomp_device_descr *devicep,
785 struct goacc_asyncqueue *aq, splay_tree mem_map,
786 splay_tree_key n, uintptr_t attach_to, size_t bias,
787 struct gomp_coalesce_buf *cbufp,
788 bool allow_zero_length_array_sections)
789 {
790 struct splay_tree_key_s s;
791 size_t size, idx;
792
793 if (n == NULL)
794 {
795 gomp_mutex_unlock (&devicep->lock);
796 gomp_fatal ("enclosing struct not mapped for attach");
797 }
798
799 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
800 /* We might have a pointer in a packed struct: however we cannot have more
801 than one such pointer in each pointer-sized portion of the struct, so
802 this is safe. */
803 idx = (attach_to - n->host_start) / sizeof (void *);
804
805 if (!n->aux)
806 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
807
808 if (!n->aux->attach_count)
809 n->aux->attach_count
810 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
811
812 if (n->aux->attach_count[idx] < UINTPTR_MAX)
813 n->aux->attach_count[idx]++;
814 else
815 {
816 gomp_mutex_unlock (&devicep->lock);
817 gomp_fatal ("attach count overflow");
818 }
819
820 if (n->aux->attach_count[idx] == 1)
821 {
822 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
823 - n->host_start;
824 uintptr_t target = (uintptr_t) *(void **) attach_to;
825 splay_tree_key tn;
826 uintptr_t data;
827
828 if ((void *) target == NULL)
829 {
830 gomp_mutex_unlock (&devicep->lock);
831 gomp_fatal ("attempt to attach null pointer");
832 }
833
834 s.host_start = target + bias;
835 s.host_end = s.host_start + 1;
836 tn = splay_tree_lookup (mem_map, &s);
837
838 if (!tn)
839 {
840 if (allow_zero_length_array_sections)
841 /* When allowing attachment to zero-length array sections, we
842 allow attaching to NULL pointers when the target region is not
843 mapped. */
844 data = 0;
845 else
846 {
847 gomp_mutex_unlock (&devicep->lock);
848 gomp_fatal ("pointer target not mapped for attach");
849 }
850 }
851 else
852 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
853
854 gomp_debug (1,
855 "%s: attaching host %p, target %p (struct base %p) to %p\n",
856 __FUNCTION__, (void *) attach_to, (void *) devptr,
857 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
858
859 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
860 sizeof (void *), true, cbufp);
861 }
862 else
863 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
864 (void *) attach_to, (int) n->aux->attach_count[idx]);
865 }
866
867 attribute_hidden void
868 gomp_detach_pointer (struct gomp_device_descr *devicep,
869 struct goacc_asyncqueue *aq, splay_tree_key n,
870 uintptr_t detach_from, bool finalize,
871 struct gomp_coalesce_buf *cbufp)
872 {
873 size_t idx;
874
875 if (n == NULL)
876 {
877 gomp_mutex_unlock (&devicep->lock);
878 gomp_fatal ("enclosing struct not mapped for detach");
879 }
880
881 idx = (detach_from - n->host_start) / sizeof (void *);
882
883 if (!n->aux || !n->aux->attach_count)
884 {
885 gomp_mutex_unlock (&devicep->lock);
886 gomp_fatal ("no attachment counters for struct");
887 }
888
889 if (finalize)
890 n->aux->attach_count[idx] = 1;
891
892 if (n->aux->attach_count[idx] == 0)
893 {
894 gomp_mutex_unlock (&devicep->lock);
895 gomp_fatal ("attach count underflow");
896 }
897 else
898 n->aux->attach_count[idx]--;
899
900 if (n->aux->attach_count[idx] == 0)
901 {
902 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
903 - n->host_start;
904 uintptr_t target = (uintptr_t) *(void **) detach_from;
905
906 gomp_debug (1,
907 "%s: detaching host %p, target %p (struct base %p) to %p\n",
908 __FUNCTION__, (void *) detach_from, (void *) devptr,
909 (void *) (n->tgt->tgt_start + n->tgt_offset),
910 (void *) target);
911
912 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
913 sizeof (void *), true, cbufp);
914 }
915 else
916 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
917 (void *) detach_from, (int) n->aux->attach_count[idx]);
918 }
919
920 attribute_hidden uintptr_t
921 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
922 {
923 if (tgt->list[i].key != NULL)
924 return tgt->list[i].key->tgt->tgt_start
925 + tgt->list[i].key->tgt_offset
926 + tgt->list[i].offset;
927
928 switch (tgt->list[i].offset)
929 {
930 case OFFSET_INLINED:
931 return (uintptr_t) hostaddrs[i];
932
933 case OFFSET_POINTER:
934 return 0;
935
936 case OFFSET_STRUCT:
937 return tgt->list[i + 1].key->tgt->tgt_start
938 + tgt->list[i + 1].key->tgt_offset
939 + tgt->list[i + 1].offset
940 + (uintptr_t) hostaddrs[i]
941 - (uintptr_t) hostaddrs[i + 1];
942
943 default:
944 return tgt->tgt_start + tgt->list[i].offset;
945 }
946 }
947
948 static inline __attribute__((always_inline)) struct target_mem_desc *
949 gomp_map_vars_internal (struct gomp_device_descr *devicep,
950 struct goacc_asyncqueue *aq, size_t mapnum,
951 void **hostaddrs, void **devaddrs, size_t *sizes,
952 void *kinds, bool short_mapkind,
953 htab_t *refcount_set,
954 enum gomp_map_vars_kind pragma_kind)
955 {
956 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
957 bool has_firstprivate = false;
958 bool has_always_ptrset = false;
959 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
960 const int rshift = short_mapkind ? 8 : 3;
961 const int typemask = short_mapkind ? 0xff : 0x7;
962 struct splay_tree_s *mem_map = &devicep->mem_map;
963 struct splay_tree_key_s cur_node;
964 struct target_mem_desc *tgt
965 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
966 tgt->list_count = mapnum;
967 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
968 tgt->device_descr = devicep;
969 tgt->prev = NULL;
970 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
971
972 if (mapnum == 0)
973 {
974 tgt->tgt_start = 0;
975 tgt->tgt_end = 0;
976 return tgt;
977 }
978
979 tgt_align = sizeof (void *);
980 tgt_size = 0;
981 cbuf.chunks = NULL;
982 cbuf.chunk_cnt = -1;
983 cbuf.use_cnt = 0;
984 cbuf.buf = NULL;
985 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
986 {
987 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
988 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
989 cbuf.chunk_cnt = 0;
990 }
991 if (pragma_kind == GOMP_MAP_VARS_TARGET)
992 {
993 size_t align = 4 * sizeof (void *);
994 tgt_align = align;
995 tgt_size = mapnum * sizeof (void *);
996 cbuf.chunk_cnt = 1;
997 cbuf.use_cnt = 1 + (mapnum > 1);
998 cbuf.chunks[0].start = 0;
999 cbuf.chunks[0].end = tgt_size;
1000 }
1001
1002 gomp_mutex_lock (&devicep->lock);
1003 if (devicep->state == GOMP_DEVICE_FINALIZED)
1004 {
1005 gomp_mutex_unlock (&devicep->lock);
1006 free (tgt);
1007 return NULL;
1008 }
1009
1010 for (i = 0; i < mapnum; i++)
1011 {
1012 int kind = get_kind (short_mapkind, kinds, i);
1013 bool implicit = get_implicit (short_mapkind, kinds, i);
1014 if (hostaddrs[i] == NULL
1015 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1016 {
1017 tgt->list[i].key = NULL;
1018 tgt->list[i].offset = OFFSET_INLINED;
1019 continue;
1020 }
1021 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1022 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1023 {
1024 tgt->list[i].key = NULL;
1025 if (!not_found_cnt)
1026 {
1027 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1028 on a separate construct prior to using use_device_{addr,ptr}.
1029 In OpenMP 5.0, map directives need to be ordered by the
1030 middle-end before the use_device_* clauses. If
1031 !not_found_cnt, all mappings requested (if any) are already
1032 mapped, so use_device_{addr,ptr} can be resolved right away.
1033 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1034 now but would succeed after performing the mappings in the
1035 following loop. We can't defer this always to the second
1036 loop, because it is not even invoked when !not_found_cnt
1037 after the first loop. */
1038 cur_node.host_start = (uintptr_t) hostaddrs[i];
1039 cur_node.host_end = cur_node.host_start;
1040 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1041 if (n != NULL)
1042 {
1043 cur_node.host_start -= n->host_start;
1044 hostaddrs[i]
1045 = (void *) (n->tgt->tgt_start + n->tgt_offset
1046 + cur_node.host_start);
1047 }
1048 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1049 {
1050 gomp_mutex_unlock (&devicep->lock);
1051 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1052 }
1053 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1054 /* If not present, continue using the host address. */
1055 ;
1056 else
1057 __builtin_unreachable ();
1058 tgt->list[i].offset = OFFSET_INLINED;
1059 }
1060 else
1061 tgt->list[i].offset = 0;
1062 continue;
1063 }
1064 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1065 {
1066 size_t first = i + 1;
1067 size_t last = i + sizes[i];
1068 cur_node.host_start = (uintptr_t) hostaddrs[i];
1069 cur_node.host_end = (uintptr_t) hostaddrs[last]
1070 + sizes[last];
1071 tgt->list[i].key = NULL;
1072 tgt->list[i].offset = OFFSET_STRUCT;
1073 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1074 if (n == NULL)
1075 {
1076 size_t align = (size_t) 1 << (kind >> rshift);
1077 if (tgt_align < align)
1078 tgt_align = align;
1079 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1080 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1081 tgt_size += cur_node.host_end - cur_node.host_start;
1082 not_found_cnt += last - i;
1083 for (i = first; i <= last; i++)
1084 {
1085 tgt->list[i].key = NULL;
1086 if (!aq
1087 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1088 & typemask))
1089 gomp_coalesce_buf_add (&cbuf,
1090 tgt_size - cur_node.host_end
1091 + (uintptr_t) hostaddrs[i],
1092 sizes[i]);
1093 }
1094 i--;
1095 continue;
1096 }
1097 for (i = first; i <= last; i++)
1098 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1099 sizes, kinds, NULL, refcount_set);
1100 i--;
1101 continue;
1102 }
1103 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1104 {
1105 tgt->list[i].key = NULL;
1106 tgt->list[i].offset = OFFSET_POINTER;
1107 has_firstprivate = true;
1108 continue;
1109 }
1110 else if ((kind & typemask) == GOMP_MAP_ATTACH
1111 || ((kind & typemask)
1112 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1113 {
1114 tgt->list[i].key = NULL;
1115 has_firstprivate = true;
1116 continue;
1117 }
1118 cur_node.host_start = (uintptr_t) hostaddrs[i];
1119 if (!GOMP_MAP_POINTER_P (kind & typemask))
1120 cur_node.host_end = cur_node.host_start + sizes[i];
1121 else
1122 cur_node.host_end = cur_node.host_start + sizeof (void *);
1123 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1124 {
1125 tgt->list[i].key = NULL;
1126
1127 size_t align = (size_t) 1 << (kind >> rshift);
1128 if (tgt_align < align)
1129 tgt_align = align;
1130 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1131 if (!aq)
1132 gomp_coalesce_buf_add (&cbuf, tgt_size,
1133 cur_node.host_end - cur_node.host_start);
1134 tgt_size += cur_node.host_end - cur_node.host_start;
1135 has_firstprivate = true;
1136 continue;
1137 }
1138 splay_tree_key n;
1139 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1140 {
1141 n = gomp_map_0len_lookup (mem_map, &cur_node);
1142 if (!n)
1143 {
1144 tgt->list[i].key = NULL;
1145 tgt->list[i].offset = OFFSET_POINTER;
1146 continue;
1147 }
1148 }
1149 else
1150 n = splay_tree_lookup (mem_map, &cur_node);
1151 if (n && n->refcount != REFCOUNT_LINK)
1152 {
1153 int always_to_cnt = 0;
1154 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1155 {
1156 bool has_nullptr = false;
1157 size_t j;
1158 for (j = 0; j < n->tgt->list_count; j++)
1159 if (n->tgt->list[j].key == n)
1160 {
1161 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1162 break;
1163 }
1164 if (n->tgt->list_count == 0)
1165 {
1166 /* 'declare target'; assume has_nullptr; it could also be
1167 statically assigned pointer, but that it should be to
1168 the equivalent variable on the host. */
1169 assert (n->refcount == REFCOUNT_INFINITY);
1170 has_nullptr = true;
1171 }
1172 else
1173 assert (j < n->tgt->list_count);
1174 /* Re-map the data if there is an 'always' modifier or if it a
1175 null pointer was there and non a nonnull has been found; that
1176 permits transparent re-mapping for Fortran array descriptors
1177 which were previously mapped unallocated. */
1178 for (j = i + 1; j < mapnum; j++)
1179 {
1180 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1181 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1182 && (!has_nullptr
1183 || !GOMP_MAP_POINTER_P (ptr_kind)
1184 || *(void **) hostaddrs[j] == NULL))
1185 break;
1186 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1187 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1188 > cur_node.host_end))
1189 break;
1190 else
1191 {
1192 has_always_ptrset = true;
1193 ++always_to_cnt;
1194 }
1195 }
1196 }
1197 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1198 kind & typemask, always_to_cnt > 0, implicit,
1199 NULL, refcount_set);
1200 i += always_to_cnt;
1201 }
1202 else
1203 {
1204 tgt->list[i].key = NULL;
1205
1206 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1207 {
1208 /* Not present, hence, skip entry - including its MAP_POINTER,
1209 when existing. */
1210 tgt->list[i].offset = OFFSET_POINTER;
1211 if (i + 1 < mapnum
1212 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1213 == GOMP_MAP_POINTER))
1214 {
1215 ++i;
1216 tgt->list[i].key = NULL;
1217 tgt->list[i].offset = 0;
1218 }
1219 continue;
1220 }
1221 size_t align = (size_t) 1 << (kind >> rshift);
1222 not_found_cnt++;
1223 if (tgt_align < align)
1224 tgt_align = align;
1225 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1226 if (!aq
1227 && gomp_to_device_kind_p (kind & typemask))
1228 gomp_coalesce_buf_add (&cbuf, tgt_size,
1229 cur_node.host_end - cur_node.host_start);
1230 tgt_size += cur_node.host_end - cur_node.host_start;
1231 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1232 {
1233 size_t j;
1234 int kind;
1235 for (j = i + 1; j < mapnum; j++)
1236 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1237 kinds, j)) & typemask))
1238 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1239 break;
1240 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1241 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1242 > cur_node.host_end))
1243 break;
1244 else
1245 {
1246 tgt->list[j].key = NULL;
1247 i++;
1248 }
1249 }
1250 }
1251 }
1252
1253 if (devaddrs)
1254 {
1255 if (mapnum != 1)
1256 {
1257 gomp_mutex_unlock (&devicep->lock);
1258 gomp_fatal ("unexpected aggregation");
1259 }
1260 tgt->to_free = devaddrs[0];
1261 tgt->tgt_start = (uintptr_t) tgt->to_free;
1262 tgt->tgt_end = tgt->tgt_start + sizes[0];
1263 }
1264 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1265 {
1266 /* Allocate tgt_align aligned tgt_size block of memory. */
1267 /* FIXME: Perhaps change interface to allocate properly aligned
1268 memory. */
1269 tgt->to_free = devicep->alloc_func (devicep->target_id,
1270 tgt_size + tgt_align - 1);
1271 if (!tgt->to_free)
1272 {
1273 gomp_mutex_unlock (&devicep->lock);
1274 gomp_fatal ("device memory allocation fail");
1275 }
1276
1277 tgt->tgt_start = (uintptr_t) tgt->to_free;
1278 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1279 tgt->tgt_end = tgt->tgt_start + tgt_size;
1280
1281 if (cbuf.use_cnt == 1)
1282 cbuf.chunk_cnt--;
1283 if (cbuf.chunk_cnt > 0)
1284 {
1285 cbuf.buf
1286 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1287 if (cbuf.buf)
1288 {
1289 cbuf.tgt = tgt;
1290 cbufp = &cbuf;
1291 }
1292 }
1293 }
1294 else
1295 {
1296 tgt->to_free = NULL;
1297 tgt->tgt_start = 0;
1298 tgt->tgt_end = 0;
1299 }
1300
1301 tgt_size = 0;
1302 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1303 tgt_size = mapnum * sizeof (void *);
1304
1305 tgt->array = NULL;
1306 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1307 {
1308 if (not_found_cnt)
1309 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1310 splay_tree_node array = tgt->array;
1311 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1312 uintptr_t field_tgt_base = 0;
1313 splay_tree_key field_tgt_structelem_first = NULL;
1314
1315 for (i = 0; i < mapnum; i++)
1316 if (has_always_ptrset
1317 && tgt->list[i].key
1318 && (get_kind (short_mapkind, kinds, i) & typemask)
1319 == GOMP_MAP_TO_PSET)
1320 {
1321 splay_tree_key k = tgt->list[i].key;
1322 bool has_nullptr = false;
1323 size_t j;
1324 for (j = 0; j < k->tgt->list_count; j++)
1325 if (k->tgt->list[j].key == k)
1326 {
1327 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1328 break;
1329 }
1330 if (k->tgt->list_count == 0)
1331 has_nullptr = true;
1332 else
1333 assert (j < k->tgt->list_count);
1334
1335 tgt->list[i].has_null_ptr_assoc = false;
1336 for (j = i + 1; j < mapnum; j++)
1337 {
1338 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1339 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1340 && (!has_nullptr
1341 || !GOMP_MAP_POINTER_P (ptr_kind)
1342 || *(void **) hostaddrs[j] == NULL))
1343 break;
1344 else if ((uintptr_t) hostaddrs[j] < k->host_start
1345 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1346 > k->host_end))
1347 break;
1348 else
1349 {
1350 if (*(void **) hostaddrs[j] == NULL)
1351 tgt->list[i].has_null_ptr_assoc = true;
1352 tgt->list[j].key = k;
1353 tgt->list[j].copy_from = false;
1354 tgt->list[j].always_copy_from = false;
1355 tgt->list[j].is_attach = false;
1356 gomp_increment_refcount (k, refcount_set);
1357 gomp_map_pointer (k->tgt, aq,
1358 (uintptr_t) *(void **) hostaddrs[j],
1359 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1360 - k->host_start),
1361 sizes[j], cbufp, false);
1362 }
1363 }
1364 i = j - 1;
1365 }
1366 else if (tgt->list[i].key == NULL)
1367 {
1368 int kind = get_kind (short_mapkind, kinds, i);
1369 bool implicit = get_implicit (short_mapkind, kinds, i);
1370 if (hostaddrs[i] == NULL)
1371 continue;
1372 switch (kind & typemask)
1373 {
1374 size_t align, len, first, last;
1375 splay_tree_key n;
1376 case GOMP_MAP_FIRSTPRIVATE:
1377 align = (size_t) 1 << (kind >> rshift);
1378 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1379 tgt->list[i].offset = tgt_size;
1380 len = sizes[i];
1381 gomp_copy_host2dev (devicep, aq,
1382 (void *) (tgt->tgt_start + tgt_size),
1383 (void *) hostaddrs[i], len, false, cbufp);
1384 /* Save device address in hostaddr to permit latter availablity
1385 when doing a deep-firstprivate with pointer attach. */
1386 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1387 tgt_size += len;
1388
1389 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1390 firstprivate to hostaddrs[i+1], which is assumed to contain a
1391 device address. */
1392 if (i + 1 < mapnum
1393 && (GOMP_MAP_ATTACH
1394 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1395 {
1396 uintptr_t target = (uintptr_t) hostaddrs[i];
1397 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1398 gomp_copy_host2dev (devicep, aq, devptr, &target,
1399 sizeof (void *), false, cbufp);
1400 ++i;
1401 }
1402 continue;
1403 case GOMP_MAP_FIRSTPRIVATE_INT:
1404 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1405 continue;
1406 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1407 /* The OpenACC 'host_data' construct only allows 'use_device'
1408 "mapping" clauses, so in the first loop, 'not_found_cnt'
1409 must always have been zero, so all OpenACC 'use_device'
1410 clauses have already been handled. (We can only easily test
1411 'use_device' with 'if_present' clause here.) */
1412 assert (tgt->list[i].offset == OFFSET_INLINED);
1413 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1414 code conceptually simple, similar to the first loop. */
1415 case GOMP_MAP_USE_DEVICE_PTR:
1416 if (tgt->list[i].offset == 0)
1417 {
1418 cur_node.host_start = (uintptr_t) hostaddrs[i];
1419 cur_node.host_end = cur_node.host_start;
1420 n = gomp_map_lookup (mem_map, &cur_node);
1421 if (n != NULL)
1422 {
1423 cur_node.host_start -= n->host_start;
1424 hostaddrs[i]
1425 = (void *) (n->tgt->tgt_start + n->tgt_offset
1426 + cur_node.host_start);
1427 }
1428 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1429 {
1430 gomp_mutex_unlock (&devicep->lock);
1431 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1432 }
1433 else if ((kind & typemask)
1434 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1435 /* If not present, continue using the host address. */
1436 ;
1437 else
1438 __builtin_unreachable ();
1439 tgt->list[i].offset = OFFSET_INLINED;
1440 }
1441 continue;
1442 case GOMP_MAP_STRUCT:
1443 first = i + 1;
1444 last = i + sizes[i];
1445 cur_node.host_start = (uintptr_t) hostaddrs[i];
1446 cur_node.host_end = (uintptr_t) hostaddrs[last]
1447 + sizes[last];
1448 if (tgt->list[first].key != NULL)
1449 continue;
1450 n = splay_tree_lookup (mem_map, &cur_node);
1451 if (n == NULL)
1452 {
1453 size_t align = (size_t) 1 << (kind >> rshift);
1454 tgt_size -= (uintptr_t) hostaddrs[first]
1455 - (uintptr_t) hostaddrs[i];
1456 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1457 tgt_size += (uintptr_t) hostaddrs[first]
1458 - (uintptr_t) hostaddrs[i];
1459 field_tgt_base = (uintptr_t) hostaddrs[first];
1460 field_tgt_offset = tgt_size;
1461 field_tgt_clear = last;
1462 field_tgt_structelem_first = NULL;
1463 tgt_size += cur_node.host_end
1464 - (uintptr_t) hostaddrs[first];
1465 continue;
1466 }
1467 for (i = first; i <= last; i++)
1468 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1469 sizes, kinds, cbufp, refcount_set);
1470 i--;
1471 continue;
1472 case GOMP_MAP_ALWAYS_POINTER:
1473 cur_node.host_start = (uintptr_t) hostaddrs[i];
1474 cur_node.host_end = cur_node.host_start + sizeof (void *);
1475 n = splay_tree_lookup (mem_map, &cur_node);
1476 if (n == NULL
1477 || n->host_start > cur_node.host_start
1478 || n->host_end < cur_node.host_end)
1479 {
1480 gomp_mutex_unlock (&devicep->lock);
1481 gomp_fatal ("always pointer not mapped");
1482 }
1483 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1484 != GOMP_MAP_ALWAYS_POINTER)
1485 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1486 if (cur_node.tgt_offset)
1487 cur_node.tgt_offset -= sizes[i];
1488 gomp_copy_host2dev (devicep, aq,
1489 (void *) (n->tgt->tgt_start
1490 + n->tgt_offset
1491 + cur_node.host_start
1492 - n->host_start),
1493 (void *) &cur_node.tgt_offset,
1494 sizeof (void *), true, cbufp);
1495 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1496 + cur_node.host_start - n->host_start;
1497 continue;
1498 case GOMP_MAP_IF_PRESENT:
1499 /* Not present - otherwise handled above. Skip over its
1500 MAP_POINTER as well. */
1501 if (i + 1 < mapnum
1502 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1503 == GOMP_MAP_POINTER))
1504 ++i;
1505 continue;
1506 case GOMP_MAP_ATTACH:
1507 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1508 {
1509 cur_node.host_start = (uintptr_t) hostaddrs[i];
1510 cur_node.host_end = cur_node.host_start + sizeof (void *);
1511 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1512 if (n != NULL)
1513 {
1514 tgt->list[i].key = n;
1515 tgt->list[i].offset = cur_node.host_start - n->host_start;
1516 tgt->list[i].length = n->host_end - n->host_start;
1517 tgt->list[i].copy_from = false;
1518 tgt->list[i].always_copy_from = false;
1519 tgt->list[i].is_attach = true;
1520 /* OpenACC 'attach'/'detach' doesn't affect
1521 structured/dynamic reference counts ('n->refcount',
1522 'n->dynamic_refcount'). */
1523
1524 bool zlas
1525 = ((kind & typemask)
1526 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1527 gomp_attach_pointer (devicep, aq, mem_map, n,
1528 (uintptr_t) hostaddrs[i], sizes[i],
1529 cbufp, zlas);
1530 }
1531 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1532 {
1533 gomp_mutex_unlock (&devicep->lock);
1534 gomp_fatal ("outer struct not mapped for attach");
1535 }
1536 continue;
1537 }
1538 default:
1539 break;
1540 }
1541 splay_tree_key k = &array->key;
1542 k->host_start = (uintptr_t) hostaddrs[i];
1543 if (!GOMP_MAP_POINTER_P (kind & typemask))
1544 k->host_end = k->host_start + sizes[i];
1545 else
1546 k->host_end = k->host_start + sizeof (void *);
1547 splay_tree_key n = splay_tree_lookup (mem_map, k);
1548 if (n && n->refcount != REFCOUNT_LINK)
1549 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1550 kind & typemask, false, implicit, cbufp,
1551 refcount_set);
1552 else
1553 {
1554 k->aux = NULL;
1555 if (n && n->refcount == REFCOUNT_LINK)
1556 {
1557 /* Replace target address of the pointer with target address
1558 of mapped object in the splay tree. */
1559 splay_tree_remove (mem_map, n);
1560 k->aux
1561 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1562 k->aux->link_key = n;
1563 }
1564 size_t align = (size_t) 1 << (kind >> rshift);
1565 tgt->list[i].key = k;
1566 k->tgt = tgt;
1567 k->refcount = 0;
1568 k->dynamic_refcount = 0;
1569 if (field_tgt_clear != FIELD_TGT_EMPTY)
1570 {
1571 k->tgt_offset = k->host_start - field_tgt_base
1572 + field_tgt_offset;
1573 if (openmp_p)
1574 {
1575 k->refcount = REFCOUNT_STRUCTELEM;
1576 if (field_tgt_structelem_first == NULL)
1577 {
1578 /* Set to first structure element of sequence. */
1579 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1580 field_tgt_structelem_first = k;
1581 }
1582 else
1583 /* Point to refcount of leading element, but do not
1584 increment again. */
1585 k->structelem_refcount_ptr
1586 = &field_tgt_structelem_first->structelem_refcount;
1587
1588 if (i == field_tgt_clear)
1589 {
1590 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1591 field_tgt_structelem_first = NULL;
1592 }
1593 }
1594 if (i == field_tgt_clear)
1595 field_tgt_clear = FIELD_TGT_EMPTY;
1596 }
1597 else
1598 {
1599 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1600 k->tgt_offset = tgt_size;
1601 tgt_size += k->host_end - k->host_start;
1602 }
1603 /* First increment, from 0 to 1. gomp_increment_refcount
1604 encapsulates the different increment cases, so use this
1605 instead of directly setting 1 during initialization. */
1606 gomp_increment_refcount (k, refcount_set);
1607
1608 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1609 tgt->list[i].always_copy_from
1610 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1611 tgt->list[i].is_attach = false;
1612 tgt->list[i].offset = 0;
1613 tgt->list[i].length = k->host_end - k->host_start;
1614 tgt->refcount++;
1615 array->left = NULL;
1616 array->right = NULL;
1617 splay_tree_insert (mem_map, array);
1618 switch (kind & typemask)
1619 {
1620 case GOMP_MAP_ALLOC:
1621 case GOMP_MAP_FROM:
1622 case GOMP_MAP_FORCE_ALLOC:
1623 case GOMP_MAP_FORCE_FROM:
1624 case GOMP_MAP_ALWAYS_FROM:
1625 break;
1626 case GOMP_MAP_TO:
1627 case GOMP_MAP_TOFROM:
1628 case GOMP_MAP_FORCE_TO:
1629 case GOMP_MAP_FORCE_TOFROM:
1630 case GOMP_MAP_ALWAYS_TO:
1631 case GOMP_MAP_ALWAYS_TOFROM:
1632 gomp_copy_host2dev (devicep, aq,
1633 (void *) (tgt->tgt_start
1634 + k->tgt_offset),
1635 (void *) k->host_start,
1636 k->host_end - k->host_start,
1637 false, cbufp);
1638 break;
1639 case GOMP_MAP_POINTER:
1640 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1641 gomp_map_pointer
1642 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1643 k->tgt_offset, sizes[i], cbufp,
1644 ((kind & typemask)
1645 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1646 break;
1647 case GOMP_MAP_TO_PSET:
1648 gomp_copy_host2dev (devicep, aq,
1649 (void *) (tgt->tgt_start
1650 + k->tgt_offset),
1651 (void *) k->host_start,
1652 k->host_end - k->host_start,
1653 false, cbufp);
1654 tgt->list[i].has_null_ptr_assoc = false;
1655
1656 for (j = i + 1; j < mapnum; j++)
1657 {
1658 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1659 & typemask);
1660 if (!GOMP_MAP_POINTER_P (ptr_kind)
1661 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1662 break;
1663 else if ((uintptr_t) hostaddrs[j] < k->host_start
1664 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1665 > k->host_end))
1666 break;
1667 else
1668 {
1669 tgt->list[j].key = k;
1670 tgt->list[j].copy_from = false;
1671 tgt->list[j].always_copy_from = false;
1672 tgt->list[j].is_attach = false;
1673 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1674 /* For OpenMP, the use of refcount_sets causes
1675 errors if we set k->refcount = 1 above but also
1676 increment it again here, for decrementing will
1677 not properly match, since we decrement only once
1678 for each key's refcount. Therefore avoid this
1679 increment for OpenMP constructs. */
1680 if (!openmp_p)
1681 gomp_increment_refcount (k, refcount_set);
1682 gomp_map_pointer (tgt, aq,
1683 (uintptr_t) *(void **) hostaddrs[j],
1684 k->tgt_offset
1685 + ((uintptr_t) hostaddrs[j]
1686 - k->host_start),
1687 sizes[j], cbufp, false);
1688 }
1689 }
1690 i = j - 1;
1691 break;
1692 case GOMP_MAP_FORCE_PRESENT:
1693 {
1694 /* We already looked up the memory region above and it
1695 was missing. */
1696 size_t size = k->host_end - k->host_start;
1697 gomp_mutex_unlock (&devicep->lock);
1698 #ifdef HAVE_INTTYPES_H
1699 gomp_fatal ("present clause: !acc_is_present (%p, "
1700 "%"PRIu64" (0x%"PRIx64"))",
1701 (void *) k->host_start,
1702 (uint64_t) size, (uint64_t) size);
1703 #else
1704 gomp_fatal ("present clause: !acc_is_present (%p, "
1705 "%lu (0x%lx))", (void *) k->host_start,
1706 (unsigned long) size, (unsigned long) size);
1707 #endif
1708 }
1709 break;
1710 case GOMP_MAP_FORCE_DEVICEPTR:
1711 assert (k->host_end - k->host_start == sizeof (void *));
1712 gomp_copy_host2dev (devicep, aq,
1713 (void *) (tgt->tgt_start
1714 + k->tgt_offset),
1715 (void *) k->host_start,
1716 sizeof (void *), false, cbufp);
1717 break;
1718 default:
1719 gomp_mutex_unlock (&devicep->lock);
1720 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1721 kind);
1722 }
1723
1724 if (k->aux && k->aux->link_key)
1725 {
1726 /* Set link pointer on target to the device address of the
1727 mapped object. */
1728 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1729 /* We intentionally do not use coalescing here, as it's not
1730 data allocated by the current call to this function. */
1731 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1732 &tgt_addr, sizeof (void *), true, NULL);
1733 }
1734 array++;
1735 }
1736 }
1737 }
1738
1739 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1740 {
1741 for (i = 0; i < mapnum; i++)
1742 {
1743 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1744 gomp_copy_host2dev (devicep, aq,
1745 (void *) (tgt->tgt_start + i * sizeof (void *)),
1746 (void *) &cur_node.tgt_offset, sizeof (void *),
1747 true, cbufp);
1748 }
1749 }
1750
1751 if (cbufp)
1752 {
1753 /* See 'gomp_coalesce_buf_add'. */
1754 assert (!aq);
1755
1756 long c = 0;
1757 for (c = 0; c < cbuf.chunk_cnt; ++c)
1758 gomp_copy_host2dev (devicep, aq,
1759 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1760 (char *) cbuf.buf + (cbuf.chunks[c].start
1761 - cbuf.chunks[0].start),
1762 cbuf.chunks[c].end - cbuf.chunks[c].start,
1763 true, NULL);
1764 free (cbuf.buf);
1765 cbuf.buf = NULL;
1766 cbufp = NULL;
1767 }
1768
1769 /* If the variable from "omp target enter data" map-list was already mapped,
1770 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1771 gomp_exit_data. */
1772 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1773 {
1774 free (tgt);
1775 tgt = NULL;
1776 }
1777
1778 gomp_mutex_unlock (&devicep->lock);
1779 return tgt;
1780 }
1781
1782 static struct target_mem_desc *
1783 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1784 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1785 bool short_mapkind, htab_t *refcount_set,
1786 enum gomp_map_vars_kind pragma_kind)
1787 {
1788 /* This management of a local refcount_set is for convenience of callers
1789 who do not share a refcount_set over multiple map/unmap uses. */
1790 htab_t local_refcount_set = NULL;
1791 if (refcount_set == NULL)
1792 {
1793 local_refcount_set = htab_create (mapnum);
1794 refcount_set = &local_refcount_set;
1795 }
1796
1797 struct target_mem_desc *tgt;
1798 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1799 sizes, kinds, short_mapkind, refcount_set,
1800 pragma_kind);
1801 if (local_refcount_set)
1802 htab_free (local_refcount_set);
1803
1804 return tgt;
1805 }
1806
1807 attribute_hidden struct target_mem_desc *
1808 goacc_map_vars (struct gomp_device_descr *devicep,
1809 struct goacc_asyncqueue *aq, size_t mapnum,
1810 void **hostaddrs, void **devaddrs, size_t *sizes,
1811 void *kinds, bool short_mapkind,
1812 enum gomp_map_vars_kind pragma_kind)
1813 {
1814 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1815 sizes, kinds, short_mapkind, NULL,
1816 GOMP_MAP_VARS_OPENACC | pragma_kind);
1817 }
1818
1819 static void
1820 gomp_unmap_tgt (struct target_mem_desc *tgt)
1821 {
1822 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1823 if (tgt->tgt_end)
1824 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1825
1826 free (tgt->array);
1827 free (tgt);
1828 }
1829
1830 static bool
1831 gomp_unref_tgt (void *ptr)
1832 {
1833 bool is_tgt_unmapped = false;
1834
1835 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1836
1837 if (tgt->refcount > 1)
1838 tgt->refcount--;
1839 else
1840 {
1841 gomp_unmap_tgt (tgt);
1842 is_tgt_unmapped = true;
1843 }
1844
1845 return is_tgt_unmapped;
1846 }
1847
1848 static void
1849 gomp_unref_tgt_void (void *ptr)
1850 {
1851 (void) gomp_unref_tgt (ptr);
1852 }
1853
1854 static void
1855 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1856 {
1857 splay_tree_remove (sp, k);
1858 if (k->aux)
1859 {
1860 if (k->aux->link_key)
1861 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1862 if (k->aux->attach_count)
1863 free (k->aux->attach_count);
1864 free (k->aux);
1865 k->aux = NULL;
1866 }
1867 }
1868
1869 static inline __attribute__((always_inline)) bool
1870 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1871 struct goacc_asyncqueue *aq)
1872 {
1873 bool is_tgt_unmapped = false;
1874
1875 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1876 {
1877 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1878 /* Infer the splay_tree_key of the first structelem key using the
1879 pointer to the first structleme_refcount. */
1880 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1881 - offsetof (struct splay_tree_key_s,
1882 structelem_refcount));
1883 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1884
1885 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1886 with the splay_tree_keys embedded inside. */
1887 splay_tree_node node =
1888 (splay_tree_node) ((char *) k
1889 - offsetof (struct splay_tree_node_s, key));
1890 while (true)
1891 {
1892 /* Starting from the _FIRST key, and continue for all following
1893 sibling keys. */
1894 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1895 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1896 break;
1897 else
1898 k = &(++node)->key;
1899 }
1900 }
1901 else
1902 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1903
1904 if (aq)
1905 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1906 (void *) k->tgt);
1907 else
1908 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1909 return is_tgt_unmapped;
1910 }
1911
1912 attribute_hidden bool
1913 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1914 {
1915 return gomp_remove_var_internal (devicep, k, NULL);
1916 }
1917
1918 /* Remove a variable asynchronously. This actually removes the variable
1919 mapping immediately, but retains the linked target_mem_desc until the
1920 asynchronous operation has completed (as it may still refer to target
1921 memory). The device lock must be held before entry, and remains locked on
1922 exit. */
1923
1924 attribute_hidden void
1925 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1926 struct goacc_asyncqueue *aq)
1927 {
1928 (void) gomp_remove_var_internal (devicep, k, aq);
1929 }
1930
1931 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1932 variables back from device to host: if it is false, it is assumed that this
1933 has been done already. */
1934
1935 static inline __attribute__((always_inline)) void
1936 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1937 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1938 {
1939 struct gomp_device_descr *devicep = tgt->device_descr;
1940
1941 if (tgt->list_count == 0)
1942 {
1943 free (tgt);
1944 return;
1945 }
1946
1947 gomp_mutex_lock (&devicep->lock);
1948 if (devicep->state == GOMP_DEVICE_FINALIZED)
1949 {
1950 gomp_mutex_unlock (&devicep->lock);
1951 free (tgt->array);
1952 free (tgt);
1953 return;
1954 }
1955
1956 size_t i;
1957
1958 /* We must perform detachments before any copies back to the host. */
1959 for (i = 0; i < tgt->list_count; i++)
1960 {
1961 splay_tree_key k = tgt->list[i].key;
1962
1963 if (k != NULL && tgt->list[i].is_attach)
1964 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1965 + tgt->list[i].offset,
1966 false, NULL);
1967 }
1968
1969 for (i = 0; i < tgt->list_count; i++)
1970 {
1971 splay_tree_key k = tgt->list[i].key;
1972 if (k == NULL)
1973 continue;
1974
1975 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1976 counts ('n->refcount', 'n->dynamic_refcount'). */
1977 if (tgt->list[i].is_attach)
1978 continue;
1979
1980 bool do_copy, do_remove;
1981 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1982
1983 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1984 || tgt->list[i].always_copy_from)
1985 gomp_copy_dev2host (devicep, aq,
1986 (void *) (k->host_start + tgt->list[i].offset),
1987 (void *) (k->tgt->tgt_start + k->tgt_offset
1988 + tgt->list[i].offset),
1989 tgt->list[i].length);
1990 if (do_remove)
1991 {
1992 struct target_mem_desc *k_tgt = k->tgt;
1993 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1994 /* It would be bad if TGT got unmapped while we're still iterating
1995 over its LIST_COUNT, and also expect to use it in the following
1996 code. */
1997 assert (!is_tgt_unmapped
1998 || k_tgt != tgt);
1999 }
2000 }
2001
2002 if (aq)
2003 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2004 (void *) tgt);
2005 else
2006 gomp_unref_tgt ((void *) tgt);
2007
2008 gomp_mutex_unlock (&devicep->lock);
2009 }
2010
2011 static void
2012 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2013 htab_t *refcount_set)
2014 {
2015 /* This management of a local refcount_set is for convenience of callers
2016 who do not share a refcount_set over multiple map/unmap uses. */
2017 htab_t local_refcount_set = NULL;
2018 if (refcount_set == NULL)
2019 {
2020 local_refcount_set = htab_create (tgt->list_count);
2021 refcount_set = &local_refcount_set;
2022 }
2023
2024 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2025
2026 if (local_refcount_set)
2027 htab_free (local_refcount_set);
2028 }
2029
2030 attribute_hidden void
2031 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2032 struct goacc_asyncqueue *aq)
2033 {
2034 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2035 }
2036
2037 static void
2038 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2039 size_t *sizes, void *kinds, bool short_mapkind)
2040 {
2041 size_t i;
2042 struct splay_tree_key_s cur_node;
2043 const int typemask = short_mapkind ? 0xff : 0x7;
2044
2045 if (!devicep)
2046 return;
2047
2048 if (mapnum == 0)
2049 return;
2050
2051 gomp_mutex_lock (&devicep->lock);
2052 if (devicep->state == GOMP_DEVICE_FINALIZED)
2053 {
2054 gomp_mutex_unlock (&devicep->lock);
2055 return;
2056 }
2057
2058 for (i = 0; i < mapnum; i++)
2059 if (sizes[i])
2060 {
2061 cur_node.host_start = (uintptr_t) hostaddrs[i];
2062 cur_node.host_end = cur_node.host_start + sizes[i];
2063 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2064 if (n)
2065 {
2066 int kind = get_kind (short_mapkind, kinds, i);
2067 if (n->host_start > cur_node.host_start
2068 || n->host_end < cur_node.host_end)
2069 {
2070 gomp_mutex_unlock (&devicep->lock);
2071 gomp_fatal ("Trying to update [%p..%p) object when "
2072 "only [%p..%p) is mapped",
2073 (void *) cur_node.host_start,
2074 (void *) cur_node.host_end,
2075 (void *) n->host_start,
2076 (void *) n->host_end);
2077 }
2078
2079 if (n->aux && n->aux->attach_count)
2080 {
2081 uintptr_t addr = cur_node.host_start;
2082 while (addr < cur_node.host_end)
2083 {
2084 /* We have to be careful not to overwrite still attached
2085 pointers during host<->device updates. */
2086 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2087 if (n->aux->attach_count[i] == 0)
2088 {
2089 void *devaddr = (void *) (n->tgt->tgt_start
2090 + n->tgt_offset
2091 + addr - n->host_start);
2092 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2093 gomp_copy_host2dev (devicep, NULL,
2094 devaddr, (void *) addr,
2095 sizeof (void *), false, NULL);
2096 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2097 gomp_copy_dev2host (devicep, NULL,
2098 (void *) addr, devaddr,
2099 sizeof (void *));
2100 }
2101 addr += sizeof (void *);
2102 }
2103 }
2104 else
2105 {
2106 void *hostaddr = (void *) cur_node.host_start;
2107 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2108 + cur_node.host_start
2109 - n->host_start);
2110 size_t size = cur_node.host_end - cur_node.host_start;
2111
2112 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2113 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2114 false, NULL);
2115 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2116 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2117 }
2118 }
2119 }
2120 gomp_mutex_unlock (&devicep->lock);
2121 }
2122
2123 static struct gomp_offload_icv_list *
2124 gomp_get_offload_icv_item (int dev_num)
2125 {
2126 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2127 while (l != NULL && l->device_num != dev_num)
2128 l = l->next;
2129
2130 return l;
2131 }
2132
2133 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2134 depending on the device num and the variable hierarchy
2135 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2136 device and thus no item with that device number is contained in
2137 gomp_offload_icv_list, then a new item is created and added to the list. */
2138
2139 static struct gomp_offload_icvs *
2140 get_gomp_offload_icvs (int dev_num)
2141 {
2142 struct gomp_icv_list *dev
2143 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2144 struct gomp_icv_list *all
2145 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2146 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2147 struct gomp_offload_icv_list *offload_icvs
2148 = gomp_get_offload_icv_item (dev_num);
2149
2150 if (offload_icvs != NULL)
2151 return &offload_icvs->icvs;
2152
2153 struct gomp_offload_icv_list *new
2154 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2155
2156 new->device_num = dev_num;
2157 new->icvs.device_num = dev_num;
2158 new->next = gomp_offload_icv_list;
2159
2160 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2161 new->icvs.nteams = dev_x->icvs.nteams_var;
2162 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2163 new->icvs.nteams = dev->icvs.nteams_var;
2164 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2165 new->icvs.nteams = all->icvs.nteams_var;
2166 else
2167 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2168
2169 if (dev_x != NULL
2170 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2171 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2172 else if (dev != NULL
2173 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2174 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2175 else if (all != NULL
2176 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2177 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2178 else
2179 new->icvs.teams_thread_limit
2180 = gomp_default_icv_values.teams_thread_limit_var;
2181
2182 if (dev_x != NULL
2183 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2184 new->icvs.default_device = dev_x->icvs.default_device_var;
2185 else if (dev != NULL
2186 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2187 new->icvs.default_device = dev->icvs.default_device_var;
2188 else if (all != NULL
2189 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2190 new->icvs.default_device = all->icvs.default_device_var;
2191 else
2192 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2193
2194 gomp_offload_icv_list = new;
2195 return &new->icvs;
2196 }
2197
2198 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2199 And insert to splay tree the mapping between addresses from HOST_TABLE and
2200 from loaded target image. We rely in the host and device compiler
2201 emitting variable and functions in the same order. */
2202
2203 static void
2204 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2205 const void *host_table, const void *target_data,
2206 bool is_register_lock)
2207 {
2208 void **host_func_table = ((void ***) host_table)[0];
2209 void **host_funcs_end = ((void ***) host_table)[1];
2210 void **host_var_table = ((void ***) host_table)[2];
2211 void **host_vars_end = ((void ***) host_table)[3];
2212
2213 /* The func table contains only addresses, the var table contains addresses
2214 and corresponding sizes. */
2215 int num_funcs = host_funcs_end - host_func_table;
2216 int num_vars = (host_vars_end - host_var_table) / 2;
2217
2218 /* Load image to device and get target addresses for the image. */
2219 struct addr_pair *target_table = NULL;
2220 uint64_t *rev_target_fn_table = NULL;
2221 int i, num_target_entries;
2222
2223 /* With reverse offload, insert also target-host addresses. */
2224 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2225
2226 num_target_entries
2227 = devicep->load_image_func (devicep->target_id, version,
2228 target_data, &target_table,
2229 rev_lookup ? &rev_target_fn_table : NULL);
2230
2231 if (num_target_entries != num_funcs + num_vars
2232 /* "+1" due to the additional ICV struct. */
2233 && num_target_entries != num_funcs + num_vars + 1)
2234 {
2235 gomp_mutex_unlock (&devicep->lock);
2236 if (is_register_lock)
2237 gomp_mutex_unlock (&register_lock);
2238 gomp_fatal ("Cannot map target functions or variables"
2239 " (expected %u, have %u)", num_funcs + num_vars,
2240 num_target_entries);
2241 }
2242
2243 /* Insert host-target address mapping into splay tree. */
2244 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2245 /* "+1" due to the additional ICV struct. */
2246 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2247 * sizeof (*tgt->array));
2248 if (rev_target_fn_table)
2249 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2250 else
2251 tgt->rev_array = NULL;
2252 tgt->refcount = REFCOUNT_INFINITY;
2253 tgt->tgt_start = 0;
2254 tgt->tgt_end = 0;
2255 tgt->to_free = NULL;
2256 tgt->prev = NULL;
2257 tgt->list_count = 0;
2258 tgt->device_descr = devicep;
2259 splay_tree_node array = tgt->array;
2260 reverse_splay_tree_node rev_array = tgt->rev_array;
2261
2262 for (i = 0; i < num_funcs; i++)
2263 {
2264 splay_tree_key k = &array->key;
2265 k->host_start = (uintptr_t) host_func_table[i];
2266 k->host_end = k->host_start + 1;
2267 k->tgt = tgt;
2268 k->tgt_offset = target_table[i].start;
2269 k->refcount = REFCOUNT_INFINITY;
2270 k->dynamic_refcount = 0;
2271 k->aux = NULL;
2272 array->left = NULL;
2273 array->right = NULL;
2274 splay_tree_insert (&devicep->mem_map, array);
2275 if (rev_target_fn_table)
2276 {
2277 reverse_splay_tree_key k2 = &rev_array->key;
2278 k2->dev = rev_target_fn_table[i];
2279 k2->k = k;
2280 rev_array->left = NULL;
2281 rev_array->right = NULL;
2282 if (k2->dev != 0)
2283 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2284 rev_array++;
2285 }
2286 array++;
2287 }
2288
2289 /* Most significant bit of the size in host and target tables marks
2290 "omp declare target link" variables. */
2291 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2292 const uintptr_t size_mask = ~link_bit;
2293
2294 for (i = 0; i < num_vars; i++)
2295 {
2296 struct addr_pair *target_var = &target_table[num_funcs + i];
2297 uintptr_t target_size = target_var->end - target_var->start;
2298 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2299
2300 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2301 {
2302 gomp_mutex_unlock (&devicep->lock);
2303 if (is_register_lock)
2304 gomp_mutex_unlock (&register_lock);
2305 gomp_fatal ("Cannot map target variables (size mismatch)");
2306 }
2307
2308 splay_tree_key k = &array->key;
2309 k->host_start = (uintptr_t) host_var_table[i * 2];
2310 k->host_end
2311 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2312 k->tgt = tgt;
2313 k->tgt_offset = target_var->start;
2314 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2315 k->dynamic_refcount = 0;
2316 k->aux = NULL;
2317 array->left = NULL;
2318 array->right = NULL;
2319 splay_tree_insert (&devicep->mem_map, array);
2320 array++;
2321 }
2322
2323 /* Last entry is for a ICVs variable.
2324 Tolerate case where plugin does not return those entries. */
2325 if (num_funcs + num_vars < num_target_entries)
2326 {
2327 struct addr_pair *var = &target_table[num_funcs + num_vars];
2328
2329 /* Start address will be non-zero for the ICVs variable if
2330 the variable was found in this image. */
2331 if (var->start != 0)
2332 {
2333 /* The index of the devicep within devices[] is regarded as its
2334 'device number', which is different from the per-device type
2335 devicep->target_id. */
2336 int dev_num = (int) (devicep - &devices[0]);
2337 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2338 size_t var_size = var->end - var->start;
2339 if (var_size != sizeof (struct gomp_offload_icvs))
2340 {
2341 gomp_mutex_unlock (&devicep->lock);
2342 if (is_register_lock)
2343 gomp_mutex_unlock (&register_lock);
2344 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2345 "format");
2346 }
2347 /* Copy the ICVs variable to place on device memory, hereby
2348 actually designating its device number into effect. */
2349 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2350 var_size, false, NULL);
2351 splay_tree_key k = &array->key;
2352 k->host_start = (uintptr_t) icvs;
2353 k->host_end =
2354 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2355 k->tgt = tgt;
2356 k->tgt_offset = var->start;
2357 k->refcount = REFCOUNT_INFINITY;
2358 k->dynamic_refcount = 0;
2359 k->aux = NULL;
2360 array->left = NULL;
2361 array->right = NULL;
2362 splay_tree_insert (&devicep->mem_map, array);
2363 array++;
2364 }
2365 }
2366
2367 free (target_table);
2368 }
2369
2370 /* Unload the mappings described by target_data from device DEVICE_P.
2371 The device must be locked. */
2372
2373 static void
2374 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2375 unsigned version,
2376 const void *host_table, const void *target_data)
2377 {
2378 void **host_func_table = ((void ***) host_table)[0];
2379 void **host_funcs_end = ((void ***) host_table)[1];
2380 void **host_var_table = ((void ***) host_table)[2];
2381 void **host_vars_end = ((void ***) host_table)[3];
2382
2383 /* The func table contains only addresses, the var table contains addresses
2384 and corresponding sizes. */
2385 int num_funcs = host_funcs_end - host_func_table;
2386 int num_vars = (host_vars_end - host_var_table) / 2;
2387
2388 struct splay_tree_key_s k;
2389 splay_tree_key node = NULL;
2390
2391 /* Find mapping at start of node array */
2392 if (num_funcs || num_vars)
2393 {
2394 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2395 : (uintptr_t) host_var_table[0]);
2396 k.host_end = k.host_start + 1;
2397 node = splay_tree_lookup (&devicep->mem_map, &k);
2398 }
2399
2400 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2401 {
2402 gomp_mutex_unlock (&devicep->lock);
2403 gomp_fatal ("image unload fail");
2404 }
2405 if (devicep->mem_map_rev.root)
2406 {
2407 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2408 real allocation. */
2409 assert (node && node->tgt && node->tgt->rev_array);
2410 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2411 free (node->tgt->rev_array);
2412 devicep->mem_map_rev.root = NULL;
2413 }
2414
2415 /* Remove mappings from splay tree. */
2416 int i;
2417 for (i = 0; i < num_funcs; i++)
2418 {
2419 k.host_start = (uintptr_t) host_func_table[i];
2420 k.host_end = k.host_start + 1;
2421 splay_tree_remove (&devicep->mem_map, &k);
2422 }
2423
2424 /* Most significant bit of the size in host and target tables marks
2425 "omp declare target link" variables. */
2426 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2427 const uintptr_t size_mask = ~link_bit;
2428 bool is_tgt_unmapped = false;
2429
2430 for (i = 0; i < num_vars; i++)
2431 {
2432 k.host_start = (uintptr_t) host_var_table[i * 2];
2433 k.host_end
2434 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2435
2436 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2437 splay_tree_remove (&devicep->mem_map, &k);
2438 else
2439 {
2440 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2441 is_tgt_unmapped = gomp_remove_var (devicep, n);
2442 }
2443 }
2444
2445 if (node && !is_tgt_unmapped)
2446 {
2447 free (node->tgt);
2448 free (node);
2449 }
2450 }
2451
2452 static void
2453 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2454 {
2455 char *end = buf + size, *p = buf;
2456 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2457 p += snprintf (p, end - p, "unified_address");
2458 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2459 p += snprintf (p, end - p, "%sunified_shared_memory",
2460 (p == buf ? "" : ", "));
2461 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2462 p += snprintf (p, end - p, "%sreverse_offload",
2463 (p == buf ? "" : ", "));
2464 }
2465
2466 /* This function should be called from every offload image while loading.
2467 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2468 the target, and DATA. */
2469
2470 void
2471 GOMP_offload_register_ver (unsigned version, const void *host_table,
2472 int target_type, const void *data)
2473 {
2474 int i;
2475
2476 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2477 gomp_fatal ("Library too old for offload (version %u < %u)",
2478 GOMP_VERSION, GOMP_VERSION_LIB (version));
2479
2480 int omp_req;
2481 const void *target_data;
2482 if (GOMP_VERSION_LIB (version) > 1)
2483 {
2484 omp_req = (int) (size_t) ((void **) data)[0];
2485 target_data = &((void **) data)[1];
2486 }
2487 else
2488 {
2489 omp_req = 0;
2490 target_data = data;
2491 }
2492
2493 gomp_mutex_lock (&register_lock);
2494
2495 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2496 {
2497 char buf1[sizeof ("unified_address, unified_shared_memory, "
2498 "reverse_offload")];
2499 char buf2[sizeof ("unified_address, unified_shared_memory, "
2500 "reverse_offload")];
2501 gomp_requires_to_name (buf2, sizeof (buf2),
2502 omp_req != GOMP_REQUIRES_TARGET_USED
2503 ? omp_req : omp_requires_mask);
2504 if (omp_req != GOMP_REQUIRES_TARGET_USED
2505 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2506 {
2507 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2508 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2509 "in multiple compilation units: '%s' vs. '%s'",
2510 buf1, buf2);
2511 }
2512 else
2513 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2514 "some compilation units", buf2);
2515 }
2516 omp_requires_mask = omp_req;
2517
2518 /* Load image to all initialized devices. */
2519 for (i = 0; i < num_devices; i++)
2520 {
2521 struct gomp_device_descr *devicep = &devices[i];
2522 gomp_mutex_lock (&devicep->lock);
2523 if (devicep->type == target_type
2524 && devicep->state == GOMP_DEVICE_INITIALIZED)
2525 gomp_load_image_to_device (devicep, version,
2526 host_table, target_data, true);
2527 gomp_mutex_unlock (&devicep->lock);
2528 }
2529
2530 /* Insert image to array of pending images. */
2531 offload_images
2532 = gomp_realloc_unlock (offload_images,
2533 (num_offload_images + 1)
2534 * sizeof (struct offload_image_descr));
2535 offload_images[num_offload_images].version = version;
2536 offload_images[num_offload_images].type = target_type;
2537 offload_images[num_offload_images].host_table = host_table;
2538 offload_images[num_offload_images].target_data = target_data;
2539
2540 num_offload_images++;
2541 gomp_mutex_unlock (&register_lock);
2542 }
2543
2544 /* Legacy entry point. */
2545
2546 void
2547 GOMP_offload_register (const void *host_table, int target_type,
2548 const void *target_data)
2549 {
2550 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2551 }
2552
2553 /* This function should be called from every offload image while unloading.
2554 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2555 the target, and DATA. */
2556
2557 void
2558 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2559 int target_type, const void *data)
2560 {
2561 int i;
2562
2563 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2564 gomp_fatal ("Library too old for offload (version %u < %u)",
2565 GOMP_VERSION, GOMP_VERSION_LIB (version));
2566
2567 const void *target_data;
2568 if (GOMP_VERSION_LIB (version) > 1)
2569 target_data = &((void **) data)[1];
2570 else
2571 target_data = data;
2572
2573 gomp_mutex_lock (&register_lock);
2574
2575 /* Unload image from all initialized devices. */
2576 for (i = 0; i < num_devices; i++)
2577 {
2578 struct gomp_device_descr *devicep = &devices[i];
2579 gomp_mutex_lock (&devicep->lock);
2580 if (devicep->type == target_type
2581 && devicep->state == GOMP_DEVICE_INITIALIZED)
2582 gomp_unload_image_from_device (devicep, version,
2583 host_table, target_data);
2584 gomp_mutex_unlock (&devicep->lock);
2585 }
2586
2587 /* Remove image from array of pending images. */
2588 for (i = 0; i < num_offload_images; i++)
2589 if (offload_images[i].target_data == target_data)
2590 {
2591 offload_images[i] = offload_images[--num_offload_images];
2592 break;
2593 }
2594
2595 gomp_mutex_unlock (&register_lock);
2596 }
2597
2598 /* Legacy entry point. */
2599
2600 void
2601 GOMP_offload_unregister (const void *host_table, int target_type,
2602 const void *target_data)
2603 {
2604 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2605 }
2606
2607 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2608 must be locked on entry, and remains locked on return. */
2609
2610 attribute_hidden void
2611 gomp_init_device (struct gomp_device_descr *devicep)
2612 {
2613 int i;
2614 if (!devicep->init_device_func (devicep->target_id))
2615 {
2616 gomp_mutex_unlock (&devicep->lock);
2617 gomp_fatal ("device initialization failed");
2618 }
2619
2620 /* Load to device all images registered by the moment. */
2621 for (i = 0; i < num_offload_images; i++)
2622 {
2623 struct offload_image_descr *image = &offload_images[i];
2624 if (image->type == devicep->type)
2625 gomp_load_image_to_device (devicep, image->version,
2626 image->host_table, image->target_data,
2627 false);
2628 }
2629
2630 /* Initialize OpenACC asynchronous queues. */
2631 goacc_init_asyncqueues (devicep);
2632
2633 devicep->state = GOMP_DEVICE_INITIALIZED;
2634 }
2635
2636 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2637 must be locked on entry, and remains locked on return. */
2638
2639 attribute_hidden bool
2640 gomp_fini_device (struct gomp_device_descr *devicep)
2641 {
2642 bool ret = goacc_fini_asyncqueues (devicep);
2643 ret &= devicep->fini_device_func (devicep->target_id);
2644 devicep->state = GOMP_DEVICE_FINALIZED;
2645 return ret;
2646 }
2647
2648 attribute_hidden void
2649 gomp_unload_device (struct gomp_device_descr *devicep)
2650 {
2651 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2652 {
2653 unsigned i;
2654
2655 /* Unload from device all images registered at the moment. */
2656 for (i = 0; i < num_offload_images; i++)
2657 {
2658 struct offload_image_descr *image = &offload_images[i];
2659 if (image->type == devicep->type)
2660 gomp_unload_image_from_device (devicep, image->version,
2661 image->host_table,
2662 image->target_data);
2663 }
2664 }
2665 }
2666
2667 /* Host fallback for GOMP_target{,_ext} routines. */
2668
2669 static void
2670 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2671 struct gomp_device_descr *devicep, void **args)
2672 {
2673 struct gomp_thread old_thr, *thr = gomp_thread ();
2674
2675 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2676 && devicep != NULL)
2677 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2678 "be used for offloading");
2679
2680 old_thr = *thr;
2681 memset (thr, '\0', sizeof (*thr));
2682 if (gomp_places_list)
2683 {
2684 thr->place = old_thr.place;
2685 thr->ts.place_partition_len = gomp_places_list_len;
2686 }
2687 if (args)
2688 while (*args)
2689 {
2690 intptr_t id = (intptr_t) *args++, val;
2691 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2692 val = (intptr_t) *args++;
2693 else
2694 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2695 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2696 continue;
2697 id &= GOMP_TARGET_ARG_ID_MASK;
2698 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2699 continue;
2700 val = val > INT_MAX ? INT_MAX : val;
2701 if (val)
2702 gomp_icv (true)->thread_limit_var = val;
2703 break;
2704 }
2705
2706 fn (hostaddrs);
2707 gomp_free_thread (thr);
2708 *thr = old_thr;
2709 }
2710
2711 /* Calculate alignment and size requirements of a private copy of data shared
2712 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2713
2714 static inline void
2715 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2716 unsigned short *kinds, size_t *tgt_align,
2717 size_t *tgt_size)
2718 {
2719 size_t i;
2720 for (i = 0; i < mapnum; i++)
2721 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2722 {
2723 size_t align = (size_t) 1 << (kinds[i] >> 8);
2724 if (*tgt_align < align)
2725 *tgt_align = align;
2726 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2727 *tgt_size += sizes[i];
2728 }
2729 }
2730
2731 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2732
2733 static inline void
2734 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2735 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2736 size_t tgt_size)
2737 {
2738 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2739 if (al)
2740 tgt += tgt_align - al;
2741 tgt_size = 0;
2742 size_t i;
2743 for (i = 0; i < mapnum; i++)
2744 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2745 {
2746 size_t align = (size_t) 1 << (kinds[i] >> 8);
2747 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2748 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2749 hostaddrs[i] = tgt + tgt_size;
2750 tgt_size = tgt_size + sizes[i];
2751 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2752 {
2753 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2754 ++i;
2755 }
2756 }
2757 }
2758
2759 /* Helper function of GOMP_target{,_ext} routines. */
2760
2761 static void *
2762 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2763 void (*host_fn) (void *))
2764 {
2765 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2766 return (void *) host_fn;
2767 else
2768 {
2769 gomp_mutex_lock (&devicep->lock);
2770 if (devicep->state == GOMP_DEVICE_FINALIZED)
2771 {
2772 gomp_mutex_unlock (&devicep->lock);
2773 return NULL;
2774 }
2775
2776 struct splay_tree_key_s k;
2777 k.host_start = (uintptr_t) host_fn;
2778 k.host_end = k.host_start + 1;
2779 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2780 gomp_mutex_unlock (&devicep->lock);
2781 if (tgt_fn == NULL)
2782 return NULL;
2783
2784 return (void *) tgt_fn->tgt_offset;
2785 }
2786 }
2787
2788 /* Called when encountering a target directive. If DEVICE
2789 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2790 GOMP_DEVICE_HOST_FALLBACK (or any value
2791 larger than last available hw device), use host fallback.
2792 FN is address of host code, UNUSED is part of the current ABI, but
2793 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2794 with MAPNUM entries, with addresses of the host objects,
2795 sizes of the host objects (resp. for pointer kind pointer bias
2796 and assumed sizeof (void *) size) and kinds. */
2797
2798 void
2799 GOMP_target (int device, void (*fn) (void *), const void *unused,
2800 size_t mapnum, void **hostaddrs, size_t *sizes,
2801 unsigned char *kinds)
2802 {
2803 struct gomp_device_descr *devicep = resolve_device (device, true);
2804
2805 void *fn_addr;
2806 if (devicep == NULL
2807 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2808 /* All shared memory devices should use the GOMP_target_ext function. */
2809 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2810 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2811 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2812
2813 htab_t refcount_set = htab_create (mapnum);
2814 struct target_mem_desc *tgt_vars
2815 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2816 &refcount_set, GOMP_MAP_VARS_TARGET);
2817 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2818 NULL);
2819 htab_clear (refcount_set);
2820 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2821 htab_free (refcount_set);
2822 }
2823
2824 static inline unsigned int
2825 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2826 {
2827 /* If we cannot run asynchronously, simply ignore nowait. */
2828 if (devicep != NULL && devicep->async_run_func == NULL)
2829 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2830
2831 return flags;
2832 }
2833
2834 static void
2835 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2836 {
2837 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2838 if (item == NULL)
2839 return;
2840
2841 void *host_ptr = &item->icvs;
2842 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2843 if (dev_ptr != NULL)
2844 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2845 sizeof (struct gomp_offload_icvs));
2846 }
2847
2848 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2849 and several arguments have been added:
2850 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2851 DEPEND is array of dependencies, see GOMP_task for details.
2852
2853 ARGS is a pointer to an array consisting of a variable number of both
2854 device-independent and device-specific arguments, which can take one two
2855 elements where the first specifies for which device it is intended, the type
2856 and optionally also the value. If the value is not present in the first
2857 one, the whole second element the actual value. The last element of the
2858 array is a single NULL. Among the device independent can be for example
2859 NUM_TEAMS and THREAD_LIMIT.
2860
2861 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2862 that value, or 1 if teams construct is not present, or 0, if
2863 teams construct does not have num_teams clause and so the choice is
2864 implementation defined, and -1 if it can't be determined on the host
2865 what value will GOMP_teams have on the device.
2866 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2867 body with that value, or 0, if teams construct does not have thread_limit
2868 clause or the teams construct is not present, or -1 if it can't be
2869 determined on the host what value will GOMP_teams have on the device. */
2870
2871 void
2872 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2873 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2874 unsigned int flags, void **depend, void **args)
2875 {
2876 struct gomp_device_descr *devicep = resolve_device (device, true);
2877 size_t tgt_align = 0, tgt_size = 0;
2878 bool fpc_done = false;
2879
2880 /* Obtain the original TEAMS and THREADS values from ARGS. */
2881 intptr_t orig_teams = 1, orig_threads = 0;
2882 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
2883 void **tmpargs = args;
2884 while (*tmpargs)
2885 {
2886 intptr_t id = (intptr_t) *tmpargs++, val;
2887 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2888 {
2889 val = (intptr_t) *tmpargs++;
2890 len = 2;
2891 }
2892 else
2893 {
2894 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2895 len = 1;
2896 }
2897 num_args += len;
2898 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2899 continue;
2900 val = val > INT_MAX ? INT_MAX : val;
2901 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
2902 {
2903 orig_teams = val;
2904 teams_len = len;
2905 }
2906 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
2907 {
2908 orig_threads = val;
2909 threads_len = len;
2910 }
2911 }
2912
2913 intptr_t new_teams = orig_teams, new_threads = orig_threads;
2914 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2915 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2916 value could not be determined. No change.
2917 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2918 Set device-specific value.
2919 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2920 No change. */
2921 if (orig_teams == -2)
2922 new_teams = 1;
2923 else if (orig_teams == 0)
2924 {
2925 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2926 if (item != NULL)
2927 new_teams = item->icvs.nteams;
2928 }
2929 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2930 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2931 e.g. a THREAD_LIMIT clause. */
2932 if (orig_teams > -2 && orig_threads == 0)
2933 {
2934 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2935 if (item != NULL)
2936 new_threads = item->icvs.teams_thread_limit;
2937 }
2938
2939 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2940 updated. */
2941 void **new_args = args;
2942 if (orig_teams != new_teams || orig_threads != new_threads)
2943 {
2944 size_t tms_len = (orig_teams == new_teams
2945 ? teams_len
2946 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
2947 ? 1 : 2));
2948 size_t ths_len = (orig_threads == new_threads
2949 ? threads_len
2950 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
2951 ? 1 : 2));
2952 /* One additional item after the last arg must be NULL. */
2953 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
2954 + ths_len + 1;
2955 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
2956
2957 tmpargs = args;
2958 void **tmp_new_args = new_args;
2959 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
2960 too if they have not been changed and skipped otherwise. */
2961 while (*tmpargs)
2962 {
2963 intptr_t id = (intptr_t) *tmpargs;
2964 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
2965 && orig_teams != new_teams)
2966 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
2967 && orig_threads != new_threads))
2968 {
2969 tmpargs++;
2970 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2971 tmpargs++;
2972 }
2973 else
2974 {
2975 *tmp_new_args++ = *tmpargs++;
2976 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2977 *tmp_new_args++ = *tmpargs++;
2978 }
2979 }
2980
2981 /* Add the new TEAMS arg to the new args list if it has been changed. */
2982 if (orig_teams != new_teams)
2983 {
2984 intptr_t new_val = new_teams;
2985 if (tms_len == 1)
2986 {
2987 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
2988 | GOMP_TARGET_ARG_NUM_TEAMS;
2989 *tmp_new_args++ = (void *) new_val;
2990 }
2991 else
2992 {
2993 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
2994 | GOMP_TARGET_ARG_NUM_TEAMS);
2995 *tmp_new_args++ = (void *) new_val;
2996 }
2997 }
2998
2999 /* Add the new THREADS arg to the new args list if it has been changed. */
3000 if (orig_threads != new_threads)
3001 {
3002 intptr_t new_val = new_threads;
3003 if (ths_len == 1)
3004 {
3005 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3006 | GOMP_TARGET_ARG_THREAD_LIMIT;
3007 *tmp_new_args++ = (void *) new_val;
3008 }
3009 else
3010 {
3011 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3012 | GOMP_TARGET_ARG_THREAD_LIMIT);
3013 *tmp_new_args++ = (void *) new_val;
3014 }
3015 }
3016
3017 *tmp_new_args = NULL;
3018 }
3019
3020 flags = clear_unsupported_flags (devicep, flags);
3021
3022 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3023 {
3024 struct gomp_thread *thr = gomp_thread ();
3025 /* Create a team if we don't have any around, as nowait
3026 target tasks make sense to run asynchronously even when
3027 outside of any parallel. */
3028 if (__builtin_expect (thr->ts.team == NULL, 0))
3029 {
3030 struct gomp_team *team = gomp_new_team (1);
3031 struct gomp_task *task = thr->task;
3032 struct gomp_task **implicit_task = &task;
3033 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3034 team->prev_ts = thr->ts;
3035 thr->ts.team = team;
3036 thr->ts.team_id = 0;
3037 thr->ts.work_share = &team->work_shares[0];
3038 thr->ts.last_work_share = NULL;
3039 #ifdef HAVE_SYNC_BUILTINS
3040 thr->ts.single_count = 0;
3041 #endif
3042 thr->ts.static_trip = 0;
3043 thr->task = &team->implicit_task[0];
3044 gomp_init_task (thr->task, NULL, icv);
3045 while (*implicit_task
3046 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3047 implicit_task = &(*implicit_task)->parent;
3048 if (*implicit_task)
3049 {
3050 thr->task = *implicit_task;
3051 gomp_end_task ();
3052 free (*implicit_task);
3053 thr->task = &team->implicit_task[0];
3054 }
3055 else
3056 pthread_setspecific (gomp_thread_destructor, thr);
3057 if (implicit_task != &task)
3058 {
3059 *implicit_task = thr->task;
3060 thr->task = task;
3061 }
3062 }
3063 if (thr->ts.team
3064 && !thr->task->final_task)
3065 {
3066 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3067 sizes, kinds, flags, depend, new_args,
3068 GOMP_TARGET_TASK_BEFORE_MAP);
3069 return;
3070 }
3071 }
3072
3073 /* If there are depend clauses, but nowait is not present
3074 (or we are in a final task), block the parent task until the
3075 dependencies are resolved and then just continue with the rest
3076 of the function as if it is a merged task. */
3077 if (depend != NULL)
3078 {
3079 struct gomp_thread *thr = gomp_thread ();
3080 if (thr->task && thr->task->depend_hash)
3081 {
3082 /* If we might need to wait, copy firstprivate now. */
3083 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3084 &tgt_align, &tgt_size);
3085 if (tgt_align)
3086 {
3087 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3088 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3089 tgt_align, tgt_size);
3090 }
3091 fpc_done = true;
3092 gomp_task_maybe_wait_for_dependencies (depend);
3093 }
3094 }
3095
3096 void *fn_addr;
3097 if (devicep == NULL
3098 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3099 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3100 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3101 {
3102 if (!fpc_done)
3103 {
3104 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3105 &tgt_align, &tgt_size);
3106 if (tgt_align)
3107 {
3108 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3109 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3110 tgt_align, tgt_size);
3111 }
3112 }
3113 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3114 return;
3115 }
3116
3117 struct target_mem_desc *tgt_vars;
3118 htab_t refcount_set = NULL;
3119
3120 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3121 {
3122 if (!fpc_done)
3123 {
3124 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3125 &tgt_align, &tgt_size);
3126 if (tgt_align)
3127 {
3128 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3129 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3130 tgt_align, tgt_size);
3131 }
3132 }
3133 tgt_vars = NULL;
3134 }
3135 else
3136 {
3137 refcount_set = htab_create (mapnum);
3138 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3139 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3140 }
3141 devicep->run_func (devicep->target_id, fn_addr,
3142 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3143 new_args);
3144 if (tgt_vars)
3145 {
3146 htab_clear (refcount_set);
3147 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3148 }
3149 if (refcount_set)
3150 htab_free (refcount_set);
3151
3152 /* Copy back ICVs from device to host.
3153 HOST_PTR is expected to exist since it was added in
3154 gomp_load_image_to_device if not already available. */
3155 gomp_copy_back_icvs (devicep, device);
3156
3157 }
3158
3159
3160 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3161 keeping track of all variable handling - assuming that reverse offload occurs
3162 ony very rarely. Downside is that the reverse search is slow. */
3163
3164 struct gomp_splay_tree_rev_lookup_data {
3165 uintptr_t tgt_start;
3166 uintptr_t tgt_end;
3167 splay_tree_key key;
3168 };
3169
3170 static int
3171 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3172 {
3173 struct gomp_splay_tree_rev_lookup_data *data;
3174 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3175 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3176
3177 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3178 return 0;
3179
3180 size_t j;
3181 for (j = 0; j < key->tgt->list_count; j++)
3182 if (key->tgt->list[j].key == key)
3183 break;
3184 assert (j < key->tgt->list_count);
3185 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3186
3187 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3188 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3189 {
3190 data->key = key;
3191 return 1;
3192 }
3193 return 0;
3194 }
3195
3196 static inline splay_tree_key
3197 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3198 bool zero_len)
3199 {
3200 struct gomp_splay_tree_rev_lookup_data data;
3201 data.key = NULL;
3202 data.tgt_start = tgt_start;
3203 data.tgt_end = tgt_end;
3204
3205 if (tgt_start != tgt_end)
3206 {
3207 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3208 return data.key;
3209 }
3210
3211 data.tgt_end++;
3212 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3213 if (data.key != NULL || zero_len)
3214 return data.key;
3215 data.tgt_end--;
3216
3217 data.tgt_start--;
3218 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3219 return data.key;
3220 }
3221
3222 struct cpy_data
3223 {
3224 uint64_t devaddr;
3225 bool present, aligned;
3226 };
3227
3228
3229 /* Search just mapped reverse-offload data; returns index if found,
3230 otherwise >= n. */
3231
3232 static inline int
3233 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3234 unsigned short *kinds, uint64_t *sizes, size_t n,
3235 uint64_t tgt_start, uint64_t tgt_end)
3236 {
3237 const bool short_mapkind = true;
3238 const int typemask = short_mapkind ? 0xff : 0x7;
3239 size_t i;
3240 for (i = 0; i < n; i++)
3241 {
3242 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3243 == GOMP_MAP_STRUCT);
3244 uint64_t dev_end;
3245 if (!is_struct)
3246 dev_end = d[i].devaddr + sizes[i];
3247 else
3248 {
3249 if (i + sizes[i] < n)
3250 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3251 else
3252 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3253 }
3254 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3255 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3256 break;
3257 if (is_struct)
3258 i += sizes[i];
3259 }
3260 return i;
3261 }
3262
3263 static inline int
3264 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3265 unsigned short *kinds, uint64_t *sizes,
3266 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3267 bool zero_len)
3268 {
3269 size_t i;
3270 if (tgt_start != tgt_end)
3271 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3272 tgt_start, tgt_end);
3273 tgt_end++;
3274 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3275 tgt_start, tgt_end);
3276 if (i < n || zero_len)
3277 return i;
3278 tgt_end--;
3279
3280 tgt_start--;
3281 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3282 tgt_start, tgt_end);
3283 }
3284
3285 /* Handle reverse offload. This is called by the device plugins for a
3286 reverse offload; it is not called if the outer target runs on the host.
3287 The mapping is simplified device-affecting constructs (except for target
3288 with device(ancestor:1)) must not be encountered; in particular not
3289 target (enter/exit) data. */
3290
3291 void
3292 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3293 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3294 void (*dev_to_host_cpy) (void *, const void *, size_t, void*),
3295 void (*host_to_dev_cpy) (void *, const void *, size_t, void*),
3296 void *token)
3297 {
3298 /* Return early if there is no offload code. */
3299 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3300 return;
3301 /* Currently, this fails because of calculate_firstprivate_requirements
3302 below; it could be fixed but additional code needs to be updated to
3303 handle 32bit hosts - thus, it is not worthwhile. */
3304 if (sizeof (void *) != sizeof (uint64_t))
3305 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3306
3307 struct cpy_data *cdata = NULL;
3308 uint64_t *devaddrs;
3309 uint64_t *sizes;
3310 unsigned short *kinds;
3311 const bool short_mapkind = true;
3312 const int typemask = short_mapkind ? 0xff : 0x7;
3313 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3314
3315 reverse_splay_tree_key n;
3316 struct reverse_splay_tree_key_s k;
3317 k.dev = fn_ptr;
3318
3319 gomp_mutex_lock (&devicep->lock);
3320 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3321 gomp_mutex_unlock (&devicep->lock);
3322
3323 if (n == NULL)
3324 gomp_fatal ("Cannot find reverse-offload function");
3325 void (*host_fn)() = (void (*)()) n->k->host_start;
3326
3327 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3328 {
3329 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3330 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3331 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3332 }
3333 else
3334 {
3335 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3336 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3337 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3338 if (dev_to_host_cpy)
3339 {
3340 dev_to_host_cpy (devaddrs, (const void *) (uintptr_t) devaddrs_ptr,
3341 mapnum * sizeof (uint64_t), token);
3342 dev_to_host_cpy (sizes, (const void *) (uintptr_t) sizes_ptr,
3343 mapnum * sizeof (uint64_t), token);
3344 dev_to_host_cpy (kinds, (const void *) (uintptr_t) kinds_ptr,
3345 mapnum * sizeof (unsigned short), token);
3346 }
3347 else
3348 {
3349 gomp_copy_dev2host (devicep, NULL, devaddrs,
3350 (const void *) (uintptr_t) devaddrs_ptr,
3351 mapnum * sizeof (uint64_t));
3352 gomp_copy_dev2host (devicep, NULL, sizes,
3353 (const void *) (uintptr_t) sizes_ptr,
3354 mapnum * sizeof (uint64_t));
3355 gomp_copy_dev2host (devicep, NULL, kinds, (const void *) (uintptr_t) kinds_ptr,
3356 mapnum * sizeof (unsigned short));
3357 }
3358 }
3359
3360 size_t tgt_align = 0, tgt_size = 0;
3361
3362 /* If actually executed on 32bit systems, the casts lead to wrong code;
3363 but 32bit with offloading is not supported; see top of this function. */
3364 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3365 (void *) (uintptr_t) kinds,
3366 &tgt_align, &tgt_size);
3367
3368 if (tgt_align)
3369 {
3370 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3371 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3372 if (al)
3373 tgt += tgt_align - al;
3374 tgt_size = 0;
3375 for (uint64_t i = 0; i < mapnum; i++)
3376 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3377 && devaddrs[i] != 0)
3378 {
3379 size_t align = (size_t) 1 << (kinds[i] >> 8);
3380 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3381 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3382 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3383 (size_t) sizes[i]);
3384 else if (dev_to_host_cpy)
3385 dev_to_host_cpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3386 (size_t) sizes[i], token);
3387 else
3388 gomp_copy_dev2host (devicep, NULL, tgt + tgt_size,
3389 (void *) (uintptr_t) devaddrs[i],
3390 (size_t) sizes[i]);
3391 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3392 tgt_size = tgt_size + sizes[i];
3393 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3394 && i + 1 < mapnum
3395 && ((get_kind (short_mapkind, kinds, i) & typemask)
3396 == GOMP_MAP_ATTACH))
3397 {
3398 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3399 = (uint64_t) devaddrs[i];
3400 ++i;
3401 }
3402 }
3403 }
3404
3405 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3406 {
3407 size_t j, struct_cpy = 0;
3408 splay_tree_key n2;
3409 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3410 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3411 gomp_mutex_lock (&devicep->lock);
3412 for (uint64_t i = 0; i < mapnum; i++)
3413 {
3414 if (devaddrs[i] == 0)
3415 continue;
3416 n = NULL;
3417 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3418 switch (kind)
3419 {
3420 case GOMP_MAP_FIRSTPRIVATE:
3421 case GOMP_MAP_FIRSTPRIVATE_INT:
3422 continue;
3423
3424 case GOMP_MAP_DELETE:
3425 case GOMP_MAP_RELEASE:
3426 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3427 /* Assume it is present; look it up - but ignore otherwise. */
3428 case GOMP_MAP_ALLOC:
3429 case GOMP_MAP_FROM:
3430 case GOMP_MAP_FORCE_ALLOC:
3431 case GOMP_MAP_FORCE_FROM:
3432 case GOMP_MAP_ALWAYS_FROM:
3433 case GOMP_MAP_TO:
3434 case GOMP_MAP_TOFROM:
3435 case GOMP_MAP_FORCE_TO:
3436 case GOMP_MAP_FORCE_TOFROM:
3437 case GOMP_MAP_ALWAYS_TO:
3438 case GOMP_MAP_ALWAYS_TOFROM:
3439 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3440 cdata[i].devaddr = devaddrs[i];
3441 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3442 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3443 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3444 devaddrs[i],
3445 devaddrs[i] + sizes[i], zero_len);
3446 if (j < i)
3447 {
3448 n2 = NULL;
3449 cdata[i].present = true;
3450 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3451 }
3452 else
3453 {
3454 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3455 devaddrs[i],
3456 devaddrs[i] + sizes[i], zero_len);
3457 cdata[i].present = n2 != NULL;
3458 }
3459 if (!cdata[i].present
3460 && kind != GOMP_MAP_DELETE
3461 && kind != GOMP_MAP_RELEASE
3462 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3463 {
3464 cdata[i].aligned = true;
3465 size_t align = (size_t) 1 << (kinds[i] >> 8);
3466 devaddrs[i]
3467 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3468 sizes[i]);
3469 }
3470 else if (n2 != NULL)
3471 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3472 - (n2->tgt->tgt_start + n2->tgt_offset));
3473 if (((!cdata[i].present || struct_cpy)
3474 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3475 || kind == GOMP_MAP_FORCE_TO
3476 || kind == GOMP_MAP_FORCE_TOFROM
3477 || kind == GOMP_MAP_ALWAYS_TO
3478 || kind == GOMP_MAP_ALWAYS_TOFROM)
3479 {
3480 if (dev_to_host_cpy)
3481 dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i],
3482 (void *) (uintptr_t) cdata[i].devaddr,
3483 sizes[i], token);
3484 else
3485 gomp_copy_dev2host (devicep, NULL,
3486 (void *) (uintptr_t) devaddrs[i],
3487 (void *) (uintptr_t) cdata[i].devaddr,
3488 sizes[i]);
3489 }
3490 if (struct_cpy)
3491 struct_cpy--;
3492 break;
3493 case GOMP_MAP_ATTACH:
3494 case GOMP_MAP_POINTER:
3495 case GOMP_MAP_ALWAYS_POINTER:
3496 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3497 devaddrs[i] + sizes[i],
3498 devaddrs[i] + sizes[i]
3499 + sizeof (void*), false);
3500 cdata[i].present = n2 != NULL;
3501 cdata[i].devaddr = devaddrs[i];
3502 if (n2)
3503 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3504 - (n2->tgt->tgt_start + n2->tgt_offset));
3505 else
3506 {
3507 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3508 devaddrs[i] + sizes[i],
3509 devaddrs[i] + sizes[i]
3510 + sizeof (void*), false);
3511 if (j < i)
3512 {
3513 cdata[i].present = true;
3514 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3515 - cdata[j].devaddr);
3516 }
3517 }
3518 if (!cdata[i].present)
3519 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3520 /* Assume that when present, the pointer is already correct. */
3521 if (!n2)
3522 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3523 = devaddrs[i-1];
3524 break;
3525 case GOMP_MAP_TO_PSET:
3526 /* Assume that when present, the pointers are fine and no 'to:'
3527 is required. */
3528 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3529 devaddrs[i], devaddrs[i] + sizes[i],
3530 false);
3531 cdata[i].present = n2 != NULL;
3532 cdata[i].devaddr = devaddrs[i];
3533 if (n2)
3534 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3535 - (n2->tgt->tgt_start + n2->tgt_offset));
3536 else
3537 {
3538 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3539 devaddrs[i],
3540 devaddrs[i] + sizes[i], false);
3541 if (j < i)
3542 {
3543 cdata[i].present = true;
3544 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3545 - cdata[j].devaddr);
3546 }
3547 }
3548 if (!cdata[i].present)
3549 {
3550 cdata[i].aligned = true;
3551 size_t align = (size_t) 1 << (kinds[i] >> 8);
3552 devaddrs[i]
3553 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3554 sizes[i]);
3555 if (dev_to_host_cpy)
3556 dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i],
3557 (void *) (uintptr_t) cdata[i].devaddr,
3558 sizes[i], token);
3559 else
3560 gomp_copy_dev2host (devicep, NULL,
3561 (void *) (uintptr_t) devaddrs[i],
3562 (void *) (uintptr_t) cdata[i].devaddr,
3563 sizes[i]);
3564 }
3565 for (j = i + 1; j < mapnum; j++)
3566 {
3567 kind = get_kind (short_mapkind, kinds, j) & typemask;
3568 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3569 && !GOMP_MAP_POINTER_P (kind))
3570 break;
3571 if (devaddrs[j] < devaddrs[i])
3572 break;
3573 if (cdata[i].present)
3574 continue;
3575 if (devaddrs[j] == 0)
3576 {
3577 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3578 continue;
3579 }
3580 int k;
3581 n2 = NULL;
3582 cdata[i].present = true;
3583 cdata[j].devaddr = devaddrs[j];
3584 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3585 devaddrs[j],
3586 devaddrs[j] + sizeof (void*),
3587 false);
3588 if (k < j)
3589 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3590 - cdata[k].devaddr);
3591 else
3592 {
3593 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3594 devaddrs[j],
3595 devaddrs[j] + sizeof (void*),
3596 false);
3597 if (n2 == NULL)
3598 {
3599 gomp_mutex_unlock (&devicep->lock);
3600 gomp_fatal ("Pointer target wasn't mapped");
3601 }
3602 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3603 - (n2->tgt->tgt_start + n2->tgt_offset));
3604 }
3605 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3606 = (void *) (uintptr_t) devaddrs[j];
3607 }
3608 i = j -1;
3609 break;
3610 case GOMP_MAP_STRUCT:
3611 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3612 devaddrs[i + sizes[i]]
3613 + sizes[i + sizes[i]], false);
3614 cdata[i].present = n2 != NULL;
3615 cdata[i].devaddr = devaddrs[i];
3616 struct_cpy = cdata[i].present ? 0 : sizes[i];
3617 if (!n2)
3618 {
3619 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3620 - devaddrs[i+1]
3621 + sizes[i + sizes[i]]);
3622 size_t align = (size_t) 1 << (kinds[i] >> 8);
3623 cdata[i].aligned = true;
3624 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3625 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3626 }
3627 else
3628 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3629 - (n2->tgt->tgt_start + n2->tgt_offset));
3630 break;
3631 default:
3632 gomp_mutex_unlock (&devicep->lock);
3633 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3634 }
3635 }
3636 gomp_mutex_unlock (&devicep->lock);
3637 }
3638
3639 host_fn (devaddrs);
3640
3641 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3642 {
3643 uint64_t struct_cpy = 0;
3644 bool clean_struct = false;
3645 for (uint64_t i = 0; i < mapnum; i++)
3646 {
3647 if (cdata[i].devaddr == 0)
3648 continue;
3649 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3650 bool copy = !cdata[i].present || struct_cpy;
3651 switch (kind)
3652 {
3653 case GOMP_MAP_FORCE_FROM:
3654 case GOMP_MAP_FORCE_TOFROM:
3655 case GOMP_MAP_ALWAYS_FROM:
3656 case GOMP_MAP_ALWAYS_TOFROM:
3657 copy = true;
3658 /* FALLTHRU */
3659 case GOMP_MAP_FROM:
3660 case GOMP_MAP_TOFROM:
3661 if (copy && host_to_dev_cpy)
3662 host_to_dev_cpy ((void *) (uintptr_t) cdata[i].devaddr,
3663 (void *) (uintptr_t) devaddrs[i],
3664 sizes[i], token);
3665 else if (copy)
3666 gomp_copy_host2dev (devicep, NULL,
3667 (void *) (uintptr_t) cdata[i].devaddr,
3668 (void *) (uintptr_t) devaddrs[i],
3669 sizes[i], false, NULL);
3670 default:
3671 break;
3672 }
3673 if (struct_cpy)
3674 {
3675 struct_cpy--;
3676 continue;
3677 }
3678 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3679 {
3680 clean_struct = true;
3681 struct_cpy = sizes[i];
3682 }
3683 else if (cdata[i].aligned)
3684 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3685 else if (!cdata[i].present)
3686 free ((void *) (uintptr_t) devaddrs[i]);
3687 }
3688 if (clean_struct)
3689 for (uint64_t i = 0; i < mapnum; i++)
3690 if (!cdata[i].present
3691 && ((get_kind (short_mapkind, kinds, i) & typemask)
3692 == GOMP_MAP_STRUCT))
3693 {
3694 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3695 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3696 }
3697
3698 free (devaddrs);
3699 free (sizes);
3700 free (kinds);
3701 }
3702 }
3703
3704 /* Host fallback for GOMP_target_data{,_ext} routines. */
3705
3706 static void
3707 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3708 {
3709 struct gomp_task_icv *icv = gomp_icv (false);
3710
3711 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3712 && devicep != NULL)
3713 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3714 "be used for offloading");
3715
3716 if (icv->target_data)
3717 {
3718 /* Even when doing a host fallback, if there are any active
3719 #pragma omp target data constructs, need to remember the
3720 new #pragma omp target data, otherwise GOMP_target_end_data
3721 would get out of sync. */
3722 struct target_mem_desc *tgt
3723 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3724 NULL, GOMP_MAP_VARS_DATA);
3725 tgt->prev = icv->target_data;
3726 icv->target_data = tgt;
3727 }
3728 }
3729
3730 void
3731 GOMP_target_data (int device, const void *unused, size_t mapnum,
3732 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3733 {
3734 struct gomp_device_descr *devicep = resolve_device (device, true);
3735
3736 if (devicep == NULL
3737 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3738 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3739 return gomp_target_data_fallback (devicep);
3740
3741 struct target_mem_desc *tgt
3742 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3743 NULL, GOMP_MAP_VARS_DATA);
3744 struct gomp_task_icv *icv = gomp_icv (true);
3745 tgt->prev = icv->target_data;
3746 icv->target_data = tgt;
3747 }
3748
3749 void
3750 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3751 size_t *sizes, unsigned short *kinds)
3752 {
3753 struct gomp_device_descr *devicep = resolve_device (device, true);
3754
3755 if (devicep == NULL
3756 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3757 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3758 return gomp_target_data_fallback (devicep);
3759
3760 struct target_mem_desc *tgt
3761 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3762 NULL, GOMP_MAP_VARS_DATA);
3763 struct gomp_task_icv *icv = gomp_icv (true);
3764 tgt->prev = icv->target_data;
3765 icv->target_data = tgt;
3766 }
3767
3768 void
3769 GOMP_target_end_data (void)
3770 {
3771 struct gomp_task_icv *icv = gomp_icv (false);
3772 if (icv->target_data)
3773 {
3774 struct target_mem_desc *tgt = icv->target_data;
3775 icv->target_data = tgt->prev;
3776 gomp_unmap_vars (tgt, true, NULL);
3777 }
3778 }
3779
3780 void
3781 GOMP_target_update (int device, const void *unused, size_t mapnum,
3782 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3783 {
3784 struct gomp_device_descr *devicep = resolve_device (device, true);
3785
3786 if (devicep == NULL
3787 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3788 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3789 return;
3790
3791 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3792 }
3793
3794 void
3795 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3796 size_t *sizes, unsigned short *kinds,
3797 unsigned int flags, void **depend)
3798 {
3799 struct gomp_device_descr *devicep = resolve_device (device, true);
3800
3801 /* If there are depend clauses, but nowait is not present,
3802 block the parent task until the dependencies are resolved
3803 and then just continue with the rest of the function as if it
3804 is a merged task. Until we are able to schedule task during
3805 variable mapping or unmapping, ignore nowait if depend clauses
3806 are not present. */
3807 if (depend != NULL)
3808 {
3809 struct gomp_thread *thr = gomp_thread ();
3810 if (thr->task && thr->task->depend_hash)
3811 {
3812 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3813 && thr->ts.team
3814 && !thr->task->final_task)
3815 {
3816 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3817 mapnum, hostaddrs, sizes, kinds,
3818 flags | GOMP_TARGET_FLAG_UPDATE,
3819 depend, NULL, GOMP_TARGET_TASK_DATA))
3820 return;
3821 }
3822 else
3823 {
3824 struct gomp_team *team = thr->ts.team;
3825 /* If parallel or taskgroup has been cancelled, don't start new
3826 tasks. */
3827 if (__builtin_expect (gomp_cancel_var, 0) && team)
3828 {
3829 if (gomp_team_barrier_cancelled (&team->barrier))
3830 return;
3831 if (thr->task->taskgroup)
3832 {
3833 if (thr->task->taskgroup->cancelled)
3834 return;
3835 if (thr->task->taskgroup->workshare
3836 && thr->task->taskgroup->prev
3837 && thr->task->taskgroup->prev->cancelled)
3838 return;
3839 }
3840 }
3841
3842 gomp_task_maybe_wait_for_dependencies (depend);
3843 }
3844 }
3845 }
3846
3847 if (devicep == NULL
3848 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3849 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3850 return;
3851
3852 struct gomp_thread *thr = gomp_thread ();
3853 struct gomp_team *team = thr->ts.team;
3854 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3855 if (__builtin_expect (gomp_cancel_var, 0) && team)
3856 {
3857 if (gomp_team_barrier_cancelled (&team->barrier))
3858 return;
3859 if (thr->task->taskgroup)
3860 {
3861 if (thr->task->taskgroup->cancelled)
3862 return;
3863 if (thr->task->taskgroup->workshare
3864 && thr->task->taskgroup->prev
3865 && thr->task->taskgroup->prev->cancelled)
3866 return;
3867 }
3868 }
3869
3870 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3871 }
3872
3873 static void
3874 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
3875 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3876 htab_t *refcount_set)
3877 {
3878 const int typemask = 0xff;
3879 size_t i;
3880 gomp_mutex_lock (&devicep->lock);
3881 if (devicep->state == GOMP_DEVICE_FINALIZED)
3882 {
3883 gomp_mutex_unlock (&devicep->lock);
3884 return;
3885 }
3886
3887 for (i = 0; i < mapnum; i++)
3888 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3889 {
3890 struct splay_tree_key_s cur_node;
3891 cur_node.host_start = (uintptr_t) hostaddrs[i];
3892 cur_node.host_end = cur_node.host_start + sizeof (void *);
3893 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3894
3895 if (n)
3896 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3897 false, NULL);
3898 }
3899
3900 int nrmvars = 0;
3901 splay_tree_key remove_vars[mapnum];
3902
3903 for (i = 0; i < mapnum; i++)
3904 {
3905 struct splay_tree_key_s cur_node;
3906 unsigned char kind = kinds[i] & typemask;
3907 switch (kind)
3908 {
3909 case GOMP_MAP_FROM:
3910 case GOMP_MAP_ALWAYS_FROM:
3911 case GOMP_MAP_DELETE:
3912 case GOMP_MAP_RELEASE:
3913 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3914 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3915 cur_node.host_start = (uintptr_t) hostaddrs[i];
3916 cur_node.host_end = cur_node.host_start + sizes[i];
3917 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3918 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
3919 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
3920 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3921 if (!k)
3922 continue;
3923
3924 bool delete_p = (kind == GOMP_MAP_DELETE
3925 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
3926 bool do_copy, do_remove;
3927 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
3928 &do_remove);
3929
3930 if ((kind == GOMP_MAP_FROM && do_copy)
3931 || kind == GOMP_MAP_ALWAYS_FROM)
3932 {
3933 if (k->aux && k->aux->attach_count)
3934 {
3935 /* We have to be careful not to overwrite still attached
3936 pointers during the copyback to host. */
3937 uintptr_t addr = k->host_start;
3938 while (addr < k->host_end)
3939 {
3940 size_t i = (addr - k->host_start) / sizeof (void *);
3941 if (k->aux->attach_count[i] == 0)
3942 gomp_copy_dev2host (devicep, NULL, (void *) addr,
3943 (void *) (k->tgt->tgt_start
3944 + k->tgt_offset
3945 + addr - k->host_start),
3946 sizeof (void *));
3947 addr += sizeof (void *);
3948 }
3949 }
3950 else
3951 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3952 (void *) (k->tgt->tgt_start + k->tgt_offset
3953 + cur_node.host_start
3954 - k->host_start),
3955 cur_node.host_end - cur_node.host_start);
3956 }
3957
3958 /* Structure elements lists are removed altogether at once, which
3959 may cause immediate deallocation of the target_mem_desc, causing
3960 errors if we still have following element siblings to copy back.
3961 While we're at it, it also seems more disciplined to simply
3962 queue all removals together for processing below.
3963
3964 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3965 not have this problem, since they maintain an additional
3966 tgt->refcount = 1 reference to the target_mem_desc to start with.
3967 */
3968 if (do_remove)
3969 remove_vars[nrmvars++] = k;
3970 break;
3971
3972 case GOMP_MAP_DETACH:
3973 break;
3974 default:
3975 gomp_mutex_unlock (&devicep->lock);
3976 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3977 kind);
3978 }
3979 }
3980
3981 for (int i = 0; i < nrmvars; i++)
3982 gomp_remove_var (devicep, remove_vars[i]);
3983
3984 gomp_mutex_unlock (&devicep->lock);
3985 }
3986
3987 void
3988 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3989 size_t *sizes, unsigned short *kinds,
3990 unsigned int flags, void **depend)
3991 {
3992 struct gomp_device_descr *devicep = resolve_device (device, true);
3993
3994 /* If there are depend clauses, but nowait is not present,
3995 block the parent task until the dependencies are resolved
3996 and then just continue with the rest of the function as if it
3997 is a merged task. Until we are able to schedule task during
3998 variable mapping or unmapping, ignore nowait if depend clauses
3999 are not present. */
4000 if (depend != NULL)
4001 {
4002 struct gomp_thread *thr = gomp_thread ();
4003 if (thr->task && thr->task->depend_hash)
4004 {
4005 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4006 && thr->ts.team
4007 && !thr->task->final_task)
4008 {
4009 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4010 mapnum, hostaddrs, sizes, kinds,
4011 flags, depend, NULL,
4012 GOMP_TARGET_TASK_DATA))
4013 return;
4014 }
4015 else
4016 {
4017 struct gomp_team *team = thr->ts.team;
4018 /* If parallel or taskgroup has been cancelled, don't start new
4019 tasks. */
4020 if (__builtin_expect (gomp_cancel_var, 0) && team)
4021 {
4022 if (gomp_team_barrier_cancelled (&team->barrier))
4023 return;
4024 if (thr->task->taskgroup)
4025 {
4026 if (thr->task->taskgroup->cancelled)
4027 return;
4028 if (thr->task->taskgroup->workshare
4029 && thr->task->taskgroup->prev
4030 && thr->task->taskgroup->prev->cancelled)
4031 return;
4032 }
4033 }
4034
4035 gomp_task_maybe_wait_for_dependencies (depend);
4036 }
4037 }
4038 }
4039
4040 if (devicep == NULL
4041 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4042 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4043 return;
4044
4045 struct gomp_thread *thr = gomp_thread ();
4046 struct gomp_team *team = thr->ts.team;
4047 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4048 if (__builtin_expect (gomp_cancel_var, 0) && team)
4049 {
4050 if (gomp_team_barrier_cancelled (&team->barrier))
4051 return;
4052 if (thr->task->taskgroup)
4053 {
4054 if (thr->task->taskgroup->cancelled)
4055 return;
4056 if (thr->task->taskgroup->workshare
4057 && thr->task->taskgroup->prev
4058 && thr->task->taskgroup->prev->cancelled)
4059 return;
4060 }
4061 }
4062
4063 htab_t refcount_set = htab_create (mapnum);
4064
4065 /* The variables are mapped separately such that they can be released
4066 independently. */
4067 size_t i, j;
4068 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4069 for (i = 0; i < mapnum; i++)
4070 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4071 {
4072 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4073 &kinds[i], true, &refcount_set,
4074 GOMP_MAP_VARS_ENTER_DATA);
4075 i += sizes[i];
4076 }
4077 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4078 {
4079 for (j = i + 1; j < mapnum; j++)
4080 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4081 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4082 break;
4083 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4084 &kinds[i], true, &refcount_set,
4085 GOMP_MAP_VARS_ENTER_DATA);
4086 i += j - i - 1;
4087 }
4088 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
4089 {
4090 /* An attach operation must be processed together with the mapped
4091 base-pointer list item. */
4092 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4093 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4094 i += 1;
4095 }
4096 else
4097 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4098 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4099 else
4100 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4101 htab_free (refcount_set);
4102 }
4103
4104 bool
4105 gomp_target_task_fn (void *data)
4106 {
4107 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4108 struct gomp_device_descr *devicep = ttask->devicep;
4109
4110 if (ttask->fn != NULL)
4111 {
4112 void *fn_addr;
4113 if (devicep == NULL
4114 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4115 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4116 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4117 {
4118 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4119 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4120 ttask->args);
4121 return false;
4122 }
4123
4124 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4125 {
4126 if (ttask->tgt)
4127 gomp_unmap_vars (ttask->tgt, true, NULL);
4128 return false;
4129 }
4130
4131 void *actual_arguments;
4132 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4133 {
4134 ttask->tgt = NULL;
4135 actual_arguments = ttask->hostaddrs;
4136 }
4137 else
4138 {
4139 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4140 NULL, ttask->sizes, ttask->kinds, true,
4141 NULL, GOMP_MAP_VARS_TARGET);
4142 actual_arguments = (void *) ttask->tgt->tgt_start;
4143 }
4144 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4145
4146 assert (devicep->async_run_func);
4147 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4148 ttask->args, (void *) ttask);
4149 return true;
4150 }
4151 else if (devicep == NULL
4152 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4153 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4154 return false;
4155
4156 size_t i;
4157 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4158 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4159 ttask->kinds, true);
4160 else
4161 {
4162 htab_t refcount_set = htab_create (ttask->mapnum);
4163 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4164 for (i = 0; i < ttask->mapnum; i++)
4165 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4166 {
4167 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4168 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4169 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4170 i += ttask->sizes[i];
4171 }
4172 else
4173 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4174 &ttask->kinds[i], true, &refcount_set,
4175 GOMP_MAP_VARS_ENTER_DATA);
4176 else
4177 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4178 ttask->kinds, &refcount_set);
4179 htab_free (refcount_set);
4180 }
4181 return false;
4182 }
4183
4184 void
4185 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4186 {
4187 if (thread_limit)
4188 {
4189 struct gomp_task_icv *icv = gomp_icv (true);
4190 icv->thread_limit_var
4191 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4192 }
4193 (void) num_teams;
4194 }
4195
4196 bool
4197 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4198 unsigned int thread_limit, bool first)
4199 {
4200 struct gomp_thread *thr = gomp_thread ();
4201 if (first)
4202 {
4203 if (thread_limit)
4204 {
4205 struct gomp_task_icv *icv = gomp_icv (true);
4206 icv->thread_limit_var
4207 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4208 }
4209 (void) num_teams_high;
4210 if (num_teams_low == 0)
4211 num_teams_low = 1;
4212 thr->num_teams = num_teams_low - 1;
4213 thr->team_num = 0;
4214 }
4215 else if (thr->team_num == thr->num_teams)
4216 return false;
4217 else
4218 ++thr->team_num;
4219 return true;
4220 }
4221
4222 void *
4223 omp_target_alloc (size_t size, int device_num)
4224 {
4225 if (device_num == omp_initial_device
4226 || device_num == gomp_get_num_devices ())
4227 return malloc (size);
4228
4229 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4230 if (devicep == NULL)
4231 return NULL;
4232
4233 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4234 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4235 return malloc (size);
4236
4237 gomp_mutex_lock (&devicep->lock);
4238 void *ret = devicep->alloc_func (devicep->target_id, size);
4239 gomp_mutex_unlock (&devicep->lock);
4240 return ret;
4241 }
4242
4243 void
4244 omp_target_free (void *device_ptr, int device_num)
4245 {
4246 if (device_num == omp_initial_device
4247 || device_num == gomp_get_num_devices ())
4248 {
4249 free (device_ptr);
4250 return;
4251 }
4252
4253 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4254 if (devicep == NULL || device_ptr == NULL)
4255 return;
4256
4257 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4258 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4259 {
4260 free (device_ptr);
4261 return;
4262 }
4263
4264 gomp_mutex_lock (&devicep->lock);
4265 gomp_free_device_memory (devicep, device_ptr);
4266 gomp_mutex_unlock (&devicep->lock);
4267 }
4268
4269 int
4270 omp_target_is_present (const void *ptr, int device_num)
4271 {
4272 if (device_num == omp_initial_device
4273 || device_num == gomp_get_num_devices ())
4274 return 1;
4275
4276 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4277 if (devicep == NULL)
4278 return 0;
4279
4280 if (ptr == NULL)
4281 return 1;
4282
4283 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4284 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4285 return 1;
4286
4287 gomp_mutex_lock (&devicep->lock);
4288 struct splay_tree_s *mem_map = &devicep->mem_map;
4289 struct splay_tree_key_s cur_node;
4290
4291 cur_node.host_start = (uintptr_t) ptr;
4292 cur_node.host_end = cur_node.host_start;
4293 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4294 int ret = n != NULL;
4295 gomp_mutex_unlock (&devicep->lock);
4296 return ret;
4297 }
4298
4299 static int
4300 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4301 struct gomp_device_descr **dst_devicep,
4302 struct gomp_device_descr **src_devicep)
4303 {
4304 if (dst_device_num != gomp_get_num_devices ()
4305 /* Above gomp_get_num_devices has to be called unconditionally. */
4306 && dst_device_num != omp_initial_device)
4307 {
4308 *dst_devicep = resolve_device (dst_device_num, false);
4309 if (*dst_devicep == NULL)
4310 return EINVAL;
4311
4312 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4313 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4314 *dst_devicep = NULL;
4315 }
4316
4317 if (src_device_num != num_devices_openmp
4318 && src_device_num != omp_initial_device)
4319 {
4320 *src_devicep = resolve_device (src_device_num, false);
4321 if (*src_devicep == NULL)
4322 return EINVAL;
4323
4324 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4325 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4326 *src_devicep = NULL;
4327 }
4328
4329 return 0;
4330 }
4331
4332 static int
4333 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4334 size_t dst_offset, size_t src_offset,
4335 struct gomp_device_descr *dst_devicep,
4336 struct gomp_device_descr *src_devicep)
4337 {
4338 bool ret;
4339 if (src_devicep == NULL && dst_devicep == NULL)
4340 {
4341 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4342 return 0;
4343 }
4344 if (src_devicep == NULL)
4345 {
4346 gomp_mutex_lock (&dst_devicep->lock);
4347 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4348 (char *) dst + dst_offset,
4349 (char *) src + src_offset, length);
4350 gomp_mutex_unlock (&dst_devicep->lock);
4351 return (ret ? 0 : EINVAL);
4352 }
4353 if (dst_devicep == NULL)
4354 {
4355 gomp_mutex_lock (&src_devicep->lock);
4356 ret = src_devicep->dev2host_func (src_devicep->target_id,
4357 (char *) dst + dst_offset,
4358 (char *) src + src_offset, length);
4359 gomp_mutex_unlock (&src_devicep->lock);
4360 return (ret ? 0 : EINVAL);
4361 }
4362 if (src_devicep == dst_devicep)
4363 {
4364 gomp_mutex_lock (&src_devicep->lock);
4365 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4366 (char *) dst + dst_offset,
4367 (char *) src + src_offset, length);
4368 gomp_mutex_unlock (&src_devicep->lock);
4369 return (ret ? 0 : EINVAL);
4370 }
4371 return EINVAL;
4372 }
4373
4374 int
4375 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4376 size_t src_offset, int dst_device_num, int src_device_num)
4377 {
4378 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4379 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4380 &dst_devicep, &src_devicep);
4381
4382 if (ret)
4383 return ret;
4384
4385 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4386 dst_devicep, src_devicep);
4387
4388 return ret;
4389 }
4390
4391 typedef struct
4392 {
4393 void *dst;
4394 const void *src;
4395 size_t length;
4396 size_t dst_offset;
4397 size_t src_offset;
4398 struct gomp_device_descr *dst_devicep;
4399 struct gomp_device_descr *src_devicep;
4400 } omp_target_memcpy_data;
4401
4402 static void
4403 omp_target_memcpy_async_helper (void *args)
4404 {
4405 omp_target_memcpy_data *a = args;
4406 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4407 a->src_offset, a->dst_devicep, a->src_devicep))
4408 gomp_fatal ("omp_target_memcpy failed");
4409 }
4410
4411 int
4412 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4413 size_t dst_offset, size_t src_offset,
4414 int dst_device_num, int src_device_num,
4415 int depobj_count, omp_depend_t *depobj_list)
4416 {
4417 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4418 unsigned int flags = 0;
4419 void *depend[depobj_count + 5];
4420 int i;
4421 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4422 &dst_devicep, &src_devicep);
4423
4424 omp_target_memcpy_data s = {
4425 .dst = dst,
4426 .src = src,
4427 .length = length,
4428 .dst_offset = dst_offset,
4429 .src_offset = src_offset,
4430 .dst_devicep = dst_devicep,
4431 .src_devicep = src_devicep
4432 };
4433
4434 if (check)
4435 return check;
4436
4437 if (depobj_count > 0 && depobj_list != NULL)
4438 {
4439 flags |= GOMP_TASK_FLAG_DEPEND;
4440 depend[0] = 0;
4441 depend[1] = (void *) (uintptr_t) depobj_count;
4442 depend[2] = depend[3] = depend[4] = 0;
4443 for (i = 0; i < depobj_count; ++i)
4444 depend[i + 5] = &depobj_list[i];
4445 }
4446
4447 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4448 __alignof__ (s), true, flags, depend, 0, NULL);
4449
4450 return 0;
4451 }
4452
4453 static int
4454 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4455 int num_dims, const size_t *volume,
4456 const size_t *dst_offsets,
4457 const size_t *src_offsets,
4458 const size_t *dst_dimensions,
4459 const size_t *src_dimensions,
4460 struct gomp_device_descr *dst_devicep,
4461 struct gomp_device_descr *src_devicep)
4462 {
4463 size_t dst_slice = element_size;
4464 size_t src_slice = element_size;
4465 size_t j, dst_off, src_off, length;
4466 int i, ret;
4467
4468 if (num_dims == 1)
4469 {
4470 if (__builtin_mul_overflow (element_size, volume[0], &length)
4471 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4472 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4473 return EINVAL;
4474 if (dst_devicep == NULL && src_devicep == NULL)
4475 {
4476 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4477 length);
4478 ret = 1;
4479 }
4480 else if (src_devicep == NULL)
4481 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4482 (char *) dst + dst_off,
4483 (const char *) src + src_off,
4484 length);
4485 else if (dst_devicep == NULL)
4486 ret = src_devicep->dev2host_func (src_devicep->target_id,
4487 (char *) dst + dst_off,
4488 (const char *) src + src_off,
4489 length);
4490 else if (src_devicep == dst_devicep)
4491 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4492 (char *) dst + dst_off,
4493 (const char *) src + src_off,
4494 length);
4495 else
4496 ret = 0;
4497 return ret ? 0 : EINVAL;
4498 }
4499
4500 /* FIXME: it would be nice to have some plugin function to handle
4501 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4502 be handled in the generic recursion below, and for host-host it
4503 should be used even for any num_dims >= 2. */
4504
4505 for (i = 1; i < num_dims; i++)
4506 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4507 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4508 return EINVAL;
4509 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4510 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4511 return EINVAL;
4512 for (j = 0; j < volume[0]; j++)
4513 {
4514 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4515 (const char *) src + src_off,
4516 element_size, num_dims - 1,
4517 volume + 1, dst_offsets + 1,
4518 src_offsets + 1, dst_dimensions + 1,
4519 src_dimensions + 1, dst_devicep,
4520 src_devicep);
4521 if (ret)
4522 return ret;
4523 dst_off += dst_slice;
4524 src_off += src_slice;
4525 }
4526 return 0;
4527 }
4528
4529 static int
4530 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4531 int src_device_num,
4532 struct gomp_device_descr **dst_devicep,
4533 struct gomp_device_descr **src_devicep)
4534 {
4535 if (!dst && !src)
4536 return INT_MAX;
4537
4538 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4539 dst_devicep, src_devicep);
4540 if (ret)
4541 return ret;
4542
4543 if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
4544 return EINVAL;
4545
4546 return 0;
4547 }
4548
4549 static int
4550 omp_target_memcpy_rect_copy (void *dst, const void *src,
4551 size_t element_size, int num_dims,
4552 const size_t *volume, const size_t *dst_offsets,
4553 const size_t *src_offsets,
4554 const size_t *dst_dimensions,
4555 const size_t *src_dimensions,
4556 struct gomp_device_descr *dst_devicep,
4557 struct gomp_device_descr *src_devicep)
4558 {
4559 if (src_devicep)
4560 gomp_mutex_lock (&src_devicep->lock);
4561 else if (dst_devicep)
4562 gomp_mutex_lock (&dst_devicep->lock);
4563 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4564 volume, dst_offsets, src_offsets,
4565 dst_dimensions, src_dimensions,
4566 dst_devicep, src_devicep);
4567 if (src_devicep)
4568 gomp_mutex_unlock (&src_devicep->lock);
4569 else if (dst_devicep)
4570 gomp_mutex_unlock (&dst_devicep->lock);
4571
4572 return ret;
4573 }
4574
4575 int
4576 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4577 int num_dims, const size_t *volume,
4578 const size_t *dst_offsets,
4579 const size_t *src_offsets,
4580 const size_t *dst_dimensions,
4581 const size_t *src_dimensions,
4582 int dst_device_num, int src_device_num)
4583 {
4584 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4585
4586 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4587 src_device_num, &dst_devicep,
4588 &src_devicep);
4589
4590 if (check)
4591 return check;
4592
4593 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4594 volume, dst_offsets, src_offsets,
4595 dst_dimensions, src_dimensions,
4596 dst_devicep, src_devicep);
4597
4598 return ret;
4599 }
4600
4601 typedef struct
4602 {
4603 void *dst;
4604 const void *src;
4605 size_t element_size;
4606 const size_t *volume;
4607 const size_t *dst_offsets;
4608 const size_t *src_offsets;
4609 const size_t *dst_dimensions;
4610 const size_t *src_dimensions;
4611 struct gomp_device_descr *dst_devicep;
4612 struct gomp_device_descr *src_devicep;
4613 int num_dims;
4614 } omp_target_memcpy_rect_data;
4615
4616 static void
4617 omp_target_memcpy_rect_async_helper (void *args)
4618 {
4619 omp_target_memcpy_rect_data *a = args;
4620 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4621 a->num_dims, a->volume, a->dst_offsets,
4622 a->src_offsets, a->dst_dimensions,
4623 a->src_dimensions, a->dst_devicep,
4624 a->src_devicep);
4625 if (ret)
4626 gomp_fatal ("omp_target_memcpy_rect failed");
4627 }
4628
4629 int
4630 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4631 int num_dims, const size_t *volume,
4632 const size_t *dst_offsets,
4633 const size_t *src_offsets,
4634 const size_t *dst_dimensions,
4635 const size_t *src_dimensions,
4636 int dst_device_num, int src_device_num,
4637 int depobj_count, omp_depend_t *depobj_list)
4638 {
4639 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4640 unsigned flags = 0;
4641 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4642 src_device_num, &dst_devicep,
4643 &src_devicep);
4644 void *depend[depobj_count + 5];
4645 int i;
4646
4647 omp_target_memcpy_rect_data s = {
4648 .dst = dst,
4649 .src = src,
4650 .element_size = element_size,
4651 .num_dims = num_dims,
4652 .volume = volume,
4653 .dst_offsets = dst_offsets,
4654 .src_offsets = src_offsets,
4655 .dst_dimensions = dst_dimensions,
4656 .src_dimensions = src_dimensions,
4657 .dst_devicep = dst_devicep,
4658 .src_devicep = src_devicep
4659 };
4660
4661 if (check)
4662 return check;
4663
4664 if (depobj_count > 0 && depobj_list != NULL)
4665 {
4666 flags |= GOMP_TASK_FLAG_DEPEND;
4667 depend[0] = 0;
4668 depend[1] = (void *) (uintptr_t) depobj_count;
4669 depend[2] = depend[3] = depend[4] = 0;
4670 for (i = 0; i < depobj_count; ++i)
4671 depend[i + 5] = &depobj_list[i];
4672 }
4673
4674 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4675 __alignof__ (s), true, flags, depend, 0, NULL);
4676
4677 return 0;
4678 }
4679
4680 int
4681 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4682 size_t size, size_t device_offset, int device_num)
4683 {
4684 if (device_num == omp_initial_device
4685 || device_num == gomp_get_num_devices ())
4686 return EINVAL;
4687
4688 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4689 if (devicep == NULL)
4690 return EINVAL;
4691
4692 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4693 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4694 return EINVAL;
4695
4696 gomp_mutex_lock (&devicep->lock);
4697
4698 struct splay_tree_s *mem_map = &devicep->mem_map;
4699 struct splay_tree_key_s cur_node;
4700 int ret = EINVAL;
4701
4702 cur_node.host_start = (uintptr_t) host_ptr;
4703 cur_node.host_end = cur_node.host_start + size;
4704 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4705 if (n)
4706 {
4707 if (n->tgt->tgt_start + n->tgt_offset
4708 == (uintptr_t) device_ptr + device_offset
4709 && n->host_start <= cur_node.host_start
4710 && n->host_end >= cur_node.host_end)
4711 ret = 0;
4712 }
4713 else
4714 {
4715 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4716 tgt->array = gomp_malloc (sizeof (*tgt->array));
4717 tgt->refcount = 1;
4718 tgt->tgt_start = 0;
4719 tgt->tgt_end = 0;
4720 tgt->to_free = NULL;
4721 tgt->prev = NULL;
4722 tgt->list_count = 0;
4723 tgt->device_descr = devicep;
4724 splay_tree_node array = tgt->array;
4725 splay_tree_key k = &array->key;
4726 k->host_start = cur_node.host_start;
4727 k->host_end = cur_node.host_end;
4728 k->tgt = tgt;
4729 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4730 k->refcount = REFCOUNT_INFINITY;
4731 k->dynamic_refcount = 0;
4732 k->aux = NULL;
4733 array->left = NULL;
4734 array->right = NULL;
4735 splay_tree_insert (&devicep->mem_map, array);
4736 ret = 0;
4737 }
4738 gomp_mutex_unlock (&devicep->lock);
4739 return ret;
4740 }
4741
4742 int
4743 omp_target_disassociate_ptr (const void *ptr, int device_num)
4744 {
4745 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4746 if (devicep == NULL)
4747 return EINVAL;
4748
4749 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4750 return EINVAL;
4751
4752 gomp_mutex_lock (&devicep->lock);
4753
4754 struct splay_tree_s *mem_map = &devicep->mem_map;
4755 struct splay_tree_key_s cur_node;
4756 int ret = EINVAL;
4757
4758 cur_node.host_start = (uintptr_t) ptr;
4759 cur_node.host_end = cur_node.host_start;
4760 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4761 if (n
4762 && n->host_start == cur_node.host_start
4763 && n->refcount == REFCOUNT_INFINITY
4764 && n->tgt->tgt_start == 0
4765 && n->tgt->to_free == NULL
4766 && n->tgt->refcount == 1
4767 && n->tgt->list_count == 0)
4768 {
4769 splay_tree_remove (&devicep->mem_map, n);
4770 gomp_unmap_tgt (n->tgt);
4771 ret = 0;
4772 }
4773
4774 gomp_mutex_unlock (&devicep->lock);
4775 return ret;
4776 }
4777
4778 void *
4779 omp_get_mapped_ptr (const void *ptr, int device_num)
4780 {
4781 if (device_num == omp_initial_device
4782 || device_num == omp_get_initial_device ())
4783 return (void *) ptr;
4784
4785 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4786 if (devicep == NULL)
4787 return NULL;
4788
4789 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4790 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4791 return (void *) ptr;
4792
4793 gomp_mutex_lock (&devicep->lock);
4794
4795 struct splay_tree_s *mem_map = &devicep->mem_map;
4796 struct splay_tree_key_s cur_node;
4797 void *ret = NULL;
4798
4799 cur_node.host_start = (uintptr_t) ptr;
4800 cur_node.host_end = cur_node.host_start;
4801 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4802
4803 if (n)
4804 {
4805 uintptr_t offset = cur_node.host_start - n->host_start;
4806 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
4807 }
4808
4809 gomp_mutex_unlock (&devicep->lock);
4810
4811 return ret;
4812 }
4813
4814 int
4815 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
4816 {
4817 if (device_num == omp_initial_device
4818 || device_num == gomp_get_num_devices ())
4819 return true;
4820
4821 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4822 if (devicep == NULL)
4823 return false;
4824
4825 /* TODO: Unified shared memory must be handled when available. */
4826
4827 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
4828 }
4829
4830 int
4831 omp_pause_resource (omp_pause_resource_t kind, int device_num)
4832 {
4833 (void) kind;
4834 if (device_num == omp_initial_device
4835 || device_num == gomp_get_num_devices ())
4836 return gomp_pause_host ();
4837
4838 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4839 if (devicep == NULL)
4840 return -1;
4841
4842 /* Do nothing for target devices for now. */
4843 return 0;
4844 }
4845
4846 int
4847 omp_pause_resource_all (omp_pause_resource_t kind)
4848 {
4849 (void) kind;
4850 if (gomp_pause_host ())
4851 return -1;
4852 /* Do nothing for target devices for now. */
4853 return 0;
4854 }
4855
4856 ialias (omp_pause_resource)
4857 ialias (omp_pause_resource_all)
4858
4859 #ifdef PLUGIN_SUPPORT
4860
4861 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4862 in PLUGIN_NAME.
4863 The handles of the found functions are stored in the corresponding fields
4864 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4865
4866 static bool
4867 gomp_load_plugin_for_device (struct gomp_device_descr *device,
4868 const char *plugin_name)
4869 {
4870 const char *err = NULL, *last_missing = NULL;
4871
4872 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
4873 if (!plugin_handle)
4874 #if OFFLOAD_DEFAULTED
4875 return 0;
4876 #else
4877 goto dl_fail;
4878 #endif
4879
4880 /* Check if all required functions are available in the plugin and store
4881 their handlers. None of the symbols can legitimately be NULL,
4882 so we don't need to check dlerror all the time. */
4883 #define DLSYM(f) \
4884 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4885 goto dl_fail
4886 /* Similar, but missing functions are not an error. Return false if
4887 failed, true otherwise. */
4888 #define DLSYM_OPT(f, n) \
4889 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4890 || (last_missing = #n, 0))
4891
4892 DLSYM (version);
4893 if (device->version_func () != GOMP_VERSION)
4894 {
4895 err = "plugin version mismatch";
4896 goto fail;
4897 }
4898
4899 DLSYM (get_name);
4900 DLSYM (get_caps);
4901 DLSYM (get_type);
4902 DLSYM (get_num_devices);
4903 DLSYM (init_device);
4904 DLSYM (fini_device);
4905 DLSYM (load_image);
4906 DLSYM (unload_image);
4907 DLSYM (alloc);
4908 DLSYM (free);
4909 DLSYM (dev2host);
4910 DLSYM (host2dev);
4911 device->capabilities = device->get_caps_func ();
4912 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4913 {
4914 DLSYM (run);
4915 DLSYM_OPT (async_run, async_run);
4916 DLSYM_OPT (can_run, can_run);
4917 DLSYM (dev2dev);
4918 }
4919 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4920 {
4921 if (!DLSYM_OPT (openacc.exec, openacc_exec)
4922 || !DLSYM_OPT (openacc.create_thread_data,
4923 openacc_create_thread_data)
4924 || !DLSYM_OPT (openacc.destroy_thread_data,
4925 openacc_destroy_thread_data)
4926 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
4927 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
4928 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
4929 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
4930 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
4931 || !DLSYM_OPT (openacc.async.queue_callback,
4932 openacc_async_queue_callback)
4933 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
4934 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
4935 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
4936 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
4937 {
4938 /* Require all the OpenACC handlers if we have
4939 GOMP_OFFLOAD_CAP_OPENACC_200. */
4940 err = "plugin missing OpenACC handler function";
4941 goto fail;
4942 }
4943
4944 unsigned cuda = 0;
4945 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
4946 openacc_cuda_get_current_device);
4947 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
4948 openacc_cuda_get_current_context);
4949 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
4950 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
4951 if (cuda && cuda != 4)
4952 {
4953 /* Make sure all the CUDA functions are there if any of them are. */
4954 err = "plugin missing OpenACC CUDA handler function";
4955 goto fail;
4956 }
4957 }
4958 #undef DLSYM
4959 #undef DLSYM_OPT
4960
4961 return 1;
4962
4963 dl_fail:
4964 err = dlerror ();
4965 fail:
4966 gomp_error ("while loading %s: %s", plugin_name, err);
4967 if (last_missing)
4968 gomp_error ("missing function was %s", last_missing);
4969 if (plugin_handle)
4970 dlclose (plugin_handle);
4971
4972 return 0;
4973 }
4974
4975 /* This function finalizes all initialized devices. */
4976
4977 static void
4978 gomp_target_fini (void)
4979 {
4980 int i;
4981 for (i = 0; i < num_devices; i++)
4982 {
4983 bool ret = true;
4984 struct gomp_device_descr *devicep = &devices[i];
4985 gomp_mutex_lock (&devicep->lock);
4986 if (devicep->state == GOMP_DEVICE_INITIALIZED)
4987 ret = gomp_fini_device (devicep);
4988 gomp_mutex_unlock (&devicep->lock);
4989 if (!ret)
4990 gomp_fatal ("device finalization failed");
4991 }
4992 }
4993
4994 /* This function initializes the runtime for offloading.
4995 It parses the list of offload plugins, and tries to load these.
4996 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
4997 will be set, and the array DEVICES initialized, containing descriptors for
4998 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
4999 by the others. */
5000
5001 static void
5002 gomp_target_init (void)
5003 {
5004 const char *prefix ="libgomp-plugin-";
5005 const char *suffix = SONAME_SUFFIX (1);
5006 const char *cur, *next;
5007 char *plugin_name;
5008 int i, new_num_devs;
5009 int num_devs = 0, num_devs_openmp;
5010 struct gomp_device_descr *devs = NULL;
5011
5012 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5013 return;
5014
5015 cur = OFFLOAD_PLUGINS;
5016 if (*cur)
5017 do
5018 {
5019 struct gomp_device_descr current_device;
5020 size_t prefix_len, suffix_len, cur_len;
5021
5022 next = strchr (cur, ',');
5023
5024 prefix_len = strlen (prefix);
5025 cur_len = next ? next - cur : strlen (cur);
5026 suffix_len = strlen (suffix);
5027
5028 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5029 if (!plugin_name)
5030 {
5031 num_devs = 0;
5032 break;
5033 }
5034
5035 memcpy (plugin_name, prefix, prefix_len);
5036 memcpy (plugin_name + prefix_len, cur, cur_len);
5037 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5038
5039 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5040 {
5041 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5042 new_num_devs = current_device.get_num_devices_func (omp_req);
5043 if (gomp_debug_var > 0 && new_num_devs < 0)
5044 {
5045 bool found = false;
5046 int type = current_device.get_type_func ();
5047 for (int img = 0; img < num_offload_images; img++)
5048 if (type == offload_images[img].type)
5049 found = true;
5050 if (found)
5051 {
5052 char buf[sizeof ("unified_address, unified_shared_memory, "
5053 "reverse_offload")];
5054 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5055 char *name = (char *) malloc (cur_len + 1);
5056 memcpy (name, cur, cur_len);
5057 name[cur_len] = '\0';
5058 gomp_debug (1,
5059 "%s devices present but 'omp requires %s' "
5060 "cannot be fulfilled\n", name, buf);
5061 free (name);
5062 }
5063 }
5064 else if (new_num_devs >= 1)
5065 {
5066 /* Augment DEVICES and NUM_DEVICES. */
5067
5068 devs = realloc (devs, (num_devs + new_num_devs)
5069 * sizeof (struct gomp_device_descr));
5070 if (!devs)
5071 {
5072 num_devs = 0;
5073 free (plugin_name);
5074 break;
5075 }
5076
5077 current_device.name = current_device.get_name_func ();
5078 /* current_device.capabilities has already been set. */
5079 current_device.type = current_device.get_type_func ();
5080 current_device.mem_map.root = NULL;
5081 current_device.mem_map_rev.root = NULL;
5082 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5083 for (i = 0; i < new_num_devs; i++)
5084 {
5085 current_device.target_id = i;
5086 devs[num_devs] = current_device;
5087 gomp_mutex_init (&devs[num_devs].lock);
5088 num_devs++;
5089 }
5090 }
5091 }
5092
5093 free (plugin_name);
5094 cur = next + 1;
5095 }
5096 while (next);
5097
5098 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5099 NUM_DEVICES_OPENMP. */
5100 struct gomp_device_descr *devs_s
5101 = malloc (num_devs * sizeof (struct gomp_device_descr));
5102 if (!devs_s)
5103 {
5104 num_devs = 0;
5105 free (devs);
5106 devs = NULL;
5107 }
5108 num_devs_openmp = 0;
5109 for (i = 0; i < num_devs; i++)
5110 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5111 devs_s[num_devs_openmp++] = devs[i];
5112 int num_devs_after_openmp = num_devs_openmp;
5113 for (i = 0; i < num_devs; i++)
5114 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5115 devs_s[num_devs_after_openmp++] = devs[i];
5116 free (devs);
5117 devs = devs_s;
5118
5119 for (i = 0; i < num_devs; i++)
5120 {
5121 /* The 'devices' array can be moved (by the realloc call) until we have
5122 found all the plugins, so registering with the OpenACC runtime (which
5123 takes a copy of the pointer argument) must be delayed until now. */
5124 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5125 goacc_register (&devs[i]);
5126 }
5127
5128 num_devices = num_devs;
5129 num_devices_openmp = num_devs_openmp;
5130 devices = devs;
5131 if (atexit (gomp_target_fini) != 0)
5132 gomp_fatal ("atexit failed");
5133 }
5134
5135 #else /* PLUGIN_SUPPORT */
5136 /* If dlfcn.h is unavailable we always fallback to host execution.
5137 GOMP_target* routines are just stubs for this case. */
5138 static void
5139 gomp_target_init (void)
5140 {
5141 }
5142 #endif /* PLUGIN_SUPPORT */