]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
ada: Crash initializing component of private record type
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
42
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_c
51 #include "splay-tree.h"
52
53
54 typedef uintptr_t *hash_entry_type;
55 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56 static inline void htab_free (void *ptr) { free (ptr); }
57 #include "hashtab.h"
58
59 ialias_redirect (GOMP_task)
60
61 static inline hashval_t
62 htab_hash (hash_entry_type element)
63 {
64 return hash_pointer ((void *) element);
65 }
66
67 static inline bool
68 htab_eq (hash_entry_type x, hash_entry_type y)
69 {
70 return x == y;
71 }
72
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
74
75 static void gomp_target_init (void);
76
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
79
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock;
82
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr {
87 unsigned version;
88 enum offload_target_type type;
89 const void *host_table;
90 const void *target_data;
91 };
92
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr *offload_images;
95
96 /* Total number of offload images. */
97 static int num_offload_images;
98
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr *devices;
101
102 /* Total number of available devices. */
103 static int num_devices;
104
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp;
107
108 /* OpenMP requires mask. */
109 static int omp_requires_mask;
110
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
112
113 static void *
114 gomp_realloc_unlock (void *old, size_t size)
115 {
116 void *ret = realloc (old, size);
117 if (ret == NULL)
118 {
119 gomp_mutex_unlock (&register_lock);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
121 }
122 return ret;
123 }
124
125 attribute_hidden void
126 gomp_init_targets_once (void)
127 {
128 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
129 }
130
131 attribute_hidden int
132 gomp_get_num_devices (void)
133 {
134 gomp_init_targets_once ();
135 return num_devices_openmp;
136 }
137
138 static struct gomp_device_descr *
139 resolve_device (int device_id, bool remapped)
140 {
141 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
142 called, which must be done before using default_device_var. */
143 int num_devices = gomp_get_num_devices ();
144
145 if (remapped && device_id == GOMP_DEVICE_ICV)
146 {
147 struct gomp_task_icv *icv = gomp_icv (false);
148 device_id = icv->default_device_var;
149 remapped = false;
150 }
151
152 if (device_id < 0)
153 {
154 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
155 : omp_initial_device))
156 return NULL;
157 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
158 && num_devices == 0)
159 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
160 "but only the host device is available");
161 else if (device_id == omp_invalid_device)
162 gomp_fatal ("omp_invalid_device encountered");
163 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
164 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
165 "but device not found");
166
167 return NULL;
168 }
169 else if (device_id >= num_devices)
170 {
171 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
172 && device_id != num_devices)
173 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
174 "but device not found");
175
176 return NULL;
177 }
178
179 gomp_mutex_lock (&devices[device_id].lock);
180 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
181 gomp_init_device (&devices[device_id]);
182 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
183 {
184 gomp_mutex_unlock (&devices[device_id].lock);
185
186 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
187 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
188 "but device is finalized");
189
190 return NULL;
191 }
192 gomp_mutex_unlock (&devices[device_id].lock);
193
194 return &devices[device_id];
195 }
196
197
198 static inline splay_tree_key
199 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
200 {
201 if (key->host_start != key->host_end)
202 return splay_tree_lookup (mem_map, key);
203
204 key->host_end++;
205 splay_tree_key n = splay_tree_lookup (mem_map, key);
206 key->host_end--;
207 if (n)
208 return n;
209 key->host_start--;
210 n = splay_tree_lookup (mem_map, key);
211 key->host_start++;
212 if (n)
213 return n;
214 return splay_tree_lookup (mem_map, key);
215 }
216
217 static inline reverse_splay_tree_key
218 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
219 {
220 return reverse_splay_tree_lookup (mem_map_rev, key);
221 }
222
223 static inline splay_tree_key
224 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
225 {
226 if (key->host_start != key->host_end)
227 return splay_tree_lookup (mem_map, key);
228
229 key->host_end++;
230 splay_tree_key n = splay_tree_lookup (mem_map, key);
231 key->host_end--;
232 return n;
233 }
234
235 static inline void
236 gomp_device_copy (struct gomp_device_descr *devicep,
237 bool (*copy_func) (int, void *, const void *, size_t),
238 const char *dst, void *dstaddr,
239 const char *src, const void *srcaddr,
240 size_t size)
241 {
242 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
243 {
244 gomp_mutex_unlock (&devicep->lock);
245 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
246 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
247 }
248 }
249
250 static inline void
251 goacc_device_copy_async (struct gomp_device_descr *devicep,
252 bool (*copy_func) (int, void *, const void *, size_t,
253 struct goacc_asyncqueue *),
254 const char *dst, void *dstaddr,
255 const char *src, const void *srcaddr,
256 const void *srcaddr_orig,
257 size_t size, struct goacc_asyncqueue *aq)
258 {
259 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
260 {
261 gomp_mutex_unlock (&devicep->lock);
262 if (srcaddr_orig && srcaddr_orig != srcaddr)
263 gomp_fatal ("Copying of %s object [%p..%p)"
264 " via buffer %s object [%p..%p)"
265 " to %s object [%p..%p) failed",
266 src, srcaddr_orig, srcaddr_orig + size,
267 src, srcaddr, srcaddr + size,
268 dst, dstaddr, dstaddr + size);
269 else
270 gomp_fatal ("Copying of %s object [%p..%p)"
271 " to %s object [%p..%p) failed",
272 src, srcaddr, srcaddr + size,
273 dst, dstaddr, dstaddr + size);
274 }
275 }
276
277 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
278 host to device memory transfers. */
279
280 struct gomp_coalesce_chunk
281 {
282 /* The starting and ending point of a coalesced chunk of memory. */
283 size_t start, end;
284 };
285
286 struct gomp_coalesce_buf
287 {
288 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
289 it will be copied to the device. */
290 void *buf;
291 struct target_mem_desc *tgt;
292 /* Array with offsets, chunks[i].start is the starting offset and
293 chunks[i].end ending offset relative to tgt->tgt_start device address
294 of chunks which are to be copied to buf and later copied to device. */
295 struct gomp_coalesce_chunk *chunks;
296 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
297 be performed. */
298 long chunk_cnt;
299 /* During construction of chunks array, how many memory regions are within
300 the last chunk. If there is just one memory region for a chunk, we copy
301 it directly to device rather than going through buf. */
302 long use_cnt;
303 };
304
305 /* Maximum size of memory region considered for coalescing. Larger copies
306 are performed directly. */
307 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
308
309 /* Maximum size of a gap in between regions to consider them being copied
310 within the same chunk. All the device offsets considered are within
311 newly allocated device memory, so it isn't fatal if we copy some padding
312 in between from host to device. The gaps come either from alignment
313 padding or from memory regions which are not supposed to be copied from
314 host to device (e.g. map(alloc:), map(from:) etc.). */
315 #define MAX_COALESCE_BUF_GAP (4 * 1024)
316
317 /* Add region with device tgt_start relative offset and length to CBUF.
318
319 This must not be used for asynchronous copies, because the host data might
320 not be computed yet (by an earlier asynchronous compute region, for
321 example). The exception is for EPHEMERAL data, that we know is available
322 already "by construction". */
323
324 static inline void
325 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
326 {
327 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
328 return;
329 if (cbuf->chunk_cnt)
330 {
331 if (cbuf->chunk_cnt < 0)
332 return;
333 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
334 {
335 cbuf->chunk_cnt = -1;
336 return;
337 }
338 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
339 {
340 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
341 cbuf->use_cnt++;
342 return;
343 }
344 /* If the last chunk is only used by one mapping, discard it,
345 as it will be one host to device copy anyway and
346 memcpying it around will only waste cycles. */
347 if (cbuf->use_cnt == 1)
348 cbuf->chunk_cnt--;
349 }
350 cbuf->chunks[cbuf->chunk_cnt].start = start;
351 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
352 cbuf->chunk_cnt++;
353 cbuf->use_cnt = 1;
354 }
355
356 /* Return true for mapping kinds which need to copy data from the
357 host to device for regions that weren't previously mapped. */
358
359 static inline bool
360 gomp_to_device_kind_p (int kind)
361 {
362 switch (kind)
363 {
364 case GOMP_MAP_ALLOC:
365 case GOMP_MAP_FROM:
366 case GOMP_MAP_FORCE_ALLOC:
367 case GOMP_MAP_FORCE_FROM:
368 case GOMP_MAP_ALWAYS_FROM:
369 case GOMP_MAP_ALWAYS_PRESENT_FROM:
370 case GOMP_MAP_FORCE_PRESENT:
371 return false;
372 default:
373 return true;
374 }
375 }
376
377 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
378 non-NULL), when the source data is stack or may otherwise be deallocated
379 before the asynchronous copy takes place, EPHEMERAL must be passed as
380 TRUE. */
381
382 attribute_hidden void
383 gomp_copy_host2dev (struct gomp_device_descr *devicep,
384 struct goacc_asyncqueue *aq,
385 void *d, const void *h, size_t sz,
386 bool ephemeral, struct gomp_coalesce_buf *cbuf)
387 {
388 if (cbuf)
389 {
390 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
391 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
392 {
393 long first = 0;
394 long last = cbuf->chunk_cnt - 1;
395 while (first <= last)
396 {
397 long middle = (first + last) >> 1;
398 if (cbuf->chunks[middle].end <= doff)
399 first = middle + 1;
400 else if (cbuf->chunks[middle].start <= doff)
401 {
402 if (doff + sz > cbuf->chunks[middle].end)
403 {
404 gomp_mutex_unlock (&devicep->lock);
405 gomp_fatal ("internal libgomp cbuf error");
406 }
407
408 /* In an asynchronous context, verify that CBUF isn't used
409 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
410 if (__builtin_expect (aq != NULL, 0))
411 assert (ephemeral);
412
413 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
414 h, sz);
415 return;
416 }
417 else
418 last = middle - 1;
419 }
420 }
421 }
422
423 if (__builtin_expect (aq != NULL, 0))
424 {
425 void *h_buf = (void *) h;
426 if (ephemeral)
427 {
428 /* We're queueing up an asynchronous copy from data that may
429 disappear before the transfer takes place (i.e. because it is a
430 stack local in a function that is no longer executing). As we've
431 not been able to use CBUF, make a copy of the data into a
432 temporary buffer. */
433 h_buf = gomp_malloc (sz);
434 memcpy (h_buf, h, sz);
435 }
436 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
437 "dev", d, "host", h_buf, h, sz, aq);
438 if (ephemeral)
439 /* Free once the transfer has completed. */
440 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
441 }
442 else
443 gomp_device_copy (devicep, devicep->host2dev_func,
444 "dev", d, "host", h, sz);
445 }
446
447 attribute_hidden void
448 gomp_copy_dev2host (struct gomp_device_descr *devicep,
449 struct goacc_asyncqueue *aq,
450 void *h, const void *d, size_t sz)
451 {
452 if (__builtin_expect (aq != NULL, 0))
453 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
454 "host", h, "dev", d, NULL, sz, aq);
455 else
456 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
457 }
458
459 static void
460 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
461 {
462 if (!devicep->free_func (devicep->target_id, devptr))
463 {
464 gomp_mutex_unlock (&devicep->lock);
465 gomp_fatal ("error in freeing device memory block at %p", devptr);
466 }
467 }
468
469 /* Increment reference count of a splay_tree_key region K by 1.
470 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
471 increment the value if refcount is not yet contained in the set (used for
472 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
473 once for each construct). */
474
475 static inline void
476 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
477 {
478 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
479 return;
480
481 uintptr_t *refcount_ptr = &k->refcount;
482
483 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
484 refcount_ptr = &k->structelem_refcount;
485 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
486 refcount_ptr = k->structelem_refcount_ptr;
487
488 if (refcount_set)
489 {
490 if (htab_find (*refcount_set, refcount_ptr))
491 return;
492 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
493 *slot = refcount_ptr;
494 }
495
496 *refcount_ptr += 1;
497 return;
498 }
499
500 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
501 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
502 track already seen refcounts, and only adjust the value if refcount is not
503 yet contained in the set (like gomp_increment_refcount).
504
505 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
506 it is already zero and we know we decremented it earlier. This signals that
507 associated maps should be copied back to host.
508
509 *DO_REMOVE is set to true when we this is the first handling of this refcount
510 and we are setting it to zero. This signals a removal of this key from the
511 splay-tree map.
512
513 Copy and removal are separated due to cases like handling of structure
514 elements, e.g. each map of a structure element representing a possible copy
515 out of a structure field has to be handled individually, but we only signal
516 removal for one (the first encountered) sibing map. */
517
518 static inline void
519 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
520 bool *do_copy, bool *do_remove)
521 {
522 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
523 {
524 *do_copy = *do_remove = false;
525 return;
526 }
527
528 uintptr_t *refcount_ptr = &k->refcount;
529
530 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
531 refcount_ptr = &k->structelem_refcount;
532 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
533 refcount_ptr = k->structelem_refcount_ptr;
534
535 bool new_encountered_refcount;
536 bool set_to_zero = false;
537 bool is_zero = false;
538
539 uintptr_t orig_refcount = *refcount_ptr;
540
541 if (refcount_set)
542 {
543 if (htab_find (*refcount_set, refcount_ptr))
544 {
545 new_encountered_refcount = false;
546 goto end;
547 }
548
549 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
550 *slot = refcount_ptr;
551 new_encountered_refcount = true;
552 }
553 else
554 /* If no refcount_set being used, assume all keys are being decremented
555 for the first time. */
556 new_encountered_refcount = true;
557
558 if (delete_p)
559 *refcount_ptr = 0;
560 else if (*refcount_ptr > 0)
561 *refcount_ptr -= 1;
562
563 end:
564 if (*refcount_ptr == 0)
565 {
566 if (orig_refcount > 0)
567 set_to_zero = true;
568
569 is_zero = true;
570 }
571
572 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
573 *do_remove = (new_encountered_refcount && set_to_zero);
574 }
575
576 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
577 gomp_map_0len_lookup found oldn for newn.
578 Helper function of gomp_map_vars. */
579
580 static inline void
581 gomp_map_vars_existing (struct gomp_device_descr *devicep,
582 struct goacc_asyncqueue *aq, splay_tree_key oldn,
583 splay_tree_key newn, struct target_var_desc *tgt_var,
584 unsigned char kind, bool always_to_flag, bool implicit,
585 struct gomp_coalesce_buf *cbuf,
586 htab_t *refcount_set)
587 {
588 assert (kind != GOMP_MAP_ATTACH
589 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
590
591 tgt_var->key = oldn;
592 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
593 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
594 tgt_var->is_attach = false;
595 tgt_var->offset = newn->host_start - oldn->host_start;
596
597 /* For implicit maps, old contained in new is valid. */
598 bool implicit_subset = (implicit
599 && newn->host_start <= oldn->host_start
600 && oldn->host_end <= newn->host_end);
601 if (implicit_subset)
602 tgt_var->length = oldn->host_end - oldn->host_start;
603 else
604 tgt_var->length = newn->host_end - newn->host_start;
605
606 if (GOMP_MAP_FORCE_P (kind)
607 /* For implicit maps, old contained in new is valid. */
608 || !(implicit_subset
609 /* Otherwise, new contained inside old is considered valid. */
610 || (oldn->host_start <= newn->host_start
611 && newn->host_end <= oldn->host_end)))
612 {
613 gomp_mutex_unlock (&devicep->lock);
614 gomp_fatal ("Trying to map into device [%p..%p) object when "
615 "[%p..%p) is already mapped",
616 (void *) newn->host_start, (void *) newn->host_end,
617 (void *) oldn->host_start, (void *) oldn->host_end);
618 }
619
620 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
621 {
622 /* Implicit + always should not happen. If this does occur, below
623 address/length adjustment is a TODO. */
624 assert (!implicit_subset);
625
626 if (oldn->aux && oldn->aux->attach_count)
627 {
628 /* We have to be careful not to overwrite still attached pointers
629 during the copyback to host. */
630 uintptr_t addr = newn->host_start;
631 while (addr < newn->host_end)
632 {
633 size_t i = (addr - oldn->host_start) / sizeof (void *);
634 if (oldn->aux->attach_count[i] == 0)
635 gomp_copy_host2dev (devicep, aq,
636 (void *) (oldn->tgt->tgt_start
637 + oldn->tgt_offset
638 + addr - oldn->host_start),
639 (void *) addr,
640 sizeof (void *), false, cbuf);
641 addr += sizeof (void *);
642 }
643 }
644 else
645 gomp_copy_host2dev (devicep, aq,
646 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
647 + newn->host_start - oldn->host_start),
648 (void *) newn->host_start,
649 newn->host_end - newn->host_start, false, cbuf);
650 }
651
652 gomp_increment_refcount (oldn, refcount_set);
653 }
654
655 static int
656 get_kind (bool short_mapkind, void *kinds, int idx)
657 {
658 if (!short_mapkind)
659 return ((unsigned char *) kinds)[idx];
660
661 int val = ((unsigned short *) kinds)[idx];
662 if (GOMP_MAP_IMPLICIT_P (val))
663 val &= ~GOMP_MAP_IMPLICIT;
664 return val;
665 }
666
667
668 static bool
669 get_implicit (bool short_mapkind, void *kinds, int idx)
670 {
671 if (!short_mapkind)
672 return false;
673
674 int val = ((unsigned short *) kinds)[idx];
675 return GOMP_MAP_IMPLICIT_P (val);
676 }
677
678 static void
679 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
680 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
681 struct gomp_coalesce_buf *cbuf,
682 bool allow_zero_length_array_sections)
683 {
684 struct gomp_device_descr *devicep = tgt->device_descr;
685 struct splay_tree_s *mem_map = &devicep->mem_map;
686 struct splay_tree_key_s cur_node;
687
688 cur_node.host_start = host_ptr;
689 if (cur_node.host_start == (uintptr_t) NULL)
690 {
691 cur_node.tgt_offset = (uintptr_t) NULL;
692 gomp_copy_host2dev (devicep, aq,
693 (void *) (tgt->tgt_start + target_offset),
694 (void *) &cur_node.tgt_offset, sizeof (void *),
695 true, cbuf);
696 return;
697 }
698 /* Add bias to the pointer value. */
699 cur_node.host_start += bias;
700 cur_node.host_end = cur_node.host_start;
701 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
702 if (n == NULL)
703 {
704 if (allow_zero_length_array_sections)
705 cur_node.tgt_offset = 0;
706 else
707 {
708 gomp_mutex_unlock (&devicep->lock);
709 gomp_fatal ("Pointer target of array section wasn't mapped");
710 }
711 }
712 else
713 {
714 cur_node.host_start -= n->host_start;
715 cur_node.tgt_offset
716 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
717 /* At this point tgt_offset is target address of the
718 array section. Now subtract bias to get what we want
719 to initialize the pointer with. */
720 cur_node.tgt_offset -= bias;
721 }
722 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
723 (void *) &cur_node.tgt_offset, sizeof (void *),
724 true, cbuf);
725 }
726
727 static void
728 gomp_map_fields_existing (struct target_mem_desc *tgt,
729 struct goacc_asyncqueue *aq, splay_tree_key n,
730 size_t first, size_t i, void **hostaddrs,
731 size_t *sizes, void *kinds,
732 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
733 {
734 struct gomp_device_descr *devicep = tgt->device_descr;
735 struct splay_tree_s *mem_map = &devicep->mem_map;
736 struct splay_tree_key_s cur_node;
737 int kind;
738 bool implicit;
739 const bool short_mapkind = true;
740 const int typemask = short_mapkind ? 0xff : 0x7;
741
742 cur_node.host_start = (uintptr_t) hostaddrs[i];
743 cur_node.host_end = cur_node.host_start + sizes[i];
744 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
745 kind = get_kind (short_mapkind, kinds, i);
746 implicit = get_implicit (short_mapkind, kinds, i);
747 if (n2
748 && n2->tgt == n->tgt
749 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
750 {
751 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
752 kind & typemask, false, implicit, cbuf,
753 refcount_set);
754 return;
755 }
756 if (sizes[i] == 0)
757 {
758 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
759 {
760 cur_node.host_start--;
761 n2 = splay_tree_lookup (mem_map, &cur_node);
762 cur_node.host_start++;
763 if (n2
764 && n2->tgt == n->tgt
765 && n2->host_start - n->host_start
766 == n2->tgt_offset - n->tgt_offset)
767 {
768 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
769 kind & typemask, false, implicit, cbuf,
770 refcount_set);
771 return;
772 }
773 }
774 cur_node.host_end++;
775 n2 = splay_tree_lookup (mem_map, &cur_node);
776 cur_node.host_end--;
777 if (n2
778 && n2->tgt == n->tgt
779 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
780 {
781 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
782 kind & typemask, false, implicit, cbuf,
783 refcount_set);
784 return;
785 }
786 }
787 gomp_mutex_unlock (&devicep->lock);
788 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
789 "other mapped elements from the same structure weren't mapped "
790 "together with it", (void *) cur_node.host_start,
791 (void *) cur_node.host_end);
792 }
793
794 attribute_hidden void
795 gomp_attach_pointer (struct gomp_device_descr *devicep,
796 struct goacc_asyncqueue *aq, splay_tree mem_map,
797 splay_tree_key n, uintptr_t attach_to, size_t bias,
798 struct gomp_coalesce_buf *cbufp,
799 bool allow_zero_length_array_sections)
800 {
801 struct splay_tree_key_s s;
802 size_t size, idx;
803
804 if (n == NULL)
805 {
806 gomp_mutex_unlock (&devicep->lock);
807 gomp_fatal ("enclosing struct not mapped for attach");
808 }
809
810 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
811 /* We might have a pointer in a packed struct: however we cannot have more
812 than one such pointer in each pointer-sized portion of the struct, so
813 this is safe. */
814 idx = (attach_to - n->host_start) / sizeof (void *);
815
816 if (!n->aux)
817 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
818
819 if (!n->aux->attach_count)
820 n->aux->attach_count
821 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
822
823 if (n->aux->attach_count[idx] < UINTPTR_MAX)
824 n->aux->attach_count[idx]++;
825 else
826 {
827 gomp_mutex_unlock (&devicep->lock);
828 gomp_fatal ("attach count overflow");
829 }
830
831 if (n->aux->attach_count[idx] == 1)
832 {
833 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
834 - n->host_start;
835 uintptr_t target = (uintptr_t) *(void **) attach_to;
836 splay_tree_key tn;
837 uintptr_t data;
838
839 if ((void *) target == NULL)
840 {
841 gomp_mutex_unlock (&devicep->lock);
842 gomp_fatal ("attempt to attach null pointer");
843 }
844
845 s.host_start = target + bias;
846 s.host_end = s.host_start + 1;
847 tn = splay_tree_lookup (mem_map, &s);
848
849 if (!tn)
850 {
851 if (allow_zero_length_array_sections)
852 /* When allowing attachment to zero-length array sections, we
853 allow attaching to NULL pointers when the target region is not
854 mapped. */
855 data = 0;
856 else
857 {
858 gomp_mutex_unlock (&devicep->lock);
859 gomp_fatal ("pointer target not mapped for attach");
860 }
861 }
862 else
863 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
864
865 gomp_debug (1,
866 "%s: attaching host %p, target %p (struct base %p) to %p\n",
867 __FUNCTION__, (void *) attach_to, (void *) devptr,
868 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
869
870 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
871 sizeof (void *), true, cbufp);
872 }
873 else
874 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
875 (void *) attach_to, (int) n->aux->attach_count[idx]);
876 }
877
878 attribute_hidden void
879 gomp_detach_pointer (struct gomp_device_descr *devicep,
880 struct goacc_asyncqueue *aq, splay_tree_key n,
881 uintptr_t detach_from, bool finalize,
882 struct gomp_coalesce_buf *cbufp)
883 {
884 size_t idx;
885
886 if (n == NULL)
887 {
888 gomp_mutex_unlock (&devicep->lock);
889 gomp_fatal ("enclosing struct not mapped for detach");
890 }
891
892 idx = (detach_from - n->host_start) / sizeof (void *);
893
894 if (!n->aux || !n->aux->attach_count)
895 {
896 gomp_mutex_unlock (&devicep->lock);
897 gomp_fatal ("no attachment counters for struct");
898 }
899
900 if (finalize)
901 n->aux->attach_count[idx] = 1;
902
903 if (n->aux->attach_count[idx] == 0)
904 {
905 gomp_mutex_unlock (&devicep->lock);
906 gomp_fatal ("attach count underflow");
907 }
908 else
909 n->aux->attach_count[idx]--;
910
911 if (n->aux->attach_count[idx] == 0)
912 {
913 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
914 - n->host_start;
915 uintptr_t target = (uintptr_t) *(void **) detach_from;
916
917 gomp_debug (1,
918 "%s: detaching host %p, target %p (struct base %p) to %p\n",
919 __FUNCTION__, (void *) detach_from, (void *) devptr,
920 (void *) (n->tgt->tgt_start + n->tgt_offset),
921 (void *) target);
922
923 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
924 sizeof (void *), true, cbufp);
925 }
926 else
927 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
928 (void *) detach_from, (int) n->aux->attach_count[idx]);
929 }
930
931 attribute_hidden uintptr_t
932 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
933 {
934 if (tgt->list[i].key != NULL)
935 return tgt->list[i].key->tgt->tgt_start
936 + tgt->list[i].key->tgt_offset
937 + tgt->list[i].offset;
938
939 switch (tgt->list[i].offset)
940 {
941 case OFFSET_INLINED:
942 return (uintptr_t) hostaddrs[i];
943
944 case OFFSET_POINTER:
945 return 0;
946
947 case OFFSET_STRUCT:
948 return tgt->list[i + 1].key->tgt->tgt_start
949 + tgt->list[i + 1].key->tgt_offset
950 + tgt->list[i + 1].offset
951 + (uintptr_t) hostaddrs[i]
952 - (uintptr_t) hostaddrs[i + 1];
953
954 default:
955 return tgt->tgt_start + tgt->list[i].offset;
956 }
957 }
958
959 static inline __attribute__((always_inline)) struct target_mem_desc *
960 gomp_map_vars_internal (struct gomp_device_descr *devicep,
961 struct goacc_asyncqueue *aq, size_t mapnum,
962 void **hostaddrs, void **devaddrs, size_t *sizes,
963 void *kinds, bool short_mapkind,
964 htab_t *refcount_set,
965 enum gomp_map_vars_kind pragma_kind)
966 {
967 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
968 bool has_firstprivate = false;
969 bool has_always_ptrset = false;
970 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
971 const int rshift = short_mapkind ? 8 : 3;
972 const int typemask = short_mapkind ? 0xff : 0x7;
973 struct splay_tree_s *mem_map = &devicep->mem_map;
974 struct splay_tree_key_s cur_node;
975 struct target_mem_desc *tgt
976 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
977 tgt->list_count = mapnum;
978 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
979 tgt->device_descr = devicep;
980 tgt->prev = NULL;
981 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
982
983 if (mapnum == 0)
984 {
985 tgt->tgt_start = 0;
986 tgt->tgt_end = 0;
987 return tgt;
988 }
989
990 tgt_align = sizeof (void *);
991 tgt_size = 0;
992 cbuf.chunks = NULL;
993 cbuf.chunk_cnt = -1;
994 cbuf.use_cnt = 0;
995 cbuf.buf = NULL;
996 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
997 {
998 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
999 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
1000 cbuf.chunk_cnt = 0;
1001 }
1002 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1003 {
1004 size_t align = 4 * sizeof (void *);
1005 tgt_align = align;
1006 tgt_size = mapnum * sizeof (void *);
1007 cbuf.chunk_cnt = 1;
1008 cbuf.use_cnt = 1 + (mapnum > 1);
1009 cbuf.chunks[0].start = 0;
1010 cbuf.chunks[0].end = tgt_size;
1011 }
1012
1013 gomp_mutex_lock (&devicep->lock);
1014 if (devicep->state == GOMP_DEVICE_FINALIZED)
1015 {
1016 gomp_mutex_unlock (&devicep->lock);
1017 free (tgt);
1018 return NULL;
1019 }
1020
1021 for (i = 0; i < mapnum; i++)
1022 {
1023 int kind = get_kind (short_mapkind, kinds, i);
1024 bool implicit = get_implicit (short_mapkind, kinds, i);
1025 if (hostaddrs[i] == NULL
1026 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1027 {
1028 tgt->list[i].key = NULL;
1029 tgt->list[i].offset = OFFSET_INLINED;
1030 continue;
1031 }
1032 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1033 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1034 {
1035 tgt->list[i].key = NULL;
1036 if (!not_found_cnt)
1037 {
1038 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1039 on a separate construct prior to using use_device_{addr,ptr}.
1040 In OpenMP 5.0, map directives need to be ordered by the
1041 middle-end before the use_device_* clauses. If
1042 !not_found_cnt, all mappings requested (if any) are already
1043 mapped, so use_device_{addr,ptr} can be resolved right away.
1044 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1045 now but would succeed after performing the mappings in the
1046 following loop. We can't defer this always to the second
1047 loop, because it is not even invoked when !not_found_cnt
1048 after the first loop. */
1049 cur_node.host_start = (uintptr_t) hostaddrs[i];
1050 cur_node.host_end = cur_node.host_start;
1051 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1052 if (n != NULL)
1053 {
1054 cur_node.host_start -= n->host_start;
1055 hostaddrs[i]
1056 = (void *) (n->tgt->tgt_start + n->tgt_offset
1057 + cur_node.host_start);
1058 }
1059 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1060 {
1061 gomp_mutex_unlock (&devicep->lock);
1062 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1063 }
1064 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1065 /* If not present, continue using the host address. */
1066 ;
1067 else
1068 __builtin_unreachable ();
1069 tgt->list[i].offset = OFFSET_INLINED;
1070 }
1071 else
1072 tgt->list[i].offset = 0;
1073 continue;
1074 }
1075 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1076 {
1077 size_t first = i + 1;
1078 size_t last = i + sizes[i];
1079 cur_node.host_start = (uintptr_t) hostaddrs[i];
1080 cur_node.host_end = (uintptr_t) hostaddrs[last]
1081 + sizes[last];
1082 tgt->list[i].key = NULL;
1083 tgt->list[i].offset = OFFSET_STRUCT;
1084 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1085 if (n == NULL)
1086 {
1087 size_t align = (size_t) 1 << (kind >> rshift);
1088 if (tgt_align < align)
1089 tgt_align = align;
1090 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1091 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1092 tgt_size += cur_node.host_end - cur_node.host_start;
1093 not_found_cnt += last - i;
1094 for (i = first; i <= last; i++)
1095 {
1096 tgt->list[i].key = NULL;
1097 if (!aq
1098 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1099 & typemask))
1100 gomp_coalesce_buf_add (&cbuf,
1101 tgt_size - cur_node.host_end
1102 + (uintptr_t) hostaddrs[i],
1103 sizes[i]);
1104 }
1105 i--;
1106 continue;
1107 }
1108 for (i = first; i <= last; i++)
1109 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1110 sizes, kinds, NULL, refcount_set);
1111 i--;
1112 continue;
1113 }
1114 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1115 {
1116 tgt->list[i].key = NULL;
1117 tgt->list[i].offset = OFFSET_POINTER;
1118 has_firstprivate = true;
1119 continue;
1120 }
1121 else if ((kind & typemask) == GOMP_MAP_ATTACH
1122 || ((kind & typemask)
1123 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1124 {
1125 tgt->list[i].key = NULL;
1126 has_firstprivate = true;
1127 continue;
1128 }
1129 cur_node.host_start = (uintptr_t) hostaddrs[i];
1130 if (!GOMP_MAP_POINTER_P (kind & typemask))
1131 cur_node.host_end = cur_node.host_start + sizes[i];
1132 else
1133 cur_node.host_end = cur_node.host_start + sizeof (void *);
1134 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1135 {
1136 tgt->list[i].key = NULL;
1137
1138 size_t align = (size_t) 1 << (kind >> rshift);
1139 if (tgt_align < align)
1140 tgt_align = align;
1141 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1142 if (!aq)
1143 gomp_coalesce_buf_add (&cbuf, tgt_size,
1144 cur_node.host_end - cur_node.host_start);
1145 tgt_size += cur_node.host_end - cur_node.host_start;
1146 has_firstprivate = true;
1147 continue;
1148 }
1149 splay_tree_key n;
1150 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1151 {
1152 n = gomp_map_0len_lookup (mem_map, &cur_node);
1153 if (!n)
1154 {
1155 tgt->list[i].key = NULL;
1156 tgt->list[i].offset = OFFSET_INLINED;
1157 continue;
1158 }
1159 }
1160 else
1161 n = splay_tree_lookup (mem_map, &cur_node);
1162 if (n && n->refcount != REFCOUNT_LINK)
1163 {
1164 int always_to_cnt = 0;
1165 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1166 {
1167 bool has_nullptr = false;
1168 size_t j;
1169 for (j = 0; j < n->tgt->list_count; j++)
1170 if (n->tgt->list[j].key == n)
1171 {
1172 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1173 break;
1174 }
1175 if (n->tgt->list_count == 0)
1176 {
1177 /* 'declare target'; assume has_nullptr; it could also be
1178 statically assigned pointer, but that it should be to
1179 the equivalent variable on the host. */
1180 assert (n->refcount == REFCOUNT_INFINITY);
1181 has_nullptr = true;
1182 }
1183 else
1184 assert (j < n->tgt->list_count);
1185 /* Re-map the data if there is an 'always' modifier or if it a
1186 null pointer was there and non a nonnull has been found; that
1187 permits transparent re-mapping for Fortran array descriptors
1188 which were previously mapped unallocated. */
1189 for (j = i + 1; j < mapnum; j++)
1190 {
1191 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1192 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1193 && (!has_nullptr
1194 || !GOMP_MAP_POINTER_P (ptr_kind)
1195 || *(void **) hostaddrs[j] == NULL))
1196 break;
1197 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1198 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1199 > cur_node.host_end))
1200 break;
1201 else
1202 {
1203 has_always_ptrset = true;
1204 ++always_to_cnt;
1205 }
1206 }
1207 }
1208 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1209 kind & typemask, always_to_cnt > 0, implicit,
1210 NULL, refcount_set);
1211 i += always_to_cnt;
1212 }
1213 else
1214 {
1215 tgt->list[i].key = NULL;
1216
1217 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1218 {
1219 /* Not present, hence, skip entry - including its MAP_POINTER,
1220 when existing. */
1221 tgt->list[i].offset = OFFSET_INLINED;
1222 if (i + 1 < mapnum
1223 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1224 == GOMP_MAP_POINTER))
1225 {
1226 ++i;
1227 tgt->list[i].key = NULL;
1228 tgt->list[i].offset = 0;
1229 }
1230 continue;
1231 }
1232 size_t align = (size_t) 1 << (kind >> rshift);
1233 not_found_cnt++;
1234 if (tgt_align < align)
1235 tgt_align = align;
1236 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1237 if (!aq
1238 && gomp_to_device_kind_p (kind & typemask))
1239 gomp_coalesce_buf_add (&cbuf, tgt_size,
1240 cur_node.host_end - cur_node.host_start);
1241 tgt_size += cur_node.host_end - cur_node.host_start;
1242 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1243 {
1244 size_t j;
1245 int kind;
1246 for (j = i + 1; j < mapnum; j++)
1247 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1248 kinds, j)) & typemask))
1249 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1250 break;
1251 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1252 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1253 > cur_node.host_end))
1254 break;
1255 else
1256 {
1257 tgt->list[j].key = NULL;
1258 i++;
1259 }
1260 }
1261 }
1262 }
1263
1264 if (devaddrs)
1265 {
1266 if (mapnum != 1)
1267 {
1268 gomp_mutex_unlock (&devicep->lock);
1269 gomp_fatal ("unexpected aggregation");
1270 }
1271 tgt->to_free = devaddrs[0];
1272 tgt->tgt_start = (uintptr_t) tgt->to_free;
1273 tgt->tgt_end = tgt->tgt_start + sizes[0];
1274 }
1275 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1276 {
1277 /* Allocate tgt_align aligned tgt_size block of memory. */
1278 /* FIXME: Perhaps change interface to allocate properly aligned
1279 memory. */
1280 tgt->to_free = devicep->alloc_func (devicep->target_id,
1281 tgt_size + tgt_align - 1);
1282 if (!tgt->to_free)
1283 {
1284 gomp_mutex_unlock (&devicep->lock);
1285 gomp_fatal ("device memory allocation fail");
1286 }
1287
1288 tgt->tgt_start = (uintptr_t) tgt->to_free;
1289 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1290 tgt->tgt_end = tgt->tgt_start + tgt_size;
1291
1292 if (cbuf.use_cnt == 1)
1293 cbuf.chunk_cnt--;
1294 if (cbuf.chunk_cnt > 0)
1295 {
1296 cbuf.buf
1297 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1298 if (cbuf.buf)
1299 {
1300 cbuf.tgt = tgt;
1301 cbufp = &cbuf;
1302 }
1303 }
1304 }
1305 else
1306 {
1307 tgt->to_free = NULL;
1308 tgt->tgt_start = 0;
1309 tgt->tgt_end = 0;
1310 }
1311
1312 tgt_size = 0;
1313 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1314 tgt_size = mapnum * sizeof (void *);
1315
1316 tgt->array = NULL;
1317 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1318 {
1319 if (not_found_cnt)
1320 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1321 splay_tree_node array = tgt->array;
1322 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1323 uintptr_t field_tgt_base = 0;
1324 splay_tree_key field_tgt_structelem_first = NULL;
1325
1326 for (i = 0; i < mapnum; i++)
1327 if (has_always_ptrset
1328 && tgt->list[i].key
1329 && (get_kind (short_mapkind, kinds, i) & typemask)
1330 == GOMP_MAP_TO_PSET)
1331 {
1332 splay_tree_key k = tgt->list[i].key;
1333 bool has_nullptr = false;
1334 size_t j;
1335 for (j = 0; j < k->tgt->list_count; j++)
1336 if (k->tgt->list[j].key == k)
1337 {
1338 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1339 break;
1340 }
1341 if (k->tgt->list_count == 0)
1342 has_nullptr = true;
1343 else
1344 assert (j < k->tgt->list_count);
1345
1346 tgt->list[i].has_null_ptr_assoc = false;
1347 for (j = i + 1; j < mapnum; j++)
1348 {
1349 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1350 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1351 && (!has_nullptr
1352 || !GOMP_MAP_POINTER_P (ptr_kind)
1353 || *(void **) hostaddrs[j] == NULL))
1354 break;
1355 else if ((uintptr_t) hostaddrs[j] < k->host_start
1356 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1357 > k->host_end))
1358 break;
1359 else
1360 {
1361 if (*(void **) hostaddrs[j] == NULL)
1362 tgt->list[i].has_null_ptr_assoc = true;
1363 tgt->list[j].key = k;
1364 tgt->list[j].copy_from = false;
1365 tgt->list[j].always_copy_from = false;
1366 tgt->list[j].is_attach = false;
1367 gomp_increment_refcount (k, refcount_set);
1368 gomp_map_pointer (k->tgt, aq,
1369 (uintptr_t) *(void **) hostaddrs[j],
1370 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1371 - k->host_start),
1372 sizes[j], cbufp, false);
1373 }
1374 }
1375 i = j - 1;
1376 }
1377 else if (tgt->list[i].key == NULL)
1378 {
1379 int kind = get_kind (short_mapkind, kinds, i);
1380 bool implicit = get_implicit (short_mapkind, kinds, i);
1381 if (hostaddrs[i] == NULL)
1382 continue;
1383 switch (kind & typemask)
1384 {
1385 size_t align, len, first, last;
1386 splay_tree_key n;
1387 case GOMP_MAP_FIRSTPRIVATE:
1388 align = (size_t) 1 << (kind >> rshift);
1389 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1390 tgt->list[i].offset = tgt_size;
1391 len = sizes[i];
1392 gomp_copy_host2dev (devicep, aq,
1393 (void *) (tgt->tgt_start + tgt_size),
1394 (void *) hostaddrs[i], len, false, cbufp);
1395 /* Save device address in hostaddr to permit latter availablity
1396 when doing a deep-firstprivate with pointer attach. */
1397 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1398 tgt_size += len;
1399
1400 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1401 firstprivate to hostaddrs[i+1], which is assumed to contain a
1402 device address. */
1403 if (i + 1 < mapnum
1404 && (GOMP_MAP_ATTACH
1405 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1406 {
1407 uintptr_t target = (uintptr_t) hostaddrs[i];
1408 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1409 /* Per
1410 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1411 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1412 this probably needs revision for 'aq' usage. */
1413 assert (!aq);
1414 gomp_copy_host2dev (devicep, aq, devptr, &target,
1415 sizeof (void *), false, cbufp);
1416 ++i;
1417 }
1418 continue;
1419 case GOMP_MAP_FIRSTPRIVATE_INT:
1420 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1421 continue;
1422 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1423 /* The OpenACC 'host_data' construct only allows 'use_device'
1424 "mapping" clauses, so in the first loop, 'not_found_cnt'
1425 must always have been zero, so all OpenACC 'use_device'
1426 clauses have already been handled. (We can only easily test
1427 'use_device' with 'if_present' clause here.) */
1428 assert (tgt->list[i].offset == OFFSET_INLINED);
1429 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1430 code conceptually simple, similar to the first loop. */
1431 case GOMP_MAP_USE_DEVICE_PTR:
1432 if (tgt->list[i].offset == 0)
1433 {
1434 cur_node.host_start = (uintptr_t) hostaddrs[i];
1435 cur_node.host_end = cur_node.host_start;
1436 n = gomp_map_lookup (mem_map, &cur_node);
1437 if (n != NULL)
1438 {
1439 cur_node.host_start -= n->host_start;
1440 hostaddrs[i]
1441 = (void *) (n->tgt->tgt_start + n->tgt_offset
1442 + cur_node.host_start);
1443 }
1444 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1445 {
1446 gomp_mutex_unlock (&devicep->lock);
1447 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1448 }
1449 else if ((kind & typemask)
1450 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1451 /* If not present, continue using the host address. */
1452 ;
1453 else
1454 __builtin_unreachable ();
1455 tgt->list[i].offset = OFFSET_INLINED;
1456 }
1457 continue;
1458 case GOMP_MAP_STRUCT:
1459 first = i + 1;
1460 last = i + sizes[i];
1461 cur_node.host_start = (uintptr_t) hostaddrs[i];
1462 cur_node.host_end = (uintptr_t) hostaddrs[last]
1463 + sizes[last];
1464 if (tgt->list[first].key != NULL)
1465 continue;
1466 n = splay_tree_lookup (mem_map, &cur_node);
1467 if (n == NULL)
1468 {
1469 size_t align = (size_t) 1 << (kind >> rshift);
1470 tgt_size -= (uintptr_t) hostaddrs[first]
1471 - (uintptr_t) hostaddrs[i];
1472 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1473 tgt_size += (uintptr_t) hostaddrs[first]
1474 - (uintptr_t) hostaddrs[i];
1475 field_tgt_base = (uintptr_t) hostaddrs[first];
1476 field_tgt_offset = tgt_size;
1477 field_tgt_clear = last;
1478 field_tgt_structelem_first = NULL;
1479 tgt_size += cur_node.host_end
1480 - (uintptr_t) hostaddrs[first];
1481 continue;
1482 }
1483 for (i = first; i <= last; i++)
1484 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1485 sizes, kinds, cbufp, refcount_set);
1486 i--;
1487 continue;
1488 case GOMP_MAP_ALWAYS_POINTER:
1489 cur_node.host_start = (uintptr_t) hostaddrs[i];
1490 cur_node.host_end = cur_node.host_start + sizeof (void *);
1491 n = splay_tree_lookup (mem_map, &cur_node);
1492 if (n == NULL
1493 || n->host_start > cur_node.host_start
1494 || n->host_end < cur_node.host_end)
1495 {
1496 gomp_mutex_unlock (&devicep->lock);
1497 gomp_fatal ("always pointer not mapped");
1498 }
1499 if (i > 0
1500 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1501 != GOMP_MAP_ALWAYS_POINTER))
1502 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1503 if (cur_node.tgt_offset)
1504 cur_node.tgt_offset -= sizes[i];
1505 gomp_copy_host2dev (devicep, aq,
1506 (void *) (n->tgt->tgt_start
1507 + n->tgt_offset
1508 + cur_node.host_start
1509 - n->host_start),
1510 (void *) &cur_node.tgt_offset,
1511 sizeof (void *), true, cbufp);
1512 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1513 + cur_node.host_start - n->host_start;
1514 continue;
1515 case GOMP_MAP_IF_PRESENT:
1516 /* Not present - otherwise handled above. Skip over its
1517 MAP_POINTER as well. */
1518 if (i + 1 < mapnum
1519 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1520 == GOMP_MAP_POINTER))
1521 ++i;
1522 continue;
1523 case GOMP_MAP_ATTACH:
1524 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1525 {
1526 cur_node.host_start = (uintptr_t) hostaddrs[i];
1527 cur_node.host_end = cur_node.host_start + sizeof (void *);
1528 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1529 if (n != NULL)
1530 {
1531 tgt->list[i].key = n;
1532 tgt->list[i].offset = cur_node.host_start - n->host_start;
1533 tgt->list[i].length = n->host_end - n->host_start;
1534 tgt->list[i].copy_from = false;
1535 tgt->list[i].always_copy_from = false;
1536 tgt->list[i].is_attach = true;
1537 /* OpenACC 'attach'/'detach' doesn't affect
1538 structured/dynamic reference counts ('n->refcount',
1539 'n->dynamic_refcount'). */
1540
1541 bool zlas
1542 = ((kind & typemask)
1543 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1544 gomp_attach_pointer (devicep, aq, mem_map, n,
1545 (uintptr_t) hostaddrs[i], sizes[i],
1546 cbufp, zlas);
1547 }
1548 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1549 {
1550 gomp_mutex_unlock (&devicep->lock);
1551 gomp_fatal ("outer struct not mapped for attach");
1552 }
1553 continue;
1554 }
1555 default:
1556 break;
1557 }
1558 splay_tree_key k = &array->key;
1559 k->host_start = (uintptr_t) hostaddrs[i];
1560 if (!GOMP_MAP_POINTER_P (kind & typemask))
1561 k->host_end = k->host_start + sizes[i];
1562 else
1563 k->host_end = k->host_start + sizeof (void *);
1564 splay_tree_key n = splay_tree_lookup (mem_map, k);
1565 if (n && n->refcount != REFCOUNT_LINK)
1566 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1567 kind & typemask, false, implicit, cbufp,
1568 refcount_set);
1569 else
1570 {
1571 k->aux = NULL;
1572 if (n && n->refcount == REFCOUNT_LINK)
1573 {
1574 /* Replace target address of the pointer with target address
1575 of mapped object in the splay tree. */
1576 splay_tree_remove (mem_map, n);
1577 k->aux
1578 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1579 k->aux->link_key = n;
1580 }
1581 size_t align = (size_t) 1 << (kind >> rshift);
1582 tgt->list[i].key = k;
1583 k->tgt = tgt;
1584 k->refcount = 0;
1585 k->dynamic_refcount = 0;
1586 if (field_tgt_clear != FIELD_TGT_EMPTY)
1587 {
1588 k->tgt_offset = k->host_start - field_tgt_base
1589 + field_tgt_offset;
1590 if (openmp_p)
1591 {
1592 k->refcount = REFCOUNT_STRUCTELEM;
1593 if (field_tgt_structelem_first == NULL)
1594 {
1595 /* Set to first structure element of sequence. */
1596 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1597 field_tgt_structelem_first = k;
1598 }
1599 else
1600 /* Point to refcount of leading element, but do not
1601 increment again. */
1602 k->structelem_refcount_ptr
1603 = &field_tgt_structelem_first->structelem_refcount;
1604
1605 if (i == field_tgt_clear)
1606 {
1607 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1608 field_tgt_structelem_first = NULL;
1609 }
1610 }
1611 if (i == field_tgt_clear)
1612 field_tgt_clear = FIELD_TGT_EMPTY;
1613 }
1614 else
1615 {
1616 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1617 k->tgt_offset = tgt_size;
1618 tgt_size += k->host_end - k->host_start;
1619 }
1620 /* First increment, from 0 to 1. gomp_increment_refcount
1621 encapsulates the different increment cases, so use this
1622 instead of directly setting 1 during initialization. */
1623 gomp_increment_refcount (k, refcount_set);
1624
1625 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1626 tgt->list[i].always_copy_from
1627 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1628 tgt->list[i].is_attach = false;
1629 tgt->list[i].offset = 0;
1630 tgt->list[i].length = k->host_end - k->host_start;
1631 tgt->refcount++;
1632 array->left = NULL;
1633 array->right = NULL;
1634 splay_tree_insert (mem_map, array);
1635 switch (kind & typemask)
1636 {
1637 case GOMP_MAP_ALLOC:
1638 case GOMP_MAP_FROM:
1639 case GOMP_MAP_FORCE_ALLOC:
1640 case GOMP_MAP_FORCE_FROM:
1641 case GOMP_MAP_ALWAYS_FROM:
1642 break;
1643 case GOMP_MAP_TO:
1644 case GOMP_MAP_TOFROM:
1645 case GOMP_MAP_FORCE_TO:
1646 case GOMP_MAP_FORCE_TOFROM:
1647 case GOMP_MAP_ALWAYS_TO:
1648 case GOMP_MAP_ALWAYS_TOFROM:
1649 gomp_copy_host2dev (devicep, aq,
1650 (void *) (tgt->tgt_start
1651 + k->tgt_offset),
1652 (void *) k->host_start,
1653 k->host_end - k->host_start,
1654 false, cbufp);
1655 break;
1656 case GOMP_MAP_POINTER:
1657 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1658 gomp_map_pointer
1659 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1660 k->tgt_offset, sizes[i], cbufp,
1661 ((kind & typemask)
1662 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1663 break;
1664 case GOMP_MAP_TO_PSET:
1665 gomp_copy_host2dev (devicep, aq,
1666 (void *) (tgt->tgt_start
1667 + k->tgt_offset),
1668 (void *) k->host_start,
1669 k->host_end - k->host_start,
1670 false, cbufp);
1671 tgt->list[i].has_null_ptr_assoc = false;
1672
1673 for (j = i + 1; j < mapnum; j++)
1674 {
1675 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1676 & typemask);
1677 if (!GOMP_MAP_POINTER_P (ptr_kind)
1678 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1679 break;
1680 else if ((uintptr_t) hostaddrs[j] < k->host_start
1681 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1682 > k->host_end))
1683 break;
1684 else
1685 {
1686 tgt->list[j].key = k;
1687 tgt->list[j].copy_from = false;
1688 tgt->list[j].always_copy_from = false;
1689 tgt->list[j].is_attach = false;
1690 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1691 /* For OpenMP, the use of refcount_sets causes
1692 errors if we set k->refcount = 1 above but also
1693 increment it again here, for decrementing will
1694 not properly match, since we decrement only once
1695 for each key's refcount. Therefore avoid this
1696 increment for OpenMP constructs. */
1697 if (!openmp_p)
1698 gomp_increment_refcount (k, refcount_set);
1699 gomp_map_pointer (tgt, aq,
1700 (uintptr_t) *(void **) hostaddrs[j],
1701 k->tgt_offset
1702 + ((uintptr_t) hostaddrs[j]
1703 - k->host_start),
1704 sizes[j], cbufp, false);
1705 }
1706 }
1707 i = j - 1;
1708 break;
1709 case GOMP_MAP_FORCE_PRESENT:
1710 case GOMP_MAP_ALWAYS_PRESENT_TO:
1711 case GOMP_MAP_ALWAYS_PRESENT_FROM:
1712 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
1713 {
1714 /* We already looked up the memory region above and it
1715 was missing. */
1716 size_t size = k->host_end - k->host_start;
1717 gomp_mutex_unlock (&devicep->lock);
1718 #ifdef HAVE_INTTYPES_H
1719 gomp_fatal ("present clause: not present on the device "
1720 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
1721 "dev: %d)", (void *) k->host_start,
1722 (uint64_t) size, (uint64_t) size,
1723 devicep->target_id);
1724 #else
1725 gomp_fatal ("present clause: not present on the device "
1726 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1727 (void *) k->host_start,
1728 (unsigned long) size, (unsigned long) size,
1729 devicep->target_id);
1730 #endif
1731 }
1732 break;
1733 case GOMP_MAP_FORCE_DEVICEPTR:
1734 assert (k->host_end - k->host_start == sizeof (void *));
1735 gomp_copy_host2dev (devicep, aq,
1736 (void *) (tgt->tgt_start
1737 + k->tgt_offset),
1738 (void *) k->host_start,
1739 sizeof (void *), false, cbufp);
1740 break;
1741 default:
1742 gomp_mutex_unlock (&devicep->lock);
1743 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1744 kind);
1745 }
1746
1747 if (k->aux && k->aux->link_key)
1748 {
1749 /* Set link pointer on target to the device address of the
1750 mapped object. */
1751 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1752 /* We intentionally do not use coalescing here, as it's not
1753 data allocated by the current call to this function. */
1754 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1755 &tgt_addr, sizeof (void *), true, NULL);
1756 }
1757 array++;
1758 }
1759 }
1760 }
1761
1762 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1763 {
1764 for (i = 0; i < mapnum; i++)
1765 {
1766 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1767 gomp_copy_host2dev (devicep, aq,
1768 (void *) (tgt->tgt_start + i * sizeof (void *)),
1769 (void *) &cur_node.tgt_offset, sizeof (void *),
1770 true, cbufp);
1771 }
1772 }
1773
1774 if (cbufp)
1775 {
1776 long c = 0;
1777 for (c = 0; c < cbuf.chunk_cnt; ++c)
1778 gomp_copy_host2dev (devicep, aq,
1779 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1780 (char *) cbuf.buf + (cbuf.chunks[c].start
1781 - cbuf.chunks[0].start),
1782 cbuf.chunks[c].end - cbuf.chunks[c].start,
1783 false, NULL);
1784 if (aq)
1785 /* Free once the transfer has completed. */
1786 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1787 else
1788 free (cbuf.buf);
1789 cbuf.buf = NULL;
1790 cbufp = NULL;
1791 }
1792
1793 /* If the variable from "omp target enter data" map-list was already mapped,
1794 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1795 gomp_exit_data. */
1796 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1797 {
1798 free (tgt);
1799 tgt = NULL;
1800 }
1801
1802 gomp_mutex_unlock (&devicep->lock);
1803 return tgt;
1804 }
1805
1806 static struct target_mem_desc *
1807 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1808 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1809 bool short_mapkind, htab_t *refcount_set,
1810 enum gomp_map_vars_kind pragma_kind)
1811 {
1812 /* This management of a local refcount_set is for convenience of callers
1813 who do not share a refcount_set over multiple map/unmap uses. */
1814 htab_t local_refcount_set = NULL;
1815 if (refcount_set == NULL)
1816 {
1817 local_refcount_set = htab_create (mapnum);
1818 refcount_set = &local_refcount_set;
1819 }
1820
1821 struct target_mem_desc *tgt;
1822 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1823 sizes, kinds, short_mapkind, refcount_set,
1824 pragma_kind);
1825 if (local_refcount_set)
1826 htab_free (local_refcount_set);
1827
1828 return tgt;
1829 }
1830
1831 attribute_hidden struct target_mem_desc *
1832 goacc_map_vars (struct gomp_device_descr *devicep,
1833 struct goacc_asyncqueue *aq, size_t mapnum,
1834 void **hostaddrs, void **devaddrs, size_t *sizes,
1835 void *kinds, bool short_mapkind,
1836 enum gomp_map_vars_kind pragma_kind)
1837 {
1838 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1839 sizes, kinds, short_mapkind, NULL,
1840 GOMP_MAP_VARS_OPENACC | pragma_kind);
1841 }
1842
1843 static void
1844 gomp_unmap_tgt (struct target_mem_desc *tgt)
1845 {
1846 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1847 if (tgt->tgt_end)
1848 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1849
1850 free (tgt->array);
1851 free (tgt);
1852 }
1853
1854 static bool
1855 gomp_unref_tgt (void *ptr)
1856 {
1857 bool is_tgt_unmapped = false;
1858
1859 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1860
1861 if (tgt->refcount > 1)
1862 tgt->refcount--;
1863 else
1864 {
1865 gomp_unmap_tgt (tgt);
1866 is_tgt_unmapped = true;
1867 }
1868
1869 return is_tgt_unmapped;
1870 }
1871
1872 static void
1873 gomp_unref_tgt_void (void *ptr)
1874 {
1875 (void) gomp_unref_tgt (ptr);
1876 }
1877
1878 static void
1879 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1880 {
1881 splay_tree_remove (sp, k);
1882 if (k->aux)
1883 {
1884 if (k->aux->link_key)
1885 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1886 if (k->aux->attach_count)
1887 free (k->aux->attach_count);
1888 free (k->aux);
1889 k->aux = NULL;
1890 }
1891 }
1892
1893 static inline __attribute__((always_inline)) bool
1894 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1895 struct goacc_asyncqueue *aq)
1896 {
1897 bool is_tgt_unmapped = false;
1898
1899 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1900 {
1901 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1902 /* Infer the splay_tree_key of the first structelem key using the
1903 pointer to the first structleme_refcount. */
1904 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1905 - offsetof (struct splay_tree_key_s,
1906 structelem_refcount));
1907 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1908
1909 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1910 with the splay_tree_keys embedded inside. */
1911 splay_tree_node node =
1912 (splay_tree_node) ((char *) k
1913 - offsetof (struct splay_tree_node_s, key));
1914 while (true)
1915 {
1916 /* Starting from the _FIRST key, and continue for all following
1917 sibling keys. */
1918 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1919 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1920 break;
1921 else
1922 k = &(++node)->key;
1923 }
1924 }
1925 else
1926 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1927
1928 if (aq)
1929 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1930 (void *) k->tgt);
1931 else
1932 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1933 return is_tgt_unmapped;
1934 }
1935
1936 attribute_hidden bool
1937 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1938 {
1939 return gomp_remove_var_internal (devicep, k, NULL);
1940 }
1941
1942 /* Remove a variable asynchronously. This actually removes the variable
1943 mapping immediately, but retains the linked target_mem_desc until the
1944 asynchronous operation has completed (as it may still refer to target
1945 memory). The device lock must be held before entry, and remains locked on
1946 exit. */
1947
1948 attribute_hidden void
1949 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1950 struct goacc_asyncqueue *aq)
1951 {
1952 (void) gomp_remove_var_internal (devicep, k, aq);
1953 }
1954
1955 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1956 variables back from device to host: if it is false, it is assumed that this
1957 has been done already. */
1958
1959 static inline __attribute__((always_inline)) void
1960 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1961 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1962 {
1963 struct gomp_device_descr *devicep = tgt->device_descr;
1964
1965 if (tgt->list_count == 0)
1966 {
1967 free (tgt);
1968 return;
1969 }
1970
1971 gomp_mutex_lock (&devicep->lock);
1972 if (devicep->state == GOMP_DEVICE_FINALIZED)
1973 {
1974 gomp_mutex_unlock (&devicep->lock);
1975 free (tgt->array);
1976 free (tgt);
1977 return;
1978 }
1979
1980 size_t i;
1981
1982 /* We must perform detachments before any copies back to the host. */
1983 for (i = 0; i < tgt->list_count; i++)
1984 {
1985 splay_tree_key k = tgt->list[i].key;
1986
1987 if (k != NULL && tgt->list[i].is_attach)
1988 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1989 + tgt->list[i].offset,
1990 false, NULL);
1991 }
1992
1993 for (i = 0; i < tgt->list_count; i++)
1994 {
1995 splay_tree_key k = tgt->list[i].key;
1996 if (k == NULL)
1997 continue;
1998
1999 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2000 counts ('n->refcount', 'n->dynamic_refcount'). */
2001 if (tgt->list[i].is_attach)
2002 continue;
2003
2004 bool do_copy, do_remove;
2005 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2006
2007 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2008 || tgt->list[i].always_copy_from)
2009 gomp_copy_dev2host (devicep, aq,
2010 (void *) (k->host_start + tgt->list[i].offset),
2011 (void *) (k->tgt->tgt_start + k->tgt_offset
2012 + tgt->list[i].offset),
2013 tgt->list[i].length);
2014 if (do_remove)
2015 {
2016 struct target_mem_desc *k_tgt = k->tgt;
2017 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2018 /* It would be bad if TGT got unmapped while we're still iterating
2019 over its LIST_COUNT, and also expect to use it in the following
2020 code. */
2021 assert (!is_tgt_unmapped
2022 || k_tgt != tgt);
2023 }
2024 }
2025
2026 if (aq)
2027 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2028 (void *) tgt);
2029 else
2030 gomp_unref_tgt ((void *) tgt);
2031
2032 gomp_mutex_unlock (&devicep->lock);
2033 }
2034
2035 static void
2036 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2037 htab_t *refcount_set)
2038 {
2039 /* This management of a local refcount_set is for convenience of callers
2040 who do not share a refcount_set over multiple map/unmap uses. */
2041 htab_t local_refcount_set = NULL;
2042 if (refcount_set == NULL)
2043 {
2044 local_refcount_set = htab_create (tgt->list_count);
2045 refcount_set = &local_refcount_set;
2046 }
2047
2048 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2049
2050 if (local_refcount_set)
2051 htab_free (local_refcount_set);
2052 }
2053
2054 attribute_hidden void
2055 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2056 struct goacc_asyncqueue *aq)
2057 {
2058 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2059 }
2060
2061 static void
2062 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2063 size_t *sizes, void *kinds, bool short_mapkind)
2064 {
2065 size_t i;
2066 struct splay_tree_key_s cur_node;
2067 const int typemask = short_mapkind ? 0xff : 0x7;
2068
2069 if (!devicep)
2070 return;
2071
2072 if (mapnum == 0)
2073 return;
2074
2075 gomp_mutex_lock (&devicep->lock);
2076 if (devicep->state == GOMP_DEVICE_FINALIZED)
2077 {
2078 gomp_mutex_unlock (&devicep->lock);
2079 return;
2080 }
2081
2082 for (i = 0; i < mapnum; i++)
2083 if (sizes[i])
2084 {
2085 cur_node.host_start = (uintptr_t) hostaddrs[i];
2086 cur_node.host_end = cur_node.host_start + sizes[i];
2087 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2088 if (n)
2089 {
2090 int kind = get_kind (short_mapkind, kinds, i);
2091 if (n->host_start > cur_node.host_start
2092 || n->host_end < cur_node.host_end)
2093 {
2094 gomp_mutex_unlock (&devicep->lock);
2095 gomp_fatal ("Trying to update [%p..%p) object when "
2096 "only [%p..%p) is mapped",
2097 (void *) cur_node.host_start,
2098 (void *) cur_node.host_end,
2099 (void *) n->host_start,
2100 (void *) n->host_end);
2101 }
2102
2103 if (n->aux && n->aux->attach_count)
2104 {
2105 uintptr_t addr = cur_node.host_start;
2106 while (addr < cur_node.host_end)
2107 {
2108 /* We have to be careful not to overwrite still attached
2109 pointers during host<->device updates. */
2110 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2111 if (n->aux->attach_count[i] == 0)
2112 {
2113 void *devaddr = (void *) (n->tgt->tgt_start
2114 + n->tgt_offset
2115 + addr - n->host_start);
2116 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2117 gomp_copy_host2dev (devicep, NULL,
2118 devaddr, (void *) addr,
2119 sizeof (void *), false, NULL);
2120 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2121 gomp_copy_dev2host (devicep, NULL,
2122 (void *) addr, devaddr,
2123 sizeof (void *));
2124 }
2125 addr += sizeof (void *);
2126 }
2127 }
2128 else
2129 {
2130 void *hostaddr = (void *) cur_node.host_start;
2131 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2132 + cur_node.host_start
2133 - n->host_start);
2134 size_t size = cur_node.host_end - cur_node.host_start;
2135
2136 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2137 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2138 false, NULL);
2139 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2140 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2141 }
2142 }
2143 else
2144 {
2145 int kind = get_kind (short_mapkind, kinds, i);
2146
2147 if (GOMP_MAP_PRESENT_P (kind))
2148 {
2149 /* We already looked up the memory region above and it
2150 was missing. */
2151 gomp_mutex_unlock (&devicep->lock);
2152 #ifdef HAVE_INTTYPES_H
2153 gomp_fatal ("present clause: not present on the device "
2154 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
2155 "dev: %d)", (void *) hostaddrs[i],
2156 (uint64_t) sizes[i], (uint64_t) sizes[i],
2157 devicep->target_id);
2158 #else
2159 gomp_fatal ("present clause: not present on the device "
2160 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2161 (void *) hostaddrs[i], (unsigned long) sizes[i],
2162 (unsigned long) sizes[i], devicep->target_id);
2163 #endif
2164 }
2165 }
2166 }
2167 gomp_mutex_unlock (&devicep->lock);
2168 }
2169
2170 static struct gomp_offload_icv_list *
2171 gomp_get_offload_icv_item (int dev_num)
2172 {
2173 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2174 while (l != NULL && l->device_num != dev_num)
2175 l = l->next;
2176
2177 return l;
2178 }
2179
2180 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2181 depending on the device num and the variable hierarchy
2182 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2183 device and thus no item with that device number is contained in
2184 gomp_offload_icv_list, then a new item is created and added to the list. */
2185
2186 static struct gomp_offload_icvs *
2187 get_gomp_offload_icvs (int dev_num)
2188 {
2189 struct gomp_icv_list *dev
2190 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2191 struct gomp_icv_list *all
2192 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2193 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2194 struct gomp_offload_icv_list *offload_icvs
2195 = gomp_get_offload_icv_item (dev_num);
2196
2197 if (offload_icvs != NULL)
2198 return &offload_icvs->icvs;
2199
2200 struct gomp_offload_icv_list *new
2201 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2202
2203 new->device_num = dev_num;
2204 new->icvs.device_num = dev_num;
2205 new->next = gomp_offload_icv_list;
2206
2207 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2208 new->icvs.nteams = dev_x->icvs.nteams_var;
2209 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2210 new->icvs.nteams = dev->icvs.nteams_var;
2211 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2212 new->icvs.nteams = all->icvs.nteams_var;
2213 else
2214 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2215
2216 if (dev_x != NULL
2217 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2218 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2219 else if (dev != NULL
2220 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2221 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2222 else if (all != NULL
2223 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2224 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2225 else
2226 new->icvs.teams_thread_limit
2227 = gomp_default_icv_values.teams_thread_limit_var;
2228
2229 if (dev_x != NULL
2230 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2231 new->icvs.default_device = dev_x->icvs.default_device_var;
2232 else if (dev != NULL
2233 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2234 new->icvs.default_device = dev->icvs.default_device_var;
2235 else if (all != NULL
2236 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2237 new->icvs.default_device = all->icvs.default_device_var;
2238 else
2239 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2240
2241 gomp_offload_icv_list = new;
2242 return &new->icvs;
2243 }
2244
2245 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2246 And insert to splay tree the mapping between addresses from HOST_TABLE and
2247 from loaded target image. We rely in the host and device compiler
2248 emitting variable and functions in the same order. */
2249
2250 static void
2251 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2252 const void *host_table, const void *target_data,
2253 bool is_register_lock)
2254 {
2255 void **host_func_table = ((void ***) host_table)[0];
2256 void **host_funcs_end = ((void ***) host_table)[1];
2257 void **host_var_table = ((void ***) host_table)[2];
2258 void **host_vars_end = ((void ***) host_table)[3];
2259 void **host_ind_func_table = NULL;
2260 void **host_ind_funcs_end = NULL;
2261
2262 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
2263 {
2264 host_ind_func_table = ((void ***) host_table)[4];
2265 host_ind_funcs_end = ((void ***) host_table)[5];
2266 }
2267
2268 /* The func and ind_func tables contain only addresses, the var table
2269 contains addresses and corresponding sizes. */
2270 int num_funcs = host_funcs_end - host_func_table;
2271 int num_vars = (host_vars_end - host_var_table) / 2;
2272 int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
2273
2274 /* Load image to device and get target addresses for the image. */
2275 struct addr_pair *target_table = NULL;
2276 uint64_t *rev_target_fn_table = NULL;
2277 int i, num_target_entries;
2278
2279 /* With reverse offload, insert also target-host addresses. */
2280 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2281
2282 num_target_entries
2283 = devicep->load_image_func (devicep->target_id, version,
2284 target_data, &target_table,
2285 rev_lookup ? &rev_target_fn_table : NULL,
2286 num_ind_funcs
2287 ? (uint64_t *) host_ind_func_table : NULL);
2288
2289 if (num_target_entries != num_funcs + num_vars
2290 /* "+1" due to the additional ICV struct. */
2291 && num_target_entries != num_funcs + num_vars + 1)
2292 {
2293 gomp_mutex_unlock (&devicep->lock);
2294 if (is_register_lock)
2295 gomp_mutex_unlock (&register_lock);
2296 gomp_fatal ("Cannot map target functions or variables"
2297 " (expected %u, have %u)", num_funcs + num_vars,
2298 num_target_entries);
2299 }
2300
2301 /* Insert host-target address mapping into splay tree. */
2302 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2303 /* "+1" due to the additional ICV struct. */
2304 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2305 * sizeof (*tgt->array));
2306 if (rev_target_fn_table)
2307 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2308 else
2309 tgt->rev_array = NULL;
2310 tgt->refcount = REFCOUNT_INFINITY;
2311 tgt->tgt_start = 0;
2312 tgt->tgt_end = 0;
2313 tgt->to_free = NULL;
2314 tgt->prev = NULL;
2315 tgt->list_count = 0;
2316 tgt->device_descr = devicep;
2317 splay_tree_node array = tgt->array;
2318 reverse_splay_tree_node rev_array = tgt->rev_array;
2319
2320 for (i = 0; i < num_funcs; i++)
2321 {
2322 splay_tree_key k = &array->key;
2323 k->host_start = (uintptr_t) host_func_table[i];
2324 k->host_end = k->host_start + 1;
2325 k->tgt = tgt;
2326 k->tgt_offset = target_table[i].start;
2327 k->refcount = REFCOUNT_INFINITY;
2328 k->dynamic_refcount = 0;
2329 k->aux = NULL;
2330 array->left = NULL;
2331 array->right = NULL;
2332 splay_tree_insert (&devicep->mem_map, array);
2333 if (rev_target_fn_table)
2334 {
2335 reverse_splay_tree_key k2 = &rev_array->key;
2336 k2->dev = rev_target_fn_table[i];
2337 k2->k = k;
2338 rev_array->left = NULL;
2339 rev_array->right = NULL;
2340 if (k2->dev != 0)
2341 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2342 rev_array++;
2343 }
2344 array++;
2345 }
2346
2347 /* Most significant bit of the size in host and target tables marks
2348 "omp declare target link" variables. */
2349 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2350 const uintptr_t size_mask = ~link_bit;
2351
2352 for (i = 0; i < num_vars; i++)
2353 {
2354 struct addr_pair *target_var = &target_table[num_funcs + i];
2355 uintptr_t target_size = target_var->end - target_var->start;
2356 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2357
2358 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2359 {
2360 gomp_mutex_unlock (&devicep->lock);
2361 if (is_register_lock)
2362 gomp_mutex_unlock (&register_lock);
2363 gomp_fatal ("Cannot map target variables (size mismatch)");
2364 }
2365
2366 splay_tree_key k = &array->key;
2367 k->host_start = (uintptr_t) host_var_table[i * 2];
2368 k->host_end
2369 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2370 k->tgt = tgt;
2371 k->tgt_offset = target_var->start;
2372 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2373 k->dynamic_refcount = 0;
2374 k->aux = NULL;
2375 array->left = NULL;
2376 array->right = NULL;
2377 splay_tree_insert (&devicep->mem_map, array);
2378 array++;
2379 }
2380
2381 /* Last entry is for a ICVs variable.
2382 Tolerate case where plugin does not return those entries. */
2383 if (num_funcs + num_vars < num_target_entries)
2384 {
2385 struct addr_pair *var = &target_table[num_funcs + num_vars];
2386
2387 /* Start address will be non-zero for the ICVs variable if
2388 the variable was found in this image. */
2389 if (var->start != 0)
2390 {
2391 /* The index of the devicep within devices[] is regarded as its
2392 'device number', which is different from the per-device type
2393 devicep->target_id. */
2394 int dev_num = (int) (devicep - &devices[0]);
2395 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2396 size_t var_size = var->end - var->start;
2397 if (var_size != sizeof (struct gomp_offload_icvs))
2398 {
2399 gomp_mutex_unlock (&devicep->lock);
2400 if (is_register_lock)
2401 gomp_mutex_unlock (&register_lock);
2402 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2403 "format");
2404 }
2405 /* Copy the ICVs variable to place on device memory, hereby
2406 actually designating its device number into effect. */
2407 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2408 var_size, false, NULL);
2409 splay_tree_key k = &array->key;
2410 k->host_start = (uintptr_t) icvs;
2411 k->host_end =
2412 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2413 k->tgt = tgt;
2414 k->tgt_offset = var->start;
2415 k->refcount = REFCOUNT_INFINITY;
2416 k->dynamic_refcount = 0;
2417 k->aux = NULL;
2418 array->left = NULL;
2419 array->right = NULL;
2420 splay_tree_insert (&devicep->mem_map, array);
2421 array++;
2422 }
2423 }
2424
2425 free (target_table);
2426 }
2427
2428 /* Unload the mappings described by target_data from device DEVICE_P.
2429 The device must be locked. */
2430
2431 static void
2432 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2433 unsigned version,
2434 const void *host_table, const void *target_data)
2435 {
2436 void **host_func_table = ((void ***) host_table)[0];
2437 void **host_funcs_end = ((void ***) host_table)[1];
2438 void **host_var_table = ((void ***) host_table)[2];
2439 void **host_vars_end = ((void ***) host_table)[3];
2440
2441 /* The func table contains only addresses, the var table contains addresses
2442 and corresponding sizes. */
2443 int num_funcs = host_funcs_end - host_func_table;
2444 int num_vars = (host_vars_end - host_var_table) / 2;
2445
2446 struct splay_tree_key_s k;
2447 splay_tree_key node = NULL;
2448
2449 /* Find mapping at start of node array */
2450 if (num_funcs || num_vars)
2451 {
2452 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2453 : (uintptr_t) host_var_table[0]);
2454 k.host_end = k.host_start + 1;
2455 node = splay_tree_lookup (&devicep->mem_map, &k);
2456 }
2457
2458 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2459 {
2460 gomp_mutex_unlock (&devicep->lock);
2461 gomp_fatal ("image unload fail");
2462 }
2463 if (devicep->mem_map_rev.root)
2464 {
2465 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2466 real allocation. */
2467 assert (node && node->tgt && node->tgt->rev_array);
2468 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2469 free (node->tgt->rev_array);
2470 devicep->mem_map_rev.root = NULL;
2471 }
2472
2473 /* Remove mappings from splay tree. */
2474 int i;
2475 for (i = 0; i < num_funcs; i++)
2476 {
2477 k.host_start = (uintptr_t) host_func_table[i];
2478 k.host_end = k.host_start + 1;
2479 splay_tree_remove (&devicep->mem_map, &k);
2480 }
2481
2482 /* Most significant bit of the size in host and target tables marks
2483 "omp declare target link" variables. */
2484 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2485 const uintptr_t size_mask = ~link_bit;
2486 bool is_tgt_unmapped = false;
2487
2488 for (i = 0; i < num_vars; i++)
2489 {
2490 k.host_start = (uintptr_t) host_var_table[i * 2];
2491 k.host_end
2492 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2493
2494 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2495 splay_tree_remove (&devicep->mem_map, &k);
2496 else
2497 {
2498 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2499 is_tgt_unmapped = gomp_remove_var (devicep, n);
2500 }
2501 }
2502
2503 if (node && !is_tgt_unmapped)
2504 {
2505 free (node->tgt);
2506 free (node);
2507 }
2508 }
2509
2510 static void
2511 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2512 {
2513 char *end = buf + size, *p = buf;
2514 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2515 p += snprintf (p, end - p, "unified_address");
2516 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2517 p += snprintf (p, end - p, "%sunified_shared_memory",
2518 (p == buf ? "" : ", "));
2519 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2520 p += snprintf (p, end - p, "%sreverse_offload",
2521 (p == buf ? "" : ", "));
2522 }
2523
2524 /* This function should be called from every offload image while loading.
2525 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2526 the target, and DATA. */
2527
2528 void
2529 GOMP_offload_register_ver (unsigned version, const void *host_table,
2530 int target_type, const void *data)
2531 {
2532 int i;
2533
2534 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2535 gomp_fatal ("Library too old for offload (version %u < %u)",
2536 GOMP_VERSION, GOMP_VERSION_LIB (version));
2537
2538 int omp_req;
2539 const void *target_data;
2540 if (GOMP_VERSION_LIB (version) > 1)
2541 {
2542 omp_req = (int) (size_t) ((void **) data)[0];
2543 target_data = &((void **) data)[1];
2544 }
2545 else
2546 {
2547 omp_req = 0;
2548 target_data = data;
2549 }
2550
2551 gomp_mutex_lock (&register_lock);
2552
2553 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2554 {
2555 char buf1[sizeof ("unified_address, unified_shared_memory, "
2556 "reverse_offload")];
2557 char buf2[sizeof ("unified_address, unified_shared_memory, "
2558 "reverse_offload")];
2559 gomp_requires_to_name (buf2, sizeof (buf2),
2560 omp_req != GOMP_REQUIRES_TARGET_USED
2561 ? omp_req : omp_requires_mask);
2562 if (omp_req != GOMP_REQUIRES_TARGET_USED
2563 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2564 {
2565 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2566 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2567 "in multiple compilation units: '%s' vs. '%s'",
2568 buf1, buf2);
2569 }
2570 else
2571 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2572 "some compilation units", buf2);
2573 }
2574 omp_requires_mask = omp_req;
2575
2576 /* Load image to all initialized devices. */
2577 for (i = 0; i < num_devices; i++)
2578 {
2579 struct gomp_device_descr *devicep = &devices[i];
2580 gomp_mutex_lock (&devicep->lock);
2581 if (devicep->type == target_type
2582 && devicep->state == GOMP_DEVICE_INITIALIZED)
2583 gomp_load_image_to_device (devicep, version,
2584 host_table, target_data, true);
2585 gomp_mutex_unlock (&devicep->lock);
2586 }
2587
2588 /* Insert image to array of pending images. */
2589 offload_images
2590 = gomp_realloc_unlock (offload_images,
2591 (num_offload_images + 1)
2592 * sizeof (struct offload_image_descr));
2593 offload_images[num_offload_images].version = version;
2594 offload_images[num_offload_images].type = target_type;
2595 offload_images[num_offload_images].host_table = host_table;
2596 offload_images[num_offload_images].target_data = target_data;
2597
2598 num_offload_images++;
2599 gomp_mutex_unlock (&register_lock);
2600 }
2601
2602 /* Legacy entry point. */
2603
2604 void
2605 GOMP_offload_register (const void *host_table, int target_type,
2606 const void *target_data)
2607 {
2608 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2609 }
2610
2611 /* This function should be called from every offload image while unloading.
2612 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2613 the target, and DATA. */
2614
2615 void
2616 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2617 int target_type, const void *data)
2618 {
2619 int i;
2620
2621 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2622 gomp_fatal ("Library too old for offload (version %u < %u)",
2623 GOMP_VERSION, GOMP_VERSION_LIB (version));
2624
2625 const void *target_data;
2626 if (GOMP_VERSION_LIB (version) > 1)
2627 target_data = &((void **) data)[1];
2628 else
2629 target_data = data;
2630
2631 gomp_mutex_lock (&register_lock);
2632
2633 /* Unload image from all initialized devices. */
2634 for (i = 0; i < num_devices; i++)
2635 {
2636 struct gomp_device_descr *devicep = &devices[i];
2637 gomp_mutex_lock (&devicep->lock);
2638 if (devicep->type == target_type
2639 && devicep->state == GOMP_DEVICE_INITIALIZED)
2640 gomp_unload_image_from_device (devicep, version,
2641 host_table, target_data);
2642 gomp_mutex_unlock (&devicep->lock);
2643 }
2644
2645 /* Remove image from array of pending images. */
2646 for (i = 0; i < num_offload_images; i++)
2647 if (offload_images[i].target_data == target_data)
2648 {
2649 offload_images[i] = offload_images[--num_offload_images];
2650 break;
2651 }
2652
2653 gomp_mutex_unlock (&register_lock);
2654 }
2655
2656 /* Legacy entry point. */
2657
2658 void
2659 GOMP_offload_unregister (const void *host_table, int target_type,
2660 const void *target_data)
2661 {
2662 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2663 }
2664
2665 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2666 must be locked on entry, and remains locked on return. */
2667
2668 attribute_hidden void
2669 gomp_init_device (struct gomp_device_descr *devicep)
2670 {
2671 int i;
2672 if (!devicep->init_device_func (devicep->target_id))
2673 {
2674 gomp_mutex_unlock (&devicep->lock);
2675 gomp_fatal ("device initialization failed");
2676 }
2677
2678 /* Load to device all images registered by the moment. */
2679 for (i = 0; i < num_offload_images; i++)
2680 {
2681 struct offload_image_descr *image = &offload_images[i];
2682 if (image->type == devicep->type)
2683 gomp_load_image_to_device (devicep, image->version,
2684 image->host_table, image->target_data,
2685 false);
2686 }
2687
2688 /* Initialize OpenACC asynchronous queues. */
2689 goacc_init_asyncqueues (devicep);
2690
2691 devicep->state = GOMP_DEVICE_INITIALIZED;
2692 }
2693
2694 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2695 must be locked on entry, and remains locked on return. */
2696
2697 attribute_hidden bool
2698 gomp_fini_device (struct gomp_device_descr *devicep)
2699 {
2700 bool ret = goacc_fini_asyncqueues (devicep);
2701 ret &= devicep->fini_device_func (devicep->target_id);
2702 devicep->state = GOMP_DEVICE_FINALIZED;
2703 return ret;
2704 }
2705
2706 attribute_hidden void
2707 gomp_unload_device (struct gomp_device_descr *devicep)
2708 {
2709 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2710 {
2711 unsigned i;
2712
2713 /* Unload from device all images registered at the moment. */
2714 for (i = 0; i < num_offload_images; i++)
2715 {
2716 struct offload_image_descr *image = &offload_images[i];
2717 if (image->type == devicep->type)
2718 gomp_unload_image_from_device (devicep, image->version,
2719 image->host_table,
2720 image->target_data);
2721 }
2722 }
2723 }
2724
2725 /* Host fallback for GOMP_target{,_ext} routines. */
2726
2727 static void
2728 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2729 struct gomp_device_descr *devicep, void **args)
2730 {
2731 struct gomp_thread old_thr, *thr = gomp_thread ();
2732
2733 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2734 && devicep != NULL)
2735 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2736 "be used for offloading");
2737
2738 old_thr = *thr;
2739 memset (thr, '\0', sizeof (*thr));
2740 if (gomp_places_list)
2741 {
2742 thr->place = old_thr.place;
2743 thr->ts.place_partition_len = gomp_places_list_len;
2744 }
2745 if (args)
2746 while (*args)
2747 {
2748 intptr_t id = (intptr_t) *args++, val;
2749 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2750 val = (intptr_t) *args++;
2751 else
2752 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2753 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2754 continue;
2755 id &= GOMP_TARGET_ARG_ID_MASK;
2756 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2757 continue;
2758 val = val > INT_MAX ? INT_MAX : val;
2759 if (val)
2760 gomp_icv (true)->thread_limit_var = val;
2761 break;
2762 }
2763
2764 fn (hostaddrs);
2765 gomp_free_thread (thr);
2766 *thr = old_thr;
2767 }
2768
2769 /* Calculate alignment and size requirements of a private copy of data shared
2770 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2771
2772 static inline void
2773 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2774 unsigned short *kinds, size_t *tgt_align,
2775 size_t *tgt_size)
2776 {
2777 size_t i;
2778 for (i = 0; i < mapnum; i++)
2779 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2780 {
2781 size_t align = (size_t) 1 << (kinds[i] >> 8);
2782 if (*tgt_align < align)
2783 *tgt_align = align;
2784 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2785 *tgt_size += sizes[i];
2786 }
2787 }
2788
2789 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2790
2791 static inline void
2792 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2793 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2794 size_t tgt_size)
2795 {
2796 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2797 if (al)
2798 tgt += tgt_align - al;
2799 tgt_size = 0;
2800 size_t i;
2801 for (i = 0; i < mapnum; i++)
2802 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2803 {
2804 size_t align = (size_t) 1 << (kinds[i] >> 8);
2805 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2806 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2807 hostaddrs[i] = tgt + tgt_size;
2808 tgt_size = tgt_size + sizes[i];
2809 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2810 {
2811 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2812 ++i;
2813 }
2814 }
2815 }
2816
2817 /* Helper function of GOMP_target{,_ext} routines. */
2818
2819 static void *
2820 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2821 void (*host_fn) (void *))
2822 {
2823 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2824 return (void *) host_fn;
2825 else
2826 {
2827 gomp_mutex_lock (&devicep->lock);
2828 if (devicep->state == GOMP_DEVICE_FINALIZED)
2829 {
2830 gomp_mutex_unlock (&devicep->lock);
2831 return NULL;
2832 }
2833
2834 struct splay_tree_key_s k;
2835 k.host_start = (uintptr_t) host_fn;
2836 k.host_end = k.host_start + 1;
2837 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2838 gomp_mutex_unlock (&devicep->lock);
2839 if (tgt_fn == NULL)
2840 return NULL;
2841
2842 return (void *) tgt_fn->tgt_offset;
2843 }
2844 }
2845
2846 /* Called when encountering a target directive. If DEVICE
2847 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2848 GOMP_DEVICE_HOST_FALLBACK (or any value
2849 larger than last available hw device), use host fallback.
2850 FN is address of host code, UNUSED is part of the current ABI, but
2851 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2852 with MAPNUM entries, with addresses of the host objects,
2853 sizes of the host objects (resp. for pointer kind pointer bias
2854 and assumed sizeof (void *) size) and kinds. */
2855
2856 void
2857 GOMP_target (int device, void (*fn) (void *), const void *unused,
2858 size_t mapnum, void **hostaddrs, size_t *sizes,
2859 unsigned char *kinds)
2860 {
2861 struct gomp_device_descr *devicep = resolve_device (device, true);
2862
2863 void *fn_addr;
2864 if (devicep == NULL
2865 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2866 /* All shared memory devices should use the GOMP_target_ext function. */
2867 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2868 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2869 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2870
2871 htab_t refcount_set = htab_create (mapnum);
2872 struct target_mem_desc *tgt_vars
2873 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2874 &refcount_set, GOMP_MAP_VARS_TARGET);
2875 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2876 NULL);
2877 htab_clear (refcount_set);
2878 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2879 htab_free (refcount_set);
2880 }
2881
2882 static inline unsigned int
2883 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2884 {
2885 /* If we cannot run asynchronously, simply ignore nowait. */
2886 if (devicep != NULL && devicep->async_run_func == NULL)
2887 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2888
2889 return flags;
2890 }
2891
2892 static void
2893 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2894 {
2895 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2896 if (item == NULL)
2897 return;
2898
2899 void *host_ptr = &item->icvs;
2900 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2901 if (dev_ptr != NULL)
2902 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2903 sizeof (struct gomp_offload_icvs));
2904 }
2905
2906 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2907 and several arguments have been added:
2908 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2909 DEPEND is array of dependencies, see GOMP_task for details.
2910
2911 ARGS is a pointer to an array consisting of a variable number of both
2912 device-independent and device-specific arguments, which can take one two
2913 elements where the first specifies for which device it is intended, the type
2914 and optionally also the value. If the value is not present in the first
2915 one, the whole second element the actual value. The last element of the
2916 array is a single NULL. Among the device independent can be for example
2917 NUM_TEAMS and THREAD_LIMIT.
2918
2919 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2920 that value, or 1 if teams construct is not present, or 0, if
2921 teams construct does not have num_teams clause and so the choice is
2922 implementation defined, and -1 if it can't be determined on the host
2923 what value will GOMP_teams have on the device.
2924 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2925 body with that value, or 0, if teams construct does not have thread_limit
2926 clause or the teams construct is not present, or -1 if it can't be
2927 determined on the host what value will GOMP_teams have on the device. */
2928
2929 void
2930 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2931 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2932 unsigned int flags, void **depend, void **args)
2933 {
2934 struct gomp_device_descr *devicep = resolve_device (device, true);
2935 size_t tgt_align = 0, tgt_size = 0;
2936 bool fpc_done = false;
2937
2938 /* Obtain the original TEAMS and THREADS values from ARGS. */
2939 intptr_t orig_teams = 1, orig_threads = 0;
2940 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
2941 void **tmpargs = args;
2942 while (*tmpargs)
2943 {
2944 intptr_t id = (intptr_t) *tmpargs++, val;
2945 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2946 {
2947 val = (intptr_t) *tmpargs++;
2948 len = 2;
2949 }
2950 else
2951 {
2952 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2953 len = 1;
2954 }
2955 num_args += len;
2956 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2957 continue;
2958 val = val > INT_MAX ? INT_MAX : val;
2959 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
2960 {
2961 orig_teams = val;
2962 teams_len = len;
2963 }
2964 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
2965 {
2966 orig_threads = val;
2967 threads_len = len;
2968 }
2969 }
2970
2971 intptr_t new_teams = orig_teams, new_threads = orig_threads;
2972 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2973 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2974 value could not be determined. No change.
2975 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2976 Set device-specific value.
2977 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2978 No change. */
2979 if (orig_teams == -2)
2980 new_teams = 1;
2981 else if (orig_teams == 0)
2982 {
2983 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2984 if (item != NULL)
2985 new_teams = item->icvs.nteams;
2986 }
2987 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2988 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2989 e.g. a THREAD_LIMIT clause. */
2990 if (orig_teams > -2 && orig_threads == 0)
2991 {
2992 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2993 if (item != NULL)
2994 new_threads = item->icvs.teams_thread_limit;
2995 }
2996
2997 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2998 updated. */
2999 void **new_args = args;
3000 if (orig_teams != new_teams || orig_threads != new_threads)
3001 {
3002 size_t tms_len = (orig_teams == new_teams
3003 ? teams_len
3004 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3005 ? 1 : 2));
3006 size_t ths_len = (orig_threads == new_threads
3007 ? threads_len
3008 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3009 ? 1 : 2));
3010 /* One additional item after the last arg must be NULL. */
3011 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3012 + ths_len + 1;
3013 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3014
3015 tmpargs = args;
3016 void **tmp_new_args = new_args;
3017 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3018 too if they have not been changed and skipped otherwise. */
3019 while (*tmpargs)
3020 {
3021 intptr_t id = (intptr_t) *tmpargs;
3022 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3023 && orig_teams != new_teams)
3024 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3025 && orig_threads != new_threads))
3026 {
3027 tmpargs++;
3028 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3029 tmpargs++;
3030 }
3031 else
3032 {
3033 *tmp_new_args++ = *tmpargs++;
3034 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3035 *tmp_new_args++ = *tmpargs++;
3036 }
3037 }
3038
3039 /* Add the new TEAMS arg to the new args list if it has been changed. */
3040 if (orig_teams != new_teams)
3041 {
3042 intptr_t new_val = new_teams;
3043 if (tms_len == 1)
3044 {
3045 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3046 | GOMP_TARGET_ARG_NUM_TEAMS;
3047 *tmp_new_args++ = (void *) new_val;
3048 }
3049 else
3050 {
3051 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3052 | GOMP_TARGET_ARG_NUM_TEAMS);
3053 *tmp_new_args++ = (void *) new_val;
3054 }
3055 }
3056
3057 /* Add the new THREADS arg to the new args list if it has been changed. */
3058 if (orig_threads != new_threads)
3059 {
3060 intptr_t new_val = new_threads;
3061 if (ths_len == 1)
3062 {
3063 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3064 | GOMP_TARGET_ARG_THREAD_LIMIT;
3065 *tmp_new_args++ = (void *) new_val;
3066 }
3067 else
3068 {
3069 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3070 | GOMP_TARGET_ARG_THREAD_LIMIT);
3071 *tmp_new_args++ = (void *) new_val;
3072 }
3073 }
3074
3075 *tmp_new_args = NULL;
3076 }
3077
3078 flags = clear_unsupported_flags (devicep, flags);
3079
3080 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3081 {
3082 struct gomp_thread *thr = gomp_thread ();
3083 /* Create a team if we don't have any around, as nowait
3084 target tasks make sense to run asynchronously even when
3085 outside of any parallel. */
3086 if (__builtin_expect (thr->ts.team == NULL, 0))
3087 {
3088 struct gomp_team *team = gomp_new_team (1);
3089 struct gomp_task *task = thr->task;
3090 struct gomp_task **implicit_task = &task;
3091 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3092 team->prev_ts = thr->ts;
3093 thr->ts.team = team;
3094 thr->ts.team_id = 0;
3095 thr->ts.work_share = &team->work_shares[0];
3096 thr->ts.last_work_share = NULL;
3097 #ifdef HAVE_SYNC_BUILTINS
3098 thr->ts.single_count = 0;
3099 #endif
3100 thr->ts.static_trip = 0;
3101 thr->task = &team->implicit_task[0];
3102 gomp_init_task (thr->task, NULL, icv);
3103 while (*implicit_task
3104 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3105 implicit_task = &(*implicit_task)->parent;
3106 if (*implicit_task)
3107 {
3108 thr->task = *implicit_task;
3109 gomp_end_task ();
3110 free (*implicit_task);
3111 thr->task = &team->implicit_task[0];
3112 }
3113 else
3114 pthread_setspecific (gomp_thread_destructor, thr);
3115 if (implicit_task != &task)
3116 {
3117 *implicit_task = thr->task;
3118 thr->task = task;
3119 }
3120 }
3121 if (thr->ts.team
3122 && !thr->task->final_task)
3123 {
3124 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3125 sizes, kinds, flags, depend, new_args,
3126 GOMP_TARGET_TASK_BEFORE_MAP);
3127 return;
3128 }
3129 }
3130
3131 /* If there are depend clauses, but nowait is not present
3132 (or we are in a final task), block the parent task until the
3133 dependencies are resolved and then just continue with the rest
3134 of the function as if it is a merged task. */
3135 if (depend != NULL)
3136 {
3137 struct gomp_thread *thr = gomp_thread ();
3138 if (thr->task && thr->task->depend_hash)
3139 {
3140 /* If we might need to wait, copy firstprivate now. */
3141 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3142 &tgt_align, &tgt_size);
3143 if (tgt_align)
3144 {
3145 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3146 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3147 tgt_align, tgt_size);
3148 }
3149 fpc_done = true;
3150 gomp_task_maybe_wait_for_dependencies (depend);
3151 }
3152 }
3153
3154 void *fn_addr;
3155 if (devicep == NULL
3156 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3157 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3158 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3159 {
3160 if (!fpc_done)
3161 {
3162 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3163 &tgt_align, &tgt_size);
3164 if (tgt_align)
3165 {
3166 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3167 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3168 tgt_align, tgt_size);
3169 }
3170 }
3171 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3172 return;
3173 }
3174
3175 struct target_mem_desc *tgt_vars;
3176 htab_t refcount_set = NULL;
3177
3178 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3179 {
3180 if (!fpc_done)
3181 {
3182 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3183 &tgt_align, &tgt_size);
3184 if (tgt_align)
3185 {
3186 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3187 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3188 tgt_align, tgt_size);
3189 }
3190 }
3191 tgt_vars = NULL;
3192 }
3193 else
3194 {
3195 refcount_set = htab_create (mapnum);
3196 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3197 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3198 }
3199 devicep->run_func (devicep->target_id, fn_addr,
3200 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3201 new_args);
3202 if (tgt_vars)
3203 {
3204 htab_clear (refcount_set);
3205 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3206 }
3207 if (refcount_set)
3208 htab_free (refcount_set);
3209
3210 /* Copy back ICVs from device to host.
3211 HOST_PTR is expected to exist since it was added in
3212 gomp_load_image_to_device if not already available. */
3213 gomp_copy_back_icvs (devicep, device);
3214
3215 }
3216
3217
3218 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3219 keeping track of all variable handling - assuming that reverse offload occurs
3220 ony very rarely. Downside is that the reverse search is slow. */
3221
3222 struct gomp_splay_tree_rev_lookup_data {
3223 uintptr_t tgt_start;
3224 uintptr_t tgt_end;
3225 splay_tree_key key;
3226 };
3227
3228 static int
3229 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3230 {
3231 struct gomp_splay_tree_rev_lookup_data *data;
3232 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3233 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3234
3235 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3236 return 0;
3237
3238 size_t j;
3239 for (j = 0; j < key->tgt->list_count; j++)
3240 if (key->tgt->list[j].key == key)
3241 break;
3242 assert (j < key->tgt->list_count);
3243 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3244
3245 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3246 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3247 {
3248 data->key = key;
3249 return 1;
3250 }
3251 return 0;
3252 }
3253
3254 static inline splay_tree_key
3255 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3256 bool zero_len)
3257 {
3258 struct gomp_splay_tree_rev_lookup_data data;
3259 data.key = NULL;
3260 data.tgt_start = tgt_start;
3261 data.tgt_end = tgt_end;
3262
3263 if (tgt_start != tgt_end)
3264 {
3265 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3266 return data.key;
3267 }
3268
3269 data.tgt_end++;
3270 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3271 if (data.key != NULL || zero_len)
3272 return data.key;
3273 data.tgt_end--;
3274
3275 data.tgt_start--;
3276 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3277 return data.key;
3278 }
3279
3280 struct cpy_data
3281 {
3282 uint64_t devaddr;
3283 bool present, aligned;
3284 };
3285
3286
3287 /* Search just mapped reverse-offload data; returns index if found,
3288 otherwise >= n. */
3289
3290 static inline int
3291 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3292 unsigned short *kinds, uint64_t *sizes, size_t n,
3293 uint64_t tgt_start, uint64_t tgt_end)
3294 {
3295 const bool short_mapkind = true;
3296 const int typemask = short_mapkind ? 0xff : 0x7;
3297 size_t i;
3298 for (i = 0; i < n; i++)
3299 {
3300 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3301 == GOMP_MAP_STRUCT);
3302 uint64_t dev_end;
3303 if (!is_struct)
3304 dev_end = d[i].devaddr + sizes[i];
3305 else
3306 {
3307 if (i + sizes[i] < n)
3308 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3309 else
3310 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3311 }
3312 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3313 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3314 break;
3315 if (is_struct)
3316 i += sizes[i];
3317 }
3318 return i;
3319 }
3320
3321 static inline int
3322 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3323 unsigned short *kinds, uint64_t *sizes,
3324 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3325 bool zero_len)
3326 {
3327 size_t i;
3328 if (tgt_start != tgt_end)
3329 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3330 tgt_start, tgt_end);
3331 tgt_end++;
3332 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3333 tgt_start, tgt_end);
3334 if (i < n || zero_len)
3335 return i;
3336 tgt_end--;
3337
3338 tgt_start--;
3339 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3340 tgt_start, tgt_end);
3341 }
3342
3343 /* Handle reverse offload. This is called by the device plugins for a
3344 reverse offload; it is not called if the outer target runs on the host.
3345 The mapping is simplified device-affecting constructs (except for target
3346 with device(ancestor:1)) must not be encountered; in particular not
3347 target (enter/exit) data. */
3348
3349 void
3350 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3351 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3352 struct goacc_asyncqueue *aq)
3353 {
3354 /* Return early if there is no offload code. */
3355 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3356 return;
3357 /* Currently, this fails because of calculate_firstprivate_requirements
3358 below; it could be fixed but additional code needs to be updated to
3359 handle 32bit hosts - thus, it is not worthwhile. */
3360 if (sizeof (void *) != sizeof (uint64_t))
3361 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3362
3363 struct cpy_data *cdata = NULL;
3364 uint64_t *devaddrs;
3365 uint64_t *sizes;
3366 unsigned short *kinds;
3367 const bool short_mapkind = true;
3368 const int typemask = short_mapkind ? 0xff : 0x7;
3369 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3370
3371 reverse_splay_tree_key n;
3372 struct reverse_splay_tree_key_s k;
3373 k.dev = fn_ptr;
3374
3375 gomp_mutex_lock (&devicep->lock);
3376 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3377 gomp_mutex_unlock (&devicep->lock);
3378
3379 if (n == NULL)
3380 gomp_fatal ("Cannot find reverse-offload function");
3381 void (*host_fn)() = (void (*)()) n->k->host_start;
3382
3383 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3384 {
3385 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3386 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3387 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3388 }
3389 else
3390 {
3391 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3392 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3393 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3394 gomp_copy_dev2host (devicep, aq, devaddrs,
3395 (const void *) (uintptr_t) devaddrs_ptr,
3396 mapnum * sizeof (uint64_t));
3397 gomp_copy_dev2host (devicep, aq, sizes,
3398 (const void *) (uintptr_t) sizes_ptr,
3399 mapnum * sizeof (uint64_t));
3400 gomp_copy_dev2host (devicep, aq, kinds,
3401 (const void *) (uintptr_t) kinds_ptr,
3402 mapnum * sizeof (unsigned short));
3403 if (aq && !devicep->openacc.async.synchronize_func (aq))
3404 exit (EXIT_FAILURE);
3405 }
3406
3407 size_t tgt_align = 0, tgt_size = 0;
3408
3409 /* If actually executed on 32bit systems, the casts lead to wrong code;
3410 but 32bit with offloading is not supported; see top of this function. */
3411 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3412 (void *) (uintptr_t) kinds,
3413 &tgt_align, &tgt_size);
3414
3415 if (tgt_align)
3416 {
3417 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3418 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3419 if (al)
3420 tgt += tgt_align - al;
3421 tgt_size = 0;
3422 for (uint64_t i = 0; i < mapnum; i++)
3423 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3424 && devaddrs[i] != 0)
3425 {
3426 size_t align = (size_t) 1 << (kinds[i] >> 8);
3427 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3428 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3429 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3430 (size_t) sizes[i]);
3431 else
3432 {
3433 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3434 (void *) (uintptr_t) devaddrs[i],
3435 (size_t) sizes[i]);
3436 if (aq && !devicep->openacc.async.synchronize_func (aq))
3437 exit (EXIT_FAILURE);
3438 }
3439 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3440 tgt_size = tgt_size + sizes[i];
3441 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3442 && i + 1 < mapnum
3443 && ((get_kind (short_mapkind, kinds, i) & typemask)
3444 == GOMP_MAP_ATTACH))
3445 {
3446 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3447 = (uint64_t) devaddrs[i];
3448 ++i;
3449 }
3450 }
3451 }
3452
3453 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3454 {
3455 size_t j, struct_cpy = 0;
3456 splay_tree_key n2;
3457 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3458 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3459 gomp_mutex_lock (&devicep->lock);
3460 for (uint64_t i = 0; i < mapnum; i++)
3461 {
3462 if (devaddrs[i] == 0)
3463 continue;
3464 n = NULL;
3465 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3466 switch (kind)
3467 {
3468 case GOMP_MAP_FIRSTPRIVATE:
3469 case GOMP_MAP_FIRSTPRIVATE_INT:
3470 continue;
3471
3472 case GOMP_MAP_DELETE:
3473 case GOMP_MAP_RELEASE:
3474 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3475 /* Assume it is present; look it up - but ignore unless the
3476 present clause is there. */
3477 case GOMP_MAP_ALLOC:
3478 case GOMP_MAP_FROM:
3479 case GOMP_MAP_FORCE_ALLOC:
3480 case GOMP_MAP_FORCE_FROM:
3481 case GOMP_MAP_ALWAYS_FROM:
3482 case GOMP_MAP_TO:
3483 case GOMP_MAP_TOFROM:
3484 case GOMP_MAP_FORCE_TO:
3485 case GOMP_MAP_FORCE_TOFROM:
3486 case GOMP_MAP_ALWAYS_TO:
3487 case GOMP_MAP_ALWAYS_TOFROM:
3488 case GOMP_MAP_FORCE_PRESENT:
3489 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3490 case GOMP_MAP_ALWAYS_PRESENT_TO:
3491 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3492 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3493 cdata[i].devaddr = devaddrs[i];
3494 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3495 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3496 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3497 devaddrs[i],
3498 devaddrs[i] + sizes[i], zero_len);
3499 if (j < i)
3500 {
3501 n2 = NULL;
3502 cdata[i].present = true;
3503 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3504 }
3505 else
3506 {
3507 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3508 devaddrs[i],
3509 devaddrs[i] + sizes[i], zero_len);
3510 cdata[i].present = n2 != NULL;
3511 }
3512 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3513 {
3514 gomp_mutex_unlock (&devicep->lock);
3515 #ifdef HAVE_INTTYPES_H
3516 gomp_fatal ("present clause: no corresponding data on "
3517 "parent device at %p with size %"PRIu64,
3518 (void *) (uintptr_t) devaddrs[i],
3519 (uint64_t) sizes[i]);
3520 #else
3521 gomp_fatal ("present clause: no corresponding data on "
3522 "parent device at %p with size %lu",
3523 (void *) (uintptr_t) devaddrs[i],
3524 (unsigned long) sizes[i]);
3525 #endif
3526 break;
3527 }
3528 else if (!cdata[i].present
3529 && kind != GOMP_MAP_DELETE
3530 && kind != GOMP_MAP_RELEASE
3531 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3532 {
3533 cdata[i].aligned = true;
3534 size_t align = (size_t) 1 << (kinds[i] >> 8);
3535 devaddrs[i]
3536 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3537 sizes[i]);
3538 }
3539 else if (n2 != NULL)
3540 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3541 - (n2->tgt->tgt_start + n2->tgt_offset));
3542 if (((!cdata[i].present || struct_cpy)
3543 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3544 || kind == GOMP_MAP_FORCE_TO
3545 || kind == GOMP_MAP_FORCE_TOFROM
3546 || GOMP_MAP_ALWAYS_TO_P (kind))
3547 {
3548 gomp_copy_dev2host (devicep, aq,
3549 (void *) (uintptr_t) devaddrs[i],
3550 (void *) (uintptr_t) cdata[i].devaddr,
3551 sizes[i]);
3552 if (aq && !devicep->openacc.async.synchronize_func (aq))
3553 {
3554 gomp_mutex_unlock (&devicep->lock);
3555 exit (EXIT_FAILURE);
3556 }
3557 }
3558 if (struct_cpy)
3559 struct_cpy--;
3560 break;
3561 case GOMP_MAP_ATTACH:
3562 case GOMP_MAP_POINTER:
3563 case GOMP_MAP_ALWAYS_POINTER:
3564 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3565 devaddrs[i] + sizes[i],
3566 devaddrs[i] + sizes[i]
3567 + sizeof (void*), false);
3568 cdata[i].present = n2 != NULL;
3569 cdata[i].devaddr = devaddrs[i];
3570 if (n2)
3571 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3572 - (n2->tgt->tgt_start + n2->tgt_offset));
3573 else
3574 {
3575 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3576 devaddrs[i] + sizes[i],
3577 devaddrs[i] + sizes[i]
3578 + sizeof (void*), false);
3579 if (j < i)
3580 {
3581 cdata[i].present = true;
3582 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3583 - cdata[j].devaddr);
3584 }
3585 }
3586 if (!cdata[i].present)
3587 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3588 /* Assume that when present, the pointer is already correct. */
3589 if (!n2)
3590 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3591 = devaddrs[i-1];
3592 break;
3593 case GOMP_MAP_TO_PSET:
3594 /* Assume that when present, the pointers are fine and no 'to:'
3595 is required. */
3596 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3597 devaddrs[i], devaddrs[i] + sizes[i],
3598 false);
3599 cdata[i].present = n2 != NULL;
3600 cdata[i].devaddr = devaddrs[i];
3601 if (n2)
3602 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3603 - (n2->tgt->tgt_start + n2->tgt_offset));
3604 else
3605 {
3606 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3607 devaddrs[i],
3608 devaddrs[i] + sizes[i], false);
3609 if (j < i)
3610 {
3611 cdata[i].present = true;
3612 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3613 - cdata[j].devaddr);
3614 }
3615 }
3616 if (!cdata[i].present)
3617 {
3618 cdata[i].aligned = true;
3619 size_t align = (size_t) 1 << (kinds[i] >> 8);
3620 devaddrs[i]
3621 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3622 sizes[i]);
3623 gomp_copy_dev2host (devicep, aq,
3624 (void *) (uintptr_t) devaddrs[i],
3625 (void *) (uintptr_t) cdata[i].devaddr,
3626 sizes[i]);
3627 if (aq && !devicep->openacc.async.synchronize_func (aq))
3628 {
3629 gomp_mutex_unlock (&devicep->lock);
3630 exit (EXIT_FAILURE);
3631 }
3632 }
3633 for (j = i + 1; j < mapnum; j++)
3634 {
3635 kind = get_kind (short_mapkind, kinds, j) & typemask;
3636 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3637 && !GOMP_MAP_POINTER_P (kind))
3638 break;
3639 if (devaddrs[j] < devaddrs[i])
3640 break;
3641 if (cdata[i].present)
3642 continue;
3643 if (devaddrs[j] == 0)
3644 {
3645 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3646 continue;
3647 }
3648 int k;
3649 n2 = NULL;
3650 /* Dereference devaddrs[j] to get the device addr. */
3651 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3652 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3653 + sizes[j]);
3654 cdata[j].present = true;
3655 cdata[j].devaddr = devaddrs[j];
3656 if (devaddrs[j] == 0)
3657 continue;
3658 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3659 devaddrs[j],
3660 devaddrs[j] + sizeof (void*),
3661 false);
3662 if (k < j)
3663 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3664 - cdata[k].devaddr);
3665 else
3666 {
3667 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3668 devaddrs[j],
3669 devaddrs[j] + sizeof (void*),
3670 false);
3671 if (n2 == NULL)
3672 {
3673 gomp_mutex_unlock (&devicep->lock);
3674 gomp_fatal ("Pointer target wasn't mapped");
3675 }
3676 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3677 - (n2->tgt->tgt_start + n2->tgt_offset));
3678 }
3679 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3680 = (void *) (uintptr_t) devaddrs[j];
3681 }
3682 i = j -1;
3683 break;
3684 case GOMP_MAP_STRUCT:
3685 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3686 devaddrs[i + sizes[i]]
3687 + sizes[i + sizes[i]], false);
3688 cdata[i].present = n2 != NULL;
3689 cdata[i].devaddr = devaddrs[i];
3690 struct_cpy = cdata[i].present ? 0 : sizes[i];
3691 if (!n2)
3692 {
3693 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3694 - devaddrs[i+1]
3695 + sizes[i + sizes[i]]);
3696 size_t align = (size_t) 1 << (kinds[i] >> 8);
3697 cdata[i].aligned = true;
3698 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3699 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3700 }
3701 else
3702 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3703 - (n2->tgt->tgt_start + n2->tgt_offset));
3704 break;
3705 default:
3706 gomp_mutex_unlock (&devicep->lock);
3707 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3708 }
3709 }
3710 gomp_mutex_unlock (&devicep->lock);
3711 }
3712
3713 host_fn (devaddrs);
3714
3715 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3716 {
3717 uint64_t struct_cpy = 0;
3718 bool clean_struct = false;
3719 for (uint64_t i = 0; i < mapnum; i++)
3720 {
3721 if (cdata[i].devaddr == 0)
3722 continue;
3723 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3724 bool copy = !cdata[i].present || struct_cpy;
3725 switch (kind)
3726 {
3727 case GOMP_MAP_FORCE_FROM:
3728 case GOMP_MAP_FORCE_TOFROM:
3729 case GOMP_MAP_ALWAYS_FROM:
3730 case GOMP_MAP_ALWAYS_TOFROM:
3731 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3732 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3733 copy = true;
3734 /* FALLTHRU */
3735 case GOMP_MAP_FROM:
3736 case GOMP_MAP_TOFROM:
3737 if (copy)
3738 {
3739 gomp_copy_host2dev (devicep, aq,
3740 (void *) (uintptr_t) cdata[i].devaddr,
3741 (void *) (uintptr_t) devaddrs[i],
3742 sizes[i], false, NULL);
3743 if (aq && !devicep->openacc.async.synchronize_func (aq))
3744 exit (EXIT_FAILURE);
3745 }
3746 default:
3747 break;
3748 }
3749 if (struct_cpy)
3750 {
3751 struct_cpy--;
3752 continue;
3753 }
3754 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3755 {
3756 clean_struct = true;
3757 struct_cpy = sizes[i];
3758 }
3759 else if (!cdata[i].present && cdata[i].aligned)
3760 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3761 else if (!cdata[i].present)
3762 free ((void *) (uintptr_t) devaddrs[i]);
3763 }
3764 if (clean_struct)
3765 for (uint64_t i = 0; i < mapnum; i++)
3766 if (!cdata[i].present
3767 && ((get_kind (short_mapkind, kinds, i) & typemask)
3768 == GOMP_MAP_STRUCT))
3769 {
3770 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3771 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3772 }
3773
3774 free (devaddrs);
3775 free (sizes);
3776 free (kinds);
3777 }
3778 }
3779
3780 /* Host fallback for GOMP_target_data{,_ext} routines. */
3781
3782 static void
3783 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3784 {
3785 struct gomp_task_icv *icv = gomp_icv (false);
3786
3787 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3788 && devicep != NULL)
3789 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3790 "be used for offloading");
3791
3792 if (icv->target_data)
3793 {
3794 /* Even when doing a host fallback, if there are any active
3795 #pragma omp target data constructs, need to remember the
3796 new #pragma omp target data, otherwise GOMP_target_end_data
3797 would get out of sync. */
3798 struct target_mem_desc *tgt
3799 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3800 NULL, GOMP_MAP_VARS_DATA);
3801 tgt->prev = icv->target_data;
3802 icv->target_data = tgt;
3803 }
3804 }
3805
3806 void
3807 GOMP_target_data (int device, const void *unused, size_t mapnum,
3808 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3809 {
3810 struct gomp_device_descr *devicep = resolve_device (device, true);
3811
3812 if (devicep == NULL
3813 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3814 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3815 return gomp_target_data_fallback (devicep);
3816
3817 struct target_mem_desc *tgt
3818 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3819 NULL, GOMP_MAP_VARS_DATA);
3820 struct gomp_task_icv *icv = gomp_icv (true);
3821 tgt->prev = icv->target_data;
3822 icv->target_data = tgt;
3823 }
3824
3825 void
3826 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3827 size_t *sizes, unsigned short *kinds)
3828 {
3829 struct gomp_device_descr *devicep = resolve_device (device, true);
3830
3831 if (devicep == NULL
3832 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3833 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3834 return gomp_target_data_fallback (devicep);
3835
3836 struct target_mem_desc *tgt
3837 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3838 NULL, GOMP_MAP_VARS_DATA);
3839 struct gomp_task_icv *icv = gomp_icv (true);
3840 tgt->prev = icv->target_data;
3841 icv->target_data = tgt;
3842 }
3843
3844 void
3845 GOMP_target_end_data (void)
3846 {
3847 struct gomp_task_icv *icv = gomp_icv (false);
3848 if (icv->target_data)
3849 {
3850 struct target_mem_desc *tgt = icv->target_data;
3851 icv->target_data = tgt->prev;
3852 gomp_unmap_vars (tgt, true, NULL);
3853 }
3854 }
3855
3856 void
3857 GOMP_target_update (int device, const void *unused, size_t mapnum,
3858 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3859 {
3860 struct gomp_device_descr *devicep = resolve_device (device, true);
3861
3862 if (devicep == NULL
3863 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3864 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3865 return;
3866
3867 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3868 }
3869
3870 void
3871 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3872 size_t *sizes, unsigned short *kinds,
3873 unsigned int flags, void **depend)
3874 {
3875 struct gomp_device_descr *devicep = resolve_device (device, true);
3876
3877 /* If there are depend clauses, but nowait is not present,
3878 block the parent task until the dependencies are resolved
3879 and then just continue with the rest of the function as if it
3880 is a merged task. Until we are able to schedule task during
3881 variable mapping or unmapping, ignore nowait if depend clauses
3882 are not present. */
3883 if (depend != NULL)
3884 {
3885 struct gomp_thread *thr = gomp_thread ();
3886 if (thr->task && thr->task->depend_hash)
3887 {
3888 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3889 && thr->ts.team
3890 && !thr->task->final_task)
3891 {
3892 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3893 mapnum, hostaddrs, sizes, kinds,
3894 flags | GOMP_TARGET_FLAG_UPDATE,
3895 depend, NULL, GOMP_TARGET_TASK_DATA))
3896 return;
3897 }
3898 else
3899 {
3900 struct gomp_team *team = thr->ts.team;
3901 /* If parallel or taskgroup has been cancelled, don't start new
3902 tasks. */
3903 if (__builtin_expect (gomp_cancel_var, 0) && team)
3904 {
3905 if (gomp_team_barrier_cancelled (&team->barrier))
3906 return;
3907 if (thr->task->taskgroup)
3908 {
3909 if (thr->task->taskgroup->cancelled)
3910 return;
3911 if (thr->task->taskgroup->workshare
3912 && thr->task->taskgroup->prev
3913 && thr->task->taskgroup->prev->cancelled)
3914 return;
3915 }
3916 }
3917
3918 gomp_task_maybe_wait_for_dependencies (depend);
3919 }
3920 }
3921 }
3922
3923 if (devicep == NULL
3924 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3925 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3926 return;
3927
3928 struct gomp_thread *thr = gomp_thread ();
3929 struct gomp_team *team = thr->ts.team;
3930 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3931 if (__builtin_expect (gomp_cancel_var, 0) && team)
3932 {
3933 if (gomp_team_barrier_cancelled (&team->barrier))
3934 return;
3935 if (thr->task->taskgroup)
3936 {
3937 if (thr->task->taskgroup->cancelled)
3938 return;
3939 if (thr->task->taskgroup->workshare
3940 && thr->task->taskgroup->prev
3941 && thr->task->taskgroup->prev->cancelled)
3942 return;
3943 }
3944 }
3945
3946 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3947 }
3948
3949 static void
3950 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
3951 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3952 htab_t *refcount_set)
3953 {
3954 const int typemask = 0xff;
3955 size_t i;
3956 gomp_mutex_lock (&devicep->lock);
3957 if (devicep->state == GOMP_DEVICE_FINALIZED)
3958 {
3959 gomp_mutex_unlock (&devicep->lock);
3960 return;
3961 }
3962
3963 for (i = 0; i < mapnum; i++)
3964 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3965 {
3966 struct splay_tree_key_s cur_node;
3967 cur_node.host_start = (uintptr_t) hostaddrs[i];
3968 cur_node.host_end = cur_node.host_start + sizeof (void *);
3969 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3970
3971 if (n)
3972 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3973 false, NULL);
3974 }
3975
3976 int nrmvars = 0;
3977 splay_tree_key remove_vars[mapnum];
3978
3979 for (i = 0; i < mapnum; i++)
3980 {
3981 struct splay_tree_key_s cur_node;
3982 unsigned char kind = kinds[i] & typemask;
3983 switch (kind)
3984 {
3985 case GOMP_MAP_FROM:
3986 case GOMP_MAP_ALWAYS_FROM:
3987 case GOMP_MAP_DELETE:
3988 case GOMP_MAP_RELEASE:
3989 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3990 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3991 cur_node.host_start = (uintptr_t) hostaddrs[i];
3992 cur_node.host_end = cur_node.host_start + sizes[i];
3993 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3994 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
3995 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
3996 : splay_tree_lookup (&devicep->mem_map, &cur_node);
3997 if (!k)
3998 continue;
3999
4000 bool delete_p = (kind == GOMP_MAP_DELETE
4001 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4002 bool do_copy, do_remove;
4003 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4004 &do_remove);
4005
4006 if ((kind == GOMP_MAP_FROM && do_copy)
4007 || kind == GOMP_MAP_ALWAYS_FROM)
4008 {
4009 if (k->aux && k->aux->attach_count)
4010 {
4011 /* We have to be careful not to overwrite still attached
4012 pointers during the copyback to host. */
4013 uintptr_t addr = k->host_start;
4014 while (addr < k->host_end)
4015 {
4016 size_t i = (addr - k->host_start) / sizeof (void *);
4017 if (k->aux->attach_count[i] == 0)
4018 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4019 (void *) (k->tgt->tgt_start
4020 + k->tgt_offset
4021 + addr - k->host_start),
4022 sizeof (void *));
4023 addr += sizeof (void *);
4024 }
4025 }
4026 else
4027 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4028 (void *) (k->tgt->tgt_start + k->tgt_offset
4029 + cur_node.host_start
4030 - k->host_start),
4031 cur_node.host_end - cur_node.host_start);
4032 }
4033
4034 /* Structure elements lists are removed altogether at once, which
4035 may cause immediate deallocation of the target_mem_desc, causing
4036 errors if we still have following element siblings to copy back.
4037 While we're at it, it also seems more disciplined to simply
4038 queue all removals together for processing below.
4039
4040 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4041 not have this problem, since they maintain an additional
4042 tgt->refcount = 1 reference to the target_mem_desc to start with.
4043 */
4044 if (do_remove)
4045 remove_vars[nrmvars++] = k;
4046 break;
4047
4048 case GOMP_MAP_DETACH:
4049 break;
4050 default:
4051 gomp_mutex_unlock (&devicep->lock);
4052 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4053 kind);
4054 }
4055 }
4056
4057 for (int i = 0; i < nrmvars; i++)
4058 gomp_remove_var (devicep, remove_vars[i]);
4059
4060 gomp_mutex_unlock (&devicep->lock);
4061 }
4062
4063 void
4064 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4065 size_t *sizes, unsigned short *kinds,
4066 unsigned int flags, void **depend)
4067 {
4068 struct gomp_device_descr *devicep = resolve_device (device, true);
4069
4070 /* If there are depend clauses, but nowait is not present,
4071 block the parent task until the dependencies are resolved
4072 and then just continue with the rest of the function as if it
4073 is a merged task. Until we are able to schedule task during
4074 variable mapping or unmapping, ignore nowait if depend clauses
4075 are not present. */
4076 if (depend != NULL)
4077 {
4078 struct gomp_thread *thr = gomp_thread ();
4079 if (thr->task && thr->task->depend_hash)
4080 {
4081 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4082 && thr->ts.team
4083 && !thr->task->final_task)
4084 {
4085 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4086 mapnum, hostaddrs, sizes, kinds,
4087 flags, depend, NULL,
4088 GOMP_TARGET_TASK_DATA))
4089 return;
4090 }
4091 else
4092 {
4093 struct gomp_team *team = thr->ts.team;
4094 /* If parallel or taskgroup has been cancelled, don't start new
4095 tasks. */
4096 if (__builtin_expect (gomp_cancel_var, 0) && team)
4097 {
4098 if (gomp_team_barrier_cancelled (&team->barrier))
4099 return;
4100 if (thr->task->taskgroup)
4101 {
4102 if (thr->task->taskgroup->cancelled)
4103 return;
4104 if (thr->task->taskgroup->workshare
4105 && thr->task->taskgroup->prev
4106 && thr->task->taskgroup->prev->cancelled)
4107 return;
4108 }
4109 }
4110
4111 gomp_task_maybe_wait_for_dependencies (depend);
4112 }
4113 }
4114 }
4115
4116 if (devicep == NULL
4117 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4118 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4119 return;
4120
4121 struct gomp_thread *thr = gomp_thread ();
4122 struct gomp_team *team = thr->ts.team;
4123 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4124 if (__builtin_expect (gomp_cancel_var, 0) && team)
4125 {
4126 if (gomp_team_barrier_cancelled (&team->barrier))
4127 return;
4128 if (thr->task->taskgroup)
4129 {
4130 if (thr->task->taskgroup->cancelled)
4131 return;
4132 if (thr->task->taskgroup->workshare
4133 && thr->task->taskgroup->prev
4134 && thr->task->taskgroup->prev->cancelled)
4135 return;
4136 }
4137 }
4138
4139 htab_t refcount_set = htab_create (mapnum);
4140
4141 /* The variables are mapped separately such that they can be released
4142 independently. */
4143 size_t i, j;
4144 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4145 for (i = 0; i < mapnum; i++)
4146 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4147 {
4148 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4149 &kinds[i], true, &refcount_set,
4150 GOMP_MAP_VARS_ENTER_DATA);
4151 i += sizes[i];
4152 }
4153 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4154 {
4155 for (j = i + 1; j < mapnum; j++)
4156 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4157 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4158 break;
4159 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4160 &kinds[i], true, &refcount_set,
4161 GOMP_MAP_VARS_ENTER_DATA);
4162 i += j - i - 1;
4163 }
4164 else if (i + 1 < mapnum
4165 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4166 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4167 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4168 {
4169 /* An attach operation must be processed together with the mapped
4170 base-pointer list item. */
4171 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4172 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4173 i += 1;
4174 }
4175 else
4176 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4177 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4178 else
4179 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4180 htab_free (refcount_set);
4181 }
4182
4183 bool
4184 gomp_target_task_fn (void *data)
4185 {
4186 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4187 struct gomp_device_descr *devicep = ttask->devicep;
4188
4189 if (ttask->fn != NULL)
4190 {
4191 void *fn_addr;
4192 if (devicep == NULL
4193 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4194 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4195 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4196 {
4197 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4198 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4199 ttask->args);
4200 return false;
4201 }
4202
4203 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4204 {
4205 if (ttask->tgt)
4206 gomp_unmap_vars (ttask->tgt, true, NULL);
4207 return false;
4208 }
4209
4210 void *actual_arguments;
4211 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4212 {
4213 ttask->tgt = NULL;
4214 actual_arguments = ttask->hostaddrs;
4215 }
4216 else
4217 {
4218 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4219 NULL, ttask->sizes, ttask->kinds, true,
4220 NULL, GOMP_MAP_VARS_TARGET);
4221 actual_arguments = (void *) ttask->tgt->tgt_start;
4222 }
4223 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4224
4225 assert (devicep->async_run_func);
4226 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4227 ttask->args, (void *) ttask);
4228 return true;
4229 }
4230 else if (devicep == NULL
4231 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4232 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4233 return false;
4234
4235 size_t i;
4236 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4237 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4238 ttask->kinds, true);
4239 else
4240 {
4241 htab_t refcount_set = htab_create (ttask->mapnum);
4242 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4243 for (i = 0; i < ttask->mapnum; i++)
4244 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4245 {
4246 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4247 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4248 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4249 i += ttask->sizes[i];
4250 }
4251 else
4252 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4253 &ttask->kinds[i], true, &refcount_set,
4254 GOMP_MAP_VARS_ENTER_DATA);
4255 else
4256 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4257 ttask->kinds, &refcount_set);
4258 htab_free (refcount_set);
4259 }
4260 return false;
4261 }
4262
4263 void
4264 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4265 {
4266 if (thread_limit)
4267 {
4268 struct gomp_task_icv *icv = gomp_icv (true);
4269 icv->thread_limit_var
4270 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4271 }
4272 (void) num_teams;
4273 }
4274
4275 bool
4276 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4277 unsigned int thread_limit, bool first)
4278 {
4279 struct gomp_thread *thr = gomp_thread ();
4280 if (first)
4281 {
4282 if (thread_limit)
4283 {
4284 struct gomp_task_icv *icv = gomp_icv (true);
4285 icv->thread_limit_var
4286 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4287 }
4288 (void) num_teams_high;
4289 if (num_teams_low == 0)
4290 num_teams_low = 1;
4291 thr->num_teams = num_teams_low - 1;
4292 thr->team_num = 0;
4293 }
4294 else if (thr->team_num == thr->num_teams)
4295 return false;
4296 else
4297 ++thr->team_num;
4298 return true;
4299 }
4300
4301 void *
4302 omp_target_alloc (size_t size, int device_num)
4303 {
4304 if (device_num == omp_initial_device
4305 || device_num == gomp_get_num_devices ())
4306 return malloc (size);
4307
4308 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4309 if (devicep == NULL)
4310 return NULL;
4311
4312 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4313 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4314 return malloc (size);
4315
4316 gomp_mutex_lock (&devicep->lock);
4317 void *ret = devicep->alloc_func (devicep->target_id, size);
4318 gomp_mutex_unlock (&devicep->lock);
4319 return ret;
4320 }
4321
4322 void
4323 omp_target_free (void *device_ptr, int device_num)
4324 {
4325 if (device_num == omp_initial_device
4326 || device_num == gomp_get_num_devices ())
4327 {
4328 free (device_ptr);
4329 return;
4330 }
4331
4332 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4333 if (devicep == NULL || device_ptr == NULL)
4334 return;
4335
4336 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4337 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4338 {
4339 free (device_ptr);
4340 return;
4341 }
4342
4343 gomp_mutex_lock (&devicep->lock);
4344 gomp_free_device_memory (devicep, device_ptr);
4345 gomp_mutex_unlock (&devicep->lock);
4346 }
4347
4348 int
4349 omp_target_is_present (const void *ptr, int device_num)
4350 {
4351 if (device_num == omp_initial_device
4352 || device_num == gomp_get_num_devices ())
4353 return 1;
4354
4355 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4356 if (devicep == NULL)
4357 return 0;
4358
4359 if (ptr == NULL)
4360 return 1;
4361
4362 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4363 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4364 return 1;
4365
4366 gomp_mutex_lock (&devicep->lock);
4367 struct splay_tree_s *mem_map = &devicep->mem_map;
4368 struct splay_tree_key_s cur_node;
4369
4370 cur_node.host_start = (uintptr_t) ptr;
4371 cur_node.host_end = cur_node.host_start;
4372 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4373 int ret = n != NULL;
4374 gomp_mutex_unlock (&devicep->lock);
4375 return ret;
4376 }
4377
4378 static int
4379 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4380 struct gomp_device_descr **dst_devicep,
4381 struct gomp_device_descr **src_devicep)
4382 {
4383 if (dst_device_num != gomp_get_num_devices ()
4384 /* Above gomp_get_num_devices has to be called unconditionally. */
4385 && dst_device_num != omp_initial_device)
4386 {
4387 *dst_devicep = resolve_device (dst_device_num, false);
4388 if (*dst_devicep == NULL)
4389 return EINVAL;
4390
4391 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4392 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4393 *dst_devicep = NULL;
4394 }
4395
4396 if (src_device_num != num_devices_openmp
4397 && src_device_num != omp_initial_device)
4398 {
4399 *src_devicep = resolve_device (src_device_num, false);
4400 if (*src_devicep == NULL)
4401 return EINVAL;
4402
4403 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4404 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4405 *src_devicep = NULL;
4406 }
4407
4408 return 0;
4409 }
4410
4411 static int
4412 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4413 size_t dst_offset, size_t src_offset,
4414 struct gomp_device_descr *dst_devicep,
4415 struct gomp_device_descr *src_devicep)
4416 {
4417 bool ret;
4418 if (src_devicep == NULL && dst_devicep == NULL)
4419 {
4420 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4421 return 0;
4422 }
4423 if (src_devicep == NULL)
4424 {
4425 gomp_mutex_lock (&dst_devicep->lock);
4426 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4427 (char *) dst + dst_offset,
4428 (char *) src + src_offset, length);
4429 gomp_mutex_unlock (&dst_devicep->lock);
4430 return (ret ? 0 : EINVAL);
4431 }
4432 if (dst_devicep == NULL)
4433 {
4434 gomp_mutex_lock (&src_devicep->lock);
4435 ret = src_devicep->dev2host_func (src_devicep->target_id,
4436 (char *) dst + dst_offset,
4437 (char *) src + src_offset, length);
4438 gomp_mutex_unlock (&src_devicep->lock);
4439 return (ret ? 0 : EINVAL);
4440 }
4441 if (src_devicep == dst_devicep)
4442 {
4443 gomp_mutex_lock (&src_devicep->lock);
4444 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4445 (char *) dst + dst_offset,
4446 (char *) src + src_offset, length);
4447 gomp_mutex_unlock (&src_devicep->lock);
4448 return (ret ? 0 : EINVAL);
4449 }
4450 return EINVAL;
4451 }
4452
4453 int
4454 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4455 size_t src_offset, int dst_device_num, int src_device_num)
4456 {
4457 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4458 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4459 &dst_devicep, &src_devicep);
4460
4461 if (ret)
4462 return ret;
4463
4464 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4465 dst_devicep, src_devicep);
4466
4467 return ret;
4468 }
4469
4470 typedef struct
4471 {
4472 void *dst;
4473 const void *src;
4474 size_t length;
4475 size_t dst_offset;
4476 size_t src_offset;
4477 struct gomp_device_descr *dst_devicep;
4478 struct gomp_device_descr *src_devicep;
4479 } omp_target_memcpy_data;
4480
4481 static void
4482 omp_target_memcpy_async_helper (void *args)
4483 {
4484 omp_target_memcpy_data *a = args;
4485 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4486 a->src_offset, a->dst_devicep, a->src_devicep))
4487 gomp_fatal ("omp_target_memcpy failed");
4488 }
4489
4490 int
4491 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4492 size_t dst_offset, size_t src_offset,
4493 int dst_device_num, int src_device_num,
4494 int depobj_count, omp_depend_t *depobj_list)
4495 {
4496 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4497 unsigned int flags = 0;
4498 void *depend[depobj_count + 5];
4499 int i;
4500 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4501 &dst_devicep, &src_devicep);
4502
4503 omp_target_memcpy_data s = {
4504 .dst = dst,
4505 .src = src,
4506 .length = length,
4507 .dst_offset = dst_offset,
4508 .src_offset = src_offset,
4509 .dst_devicep = dst_devicep,
4510 .src_devicep = src_devicep
4511 };
4512
4513 if (check)
4514 return check;
4515
4516 if (depobj_count > 0 && depobj_list != NULL)
4517 {
4518 flags |= GOMP_TASK_FLAG_DEPEND;
4519 depend[0] = 0;
4520 depend[1] = (void *) (uintptr_t) depobj_count;
4521 depend[2] = depend[3] = depend[4] = 0;
4522 for (i = 0; i < depobj_count; ++i)
4523 depend[i + 5] = &depobj_list[i];
4524 }
4525
4526 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4527 __alignof__ (s), true, flags, depend, 0, NULL);
4528
4529 return 0;
4530 }
4531
4532 static int
4533 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4534 int num_dims, const size_t *volume,
4535 const size_t *dst_offsets,
4536 const size_t *src_offsets,
4537 const size_t *dst_dimensions,
4538 const size_t *src_dimensions,
4539 struct gomp_device_descr *dst_devicep,
4540 struct gomp_device_descr *src_devicep,
4541 size_t *tmp_size, void **tmp)
4542 {
4543 size_t dst_slice = element_size;
4544 size_t src_slice = element_size;
4545 size_t j, dst_off, src_off, length;
4546 int i, ret;
4547
4548 if (num_dims == 1)
4549 {
4550 if (__builtin_mul_overflow (element_size, volume[0], &length)
4551 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4552 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4553 return EINVAL;
4554 if (dst_devicep == NULL && src_devicep == NULL)
4555 {
4556 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4557 length);
4558 ret = 1;
4559 }
4560 else if (src_devicep == NULL)
4561 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4562 (char *) dst + dst_off,
4563 (const char *) src + src_off,
4564 length);
4565 else if (dst_devicep == NULL)
4566 ret = src_devicep->dev2host_func (src_devicep->target_id,
4567 (char *) dst + dst_off,
4568 (const char *) src + src_off,
4569 length);
4570 else if (src_devicep == dst_devicep)
4571 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4572 (char *) dst + dst_off,
4573 (const char *) src + src_off,
4574 length);
4575 else
4576 {
4577 if (*tmp_size == 0)
4578 {
4579 *tmp_size = length;
4580 *tmp = malloc (length);
4581 if (*tmp == NULL)
4582 return ENOMEM;
4583 }
4584 else if (*tmp_size < length)
4585 {
4586 *tmp_size = length;
4587 free (*tmp);
4588 *tmp = malloc (length);
4589 if (*tmp == NULL)
4590 return ENOMEM;
4591 }
4592 ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
4593 (const char *) src + src_off,
4594 length);
4595 if (ret == 1)
4596 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4597 (char *) dst + dst_off, *tmp,
4598 length);
4599 }
4600 return ret ? 0 : EINVAL;
4601 }
4602
4603 /* host->device, device->host and intra device. */
4604 if (num_dims == 2
4605 && ((src_devicep
4606 && src_devicep == dst_devicep
4607 && src_devicep->memcpy2d_func)
4608 || (!src_devicep != !dst_devicep
4609 && ((src_devicep && src_devicep->memcpy2d_func)
4610 || (dst_devicep && dst_devicep->memcpy2d_func)))))
4611 {
4612 size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
4613 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4614 int src_id = src_devicep ? src_devicep->target_id : -1;
4615 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4616
4617 if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
4618 || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
4619 || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
4620 || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
4621 || __builtin_mul_overflow (src_offsets[1], element_size,
4622 &src_off_sz1))
4623 return EINVAL;
4624 ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
4625 dst, dst_off_sz1, dst_offsets[0], dst_sz1,
4626 src, src_off_sz1, src_offsets[0], src_sz1);
4627 if (ret != -1)
4628 return ret ? 0 : EINVAL;
4629 }
4630 else if (num_dims == 3
4631 && ((src_devicep
4632 && src_devicep == dst_devicep
4633 && src_devicep->memcpy3d_func)
4634 || (!src_devicep != !dst_devicep
4635 && ((src_devicep && src_devicep->memcpy3d_func)
4636 || (dst_devicep && dst_devicep->memcpy3d_func)))))
4637 {
4638 size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
4639 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4640 int src_id = src_devicep ? src_devicep->target_id : -1;
4641 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4642
4643 if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
4644 || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
4645 || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
4646 || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
4647 || __builtin_mul_overflow (src_offsets[2], element_size,
4648 &src_off_sz2))
4649 return EINVAL;
4650 ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
4651 dst, dst_off_sz2, dst_offsets[1],
4652 dst_offsets[0], dst_sz2, dst_dimensions[1],
4653 src, src_off_sz2, src_offsets[1],
4654 src_offsets[0], src_sz2, src_dimensions[1]);
4655 if (ret != -1)
4656 return ret ? 0 : EINVAL;
4657 }
4658
4659 for (i = 1; i < num_dims; i++)
4660 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4661 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4662 return EINVAL;
4663 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4664 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4665 return EINVAL;
4666 for (j = 0; j < volume[0]; j++)
4667 {
4668 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4669 (const char *) src + src_off,
4670 element_size, num_dims - 1,
4671 volume + 1, dst_offsets + 1,
4672 src_offsets + 1, dst_dimensions + 1,
4673 src_dimensions + 1, dst_devicep,
4674 src_devicep, tmp_size, tmp);
4675 if (ret)
4676 return ret;
4677 dst_off += dst_slice;
4678 src_off += src_slice;
4679 }
4680 return 0;
4681 }
4682
4683 static int
4684 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4685 int src_device_num,
4686 struct gomp_device_descr **dst_devicep,
4687 struct gomp_device_descr **src_devicep)
4688 {
4689 if (!dst && !src)
4690 return INT_MAX;
4691
4692 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4693 dst_devicep, src_devicep);
4694 if (ret)
4695 return ret;
4696
4697 return 0;
4698 }
4699
4700 static int
4701 omp_target_memcpy_rect_copy (void *dst, const void *src,
4702 size_t element_size, int num_dims,
4703 const size_t *volume, const size_t *dst_offsets,
4704 const size_t *src_offsets,
4705 const size_t *dst_dimensions,
4706 const size_t *src_dimensions,
4707 struct gomp_device_descr *dst_devicep,
4708 struct gomp_device_descr *src_devicep)
4709 {
4710 size_t tmp_size = 0;
4711 void *tmp = NULL;
4712 bool lock_src;
4713 bool lock_dst;
4714
4715 lock_src = src_devicep != NULL;
4716 lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
4717 if (lock_src)
4718 gomp_mutex_lock (&src_devicep->lock);
4719 if (lock_dst)
4720 gomp_mutex_lock (&dst_devicep->lock);
4721 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4722 volume, dst_offsets, src_offsets,
4723 dst_dimensions, src_dimensions,
4724 dst_devicep, src_devicep,
4725 &tmp_size, &tmp);
4726 if (lock_src)
4727 gomp_mutex_unlock (&src_devicep->lock);
4728 if (lock_dst)
4729 gomp_mutex_unlock (&dst_devicep->lock);
4730 if (tmp)
4731 free (tmp);
4732
4733 return ret;
4734 }
4735
4736 int
4737 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4738 int num_dims, const size_t *volume,
4739 const size_t *dst_offsets,
4740 const size_t *src_offsets,
4741 const size_t *dst_dimensions,
4742 const size_t *src_dimensions,
4743 int dst_device_num, int src_device_num)
4744 {
4745 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4746
4747 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4748 src_device_num, &dst_devicep,
4749 &src_devicep);
4750
4751 if (check)
4752 return check;
4753
4754 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4755 volume, dst_offsets, src_offsets,
4756 dst_dimensions, src_dimensions,
4757 dst_devicep, src_devicep);
4758
4759 return ret;
4760 }
4761
4762 typedef struct
4763 {
4764 void *dst;
4765 const void *src;
4766 size_t element_size;
4767 const size_t *volume;
4768 const size_t *dst_offsets;
4769 const size_t *src_offsets;
4770 const size_t *dst_dimensions;
4771 const size_t *src_dimensions;
4772 struct gomp_device_descr *dst_devicep;
4773 struct gomp_device_descr *src_devicep;
4774 int num_dims;
4775 } omp_target_memcpy_rect_data;
4776
4777 static void
4778 omp_target_memcpy_rect_async_helper (void *args)
4779 {
4780 omp_target_memcpy_rect_data *a = args;
4781 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4782 a->num_dims, a->volume, a->dst_offsets,
4783 a->src_offsets, a->dst_dimensions,
4784 a->src_dimensions, a->dst_devicep,
4785 a->src_devicep);
4786 if (ret)
4787 gomp_fatal ("omp_target_memcpy_rect failed");
4788 }
4789
4790 int
4791 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4792 int num_dims, const size_t *volume,
4793 const size_t *dst_offsets,
4794 const size_t *src_offsets,
4795 const size_t *dst_dimensions,
4796 const size_t *src_dimensions,
4797 int dst_device_num, int src_device_num,
4798 int depobj_count, omp_depend_t *depobj_list)
4799 {
4800 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4801 unsigned flags = 0;
4802 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4803 src_device_num, &dst_devicep,
4804 &src_devicep);
4805 void *depend[depobj_count + 5];
4806 int i;
4807
4808 omp_target_memcpy_rect_data s = {
4809 .dst = dst,
4810 .src = src,
4811 .element_size = element_size,
4812 .num_dims = num_dims,
4813 .volume = volume,
4814 .dst_offsets = dst_offsets,
4815 .src_offsets = src_offsets,
4816 .dst_dimensions = dst_dimensions,
4817 .src_dimensions = src_dimensions,
4818 .dst_devicep = dst_devicep,
4819 .src_devicep = src_devicep
4820 };
4821
4822 if (check)
4823 return check;
4824
4825 if (depobj_count > 0 && depobj_list != NULL)
4826 {
4827 flags |= GOMP_TASK_FLAG_DEPEND;
4828 depend[0] = 0;
4829 depend[1] = (void *) (uintptr_t) depobj_count;
4830 depend[2] = depend[3] = depend[4] = 0;
4831 for (i = 0; i < depobj_count; ++i)
4832 depend[i + 5] = &depobj_list[i];
4833 }
4834
4835 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4836 __alignof__ (s), true, flags, depend, 0, NULL);
4837
4838 return 0;
4839 }
4840
4841 int
4842 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4843 size_t size, size_t device_offset, int device_num)
4844 {
4845 if (device_num == omp_initial_device
4846 || device_num == gomp_get_num_devices ())
4847 return EINVAL;
4848
4849 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4850 if (devicep == NULL)
4851 return EINVAL;
4852
4853 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4854 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4855 return EINVAL;
4856
4857 gomp_mutex_lock (&devicep->lock);
4858
4859 struct splay_tree_s *mem_map = &devicep->mem_map;
4860 struct splay_tree_key_s cur_node;
4861 int ret = EINVAL;
4862
4863 cur_node.host_start = (uintptr_t) host_ptr;
4864 cur_node.host_end = cur_node.host_start + size;
4865 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4866 if (n)
4867 {
4868 if (n->tgt->tgt_start + n->tgt_offset
4869 == (uintptr_t) device_ptr + device_offset
4870 && n->host_start <= cur_node.host_start
4871 && n->host_end >= cur_node.host_end)
4872 ret = 0;
4873 }
4874 else
4875 {
4876 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4877 tgt->array = gomp_malloc (sizeof (*tgt->array));
4878 tgt->refcount = 1;
4879 tgt->tgt_start = 0;
4880 tgt->tgt_end = 0;
4881 tgt->to_free = NULL;
4882 tgt->prev = NULL;
4883 tgt->list_count = 0;
4884 tgt->device_descr = devicep;
4885 splay_tree_node array = tgt->array;
4886 splay_tree_key k = &array->key;
4887 k->host_start = cur_node.host_start;
4888 k->host_end = cur_node.host_end;
4889 k->tgt = tgt;
4890 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4891 k->refcount = REFCOUNT_INFINITY;
4892 k->dynamic_refcount = 0;
4893 k->aux = NULL;
4894 array->left = NULL;
4895 array->right = NULL;
4896 splay_tree_insert (&devicep->mem_map, array);
4897 ret = 0;
4898 }
4899 gomp_mutex_unlock (&devicep->lock);
4900 return ret;
4901 }
4902
4903 int
4904 omp_target_disassociate_ptr (const void *ptr, int device_num)
4905 {
4906 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4907 if (devicep == NULL)
4908 return EINVAL;
4909
4910 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4911 return EINVAL;
4912
4913 gomp_mutex_lock (&devicep->lock);
4914
4915 struct splay_tree_s *mem_map = &devicep->mem_map;
4916 struct splay_tree_key_s cur_node;
4917 int ret = EINVAL;
4918
4919 cur_node.host_start = (uintptr_t) ptr;
4920 cur_node.host_end = cur_node.host_start;
4921 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4922 if (n
4923 && n->host_start == cur_node.host_start
4924 && n->refcount == REFCOUNT_INFINITY
4925 && n->tgt->tgt_start == 0
4926 && n->tgt->to_free == NULL
4927 && n->tgt->refcount == 1
4928 && n->tgt->list_count == 0)
4929 {
4930 splay_tree_remove (&devicep->mem_map, n);
4931 gomp_unmap_tgt (n->tgt);
4932 ret = 0;
4933 }
4934
4935 gomp_mutex_unlock (&devicep->lock);
4936 return ret;
4937 }
4938
4939 void *
4940 omp_get_mapped_ptr (const void *ptr, int device_num)
4941 {
4942 if (device_num == omp_initial_device
4943 || device_num == omp_get_initial_device ())
4944 return (void *) ptr;
4945
4946 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4947 if (devicep == NULL)
4948 return NULL;
4949
4950 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4951 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4952 return (void *) ptr;
4953
4954 gomp_mutex_lock (&devicep->lock);
4955
4956 struct splay_tree_s *mem_map = &devicep->mem_map;
4957 struct splay_tree_key_s cur_node;
4958 void *ret = NULL;
4959
4960 cur_node.host_start = (uintptr_t) ptr;
4961 cur_node.host_end = cur_node.host_start;
4962 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4963
4964 if (n)
4965 {
4966 uintptr_t offset = cur_node.host_start - n->host_start;
4967 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
4968 }
4969
4970 gomp_mutex_unlock (&devicep->lock);
4971
4972 return ret;
4973 }
4974
4975 int
4976 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
4977 {
4978 if (device_num == omp_initial_device
4979 || device_num == gomp_get_num_devices ())
4980 return true;
4981
4982 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4983 if (devicep == NULL)
4984 return false;
4985
4986 /* TODO: Unified shared memory must be handled when available. */
4987
4988 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
4989 }
4990
4991 int
4992 omp_pause_resource (omp_pause_resource_t kind, int device_num)
4993 {
4994 (void) kind;
4995 if (device_num == omp_initial_device
4996 || device_num == gomp_get_num_devices ())
4997 return gomp_pause_host ();
4998
4999 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5000 if (devicep == NULL)
5001 return -1;
5002
5003 /* Do nothing for target devices for now. */
5004 return 0;
5005 }
5006
5007 int
5008 omp_pause_resource_all (omp_pause_resource_t kind)
5009 {
5010 (void) kind;
5011 if (gomp_pause_host ())
5012 return -1;
5013 /* Do nothing for target devices for now. */
5014 return 0;
5015 }
5016
5017 ialias (omp_pause_resource)
5018 ialias (omp_pause_resource_all)
5019
5020 #ifdef PLUGIN_SUPPORT
5021
5022 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5023 in PLUGIN_NAME.
5024 The handles of the found functions are stored in the corresponding fields
5025 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5026
5027 static bool
5028 gomp_load_plugin_for_device (struct gomp_device_descr *device,
5029 const char *plugin_name)
5030 {
5031 const char *err = NULL, *last_missing = NULL;
5032
5033 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5034 if (!plugin_handle)
5035 #if OFFLOAD_DEFAULTED
5036 return 0;
5037 #else
5038 goto dl_fail;
5039 #endif
5040
5041 /* Check if all required functions are available in the plugin and store
5042 their handlers. None of the symbols can legitimately be NULL,
5043 so we don't need to check dlerror all the time. */
5044 #define DLSYM(f) \
5045 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5046 goto dl_fail
5047 /* Similar, but missing functions are not an error. Return false if
5048 failed, true otherwise. */
5049 #define DLSYM_OPT(f, n) \
5050 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5051 || (last_missing = #n, 0))
5052
5053 DLSYM (version);
5054 if (device->version_func () != GOMP_VERSION)
5055 {
5056 err = "plugin version mismatch";
5057 goto fail;
5058 }
5059
5060 DLSYM (get_name);
5061 DLSYM (get_caps);
5062 DLSYM (get_type);
5063 DLSYM (get_num_devices);
5064 DLSYM (init_device);
5065 DLSYM (fini_device);
5066 DLSYM (load_image);
5067 DLSYM (unload_image);
5068 DLSYM (alloc);
5069 DLSYM (free);
5070 DLSYM (dev2host);
5071 DLSYM (host2dev);
5072 DLSYM_OPT (memcpy2d, memcpy2d);
5073 DLSYM_OPT (memcpy3d, memcpy3d);
5074 device->capabilities = device->get_caps_func ();
5075 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5076 {
5077 DLSYM (run);
5078 DLSYM_OPT (async_run, async_run);
5079 DLSYM_OPT (can_run, can_run);
5080 DLSYM (dev2dev);
5081 }
5082 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5083 {
5084 if (!DLSYM_OPT (openacc.exec, openacc_exec)
5085 || !DLSYM_OPT (openacc.create_thread_data,
5086 openacc_create_thread_data)
5087 || !DLSYM_OPT (openacc.destroy_thread_data,
5088 openacc_destroy_thread_data)
5089 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5090 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5091 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5092 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5093 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5094 || !DLSYM_OPT (openacc.async.queue_callback,
5095 openacc_async_queue_callback)
5096 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5097 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
5098 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5099 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
5100 {
5101 /* Require all the OpenACC handlers if we have
5102 GOMP_OFFLOAD_CAP_OPENACC_200. */
5103 err = "plugin missing OpenACC handler function";
5104 goto fail;
5105 }
5106
5107 unsigned cuda = 0;
5108 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
5109 openacc_cuda_get_current_device);
5110 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
5111 openacc_cuda_get_current_context);
5112 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5113 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
5114 if (cuda && cuda != 4)
5115 {
5116 /* Make sure all the CUDA functions are there if any of them are. */
5117 err = "plugin missing OpenACC CUDA handler function";
5118 goto fail;
5119 }
5120 }
5121 #undef DLSYM
5122 #undef DLSYM_OPT
5123
5124 return 1;
5125
5126 dl_fail:
5127 err = dlerror ();
5128 fail:
5129 gomp_error ("while loading %s: %s", plugin_name, err);
5130 if (last_missing)
5131 gomp_error ("missing function was %s", last_missing);
5132 if (plugin_handle)
5133 dlclose (plugin_handle);
5134
5135 return 0;
5136 }
5137
5138 /* This function finalizes all initialized devices. */
5139
5140 static void
5141 gomp_target_fini (void)
5142 {
5143 int i;
5144 for (i = 0; i < num_devices; i++)
5145 {
5146 bool ret = true;
5147 struct gomp_device_descr *devicep = &devices[i];
5148 gomp_mutex_lock (&devicep->lock);
5149 if (devicep->state == GOMP_DEVICE_INITIALIZED)
5150 ret = gomp_fini_device (devicep);
5151 gomp_mutex_unlock (&devicep->lock);
5152 if (!ret)
5153 gomp_fatal ("device finalization failed");
5154 }
5155 }
5156
5157 /* This function initializes the runtime for offloading.
5158 It parses the list of offload plugins, and tries to load these.
5159 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5160 will be set, and the array DEVICES initialized, containing descriptors for
5161 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5162 by the others. */
5163
5164 static void
5165 gomp_target_init (void)
5166 {
5167 const char *prefix ="libgomp-plugin-";
5168 const char *suffix = SONAME_SUFFIX (1);
5169 const char *cur, *next;
5170 char *plugin_name;
5171 int i, new_num_devs;
5172 int num_devs = 0, num_devs_openmp;
5173 struct gomp_device_descr *devs = NULL;
5174
5175 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5176 return;
5177
5178 cur = OFFLOAD_PLUGINS;
5179 if (*cur)
5180 do
5181 {
5182 struct gomp_device_descr current_device;
5183 size_t prefix_len, suffix_len, cur_len;
5184
5185 next = strchr (cur, ',');
5186
5187 prefix_len = strlen (prefix);
5188 cur_len = next ? next - cur : strlen (cur);
5189 suffix_len = strlen (suffix);
5190
5191 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5192 if (!plugin_name)
5193 {
5194 num_devs = 0;
5195 break;
5196 }
5197
5198 memcpy (plugin_name, prefix, prefix_len);
5199 memcpy (plugin_name + prefix_len, cur, cur_len);
5200 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5201
5202 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5203 {
5204 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5205 new_num_devs = current_device.get_num_devices_func (omp_req);
5206 if (gomp_debug_var > 0 && new_num_devs < 0)
5207 {
5208 bool found = false;
5209 int type = current_device.get_type_func ();
5210 for (int img = 0; img < num_offload_images; img++)
5211 if (type == offload_images[img].type)
5212 found = true;
5213 if (found)
5214 {
5215 char buf[sizeof ("unified_address, unified_shared_memory, "
5216 "reverse_offload")];
5217 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5218 char *name = (char *) malloc (cur_len + 1);
5219 memcpy (name, cur, cur_len);
5220 name[cur_len] = '\0';
5221 gomp_debug (1,
5222 "%s devices present but 'omp requires %s' "
5223 "cannot be fulfilled\n", name, buf);
5224 free (name);
5225 }
5226 }
5227 else if (new_num_devs >= 1)
5228 {
5229 /* Augment DEVICES and NUM_DEVICES. */
5230
5231 devs = realloc (devs, (num_devs + new_num_devs)
5232 * sizeof (struct gomp_device_descr));
5233 if (!devs)
5234 {
5235 num_devs = 0;
5236 free (plugin_name);
5237 break;
5238 }
5239
5240 current_device.name = current_device.get_name_func ();
5241 /* current_device.capabilities has already been set. */
5242 current_device.type = current_device.get_type_func ();
5243 current_device.mem_map.root = NULL;
5244 current_device.mem_map_rev.root = NULL;
5245 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5246 for (i = 0; i < new_num_devs; i++)
5247 {
5248 current_device.target_id = i;
5249 devs[num_devs] = current_device;
5250 gomp_mutex_init (&devs[num_devs].lock);
5251 num_devs++;
5252 }
5253 }
5254 }
5255
5256 free (plugin_name);
5257 cur = next + 1;
5258 }
5259 while (next);
5260
5261 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5262 NUM_DEVICES_OPENMP. */
5263 struct gomp_device_descr *devs_s
5264 = malloc (num_devs * sizeof (struct gomp_device_descr));
5265 if (!devs_s)
5266 {
5267 num_devs = 0;
5268 free (devs);
5269 devs = NULL;
5270 }
5271 num_devs_openmp = 0;
5272 for (i = 0; i < num_devs; i++)
5273 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5274 devs_s[num_devs_openmp++] = devs[i];
5275 int num_devs_after_openmp = num_devs_openmp;
5276 for (i = 0; i < num_devs; i++)
5277 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5278 devs_s[num_devs_after_openmp++] = devs[i];
5279 free (devs);
5280 devs = devs_s;
5281
5282 for (i = 0; i < num_devs; i++)
5283 {
5284 /* The 'devices' array can be moved (by the realloc call) until we have
5285 found all the plugins, so registering with the OpenACC runtime (which
5286 takes a copy of the pointer argument) must be delayed until now. */
5287 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5288 goacc_register (&devs[i]);
5289 }
5290 if (gomp_global_icv.default_device_var == INT_MIN)
5291 {
5292 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5293 struct gomp_icv_list *none;
5294 none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
5295 gomp_global_icv.default_device_var = (num_devs_openmp
5296 ? 0 : omp_invalid_device);
5297 none->icvs.default_device_var = gomp_global_icv.default_device_var;
5298 }
5299
5300 num_devices = num_devs;
5301 num_devices_openmp = num_devs_openmp;
5302 devices = devs;
5303 if (atexit (gomp_target_fini) != 0)
5304 gomp_fatal ("atexit failed");
5305 }
5306
5307 #else /* PLUGIN_SUPPORT */
5308 /* If dlfcn.h is unavailable we always fallback to host execution.
5309 GOMP_target* routines are just stubs for this case. */
5310 static void
5311 gomp_target_init (void)
5312 {
5313 }
5314 #endif /* PLUGIN_SUPPORT */