]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
libgomp: Add new runtime routines omp_target_memcpy_async and omp_target_memcpy_rect_...
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46
47 typedef uintptr_t *hash_entry_type;
48 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
49 static inline void htab_free (void *ptr) { free (ptr); }
50 #include "hashtab.h"
51
52 ialias_redirect (GOMP_task)
53
54 static inline hashval_t
55 htab_hash (hash_entry_type element)
56 {
57 return hash_pointer ((void *) element);
58 }
59
60 static inline bool
61 htab_eq (hash_entry_type x, hash_entry_type y)
62 {
63 return x == y;
64 }
65
66 #define FIELD_TGT_EMPTY (~(size_t) 0)
67
68 static void gomp_target_init (void);
69
70 /* The whole initialization code for offloading plugins is only run one. */
71 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
72
73 /* Mutex for offload image registration. */
74 static gomp_mutex_t register_lock;
75
76 /* This structure describes an offload image.
77 It contains type of the target device, pointer to host table descriptor, and
78 pointer to target data. */
79 struct offload_image_descr {
80 unsigned version;
81 enum offload_target_type type;
82 const void *host_table;
83 const void *target_data;
84 };
85
86 /* Array of descriptors of offload images. */
87 static struct offload_image_descr *offload_images;
88
89 /* Total number of offload images. */
90 static int num_offload_images;
91
92 /* Array of descriptors for all available devices. */
93 static struct gomp_device_descr *devices;
94
95 /* Total number of available devices. */
96 static int num_devices;
97
98 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
99 static int num_devices_openmp;
100
101 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
102
103 static void *
104 gomp_realloc_unlock (void *old, size_t size)
105 {
106 void *ret = realloc (old, size);
107 if (ret == NULL)
108 {
109 gomp_mutex_unlock (&register_lock);
110 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
111 }
112 return ret;
113 }
114
115 attribute_hidden void
116 gomp_init_targets_once (void)
117 {
118 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
119 }
120
121 attribute_hidden int
122 gomp_get_num_devices (void)
123 {
124 gomp_init_targets_once ();
125 return num_devices_openmp;
126 }
127
128 static struct gomp_device_descr *
129 resolve_device (int device_id)
130 {
131 if (device_id == GOMP_DEVICE_ICV)
132 {
133 struct gomp_task_icv *icv = gomp_icv (false);
134 device_id = icv->default_device_var;
135 }
136
137 if (device_id < 0 || device_id >= gomp_get_num_devices ())
138 {
139 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
140 && device_id != GOMP_DEVICE_HOST_FALLBACK
141 && device_id != num_devices_openmp)
142 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
143 "but device not found");
144
145 return NULL;
146 }
147
148 gomp_mutex_lock (&devices[device_id].lock);
149 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
150 gomp_init_device (&devices[device_id]);
151 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
152 {
153 gomp_mutex_unlock (&devices[device_id].lock);
154
155 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device is finalized");
158
159 return NULL;
160 }
161 gomp_mutex_unlock (&devices[device_id].lock);
162
163 return &devices[device_id];
164 }
165
166
167 static inline splay_tree_key
168 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
169 {
170 if (key->host_start != key->host_end)
171 return splay_tree_lookup (mem_map, key);
172
173 key->host_end++;
174 splay_tree_key n = splay_tree_lookup (mem_map, key);
175 key->host_end--;
176 if (n)
177 return n;
178 key->host_start--;
179 n = splay_tree_lookup (mem_map, key);
180 key->host_start++;
181 if (n)
182 return n;
183 return splay_tree_lookup (mem_map, key);
184 }
185
186 static inline splay_tree_key
187 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
188 {
189 if (key->host_start != key->host_end)
190 return splay_tree_lookup (mem_map, key);
191
192 key->host_end++;
193 splay_tree_key n = splay_tree_lookup (mem_map, key);
194 key->host_end--;
195 return n;
196 }
197
198 static inline void
199 gomp_device_copy (struct gomp_device_descr *devicep,
200 bool (*copy_func) (int, void *, const void *, size_t),
201 const char *dst, void *dstaddr,
202 const char *src, const void *srcaddr,
203 size_t size)
204 {
205 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
206 {
207 gomp_mutex_unlock (&devicep->lock);
208 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
209 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
210 }
211 }
212
213 static inline void
214 goacc_device_copy_async (struct gomp_device_descr *devicep,
215 bool (*copy_func) (int, void *, const void *, size_t,
216 struct goacc_asyncqueue *),
217 const char *dst, void *dstaddr,
218 const char *src, const void *srcaddr,
219 const void *srcaddr_orig,
220 size_t size, struct goacc_asyncqueue *aq)
221 {
222 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
223 {
224 gomp_mutex_unlock (&devicep->lock);
225 if (srcaddr_orig && srcaddr_orig != srcaddr)
226 gomp_fatal ("Copying of %s object [%p..%p)"
227 " via buffer %s object [%p..%p)"
228 " to %s object [%p..%p) failed",
229 src, srcaddr_orig, srcaddr_orig + size,
230 src, srcaddr, srcaddr + size,
231 dst, dstaddr, dstaddr + size);
232 else
233 gomp_fatal ("Copying of %s object [%p..%p)"
234 " to %s object [%p..%p) failed",
235 src, srcaddr, srcaddr + size,
236 dst, dstaddr, dstaddr + size);
237 }
238 }
239
240 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
241 host to device memory transfers. */
242
243 struct gomp_coalesce_chunk
244 {
245 /* The starting and ending point of a coalesced chunk of memory. */
246 size_t start, end;
247 };
248
249 struct gomp_coalesce_buf
250 {
251 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
252 it will be copied to the device. */
253 void *buf;
254 struct target_mem_desc *tgt;
255 /* Array with offsets, chunks[i].start is the starting offset and
256 chunks[i].end ending offset relative to tgt->tgt_start device address
257 of chunks which are to be copied to buf and later copied to device. */
258 struct gomp_coalesce_chunk *chunks;
259 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
260 be performed. */
261 long chunk_cnt;
262 /* During construction of chunks array, how many memory regions are within
263 the last chunk. If there is just one memory region for a chunk, we copy
264 it directly to device rather than going through buf. */
265 long use_cnt;
266 };
267
268 /* Maximum size of memory region considered for coalescing. Larger copies
269 are performed directly. */
270 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
271
272 /* Maximum size of a gap in between regions to consider them being copied
273 within the same chunk. All the device offsets considered are within
274 newly allocated device memory, so it isn't fatal if we copy some padding
275 in between from host to device. The gaps come either from alignment
276 padding or from memory regions which are not supposed to be copied from
277 host to device (e.g. map(alloc:), map(from:) etc.). */
278 #define MAX_COALESCE_BUF_GAP (4 * 1024)
279
280 /* Add region with device tgt_start relative offset and length to CBUF.
281
282 This must not be used for asynchronous copies, because the host data might
283 not be computed yet (by an earlier asynchronous compute region, for
284 example).
285 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
286 is it more performant to use libgomp CBUF buffering or individual device
287 asyncronous copying?) */
288
289 static inline void
290 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
291 {
292 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
293 return;
294 if (cbuf->chunk_cnt)
295 {
296 if (cbuf->chunk_cnt < 0)
297 return;
298 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
299 {
300 cbuf->chunk_cnt = -1;
301 return;
302 }
303 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
304 {
305 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
306 cbuf->use_cnt++;
307 return;
308 }
309 /* If the last chunk is only used by one mapping, discard it,
310 as it will be one host to device copy anyway and
311 memcpying it around will only waste cycles. */
312 if (cbuf->use_cnt == 1)
313 cbuf->chunk_cnt--;
314 }
315 cbuf->chunks[cbuf->chunk_cnt].start = start;
316 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
317 cbuf->chunk_cnt++;
318 cbuf->use_cnt = 1;
319 }
320
321 /* Return true for mapping kinds which need to copy data from the
322 host to device for regions that weren't previously mapped. */
323
324 static inline bool
325 gomp_to_device_kind_p (int kind)
326 {
327 switch (kind)
328 {
329 case GOMP_MAP_ALLOC:
330 case GOMP_MAP_FROM:
331 case GOMP_MAP_FORCE_ALLOC:
332 case GOMP_MAP_FORCE_FROM:
333 case GOMP_MAP_ALWAYS_FROM:
334 return false;
335 default:
336 return true;
337 }
338 }
339
340 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
341 non-NULL), when the source data is stack or may otherwise be deallocated
342 before the asynchronous copy takes place, EPHEMERAL must be passed as
343 TRUE. */
344
345 attribute_hidden void
346 gomp_copy_host2dev (struct gomp_device_descr *devicep,
347 struct goacc_asyncqueue *aq,
348 void *d, const void *h, size_t sz,
349 bool ephemeral, struct gomp_coalesce_buf *cbuf)
350 {
351 if (__builtin_expect (aq != NULL, 0))
352 {
353 /* See 'gomp_coalesce_buf_add'. */
354 assert (!cbuf);
355
356 void *h_buf = (void *) h;
357 if (ephemeral)
358 {
359 /* We're queueing up an asynchronous copy from data that may
360 disappear before the transfer takes place (i.e. because it is a
361 stack local in a function that is no longer executing). Make a
362 copy of the data into a temporary buffer in those cases. */
363 h_buf = gomp_malloc (sz);
364 memcpy (h_buf, h, sz);
365 }
366 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
367 "dev", d, "host", h_buf, h, sz, aq);
368 if (ephemeral)
369 /* Free temporary buffer once the transfer has completed. */
370 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
371
372 return;
373 }
374
375 if (cbuf)
376 {
377 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
378 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
379 {
380 long first = 0;
381 long last = cbuf->chunk_cnt - 1;
382 while (first <= last)
383 {
384 long middle = (first + last) >> 1;
385 if (cbuf->chunks[middle].end <= doff)
386 first = middle + 1;
387 else if (cbuf->chunks[middle].start <= doff)
388 {
389 if (doff + sz > cbuf->chunks[middle].end)
390 {
391 gomp_mutex_unlock (&devicep->lock);
392 gomp_fatal ("internal libgomp cbuf error");
393 }
394 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
395 h, sz);
396 return;
397 }
398 else
399 last = middle - 1;
400 }
401 }
402 }
403
404 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
405 }
406
407 attribute_hidden void
408 gomp_copy_dev2host (struct gomp_device_descr *devicep,
409 struct goacc_asyncqueue *aq,
410 void *h, const void *d, size_t sz)
411 {
412 if (__builtin_expect (aq != NULL, 0))
413 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
414 "host", h, "dev", d, NULL, sz, aq);
415 else
416 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
417 }
418
419 static void
420 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
421 {
422 if (!devicep->free_func (devicep->target_id, devptr))
423 {
424 gomp_mutex_unlock (&devicep->lock);
425 gomp_fatal ("error in freeing device memory block at %p", devptr);
426 }
427 }
428
429 /* Increment reference count of a splay_tree_key region K by 1.
430 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
431 increment the value if refcount is not yet contained in the set (used for
432 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
433 once for each construct). */
434
435 static inline void
436 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
437 {
438 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
439 return;
440
441 uintptr_t *refcount_ptr = &k->refcount;
442
443 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
444 refcount_ptr = &k->structelem_refcount;
445 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
446 refcount_ptr = k->structelem_refcount_ptr;
447
448 if (refcount_set)
449 {
450 if (htab_find (*refcount_set, refcount_ptr))
451 return;
452 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
453 *slot = refcount_ptr;
454 }
455
456 *refcount_ptr += 1;
457 return;
458 }
459
460 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
461 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
462 track already seen refcounts, and only adjust the value if refcount is not
463 yet contained in the set (like gomp_increment_refcount).
464
465 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
466 it is already zero and we know we decremented it earlier. This signals that
467 associated maps should be copied back to host.
468
469 *DO_REMOVE is set to true when we this is the first handling of this refcount
470 and we are setting it to zero. This signals a removal of this key from the
471 splay-tree map.
472
473 Copy and removal are separated due to cases like handling of structure
474 elements, e.g. each map of a structure element representing a possible copy
475 out of a structure field has to be handled individually, but we only signal
476 removal for one (the first encountered) sibing map. */
477
478 static inline void
479 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
480 bool *do_copy, bool *do_remove)
481 {
482 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
483 {
484 *do_copy = *do_remove = false;
485 return;
486 }
487
488 uintptr_t *refcount_ptr = &k->refcount;
489
490 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
491 refcount_ptr = &k->structelem_refcount;
492 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
493 refcount_ptr = k->structelem_refcount_ptr;
494
495 bool new_encountered_refcount;
496 bool set_to_zero = false;
497 bool is_zero = false;
498
499 uintptr_t orig_refcount = *refcount_ptr;
500
501 if (refcount_set)
502 {
503 if (htab_find (*refcount_set, refcount_ptr))
504 {
505 new_encountered_refcount = false;
506 goto end;
507 }
508
509 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
510 *slot = refcount_ptr;
511 new_encountered_refcount = true;
512 }
513 else
514 /* If no refcount_set being used, assume all keys are being decremented
515 for the first time. */
516 new_encountered_refcount = true;
517
518 if (delete_p)
519 *refcount_ptr = 0;
520 else if (*refcount_ptr > 0)
521 *refcount_ptr -= 1;
522
523 end:
524 if (*refcount_ptr == 0)
525 {
526 if (orig_refcount > 0)
527 set_to_zero = true;
528
529 is_zero = true;
530 }
531
532 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
533 *do_remove = (new_encountered_refcount && set_to_zero);
534 }
535
536 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
537 gomp_map_0len_lookup found oldn for newn.
538 Helper function of gomp_map_vars. */
539
540 static inline void
541 gomp_map_vars_existing (struct gomp_device_descr *devicep,
542 struct goacc_asyncqueue *aq, splay_tree_key oldn,
543 splay_tree_key newn, struct target_var_desc *tgt_var,
544 unsigned char kind, bool always_to_flag, bool implicit,
545 struct gomp_coalesce_buf *cbuf,
546 htab_t *refcount_set)
547 {
548 assert (kind != GOMP_MAP_ATTACH
549 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
550
551 tgt_var->key = oldn;
552 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
553 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
554 tgt_var->is_attach = false;
555 tgt_var->offset = newn->host_start - oldn->host_start;
556
557 /* For implicit maps, old contained in new is valid. */
558 bool implicit_subset = (implicit
559 && newn->host_start <= oldn->host_start
560 && oldn->host_end <= newn->host_end);
561 if (implicit_subset)
562 tgt_var->length = oldn->host_end - oldn->host_start;
563 else
564 tgt_var->length = newn->host_end - newn->host_start;
565
566 if ((kind & GOMP_MAP_FLAG_FORCE)
567 /* For implicit maps, old contained in new is valid. */
568 || !(implicit_subset
569 /* Otherwise, new contained inside old is considered valid. */
570 || (oldn->host_start <= newn->host_start
571 && newn->host_end <= oldn->host_end)))
572 {
573 gomp_mutex_unlock (&devicep->lock);
574 gomp_fatal ("Trying to map into device [%p..%p) object when "
575 "[%p..%p) is already mapped",
576 (void *) newn->host_start, (void *) newn->host_end,
577 (void *) oldn->host_start, (void *) oldn->host_end);
578 }
579
580 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
581 {
582 /* Implicit + always should not happen. If this does occur, below
583 address/length adjustment is a TODO. */
584 assert (!implicit_subset);
585
586 if (oldn->aux && oldn->aux->attach_count)
587 {
588 /* We have to be careful not to overwrite still attached pointers
589 during the copyback to host. */
590 uintptr_t addr = newn->host_start;
591 while (addr < newn->host_end)
592 {
593 size_t i = (addr - oldn->host_start) / sizeof (void *);
594 if (oldn->aux->attach_count[i] == 0)
595 gomp_copy_host2dev (devicep, aq,
596 (void *) (oldn->tgt->tgt_start
597 + oldn->tgt_offset
598 + addr - oldn->host_start),
599 (void *) addr,
600 sizeof (void *), false, cbuf);
601 addr += sizeof (void *);
602 }
603 }
604 else
605 gomp_copy_host2dev (devicep, aq,
606 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
607 + newn->host_start - oldn->host_start),
608 (void *) newn->host_start,
609 newn->host_end - newn->host_start, false, cbuf);
610 }
611
612 gomp_increment_refcount (oldn, refcount_set);
613 }
614
615 static int
616 get_kind (bool short_mapkind, void *kinds, int idx)
617 {
618 if (!short_mapkind)
619 return ((unsigned char *) kinds)[idx];
620
621 int val = ((unsigned short *) kinds)[idx];
622 if (GOMP_MAP_IMPLICIT_P (val))
623 val &= ~GOMP_MAP_IMPLICIT;
624 return val;
625 }
626
627
628 static bool
629 get_implicit (bool short_mapkind, void *kinds, int idx)
630 {
631 if (!short_mapkind)
632 return false;
633
634 int val = ((unsigned short *) kinds)[idx];
635 return GOMP_MAP_IMPLICIT_P (val);
636 }
637
638 static void
639 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
640 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
641 struct gomp_coalesce_buf *cbuf,
642 bool allow_zero_length_array_sections)
643 {
644 struct gomp_device_descr *devicep = tgt->device_descr;
645 struct splay_tree_s *mem_map = &devicep->mem_map;
646 struct splay_tree_key_s cur_node;
647
648 cur_node.host_start = host_ptr;
649 if (cur_node.host_start == (uintptr_t) NULL)
650 {
651 cur_node.tgt_offset = (uintptr_t) NULL;
652 gomp_copy_host2dev (devicep, aq,
653 (void *) (tgt->tgt_start + target_offset),
654 (void *) &cur_node.tgt_offset, sizeof (void *),
655 true, cbuf);
656 return;
657 }
658 /* Add bias to the pointer value. */
659 cur_node.host_start += bias;
660 cur_node.host_end = cur_node.host_start;
661 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
662 if (n == NULL)
663 {
664 if (allow_zero_length_array_sections)
665 cur_node.tgt_offset = 0;
666 else
667 {
668 gomp_mutex_unlock (&devicep->lock);
669 gomp_fatal ("Pointer target of array section wasn't mapped");
670 }
671 }
672 else
673 {
674 cur_node.host_start -= n->host_start;
675 cur_node.tgt_offset
676 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
677 /* At this point tgt_offset is target address of the
678 array section. Now subtract bias to get what we want
679 to initialize the pointer with. */
680 cur_node.tgt_offset -= bias;
681 }
682 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
683 (void *) &cur_node.tgt_offset, sizeof (void *),
684 true, cbuf);
685 }
686
687 static void
688 gomp_map_fields_existing (struct target_mem_desc *tgt,
689 struct goacc_asyncqueue *aq, splay_tree_key n,
690 size_t first, size_t i, void **hostaddrs,
691 size_t *sizes, void *kinds,
692 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
693 {
694 struct gomp_device_descr *devicep = tgt->device_descr;
695 struct splay_tree_s *mem_map = &devicep->mem_map;
696 struct splay_tree_key_s cur_node;
697 int kind;
698 bool implicit;
699 const bool short_mapkind = true;
700 const int typemask = short_mapkind ? 0xff : 0x7;
701
702 cur_node.host_start = (uintptr_t) hostaddrs[i];
703 cur_node.host_end = cur_node.host_start + sizes[i];
704 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
705 kind = get_kind (short_mapkind, kinds, i);
706 implicit = get_implicit (short_mapkind, kinds, i);
707 if (n2
708 && n2->tgt == n->tgt
709 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
710 {
711 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
712 kind & typemask, false, implicit, cbuf,
713 refcount_set);
714 return;
715 }
716 if (sizes[i] == 0)
717 {
718 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
719 {
720 cur_node.host_start--;
721 n2 = splay_tree_lookup (mem_map, &cur_node);
722 cur_node.host_start++;
723 if (n2
724 && n2->tgt == n->tgt
725 && n2->host_start - n->host_start
726 == n2->tgt_offset - n->tgt_offset)
727 {
728 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
729 kind & typemask, false, implicit, cbuf,
730 refcount_set);
731 return;
732 }
733 }
734 cur_node.host_end++;
735 n2 = splay_tree_lookup (mem_map, &cur_node);
736 cur_node.host_end--;
737 if (n2
738 && n2->tgt == n->tgt
739 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
740 {
741 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
742 kind & typemask, false, implicit, cbuf,
743 refcount_set);
744 return;
745 }
746 }
747 gomp_mutex_unlock (&devicep->lock);
748 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
749 "other mapped elements from the same structure weren't mapped "
750 "together with it", (void *) cur_node.host_start,
751 (void *) cur_node.host_end);
752 }
753
754 attribute_hidden void
755 gomp_attach_pointer (struct gomp_device_descr *devicep,
756 struct goacc_asyncqueue *aq, splay_tree mem_map,
757 splay_tree_key n, uintptr_t attach_to, size_t bias,
758 struct gomp_coalesce_buf *cbufp,
759 bool allow_zero_length_array_sections)
760 {
761 struct splay_tree_key_s s;
762 size_t size, idx;
763
764 if (n == NULL)
765 {
766 gomp_mutex_unlock (&devicep->lock);
767 gomp_fatal ("enclosing struct not mapped for attach");
768 }
769
770 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
771 /* We might have a pointer in a packed struct: however we cannot have more
772 than one such pointer in each pointer-sized portion of the struct, so
773 this is safe. */
774 idx = (attach_to - n->host_start) / sizeof (void *);
775
776 if (!n->aux)
777 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
778
779 if (!n->aux->attach_count)
780 n->aux->attach_count
781 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
782
783 if (n->aux->attach_count[idx] < UINTPTR_MAX)
784 n->aux->attach_count[idx]++;
785 else
786 {
787 gomp_mutex_unlock (&devicep->lock);
788 gomp_fatal ("attach count overflow");
789 }
790
791 if (n->aux->attach_count[idx] == 1)
792 {
793 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
794 - n->host_start;
795 uintptr_t target = (uintptr_t) *(void **) attach_to;
796 splay_tree_key tn;
797 uintptr_t data;
798
799 if ((void *) target == NULL)
800 {
801 gomp_mutex_unlock (&devicep->lock);
802 gomp_fatal ("attempt to attach null pointer");
803 }
804
805 s.host_start = target + bias;
806 s.host_end = s.host_start + 1;
807 tn = splay_tree_lookup (mem_map, &s);
808
809 if (!tn)
810 {
811 if (allow_zero_length_array_sections)
812 /* When allowing attachment to zero-length array sections, we
813 allow attaching to NULL pointers when the target region is not
814 mapped. */
815 data = 0;
816 else
817 {
818 gomp_mutex_unlock (&devicep->lock);
819 gomp_fatal ("pointer target not mapped for attach");
820 }
821 }
822 else
823 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
824
825 gomp_debug (1,
826 "%s: attaching host %p, target %p (struct base %p) to %p\n",
827 __FUNCTION__, (void *) attach_to, (void *) devptr,
828 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
829
830 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
831 sizeof (void *), true, cbufp);
832 }
833 else
834 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
835 (void *) attach_to, (int) n->aux->attach_count[idx]);
836 }
837
838 attribute_hidden void
839 gomp_detach_pointer (struct gomp_device_descr *devicep,
840 struct goacc_asyncqueue *aq, splay_tree_key n,
841 uintptr_t detach_from, bool finalize,
842 struct gomp_coalesce_buf *cbufp)
843 {
844 size_t idx;
845
846 if (n == NULL)
847 {
848 gomp_mutex_unlock (&devicep->lock);
849 gomp_fatal ("enclosing struct not mapped for detach");
850 }
851
852 idx = (detach_from - n->host_start) / sizeof (void *);
853
854 if (!n->aux || !n->aux->attach_count)
855 {
856 gomp_mutex_unlock (&devicep->lock);
857 gomp_fatal ("no attachment counters for struct");
858 }
859
860 if (finalize)
861 n->aux->attach_count[idx] = 1;
862
863 if (n->aux->attach_count[idx] == 0)
864 {
865 gomp_mutex_unlock (&devicep->lock);
866 gomp_fatal ("attach count underflow");
867 }
868 else
869 n->aux->attach_count[idx]--;
870
871 if (n->aux->attach_count[idx] == 0)
872 {
873 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
874 - n->host_start;
875 uintptr_t target = (uintptr_t) *(void **) detach_from;
876
877 gomp_debug (1,
878 "%s: detaching host %p, target %p (struct base %p) to %p\n",
879 __FUNCTION__, (void *) detach_from, (void *) devptr,
880 (void *) (n->tgt->tgt_start + n->tgt_offset),
881 (void *) target);
882
883 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
884 sizeof (void *), true, cbufp);
885 }
886 else
887 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
888 (void *) detach_from, (int) n->aux->attach_count[idx]);
889 }
890
891 attribute_hidden uintptr_t
892 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
893 {
894 if (tgt->list[i].key != NULL)
895 return tgt->list[i].key->tgt->tgt_start
896 + tgt->list[i].key->tgt_offset
897 + tgt->list[i].offset;
898
899 switch (tgt->list[i].offset)
900 {
901 case OFFSET_INLINED:
902 return (uintptr_t) hostaddrs[i];
903
904 case OFFSET_POINTER:
905 return 0;
906
907 case OFFSET_STRUCT:
908 return tgt->list[i + 1].key->tgt->tgt_start
909 + tgt->list[i + 1].key->tgt_offset
910 + tgt->list[i + 1].offset
911 + (uintptr_t) hostaddrs[i]
912 - (uintptr_t) hostaddrs[i + 1];
913
914 default:
915 return tgt->tgt_start + tgt->list[i].offset;
916 }
917 }
918
919 static inline __attribute__((always_inline)) struct target_mem_desc *
920 gomp_map_vars_internal (struct gomp_device_descr *devicep,
921 struct goacc_asyncqueue *aq, size_t mapnum,
922 void **hostaddrs, void **devaddrs, size_t *sizes,
923 void *kinds, bool short_mapkind,
924 htab_t *refcount_set,
925 enum gomp_map_vars_kind pragma_kind)
926 {
927 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
928 bool has_firstprivate = false;
929 bool has_always_ptrset = false;
930 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
931 const int rshift = short_mapkind ? 8 : 3;
932 const int typemask = short_mapkind ? 0xff : 0x7;
933 struct splay_tree_s *mem_map = &devicep->mem_map;
934 struct splay_tree_key_s cur_node;
935 struct target_mem_desc *tgt
936 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
937 tgt->list_count = mapnum;
938 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
939 tgt->device_descr = devicep;
940 tgt->prev = NULL;
941 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
942
943 if (mapnum == 0)
944 {
945 tgt->tgt_start = 0;
946 tgt->tgt_end = 0;
947 return tgt;
948 }
949
950 tgt_align = sizeof (void *);
951 tgt_size = 0;
952 cbuf.chunks = NULL;
953 cbuf.chunk_cnt = -1;
954 cbuf.use_cnt = 0;
955 cbuf.buf = NULL;
956 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
957 {
958 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
959 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
960 cbuf.chunk_cnt = 0;
961 }
962 if (pragma_kind == GOMP_MAP_VARS_TARGET)
963 {
964 size_t align = 4 * sizeof (void *);
965 tgt_align = align;
966 tgt_size = mapnum * sizeof (void *);
967 cbuf.chunk_cnt = 1;
968 cbuf.use_cnt = 1 + (mapnum > 1);
969 cbuf.chunks[0].start = 0;
970 cbuf.chunks[0].end = tgt_size;
971 }
972
973 gomp_mutex_lock (&devicep->lock);
974 if (devicep->state == GOMP_DEVICE_FINALIZED)
975 {
976 gomp_mutex_unlock (&devicep->lock);
977 free (tgt);
978 return NULL;
979 }
980
981 for (i = 0; i < mapnum; i++)
982 {
983 int kind = get_kind (short_mapkind, kinds, i);
984 bool implicit = get_implicit (short_mapkind, kinds, i);
985 if (hostaddrs[i] == NULL
986 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
987 {
988 tgt->list[i].key = NULL;
989 tgt->list[i].offset = OFFSET_INLINED;
990 continue;
991 }
992 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
993 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
994 {
995 tgt->list[i].key = NULL;
996 if (!not_found_cnt)
997 {
998 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
999 on a separate construct prior to using use_device_{addr,ptr}.
1000 In OpenMP 5.0, map directives need to be ordered by the
1001 middle-end before the use_device_* clauses. If
1002 !not_found_cnt, all mappings requested (if any) are already
1003 mapped, so use_device_{addr,ptr} can be resolved right away.
1004 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1005 now but would succeed after performing the mappings in the
1006 following loop. We can't defer this always to the second
1007 loop, because it is not even invoked when !not_found_cnt
1008 after the first loop. */
1009 cur_node.host_start = (uintptr_t) hostaddrs[i];
1010 cur_node.host_end = cur_node.host_start;
1011 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1012 if (n != NULL)
1013 {
1014 cur_node.host_start -= n->host_start;
1015 hostaddrs[i]
1016 = (void *) (n->tgt->tgt_start + n->tgt_offset
1017 + cur_node.host_start);
1018 }
1019 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1020 {
1021 gomp_mutex_unlock (&devicep->lock);
1022 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1023 }
1024 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1025 /* If not present, continue using the host address. */
1026 ;
1027 else
1028 __builtin_unreachable ();
1029 tgt->list[i].offset = OFFSET_INLINED;
1030 }
1031 else
1032 tgt->list[i].offset = 0;
1033 continue;
1034 }
1035 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1036 {
1037 size_t first = i + 1;
1038 size_t last = i + sizes[i];
1039 cur_node.host_start = (uintptr_t) hostaddrs[i];
1040 cur_node.host_end = (uintptr_t) hostaddrs[last]
1041 + sizes[last];
1042 tgt->list[i].key = NULL;
1043 tgt->list[i].offset = OFFSET_STRUCT;
1044 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1045 if (n == NULL)
1046 {
1047 size_t align = (size_t) 1 << (kind >> rshift);
1048 if (tgt_align < align)
1049 tgt_align = align;
1050 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1051 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1052 tgt_size += cur_node.host_end - cur_node.host_start;
1053 not_found_cnt += last - i;
1054 for (i = first; i <= last; i++)
1055 {
1056 tgt->list[i].key = NULL;
1057 if (!aq
1058 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1059 & typemask))
1060 gomp_coalesce_buf_add (&cbuf,
1061 tgt_size - cur_node.host_end
1062 + (uintptr_t) hostaddrs[i],
1063 sizes[i]);
1064 }
1065 i--;
1066 continue;
1067 }
1068 for (i = first; i <= last; i++)
1069 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1070 sizes, kinds, NULL, refcount_set);
1071 i--;
1072 continue;
1073 }
1074 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1075 {
1076 tgt->list[i].key = NULL;
1077 tgt->list[i].offset = OFFSET_POINTER;
1078 has_firstprivate = true;
1079 continue;
1080 }
1081 else if ((kind & typemask) == GOMP_MAP_ATTACH
1082 || ((kind & typemask)
1083 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1084 {
1085 tgt->list[i].key = NULL;
1086 has_firstprivate = true;
1087 continue;
1088 }
1089 cur_node.host_start = (uintptr_t) hostaddrs[i];
1090 if (!GOMP_MAP_POINTER_P (kind & typemask))
1091 cur_node.host_end = cur_node.host_start + sizes[i];
1092 else
1093 cur_node.host_end = cur_node.host_start + sizeof (void *);
1094 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1095 {
1096 tgt->list[i].key = NULL;
1097
1098 size_t align = (size_t) 1 << (kind >> rshift);
1099 if (tgt_align < align)
1100 tgt_align = align;
1101 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1102 if (!aq)
1103 gomp_coalesce_buf_add (&cbuf, tgt_size,
1104 cur_node.host_end - cur_node.host_start);
1105 tgt_size += cur_node.host_end - cur_node.host_start;
1106 has_firstprivate = true;
1107 continue;
1108 }
1109 splay_tree_key n;
1110 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1111 {
1112 n = gomp_map_0len_lookup (mem_map, &cur_node);
1113 if (!n)
1114 {
1115 tgt->list[i].key = NULL;
1116 tgt->list[i].offset = OFFSET_POINTER;
1117 continue;
1118 }
1119 }
1120 else
1121 n = splay_tree_lookup (mem_map, &cur_node);
1122 if (n && n->refcount != REFCOUNT_LINK)
1123 {
1124 int always_to_cnt = 0;
1125 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1126 {
1127 bool has_nullptr = false;
1128 size_t j;
1129 for (j = 0; j < n->tgt->list_count; j++)
1130 if (n->tgt->list[j].key == n)
1131 {
1132 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1133 break;
1134 }
1135 if (n->tgt->list_count == 0)
1136 {
1137 /* 'declare target'; assume has_nullptr; it could also be
1138 statically assigned pointer, but that it should be to
1139 the equivalent variable on the host. */
1140 assert (n->refcount == REFCOUNT_INFINITY);
1141 has_nullptr = true;
1142 }
1143 else
1144 assert (j < n->tgt->list_count);
1145 /* Re-map the data if there is an 'always' modifier or if it a
1146 null pointer was there and non a nonnull has been found; that
1147 permits transparent re-mapping for Fortran array descriptors
1148 which were previously mapped unallocated. */
1149 for (j = i + 1; j < mapnum; j++)
1150 {
1151 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1152 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1153 && (!has_nullptr
1154 || !GOMP_MAP_POINTER_P (ptr_kind)
1155 || *(void **) hostaddrs[j] == NULL))
1156 break;
1157 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1158 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1159 > cur_node.host_end))
1160 break;
1161 else
1162 {
1163 has_always_ptrset = true;
1164 ++always_to_cnt;
1165 }
1166 }
1167 }
1168 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1169 kind & typemask, always_to_cnt > 0, implicit,
1170 NULL, refcount_set);
1171 i += always_to_cnt;
1172 }
1173 else
1174 {
1175 tgt->list[i].key = NULL;
1176
1177 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1178 {
1179 /* Not present, hence, skip entry - including its MAP_POINTER,
1180 when existing. */
1181 tgt->list[i].offset = OFFSET_POINTER;
1182 if (i + 1 < mapnum
1183 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1184 == GOMP_MAP_POINTER))
1185 {
1186 ++i;
1187 tgt->list[i].key = NULL;
1188 tgt->list[i].offset = 0;
1189 }
1190 continue;
1191 }
1192 size_t align = (size_t) 1 << (kind >> rshift);
1193 not_found_cnt++;
1194 if (tgt_align < align)
1195 tgt_align = align;
1196 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1197 if (!aq
1198 && gomp_to_device_kind_p (kind & typemask))
1199 gomp_coalesce_buf_add (&cbuf, tgt_size,
1200 cur_node.host_end - cur_node.host_start);
1201 tgt_size += cur_node.host_end - cur_node.host_start;
1202 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1203 {
1204 size_t j;
1205 int kind;
1206 for (j = i + 1; j < mapnum; j++)
1207 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1208 kinds, j)) & typemask))
1209 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1210 break;
1211 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1212 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1213 > cur_node.host_end))
1214 break;
1215 else
1216 {
1217 tgt->list[j].key = NULL;
1218 i++;
1219 }
1220 }
1221 }
1222 }
1223
1224 if (devaddrs)
1225 {
1226 if (mapnum != 1)
1227 {
1228 gomp_mutex_unlock (&devicep->lock);
1229 gomp_fatal ("unexpected aggregation");
1230 }
1231 tgt->to_free = devaddrs[0];
1232 tgt->tgt_start = (uintptr_t) tgt->to_free;
1233 tgt->tgt_end = tgt->tgt_start + sizes[0];
1234 }
1235 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1236 {
1237 /* Allocate tgt_align aligned tgt_size block of memory. */
1238 /* FIXME: Perhaps change interface to allocate properly aligned
1239 memory. */
1240 tgt->to_free = devicep->alloc_func (devicep->target_id,
1241 tgt_size + tgt_align - 1);
1242 if (!tgt->to_free)
1243 {
1244 gomp_mutex_unlock (&devicep->lock);
1245 gomp_fatal ("device memory allocation fail");
1246 }
1247
1248 tgt->tgt_start = (uintptr_t) tgt->to_free;
1249 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1250 tgt->tgt_end = tgt->tgt_start + tgt_size;
1251
1252 if (cbuf.use_cnt == 1)
1253 cbuf.chunk_cnt--;
1254 if (cbuf.chunk_cnt > 0)
1255 {
1256 cbuf.buf
1257 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1258 if (cbuf.buf)
1259 {
1260 cbuf.tgt = tgt;
1261 cbufp = &cbuf;
1262 }
1263 }
1264 }
1265 else
1266 {
1267 tgt->to_free = NULL;
1268 tgt->tgt_start = 0;
1269 tgt->tgt_end = 0;
1270 }
1271
1272 tgt_size = 0;
1273 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1274 tgt_size = mapnum * sizeof (void *);
1275
1276 tgt->array = NULL;
1277 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1278 {
1279 if (not_found_cnt)
1280 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1281 splay_tree_node array = tgt->array;
1282 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1283 uintptr_t field_tgt_base = 0;
1284 splay_tree_key field_tgt_structelem_first = NULL;
1285
1286 for (i = 0; i < mapnum; i++)
1287 if (has_always_ptrset
1288 && tgt->list[i].key
1289 && (get_kind (short_mapkind, kinds, i) & typemask)
1290 == GOMP_MAP_TO_PSET)
1291 {
1292 splay_tree_key k = tgt->list[i].key;
1293 bool has_nullptr = false;
1294 size_t j;
1295 for (j = 0; j < k->tgt->list_count; j++)
1296 if (k->tgt->list[j].key == k)
1297 {
1298 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1299 break;
1300 }
1301 if (k->tgt->list_count == 0)
1302 has_nullptr = true;
1303 else
1304 assert (j < k->tgt->list_count);
1305
1306 tgt->list[i].has_null_ptr_assoc = false;
1307 for (j = i + 1; j < mapnum; j++)
1308 {
1309 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1310 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1311 && (!has_nullptr
1312 || !GOMP_MAP_POINTER_P (ptr_kind)
1313 || *(void **) hostaddrs[j] == NULL))
1314 break;
1315 else if ((uintptr_t) hostaddrs[j] < k->host_start
1316 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1317 > k->host_end))
1318 break;
1319 else
1320 {
1321 if (*(void **) hostaddrs[j] == NULL)
1322 tgt->list[i].has_null_ptr_assoc = true;
1323 tgt->list[j].key = k;
1324 tgt->list[j].copy_from = false;
1325 tgt->list[j].always_copy_from = false;
1326 tgt->list[j].is_attach = false;
1327 gomp_increment_refcount (k, refcount_set);
1328 gomp_map_pointer (k->tgt, aq,
1329 (uintptr_t) *(void **) hostaddrs[j],
1330 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1331 - k->host_start),
1332 sizes[j], cbufp, false);
1333 }
1334 }
1335 i = j - 1;
1336 }
1337 else if (tgt->list[i].key == NULL)
1338 {
1339 int kind = get_kind (short_mapkind, kinds, i);
1340 bool implicit = get_implicit (short_mapkind, kinds, i);
1341 if (hostaddrs[i] == NULL)
1342 continue;
1343 switch (kind & typemask)
1344 {
1345 size_t align, len, first, last;
1346 splay_tree_key n;
1347 case GOMP_MAP_FIRSTPRIVATE:
1348 align = (size_t) 1 << (kind >> rshift);
1349 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1350 tgt->list[i].offset = tgt_size;
1351 len = sizes[i];
1352 gomp_copy_host2dev (devicep, aq,
1353 (void *) (tgt->tgt_start + tgt_size),
1354 (void *) hostaddrs[i], len, false, cbufp);
1355 tgt_size += len;
1356 continue;
1357 case GOMP_MAP_FIRSTPRIVATE_INT:
1358 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1359 continue;
1360 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1361 /* The OpenACC 'host_data' construct only allows 'use_device'
1362 "mapping" clauses, so in the first loop, 'not_found_cnt'
1363 must always have been zero, so all OpenACC 'use_device'
1364 clauses have already been handled. (We can only easily test
1365 'use_device' with 'if_present' clause here.) */
1366 assert (tgt->list[i].offset == OFFSET_INLINED);
1367 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1368 code conceptually simple, similar to the first loop. */
1369 case GOMP_MAP_USE_DEVICE_PTR:
1370 if (tgt->list[i].offset == 0)
1371 {
1372 cur_node.host_start = (uintptr_t) hostaddrs[i];
1373 cur_node.host_end = cur_node.host_start;
1374 n = gomp_map_lookup (mem_map, &cur_node);
1375 if (n != NULL)
1376 {
1377 cur_node.host_start -= n->host_start;
1378 hostaddrs[i]
1379 = (void *) (n->tgt->tgt_start + n->tgt_offset
1380 + cur_node.host_start);
1381 }
1382 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1383 {
1384 gomp_mutex_unlock (&devicep->lock);
1385 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1386 }
1387 else if ((kind & typemask)
1388 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1389 /* If not present, continue using the host address. */
1390 ;
1391 else
1392 __builtin_unreachable ();
1393 tgt->list[i].offset = OFFSET_INLINED;
1394 }
1395 continue;
1396 case GOMP_MAP_STRUCT:
1397 first = i + 1;
1398 last = i + sizes[i];
1399 cur_node.host_start = (uintptr_t) hostaddrs[i];
1400 cur_node.host_end = (uintptr_t) hostaddrs[last]
1401 + sizes[last];
1402 if (tgt->list[first].key != NULL)
1403 continue;
1404 n = splay_tree_lookup (mem_map, &cur_node);
1405 if (n == NULL)
1406 {
1407 size_t align = (size_t) 1 << (kind >> rshift);
1408 tgt_size -= (uintptr_t) hostaddrs[first]
1409 - (uintptr_t) hostaddrs[i];
1410 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1411 tgt_size += (uintptr_t) hostaddrs[first]
1412 - (uintptr_t) hostaddrs[i];
1413 field_tgt_base = (uintptr_t) hostaddrs[first];
1414 field_tgt_offset = tgt_size;
1415 field_tgt_clear = last;
1416 field_tgt_structelem_first = NULL;
1417 tgt_size += cur_node.host_end
1418 - (uintptr_t) hostaddrs[first];
1419 continue;
1420 }
1421 for (i = first; i <= last; i++)
1422 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1423 sizes, kinds, cbufp, refcount_set);
1424 i--;
1425 continue;
1426 case GOMP_MAP_ALWAYS_POINTER:
1427 cur_node.host_start = (uintptr_t) hostaddrs[i];
1428 cur_node.host_end = cur_node.host_start + sizeof (void *);
1429 n = splay_tree_lookup (mem_map, &cur_node);
1430 if (n == NULL
1431 || n->host_start > cur_node.host_start
1432 || n->host_end < cur_node.host_end)
1433 {
1434 gomp_mutex_unlock (&devicep->lock);
1435 gomp_fatal ("always pointer not mapped");
1436 }
1437 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1438 != GOMP_MAP_ALWAYS_POINTER)
1439 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1440 if (cur_node.tgt_offset)
1441 cur_node.tgt_offset -= sizes[i];
1442 gomp_copy_host2dev (devicep, aq,
1443 (void *) (n->tgt->tgt_start
1444 + n->tgt_offset
1445 + cur_node.host_start
1446 - n->host_start),
1447 (void *) &cur_node.tgt_offset,
1448 sizeof (void *), true, cbufp);
1449 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1450 + cur_node.host_start - n->host_start;
1451 continue;
1452 case GOMP_MAP_IF_PRESENT:
1453 /* Not present - otherwise handled above. Skip over its
1454 MAP_POINTER as well. */
1455 if (i + 1 < mapnum
1456 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1457 == GOMP_MAP_POINTER))
1458 ++i;
1459 continue;
1460 case GOMP_MAP_ATTACH:
1461 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1462 {
1463 cur_node.host_start = (uintptr_t) hostaddrs[i];
1464 cur_node.host_end = cur_node.host_start + sizeof (void *);
1465 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1466 if (n != NULL)
1467 {
1468 tgt->list[i].key = n;
1469 tgt->list[i].offset = cur_node.host_start - n->host_start;
1470 tgt->list[i].length = n->host_end - n->host_start;
1471 tgt->list[i].copy_from = false;
1472 tgt->list[i].always_copy_from = false;
1473 tgt->list[i].is_attach = true;
1474 /* OpenACC 'attach'/'detach' doesn't affect
1475 structured/dynamic reference counts ('n->refcount',
1476 'n->dynamic_refcount'). */
1477
1478 bool zlas
1479 = ((kind & typemask)
1480 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1481 gomp_attach_pointer (devicep, aq, mem_map, n,
1482 (uintptr_t) hostaddrs[i], sizes[i],
1483 cbufp, zlas);
1484 }
1485 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1486 {
1487 gomp_mutex_unlock (&devicep->lock);
1488 gomp_fatal ("outer struct not mapped for attach");
1489 }
1490 continue;
1491 }
1492 default:
1493 break;
1494 }
1495 splay_tree_key k = &array->key;
1496 k->host_start = (uintptr_t) hostaddrs[i];
1497 if (!GOMP_MAP_POINTER_P (kind & typemask))
1498 k->host_end = k->host_start + sizes[i];
1499 else
1500 k->host_end = k->host_start + sizeof (void *);
1501 splay_tree_key n = splay_tree_lookup (mem_map, k);
1502 if (n && n->refcount != REFCOUNT_LINK)
1503 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1504 kind & typemask, false, implicit, cbufp,
1505 refcount_set);
1506 else
1507 {
1508 k->aux = NULL;
1509 if (n && n->refcount == REFCOUNT_LINK)
1510 {
1511 /* Replace target address of the pointer with target address
1512 of mapped object in the splay tree. */
1513 splay_tree_remove (mem_map, n);
1514 k->aux
1515 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1516 k->aux->link_key = n;
1517 }
1518 size_t align = (size_t) 1 << (kind >> rshift);
1519 tgt->list[i].key = k;
1520 k->tgt = tgt;
1521 k->refcount = 0;
1522 k->dynamic_refcount = 0;
1523 if (field_tgt_clear != FIELD_TGT_EMPTY)
1524 {
1525 k->tgt_offset = k->host_start - field_tgt_base
1526 + field_tgt_offset;
1527 if (openmp_p)
1528 {
1529 k->refcount = REFCOUNT_STRUCTELEM;
1530 if (field_tgt_structelem_first == NULL)
1531 {
1532 /* Set to first structure element of sequence. */
1533 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1534 field_tgt_structelem_first = k;
1535 }
1536 else
1537 /* Point to refcount of leading element, but do not
1538 increment again. */
1539 k->structelem_refcount_ptr
1540 = &field_tgt_structelem_first->structelem_refcount;
1541
1542 if (i == field_tgt_clear)
1543 {
1544 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1545 field_tgt_structelem_first = NULL;
1546 }
1547 }
1548 if (i == field_tgt_clear)
1549 field_tgt_clear = FIELD_TGT_EMPTY;
1550 }
1551 else
1552 {
1553 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1554 k->tgt_offset = tgt_size;
1555 tgt_size += k->host_end - k->host_start;
1556 }
1557 /* First increment, from 0 to 1. gomp_increment_refcount
1558 encapsulates the different increment cases, so use this
1559 instead of directly setting 1 during initialization. */
1560 gomp_increment_refcount (k, refcount_set);
1561
1562 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1563 tgt->list[i].always_copy_from
1564 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1565 tgt->list[i].is_attach = false;
1566 tgt->list[i].offset = 0;
1567 tgt->list[i].length = k->host_end - k->host_start;
1568 tgt->refcount++;
1569 array->left = NULL;
1570 array->right = NULL;
1571 splay_tree_insert (mem_map, array);
1572 switch (kind & typemask)
1573 {
1574 case GOMP_MAP_ALLOC:
1575 case GOMP_MAP_FROM:
1576 case GOMP_MAP_FORCE_ALLOC:
1577 case GOMP_MAP_FORCE_FROM:
1578 case GOMP_MAP_ALWAYS_FROM:
1579 break;
1580 case GOMP_MAP_TO:
1581 case GOMP_MAP_TOFROM:
1582 case GOMP_MAP_FORCE_TO:
1583 case GOMP_MAP_FORCE_TOFROM:
1584 case GOMP_MAP_ALWAYS_TO:
1585 case GOMP_MAP_ALWAYS_TOFROM:
1586 gomp_copy_host2dev (devicep, aq,
1587 (void *) (tgt->tgt_start
1588 + k->tgt_offset),
1589 (void *) k->host_start,
1590 k->host_end - k->host_start,
1591 false, cbufp);
1592 break;
1593 case GOMP_MAP_POINTER:
1594 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1595 gomp_map_pointer
1596 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1597 k->tgt_offset, sizes[i], cbufp,
1598 ((kind & typemask)
1599 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1600 break;
1601 case GOMP_MAP_TO_PSET:
1602 gomp_copy_host2dev (devicep, aq,
1603 (void *) (tgt->tgt_start
1604 + k->tgt_offset),
1605 (void *) k->host_start,
1606 k->host_end - k->host_start,
1607 false, cbufp);
1608 tgt->list[i].has_null_ptr_assoc = false;
1609
1610 for (j = i + 1; j < mapnum; j++)
1611 {
1612 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1613 & typemask);
1614 if (!GOMP_MAP_POINTER_P (ptr_kind)
1615 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1616 break;
1617 else if ((uintptr_t) hostaddrs[j] < k->host_start
1618 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1619 > k->host_end))
1620 break;
1621 else
1622 {
1623 tgt->list[j].key = k;
1624 tgt->list[j].copy_from = false;
1625 tgt->list[j].always_copy_from = false;
1626 tgt->list[j].is_attach = false;
1627 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1628 /* For OpenMP, the use of refcount_sets causes
1629 errors if we set k->refcount = 1 above but also
1630 increment it again here, for decrementing will
1631 not properly match, since we decrement only once
1632 for each key's refcount. Therefore avoid this
1633 increment for OpenMP constructs. */
1634 if (!openmp_p)
1635 gomp_increment_refcount (k, refcount_set);
1636 gomp_map_pointer (tgt, aq,
1637 (uintptr_t) *(void **) hostaddrs[j],
1638 k->tgt_offset
1639 + ((uintptr_t) hostaddrs[j]
1640 - k->host_start),
1641 sizes[j], cbufp, false);
1642 }
1643 }
1644 i = j - 1;
1645 break;
1646 case GOMP_MAP_FORCE_PRESENT:
1647 {
1648 /* We already looked up the memory region above and it
1649 was missing. */
1650 size_t size = k->host_end - k->host_start;
1651 gomp_mutex_unlock (&devicep->lock);
1652 #ifdef HAVE_INTTYPES_H
1653 gomp_fatal ("present clause: !acc_is_present (%p, "
1654 "%"PRIu64" (0x%"PRIx64"))",
1655 (void *) k->host_start,
1656 (uint64_t) size, (uint64_t) size);
1657 #else
1658 gomp_fatal ("present clause: !acc_is_present (%p, "
1659 "%lu (0x%lx))", (void *) k->host_start,
1660 (unsigned long) size, (unsigned long) size);
1661 #endif
1662 }
1663 break;
1664 case GOMP_MAP_FORCE_DEVICEPTR:
1665 assert (k->host_end - k->host_start == sizeof (void *));
1666 gomp_copy_host2dev (devicep, aq,
1667 (void *) (tgt->tgt_start
1668 + k->tgt_offset),
1669 (void *) k->host_start,
1670 sizeof (void *), false, cbufp);
1671 break;
1672 default:
1673 gomp_mutex_unlock (&devicep->lock);
1674 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1675 kind);
1676 }
1677
1678 if (k->aux && k->aux->link_key)
1679 {
1680 /* Set link pointer on target to the device address of the
1681 mapped object. */
1682 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1683 /* We intentionally do not use coalescing here, as it's not
1684 data allocated by the current call to this function. */
1685 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1686 &tgt_addr, sizeof (void *), true, NULL);
1687 }
1688 array++;
1689 }
1690 }
1691 }
1692
1693 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1694 {
1695 for (i = 0; i < mapnum; i++)
1696 {
1697 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1698 gomp_copy_host2dev (devicep, aq,
1699 (void *) (tgt->tgt_start + i * sizeof (void *)),
1700 (void *) &cur_node.tgt_offset, sizeof (void *),
1701 true, cbufp);
1702 }
1703 }
1704
1705 if (cbufp)
1706 {
1707 /* See 'gomp_coalesce_buf_add'. */
1708 assert (!aq);
1709
1710 long c = 0;
1711 for (c = 0; c < cbuf.chunk_cnt; ++c)
1712 gomp_copy_host2dev (devicep, aq,
1713 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1714 (char *) cbuf.buf + (cbuf.chunks[c].start
1715 - cbuf.chunks[0].start),
1716 cbuf.chunks[c].end - cbuf.chunks[c].start,
1717 true, NULL);
1718 free (cbuf.buf);
1719 cbuf.buf = NULL;
1720 cbufp = NULL;
1721 }
1722
1723 /* If the variable from "omp target enter data" map-list was already mapped,
1724 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1725 gomp_exit_data. */
1726 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1727 {
1728 free (tgt);
1729 tgt = NULL;
1730 }
1731
1732 gomp_mutex_unlock (&devicep->lock);
1733 return tgt;
1734 }
1735
1736 static struct target_mem_desc *
1737 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1738 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1739 bool short_mapkind, htab_t *refcount_set,
1740 enum gomp_map_vars_kind pragma_kind)
1741 {
1742 /* This management of a local refcount_set is for convenience of callers
1743 who do not share a refcount_set over multiple map/unmap uses. */
1744 htab_t local_refcount_set = NULL;
1745 if (refcount_set == NULL)
1746 {
1747 local_refcount_set = htab_create (mapnum);
1748 refcount_set = &local_refcount_set;
1749 }
1750
1751 struct target_mem_desc *tgt;
1752 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1753 sizes, kinds, short_mapkind, refcount_set,
1754 pragma_kind);
1755 if (local_refcount_set)
1756 htab_free (local_refcount_set);
1757
1758 return tgt;
1759 }
1760
1761 attribute_hidden struct target_mem_desc *
1762 goacc_map_vars (struct gomp_device_descr *devicep,
1763 struct goacc_asyncqueue *aq, size_t mapnum,
1764 void **hostaddrs, void **devaddrs, size_t *sizes,
1765 void *kinds, bool short_mapkind,
1766 enum gomp_map_vars_kind pragma_kind)
1767 {
1768 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1769 sizes, kinds, short_mapkind, NULL,
1770 GOMP_MAP_VARS_OPENACC | pragma_kind);
1771 }
1772
1773 static void
1774 gomp_unmap_tgt (struct target_mem_desc *tgt)
1775 {
1776 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1777 if (tgt->tgt_end)
1778 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1779
1780 free (tgt->array);
1781 free (tgt);
1782 }
1783
1784 static bool
1785 gomp_unref_tgt (void *ptr)
1786 {
1787 bool is_tgt_unmapped = false;
1788
1789 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1790
1791 if (tgt->refcount > 1)
1792 tgt->refcount--;
1793 else
1794 {
1795 gomp_unmap_tgt (tgt);
1796 is_tgt_unmapped = true;
1797 }
1798
1799 return is_tgt_unmapped;
1800 }
1801
1802 static void
1803 gomp_unref_tgt_void (void *ptr)
1804 {
1805 (void) gomp_unref_tgt (ptr);
1806 }
1807
1808 static void
1809 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1810 {
1811 splay_tree_remove (sp, k);
1812 if (k->aux)
1813 {
1814 if (k->aux->link_key)
1815 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1816 if (k->aux->attach_count)
1817 free (k->aux->attach_count);
1818 free (k->aux);
1819 k->aux = NULL;
1820 }
1821 }
1822
1823 static inline __attribute__((always_inline)) bool
1824 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1825 struct goacc_asyncqueue *aq)
1826 {
1827 bool is_tgt_unmapped = false;
1828
1829 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1830 {
1831 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1832 /* Infer the splay_tree_key of the first structelem key using the
1833 pointer to the first structleme_refcount. */
1834 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1835 - offsetof (struct splay_tree_key_s,
1836 structelem_refcount));
1837 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1838
1839 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1840 with the splay_tree_keys embedded inside. */
1841 splay_tree_node node =
1842 (splay_tree_node) ((char *) k
1843 - offsetof (struct splay_tree_node_s, key));
1844 while (true)
1845 {
1846 /* Starting from the _FIRST key, and continue for all following
1847 sibling keys. */
1848 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1849 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1850 break;
1851 else
1852 k = &(++node)->key;
1853 }
1854 }
1855 else
1856 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1857
1858 if (aq)
1859 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1860 (void *) k->tgt);
1861 else
1862 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1863 return is_tgt_unmapped;
1864 }
1865
1866 attribute_hidden bool
1867 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1868 {
1869 return gomp_remove_var_internal (devicep, k, NULL);
1870 }
1871
1872 /* Remove a variable asynchronously. This actually removes the variable
1873 mapping immediately, but retains the linked target_mem_desc until the
1874 asynchronous operation has completed (as it may still refer to target
1875 memory). The device lock must be held before entry, and remains locked on
1876 exit. */
1877
1878 attribute_hidden void
1879 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1880 struct goacc_asyncqueue *aq)
1881 {
1882 (void) gomp_remove_var_internal (devicep, k, aq);
1883 }
1884
1885 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1886 variables back from device to host: if it is false, it is assumed that this
1887 has been done already. */
1888
1889 static inline __attribute__((always_inline)) void
1890 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1891 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1892 {
1893 struct gomp_device_descr *devicep = tgt->device_descr;
1894
1895 if (tgt->list_count == 0)
1896 {
1897 free (tgt);
1898 return;
1899 }
1900
1901 gomp_mutex_lock (&devicep->lock);
1902 if (devicep->state == GOMP_DEVICE_FINALIZED)
1903 {
1904 gomp_mutex_unlock (&devicep->lock);
1905 free (tgt->array);
1906 free (tgt);
1907 return;
1908 }
1909
1910 size_t i;
1911
1912 /* We must perform detachments before any copies back to the host. */
1913 for (i = 0; i < tgt->list_count; i++)
1914 {
1915 splay_tree_key k = tgt->list[i].key;
1916
1917 if (k != NULL && tgt->list[i].is_attach)
1918 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1919 + tgt->list[i].offset,
1920 false, NULL);
1921 }
1922
1923 for (i = 0; i < tgt->list_count; i++)
1924 {
1925 splay_tree_key k = tgt->list[i].key;
1926 if (k == NULL)
1927 continue;
1928
1929 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1930 counts ('n->refcount', 'n->dynamic_refcount'). */
1931 if (tgt->list[i].is_attach)
1932 continue;
1933
1934 bool do_copy, do_remove;
1935 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1936
1937 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1938 || tgt->list[i].always_copy_from)
1939 gomp_copy_dev2host (devicep, aq,
1940 (void *) (k->host_start + tgt->list[i].offset),
1941 (void *) (k->tgt->tgt_start + k->tgt_offset
1942 + tgt->list[i].offset),
1943 tgt->list[i].length);
1944 if (do_remove)
1945 {
1946 struct target_mem_desc *k_tgt = k->tgt;
1947 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1948 /* It would be bad if TGT got unmapped while we're still iterating
1949 over its LIST_COUNT, and also expect to use it in the following
1950 code. */
1951 assert (!is_tgt_unmapped
1952 || k_tgt != tgt);
1953 }
1954 }
1955
1956 if (aq)
1957 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1958 (void *) tgt);
1959 else
1960 gomp_unref_tgt ((void *) tgt);
1961
1962 gomp_mutex_unlock (&devicep->lock);
1963 }
1964
1965 static void
1966 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1967 htab_t *refcount_set)
1968 {
1969 /* This management of a local refcount_set is for convenience of callers
1970 who do not share a refcount_set over multiple map/unmap uses. */
1971 htab_t local_refcount_set = NULL;
1972 if (refcount_set == NULL)
1973 {
1974 local_refcount_set = htab_create (tgt->list_count);
1975 refcount_set = &local_refcount_set;
1976 }
1977
1978 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1979
1980 if (local_refcount_set)
1981 htab_free (local_refcount_set);
1982 }
1983
1984 attribute_hidden void
1985 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1986 struct goacc_asyncqueue *aq)
1987 {
1988 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1989 }
1990
1991 static void
1992 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1993 size_t *sizes, void *kinds, bool short_mapkind)
1994 {
1995 size_t i;
1996 struct splay_tree_key_s cur_node;
1997 const int typemask = short_mapkind ? 0xff : 0x7;
1998
1999 if (!devicep)
2000 return;
2001
2002 if (mapnum == 0)
2003 return;
2004
2005 gomp_mutex_lock (&devicep->lock);
2006 if (devicep->state == GOMP_DEVICE_FINALIZED)
2007 {
2008 gomp_mutex_unlock (&devicep->lock);
2009 return;
2010 }
2011
2012 for (i = 0; i < mapnum; i++)
2013 if (sizes[i])
2014 {
2015 cur_node.host_start = (uintptr_t) hostaddrs[i];
2016 cur_node.host_end = cur_node.host_start + sizes[i];
2017 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2018 if (n)
2019 {
2020 int kind = get_kind (short_mapkind, kinds, i);
2021 if (n->host_start > cur_node.host_start
2022 || n->host_end < cur_node.host_end)
2023 {
2024 gomp_mutex_unlock (&devicep->lock);
2025 gomp_fatal ("Trying to update [%p..%p) object when "
2026 "only [%p..%p) is mapped",
2027 (void *) cur_node.host_start,
2028 (void *) cur_node.host_end,
2029 (void *) n->host_start,
2030 (void *) n->host_end);
2031 }
2032
2033 if (n->aux && n->aux->attach_count)
2034 {
2035 uintptr_t addr = cur_node.host_start;
2036 while (addr < cur_node.host_end)
2037 {
2038 /* We have to be careful not to overwrite still attached
2039 pointers during host<->device updates. */
2040 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2041 if (n->aux->attach_count[i] == 0)
2042 {
2043 void *devaddr = (void *) (n->tgt->tgt_start
2044 + n->tgt_offset
2045 + addr - n->host_start);
2046 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2047 gomp_copy_host2dev (devicep, NULL,
2048 devaddr, (void *) addr,
2049 sizeof (void *), false, NULL);
2050 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2051 gomp_copy_dev2host (devicep, NULL,
2052 (void *) addr, devaddr,
2053 sizeof (void *));
2054 }
2055 addr += sizeof (void *);
2056 }
2057 }
2058 else
2059 {
2060 void *hostaddr = (void *) cur_node.host_start;
2061 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2062 + cur_node.host_start
2063 - n->host_start);
2064 size_t size = cur_node.host_end - cur_node.host_start;
2065
2066 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2067 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2068 false, NULL);
2069 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2070 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2071 }
2072 }
2073 }
2074 gomp_mutex_unlock (&devicep->lock);
2075 }
2076
2077 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2078 And insert to splay tree the mapping between addresses from HOST_TABLE and
2079 from loaded target image. We rely in the host and device compiler
2080 emitting variable and functions in the same order. */
2081
2082 static void
2083 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2084 const void *host_table, const void *target_data,
2085 bool is_register_lock)
2086 {
2087 void **host_func_table = ((void ***) host_table)[0];
2088 void **host_funcs_end = ((void ***) host_table)[1];
2089 void **host_var_table = ((void ***) host_table)[2];
2090 void **host_vars_end = ((void ***) host_table)[3];
2091
2092 /* The func table contains only addresses, the var table contains addresses
2093 and corresponding sizes. */
2094 int num_funcs = host_funcs_end - host_func_table;
2095 int num_vars = (host_vars_end - host_var_table) / 2;
2096
2097 /* Others currently is only 'device_num' */
2098 int num_others = 1;
2099
2100 /* Load image to device and get target addresses for the image. */
2101 struct addr_pair *target_table = NULL;
2102 int i, num_target_entries;
2103
2104 num_target_entries
2105 = devicep->load_image_func (devicep->target_id, version,
2106 target_data, &target_table);
2107
2108 if (num_target_entries != num_funcs + num_vars
2109 /* Others (device_num) are included as trailing entries in pair list. */
2110 && num_target_entries != num_funcs + num_vars + num_others)
2111 {
2112 gomp_mutex_unlock (&devicep->lock);
2113 if (is_register_lock)
2114 gomp_mutex_unlock (&register_lock);
2115 gomp_fatal ("Cannot map target functions or variables"
2116 " (expected %u, have %u)", num_funcs + num_vars,
2117 num_target_entries);
2118 }
2119
2120 /* Insert host-target address mapping into splay tree. */
2121 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2122 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2123 tgt->refcount = REFCOUNT_INFINITY;
2124 tgt->tgt_start = 0;
2125 tgt->tgt_end = 0;
2126 tgt->to_free = NULL;
2127 tgt->prev = NULL;
2128 tgt->list_count = 0;
2129 tgt->device_descr = devicep;
2130 splay_tree_node array = tgt->array;
2131
2132 for (i = 0; i < num_funcs; i++)
2133 {
2134 splay_tree_key k = &array->key;
2135 k->host_start = (uintptr_t) host_func_table[i];
2136 k->host_end = k->host_start + 1;
2137 k->tgt = tgt;
2138 k->tgt_offset = target_table[i].start;
2139 k->refcount = REFCOUNT_INFINITY;
2140 k->dynamic_refcount = 0;
2141 k->aux = NULL;
2142 array->left = NULL;
2143 array->right = NULL;
2144 splay_tree_insert (&devicep->mem_map, array);
2145 array++;
2146 }
2147
2148 /* Most significant bit of the size in host and target tables marks
2149 "omp declare target link" variables. */
2150 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2151 const uintptr_t size_mask = ~link_bit;
2152
2153 for (i = 0; i < num_vars; i++)
2154 {
2155 struct addr_pair *target_var = &target_table[num_funcs + i];
2156 uintptr_t target_size = target_var->end - target_var->start;
2157 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2158
2159 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2160 {
2161 gomp_mutex_unlock (&devicep->lock);
2162 if (is_register_lock)
2163 gomp_mutex_unlock (&register_lock);
2164 gomp_fatal ("Cannot map target variables (size mismatch)");
2165 }
2166
2167 splay_tree_key k = &array->key;
2168 k->host_start = (uintptr_t) host_var_table[i * 2];
2169 k->host_end
2170 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2171 k->tgt = tgt;
2172 k->tgt_offset = target_var->start;
2173 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2174 k->dynamic_refcount = 0;
2175 k->aux = NULL;
2176 array->left = NULL;
2177 array->right = NULL;
2178 splay_tree_insert (&devicep->mem_map, array);
2179 array++;
2180 }
2181
2182 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2183 where plugin does not return this entry. */
2184 if (num_funcs + num_vars < num_target_entries)
2185 {
2186 struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2187 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2188 was found in this image. */
2189 if (device_num_var->start != 0)
2190 {
2191 /* The index of the devicep within devices[] is regarded as its
2192 'device number', which is different from the per-device type
2193 devicep->target_id. */
2194 int device_num_val = (int) (devicep - &devices[0]);
2195 if (device_num_var->end - device_num_var->start != sizeof (int))
2196 {
2197 gomp_mutex_unlock (&devicep->lock);
2198 if (is_register_lock)
2199 gomp_mutex_unlock (&register_lock);
2200 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2201 "format");
2202 }
2203
2204 /* Copy device_num value to place on device memory, hereby actually
2205 designating its device number into effect. */
2206 gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2207 &device_num_val, sizeof (int), false, NULL);
2208 }
2209 }
2210
2211 free (target_table);
2212 }
2213
2214 /* Unload the mappings described by target_data from device DEVICE_P.
2215 The device must be locked. */
2216
2217 static void
2218 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2219 unsigned version,
2220 const void *host_table, const void *target_data)
2221 {
2222 void **host_func_table = ((void ***) host_table)[0];
2223 void **host_funcs_end = ((void ***) host_table)[1];
2224 void **host_var_table = ((void ***) host_table)[2];
2225 void **host_vars_end = ((void ***) host_table)[3];
2226
2227 /* The func table contains only addresses, the var table contains addresses
2228 and corresponding sizes. */
2229 int num_funcs = host_funcs_end - host_func_table;
2230 int num_vars = (host_vars_end - host_var_table) / 2;
2231
2232 struct splay_tree_key_s k;
2233 splay_tree_key node = NULL;
2234
2235 /* Find mapping at start of node array */
2236 if (num_funcs || num_vars)
2237 {
2238 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2239 : (uintptr_t) host_var_table[0]);
2240 k.host_end = k.host_start + 1;
2241 node = splay_tree_lookup (&devicep->mem_map, &k);
2242 }
2243
2244 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2245 {
2246 gomp_mutex_unlock (&devicep->lock);
2247 gomp_fatal ("image unload fail");
2248 }
2249
2250 /* Remove mappings from splay tree. */
2251 int i;
2252 for (i = 0; i < num_funcs; i++)
2253 {
2254 k.host_start = (uintptr_t) host_func_table[i];
2255 k.host_end = k.host_start + 1;
2256 splay_tree_remove (&devicep->mem_map, &k);
2257 }
2258
2259 /* Most significant bit of the size in host and target tables marks
2260 "omp declare target link" variables. */
2261 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2262 const uintptr_t size_mask = ~link_bit;
2263 bool is_tgt_unmapped = false;
2264
2265 for (i = 0; i < num_vars; i++)
2266 {
2267 k.host_start = (uintptr_t) host_var_table[i * 2];
2268 k.host_end
2269 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2270
2271 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2272 splay_tree_remove (&devicep->mem_map, &k);
2273 else
2274 {
2275 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2276 is_tgt_unmapped = gomp_remove_var (devicep, n);
2277 }
2278 }
2279
2280 if (node && !is_tgt_unmapped)
2281 {
2282 free (node->tgt);
2283 free (node);
2284 }
2285 }
2286
2287 /* This function should be called from every offload image while loading.
2288 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2289 the target, and TARGET_DATA needed by target plugin. */
2290
2291 void
2292 GOMP_offload_register_ver (unsigned version, const void *host_table,
2293 int target_type, const void *target_data)
2294 {
2295 int i;
2296
2297 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2298 gomp_fatal ("Library too old for offload (version %u < %u)",
2299 GOMP_VERSION, GOMP_VERSION_LIB (version));
2300
2301 gomp_mutex_lock (&register_lock);
2302
2303 /* Load image to all initialized devices. */
2304 for (i = 0; i < num_devices; i++)
2305 {
2306 struct gomp_device_descr *devicep = &devices[i];
2307 gomp_mutex_lock (&devicep->lock);
2308 if (devicep->type == target_type
2309 && devicep->state == GOMP_DEVICE_INITIALIZED)
2310 gomp_load_image_to_device (devicep, version,
2311 host_table, target_data, true);
2312 gomp_mutex_unlock (&devicep->lock);
2313 }
2314
2315 /* Insert image to array of pending images. */
2316 offload_images
2317 = gomp_realloc_unlock (offload_images,
2318 (num_offload_images + 1)
2319 * sizeof (struct offload_image_descr));
2320 offload_images[num_offload_images].version = version;
2321 offload_images[num_offload_images].type = target_type;
2322 offload_images[num_offload_images].host_table = host_table;
2323 offload_images[num_offload_images].target_data = target_data;
2324
2325 num_offload_images++;
2326 gomp_mutex_unlock (&register_lock);
2327 }
2328
2329 void
2330 GOMP_offload_register (const void *host_table, int target_type,
2331 const void *target_data)
2332 {
2333 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2334 }
2335
2336 /* This function should be called from every offload image while unloading.
2337 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2338 the target, and TARGET_DATA needed by target plugin. */
2339
2340 void
2341 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2342 int target_type, const void *target_data)
2343 {
2344 int i;
2345
2346 gomp_mutex_lock (&register_lock);
2347
2348 /* Unload image from all initialized devices. */
2349 for (i = 0; i < num_devices; i++)
2350 {
2351 struct gomp_device_descr *devicep = &devices[i];
2352 gomp_mutex_lock (&devicep->lock);
2353 if (devicep->type == target_type
2354 && devicep->state == GOMP_DEVICE_INITIALIZED)
2355 gomp_unload_image_from_device (devicep, version,
2356 host_table, target_data);
2357 gomp_mutex_unlock (&devicep->lock);
2358 }
2359
2360 /* Remove image from array of pending images. */
2361 for (i = 0; i < num_offload_images; i++)
2362 if (offload_images[i].target_data == target_data)
2363 {
2364 offload_images[i] = offload_images[--num_offload_images];
2365 break;
2366 }
2367
2368 gomp_mutex_unlock (&register_lock);
2369 }
2370
2371 void
2372 GOMP_offload_unregister (const void *host_table, int target_type,
2373 const void *target_data)
2374 {
2375 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2376 }
2377
2378 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2379 must be locked on entry, and remains locked on return. */
2380
2381 attribute_hidden void
2382 gomp_init_device (struct gomp_device_descr *devicep)
2383 {
2384 int i;
2385 if (!devicep->init_device_func (devicep->target_id))
2386 {
2387 gomp_mutex_unlock (&devicep->lock);
2388 gomp_fatal ("device initialization failed");
2389 }
2390
2391 /* Load to device all images registered by the moment. */
2392 for (i = 0; i < num_offload_images; i++)
2393 {
2394 struct offload_image_descr *image = &offload_images[i];
2395 if (image->type == devicep->type)
2396 gomp_load_image_to_device (devicep, image->version,
2397 image->host_table, image->target_data,
2398 false);
2399 }
2400
2401 /* Initialize OpenACC asynchronous queues. */
2402 goacc_init_asyncqueues (devicep);
2403
2404 devicep->state = GOMP_DEVICE_INITIALIZED;
2405 }
2406
2407 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2408 must be locked on entry, and remains locked on return. */
2409
2410 attribute_hidden bool
2411 gomp_fini_device (struct gomp_device_descr *devicep)
2412 {
2413 bool ret = goacc_fini_asyncqueues (devicep);
2414 ret &= devicep->fini_device_func (devicep->target_id);
2415 devicep->state = GOMP_DEVICE_FINALIZED;
2416 return ret;
2417 }
2418
2419 attribute_hidden void
2420 gomp_unload_device (struct gomp_device_descr *devicep)
2421 {
2422 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2423 {
2424 unsigned i;
2425
2426 /* Unload from device all images registered at the moment. */
2427 for (i = 0; i < num_offload_images; i++)
2428 {
2429 struct offload_image_descr *image = &offload_images[i];
2430 if (image->type == devicep->type)
2431 gomp_unload_image_from_device (devicep, image->version,
2432 image->host_table,
2433 image->target_data);
2434 }
2435 }
2436 }
2437
2438 /* Host fallback for GOMP_target{,_ext} routines. */
2439
2440 static void
2441 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2442 struct gomp_device_descr *devicep, void **args)
2443 {
2444 struct gomp_thread old_thr, *thr = gomp_thread ();
2445
2446 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2447 && devicep != NULL)
2448 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2449 "be used for offloading");
2450
2451 old_thr = *thr;
2452 memset (thr, '\0', sizeof (*thr));
2453 if (gomp_places_list)
2454 {
2455 thr->place = old_thr.place;
2456 thr->ts.place_partition_len = gomp_places_list_len;
2457 }
2458 if (args)
2459 while (*args)
2460 {
2461 intptr_t id = (intptr_t) *args++, val;
2462 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2463 val = (intptr_t) *args++;
2464 else
2465 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2466 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2467 continue;
2468 id &= GOMP_TARGET_ARG_ID_MASK;
2469 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2470 continue;
2471 val = val > INT_MAX ? INT_MAX : val;
2472 if (val)
2473 gomp_icv (true)->thread_limit_var = val;
2474 break;
2475 }
2476
2477 fn (hostaddrs);
2478 gomp_free_thread (thr);
2479 *thr = old_thr;
2480 }
2481
2482 /* Calculate alignment and size requirements of a private copy of data shared
2483 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2484
2485 static inline void
2486 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2487 unsigned short *kinds, size_t *tgt_align,
2488 size_t *tgt_size)
2489 {
2490 size_t i;
2491 for (i = 0; i < mapnum; i++)
2492 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2493 {
2494 size_t align = (size_t) 1 << (kinds[i] >> 8);
2495 if (*tgt_align < align)
2496 *tgt_align = align;
2497 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2498 *tgt_size += sizes[i];
2499 }
2500 }
2501
2502 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2503
2504 static inline void
2505 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2506 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2507 size_t tgt_size)
2508 {
2509 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2510 if (al)
2511 tgt += tgt_align - al;
2512 tgt_size = 0;
2513 size_t i;
2514 for (i = 0; i < mapnum; i++)
2515 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2516 {
2517 size_t align = (size_t) 1 << (kinds[i] >> 8);
2518 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2519 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2520 hostaddrs[i] = tgt + tgt_size;
2521 tgt_size = tgt_size + sizes[i];
2522 }
2523 }
2524
2525 /* Helper function of GOMP_target{,_ext} routines. */
2526
2527 static void *
2528 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2529 void (*host_fn) (void *))
2530 {
2531 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2532 return (void *) host_fn;
2533 else
2534 {
2535 gomp_mutex_lock (&devicep->lock);
2536 if (devicep->state == GOMP_DEVICE_FINALIZED)
2537 {
2538 gomp_mutex_unlock (&devicep->lock);
2539 return NULL;
2540 }
2541
2542 struct splay_tree_key_s k;
2543 k.host_start = (uintptr_t) host_fn;
2544 k.host_end = k.host_start + 1;
2545 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2546 gomp_mutex_unlock (&devicep->lock);
2547 if (tgt_fn == NULL)
2548 return NULL;
2549
2550 return (void *) tgt_fn->tgt_offset;
2551 }
2552 }
2553
2554 /* Called when encountering a target directive. If DEVICE
2555 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2556 GOMP_DEVICE_HOST_FALLBACK (or any value
2557 larger than last available hw device), use host fallback.
2558 FN is address of host code, UNUSED is part of the current ABI, but
2559 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2560 with MAPNUM entries, with addresses of the host objects,
2561 sizes of the host objects (resp. for pointer kind pointer bias
2562 and assumed sizeof (void *) size) and kinds. */
2563
2564 void
2565 GOMP_target (int device, void (*fn) (void *), const void *unused,
2566 size_t mapnum, void **hostaddrs, size_t *sizes,
2567 unsigned char *kinds)
2568 {
2569 struct gomp_device_descr *devicep = resolve_device (device);
2570
2571 void *fn_addr;
2572 if (devicep == NULL
2573 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2574 /* All shared memory devices should use the GOMP_target_ext function. */
2575 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2576 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2577 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2578
2579 htab_t refcount_set = htab_create (mapnum);
2580 struct target_mem_desc *tgt_vars
2581 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2582 &refcount_set, GOMP_MAP_VARS_TARGET);
2583 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2584 NULL);
2585 htab_clear (refcount_set);
2586 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2587 htab_free (refcount_set);
2588 }
2589
2590 static inline unsigned int
2591 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2592 {
2593 /* If we cannot run asynchronously, simply ignore nowait. */
2594 if (devicep != NULL && devicep->async_run_func == NULL)
2595 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2596
2597 return flags;
2598 }
2599
2600 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2601 and several arguments have been added:
2602 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2603 DEPEND is array of dependencies, see GOMP_task for details.
2604
2605 ARGS is a pointer to an array consisting of a variable number of both
2606 device-independent and device-specific arguments, which can take one two
2607 elements where the first specifies for which device it is intended, the type
2608 and optionally also the value. If the value is not present in the first
2609 one, the whole second element the actual value. The last element of the
2610 array is a single NULL. Among the device independent can be for example
2611 NUM_TEAMS and THREAD_LIMIT.
2612
2613 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2614 that value, or 1 if teams construct is not present, or 0, if
2615 teams construct does not have num_teams clause and so the choice is
2616 implementation defined, and -1 if it can't be determined on the host
2617 what value will GOMP_teams have on the device.
2618 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2619 body with that value, or 0, if teams construct does not have thread_limit
2620 clause or the teams construct is not present, or -1 if it can't be
2621 determined on the host what value will GOMP_teams have on the device. */
2622
2623 void
2624 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2625 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2626 unsigned int flags, void **depend, void **args)
2627 {
2628 struct gomp_device_descr *devicep = resolve_device (device);
2629 size_t tgt_align = 0, tgt_size = 0;
2630 bool fpc_done = false;
2631
2632 flags = clear_unsupported_flags (devicep, flags);
2633
2634 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2635 {
2636 struct gomp_thread *thr = gomp_thread ();
2637 /* Create a team if we don't have any around, as nowait
2638 target tasks make sense to run asynchronously even when
2639 outside of any parallel. */
2640 if (__builtin_expect (thr->ts.team == NULL, 0))
2641 {
2642 struct gomp_team *team = gomp_new_team (1);
2643 struct gomp_task *task = thr->task;
2644 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2645 team->prev_ts = thr->ts;
2646 thr->ts.team = team;
2647 thr->ts.team_id = 0;
2648 thr->ts.work_share = &team->work_shares[0];
2649 thr->ts.last_work_share = NULL;
2650 #ifdef HAVE_SYNC_BUILTINS
2651 thr->ts.single_count = 0;
2652 #endif
2653 thr->ts.static_trip = 0;
2654 thr->task = &team->implicit_task[0];
2655 gomp_init_task (thr->task, NULL, icv);
2656 if (task)
2657 {
2658 thr->task = task;
2659 gomp_end_task ();
2660 free (task);
2661 thr->task = &team->implicit_task[0];
2662 }
2663 else
2664 pthread_setspecific (gomp_thread_destructor, thr);
2665 }
2666 if (thr->ts.team
2667 && !thr->task->final_task)
2668 {
2669 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2670 sizes, kinds, flags, depend, args,
2671 GOMP_TARGET_TASK_BEFORE_MAP);
2672 return;
2673 }
2674 }
2675
2676 /* If there are depend clauses, but nowait is not present
2677 (or we are in a final task), block the parent task until the
2678 dependencies are resolved and then just continue with the rest
2679 of the function as if it is a merged task. */
2680 if (depend != NULL)
2681 {
2682 struct gomp_thread *thr = gomp_thread ();
2683 if (thr->task && thr->task->depend_hash)
2684 {
2685 /* If we might need to wait, copy firstprivate now. */
2686 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2687 &tgt_align, &tgt_size);
2688 if (tgt_align)
2689 {
2690 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2691 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2692 tgt_align, tgt_size);
2693 }
2694 fpc_done = true;
2695 gomp_task_maybe_wait_for_dependencies (depend);
2696 }
2697 }
2698
2699 void *fn_addr;
2700 if (devicep == NULL
2701 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2702 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2703 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2704 {
2705 if (!fpc_done)
2706 {
2707 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2708 &tgt_align, &tgt_size);
2709 if (tgt_align)
2710 {
2711 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2712 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2713 tgt_align, tgt_size);
2714 }
2715 }
2716 gomp_target_fallback (fn, hostaddrs, devicep, args);
2717 return;
2718 }
2719
2720 struct target_mem_desc *tgt_vars;
2721 htab_t refcount_set = NULL;
2722
2723 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2724 {
2725 if (!fpc_done)
2726 {
2727 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2728 &tgt_align, &tgt_size);
2729 if (tgt_align)
2730 {
2731 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2732 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2733 tgt_align, tgt_size);
2734 }
2735 }
2736 tgt_vars = NULL;
2737 }
2738 else
2739 {
2740 refcount_set = htab_create (mapnum);
2741 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2742 true, &refcount_set, GOMP_MAP_VARS_TARGET);
2743 }
2744 devicep->run_func (devicep->target_id, fn_addr,
2745 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2746 args);
2747 if (tgt_vars)
2748 {
2749 htab_clear (refcount_set);
2750 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2751 }
2752 if (refcount_set)
2753 htab_free (refcount_set);
2754 }
2755
2756 /* Host fallback for GOMP_target_data{,_ext} routines. */
2757
2758 static void
2759 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2760 {
2761 struct gomp_task_icv *icv = gomp_icv (false);
2762
2763 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2764 && devicep != NULL)
2765 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2766 "be used for offloading");
2767
2768 if (icv->target_data)
2769 {
2770 /* Even when doing a host fallback, if there are any active
2771 #pragma omp target data constructs, need to remember the
2772 new #pragma omp target data, otherwise GOMP_target_end_data
2773 would get out of sync. */
2774 struct target_mem_desc *tgt
2775 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2776 NULL, GOMP_MAP_VARS_DATA);
2777 tgt->prev = icv->target_data;
2778 icv->target_data = tgt;
2779 }
2780 }
2781
2782 void
2783 GOMP_target_data (int device, const void *unused, size_t mapnum,
2784 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2785 {
2786 struct gomp_device_descr *devicep = resolve_device (device);
2787
2788 if (devicep == NULL
2789 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2790 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2791 return gomp_target_data_fallback (devicep);
2792
2793 struct target_mem_desc *tgt
2794 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2795 NULL, GOMP_MAP_VARS_DATA);
2796 struct gomp_task_icv *icv = gomp_icv (true);
2797 tgt->prev = icv->target_data;
2798 icv->target_data = tgt;
2799 }
2800
2801 void
2802 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2803 size_t *sizes, unsigned short *kinds)
2804 {
2805 struct gomp_device_descr *devicep = resolve_device (device);
2806
2807 if (devicep == NULL
2808 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2809 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2810 return gomp_target_data_fallback (devicep);
2811
2812 struct target_mem_desc *tgt
2813 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2814 NULL, GOMP_MAP_VARS_DATA);
2815 struct gomp_task_icv *icv = gomp_icv (true);
2816 tgt->prev = icv->target_data;
2817 icv->target_data = tgt;
2818 }
2819
2820 void
2821 GOMP_target_end_data (void)
2822 {
2823 struct gomp_task_icv *icv = gomp_icv (false);
2824 if (icv->target_data)
2825 {
2826 struct target_mem_desc *tgt = icv->target_data;
2827 icv->target_data = tgt->prev;
2828 gomp_unmap_vars (tgt, true, NULL);
2829 }
2830 }
2831
2832 void
2833 GOMP_target_update (int device, const void *unused, size_t mapnum,
2834 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2835 {
2836 struct gomp_device_descr *devicep = resolve_device (device);
2837
2838 if (devicep == NULL
2839 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2840 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2841 return;
2842
2843 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2844 }
2845
2846 void
2847 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2848 size_t *sizes, unsigned short *kinds,
2849 unsigned int flags, void **depend)
2850 {
2851 struct gomp_device_descr *devicep = resolve_device (device);
2852
2853 /* If there are depend clauses, but nowait is not present,
2854 block the parent task until the dependencies are resolved
2855 and then just continue with the rest of the function as if it
2856 is a merged task. Until we are able to schedule task during
2857 variable mapping or unmapping, ignore nowait if depend clauses
2858 are not present. */
2859 if (depend != NULL)
2860 {
2861 struct gomp_thread *thr = gomp_thread ();
2862 if (thr->task && thr->task->depend_hash)
2863 {
2864 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2865 && thr->ts.team
2866 && !thr->task->final_task)
2867 {
2868 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2869 mapnum, hostaddrs, sizes, kinds,
2870 flags | GOMP_TARGET_FLAG_UPDATE,
2871 depend, NULL, GOMP_TARGET_TASK_DATA))
2872 return;
2873 }
2874 else
2875 {
2876 struct gomp_team *team = thr->ts.team;
2877 /* If parallel or taskgroup has been cancelled, don't start new
2878 tasks. */
2879 if (__builtin_expect (gomp_cancel_var, 0) && team)
2880 {
2881 if (gomp_team_barrier_cancelled (&team->barrier))
2882 return;
2883 if (thr->task->taskgroup)
2884 {
2885 if (thr->task->taskgroup->cancelled)
2886 return;
2887 if (thr->task->taskgroup->workshare
2888 && thr->task->taskgroup->prev
2889 && thr->task->taskgroup->prev->cancelled)
2890 return;
2891 }
2892 }
2893
2894 gomp_task_maybe_wait_for_dependencies (depend);
2895 }
2896 }
2897 }
2898
2899 if (devicep == NULL
2900 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2901 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2902 return;
2903
2904 struct gomp_thread *thr = gomp_thread ();
2905 struct gomp_team *team = thr->ts.team;
2906 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2907 if (__builtin_expect (gomp_cancel_var, 0) && team)
2908 {
2909 if (gomp_team_barrier_cancelled (&team->barrier))
2910 return;
2911 if (thr->task->taskgroup)
2912 {
2913 if (thr->task->taskgroup->cancelled)
2914 return;
2915 if (thr->task->taskgroup->workshare
2916 && thr->task->taskgroup->prev
2917 && thr->task->taskgroup->prev->cancelled)
2918 return;
2919 }
2920 }
2921
2922 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2923 }
2924
2925 static void
2926 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2927 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2928 htab_t *refcount_set)
2929 {
2930 const int typemask = 0xff;
2931 size_t i;
2932 gomp_mutex_lock (&devicep->lock);
2933 if (devicep->state == GOMP_DEVICE_FINALIZED)
2934 {
2935 gomp_mutex_unlock (&devicep->lock);
2936 return;
2937 }
2938
2939 for (i = 0; i < mapnum; i++)
2940 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2941 {
2942 struct splay_tree_key_s cur_node;
2943 cur_node.host_start = (uintptr_t) hostaddrs[i];
2944 cur_node.host_end = cur_node.host_start + sizeof (void *);
2945 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2946
2947 if (n)
2948 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2949 false, NULL);
2950 }
2951
2952 int nrmvars = 0;
2953 splay_tree_key remove_vars[mapnum];
2954
2955 for (i = 0; i < mapnum; i++)
2956 {
2957 struct splay_tree_key_s cur_node;
2958 unsigned char kind = kinds[i] & typemask;
2959 switch (kind)
2960 {
2961 case GOMP_MAP_FROM:
2962 case GOMP_MAP_ALWAYS_FROM:
2963 case GOMP_MAP_DELETE:
2964 case GOMP_MAP_RELEASE:
2965 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2966 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2967 cur_node.host_start = (uintptr_t) hostaddrs[i];
2968 cur_node.host_end = cur_node.host_start + sizes[i];
2969 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2970 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2971 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2972 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2973 if (!k)
2974 continue;
2975
2976 bool delete_p = (kind == GOMP_MAP_DELETE
2977 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2978 bool do_copy, do_remove;
2979 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2980 &do_remove);
2981
2982 if ((kind == GOMP_MAP_FROM && do_copy)
2983 || kind == GOMP_MAP_ALWAYS_FROM)
2984 {
2985 if (k->aux && k->aux->attach_count)
2986 {
2987 /* We have to be careful not to overwrite still attached
2988 pointers during the copyback to host. */
2989 uintptr_t addr = k->host_start;
2990 while (addr < k->host_end)
2991 {
2992 size_t i = (addr - k->host_start) / sizeof (void *);
2993 if (k->aux->attach_count[i] == 0)
2994 gomp_copy_dev2host (devicep, NULL, (void *) addr,
2995 (void *) (k->tgt->tgt_start
2996 + k->tgt_offset
2997 + addr - k->host_start),
2998 sizeof (void *));
2999 addr += sizeof (void *);
3000 }
3001 }
3002 else
3003 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3004 (void *) (k->tgt->tgt_start + k->tgt_offset
3005 + cur_node.host_start
3006 - k->host_start),
3007 cur_node.host_end - cur_node.host_start);
3008 }
3009
3010 /* Structure elements lists are removed altogether at once, which
3011 may cause immediate deallocation of the target_mem_desc, causing
3012 errors if we still have following element siblings to copy back.
3013 While we're at it, it also seems more disciplined to simply
3014 queue all removals together for processing below.
3015
3016 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3017 not have this problem, since they maintain an additional
3018 tgt->refcount = 1 reference to the target_mem_desc to start with.
3019 */
3020 if (do_remove)
3021 remove_vars[nrmvars++] = k;
3022 break;
3023
3024 case GOMP_MAP_DETACH:
3025 break;
3026 default:
3027 gomp_mutex_unlock (&devicep->lock);
3028 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3029 kind);
3030 }
3031 }
3032
3033 for (int i = 0; i < nrmvars; i++)
3034 gomp_remove_var (devicep, remove_vars[i]);
3035
3036 gomp_mutex_unlock (&devicep->lock);
3037 }
3038
3039 void
3040 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3041 size_t *sizes, unsigned short *kinds,
3042 unsigned int flags, void **depend)
3043 {
3044 struct gomp_device_descr *devicep = resolve_device (device);
3045
3046 /* If there are depend clauses, but nowait is not present,
3047 block the parent task until the dependencies are resolved
3048 and then just continue with the rest of the function as if it
3049 is a merged task. Until we are able to schedule task during
3050 variable mapping or unmapping, ignore nowait if depend clauses
3051 are not present. */
3052 if (depend != NULL)
3053 {
3054 struct gomp_thread *thr = gomp_thread ();
3055 if (thr->task && thr->task->depend_hash)
3056 {
3057 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3058 && thr->ts.team
3059 && !thr->task->final_task)
3060 {
3061 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3062 mapnum, hostaddrs, sizes, kinds,
3063 flags, depend, NULL,
3064 GOMP_TARGET_TASK_DATA))
3065 return;
3066 }
3067 else
3068 {
3069 struct gomp_team *team = thr->ts.team;
3070 /* If parallel or taskgroup has been cancelled, don't start new
3071 tasks. */
3072 if (__builtin_expect (gomp_cancel_var, 0) && team)
3073 {
3074 if (gomp_team_barrier_cancelled (&team->barrier))
3075 return;
3076 if (thr->task->taskgroup)
3077 {
3078 if (thr->task->taskgroup->cancelled)
3079 return;
3080 if (thr->task->taskgroup->workshare
3081 && thr->task->taskgroup->prev
3082 && thr->task->taskgroup->prev->cancelled)
3083 return;
3084 }
3085 }
3086
3087 gomp_task_maybe_wait_for_dependencies (depend);
3088 }
3089 }
3090 }
3091
3092 if (devicep == NULL
3093 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3094 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3095 return;
3096
3097 struct gomp_thread *thr = gomp_thread ();
3098 struct gomp_team *team = thr->ts.team;
3099 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3100 if (__builtin_expect (gomp_cancel_var, 0) && team)
3101 {
3102 if (gomp_team_barrier_cancelled (&team->barrier))
3103 return;
3104 if (thr->task->taskgroup)
3105 {
3106 if (thr->task->taskgroup->cancelled)
3107 return;
3108 if (thr->task->taskgroup->workshare
3109 && thr->task->taskgroup->prev
3110 && thr->task->taskgroup->prev->cancelled)
3111 return;
3112 }
3113 }
3114
3115 htab_t refcount_set = htab_create (mapnum);
3116
3117 /* The variables are mapped separately such that they can be released
3118 independently. */
3119 size_t i, j;
3120 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3121 for (i = 0; i < mapnum; i++)
3122 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3123 {
3124 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
3125 &kinds[i], true, &refcount_set,
3126 GOMP_MAP_VARS_ENTER_DATA);
3127 i += sizes[i];
3128 }
3129 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
3130 {
3131 for (j = i + 1; j < mapnum; j++)
3132 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
3133 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
3134 break;
3135 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
3136 &kinds[i], true, &refcount_set,
3137 GOMP_MAP_VARS_ENTER_DATA);
3138 i += j - i - 1;
3139 }
3140 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
3141 {
3142 /* An attach operation must be processed together with the mapped
3143 base-pointer list item. */
3144 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3145 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3146 i += 1;
3147 }
3148 else
3149 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3150 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3151 else
3152 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
3153 htab_free (refcount_set);
3154 }
3155
3156 bool
3157 gomp_target_task_fn (void *data)
3158 {
3159 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3160 struct gomp_device_descr *devicep = ttask->devicep;
3161
3162 if (ttask->fn != NULL)
3163 {
3164 void *fn_addr;
3165 if (devicep == NULL
3166 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3167 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3168 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3169 {
3170 ttask->state = GOMP_TARGET_TASK_FALLBACK;
3171 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
3172 ttask->args);
3173 return false;
3174 }
3175
3176 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3177 {
3178 if (ttask->tgt)
3179 gomp_unmap_vars (ttask->tgt, true, NULL);
3180 return false;
3181 }
3182
3183 void *actual_arguments;
3184 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3185 {
3186 ttask->tgt = NULL;
3187 actual_arguments = ttask->hostaddrs;
3188 }
3189 else
3190 {
3191 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3192 NULL, ttask->sizes, ttask->kinds, true,
3193 NULL, GOMP_MAP_VARS_TARGET);
3194 actual_arguments = (void *) ttask->tgt->tgt_start;
3195 }
3196 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3197
3198 assert (devicep->async_run_func);
3199 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3200 ttask->args, (void *) ttask);
3201 return true;
3202 }
3203 else if (devicep == NULL
3204 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3205 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3206 return false;
3207
3208 size_t i;
3209 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3210 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3211 ttask->kinds, true);
3212 else
3213 {
3214 htab_t refcount_set = htab_create (ttask->mapnum);
3215 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3216 for (i = 0; i < ttask->mapnum; i++)
3217 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3218 {
3219 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3220 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3221 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3222 i += ttask->sizes[i];
3223 }
3224 else
3225 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3226 &ttask->kinds[i], true, &refcount_set,
3227 GOMP_MAP_VARS_ENTER_DATA);
3228 else
3229 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3230 ttask->kinds, &refcount_set);
3231 htab_free (refcount_set);
3232 }
3233 return false;
3234 }
3235
3236 void
3237 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3238 {
3239 if (thread_limit)
3240 {
3241 struct gomp_task_icv *icv = gomp_icv (true);
3242 icv->thread_limit_var
3243 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3244 }
3245 (void) num_teams;
3246 }
3247
3248 bool
3249 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
3250 unsigned int thread_limit, bool first)
3251 {
3252 struct gomp_thread *thr = gomp_thread ();
3253 if (first)
3254 {
3255 if (thread_limit)
3256 {
3257 struct gomp_task_icv *icv = gomp_icv (true);
3258 icv->thread_limit_var
3259 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3260 }
3261 (void) num_teams_high;
3262 if (num_teams_low == 0)
3263 num_teams_low = 1;
3264 thr->num_teams = num_teams_low - 1;
3265 thr->team_num = 0;
3266 }
3267 else if (thr->team_num == thr->num_teams)
3268 return false;
3269 else
3270 ++thr->team_num;
3271 return true;
3272 }
3273
3274 void *
3275 omp_target_alloc (size_t size, int device_num)
3276 {
3277 if (device_num == gomp_get_num_devices ())
3278 return malloc (size);
3279
3280 if (device_num < 0)
3281 return NULL;
3282
3283 struct gomp_device_descr *devicep = resolve_device (device_num);
3284 if (devicep == NULL)
3285 return NULL;
3286
3287 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3288 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3289 return malloc (size);
3290
3291 gomp_mutex_lock (&devicep->lock);
3292 void *ret = devicep->alloc_func (devicep->target_id, size);
3293 gomp_mutex_unlock (&devicep->lock);
3294 return ret;
3295 }
3296
3297 void
3298 omp_target_free (void *device_ptr, int device_num)
3299 {
3300 if (device_ptr == NULL)
3301 return;
3302
3303 if (device_num == gomp_get_num_devices ())
3304 {
3305 free (device_ptr);
3306 return;
3307 }
3308
3309 if (device_num < 0)
3310 return;
3311
3312 struct gomp_device_descr *devicep = resolve_device (device_num);
3313 if (devicep == NULL)
3314 return;
3315
3316 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3317 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3318 {
3319 free (device_ptr);
3320 return;
3321 }
3322
3323 gomp_mutex_lock (&devicep->lock);
3324 gomp_free_device_memory (devicep, device_ptr);
3325 gomp_mutex_unlock (&devicep->lock);
3326 }
3327
3328 int
3329 omp_target_is_present (const void *ptr, int device_num)
3330 {
3331 if (ptr == NULL)
3332 return 1;
3333
3334 if (device_num == gomp_get_num_devices ())
3335 return 1;
3336
3337 if (device_num < 0)
3338 return 0;
3339
3340 struct gomp_device_descr *devicep = resolve_device (device_num);
3341 if (devicep == NULL)
3342 return 0;
3343
3344 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3345 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3346 return 1;
3347
3348 gomp_mutex_lock (&devicep->lock);
3349 struct splay_tree_s *mem_map = &devicep->mem_map;
3350 struct splay_tree_key_s cur_node;
3351
3352 cur_node.host_start = (uintptr_t) ptr;
3353 cur_node.host_end = cur_node.host_start;
3354 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3355 int ret = n != NULL;
3356 gomp_mutex_unlock (&devicep->lock);
3357 return ret;
3358 }
3359
3360 static int
3361 omp_target_memcpy_check (int dst_device_num, int src_device_num,
3362 struct gomp_device_descr **dst_devicep,
3363 struct gomp_device_descr **src_devicep)
3364 {
3365 if (dst_device_num != gomp_get_num_devices ())
3366 {
3367 if (dst_device_num < 0)
3368 return EINVAL;
3369
3370 *dst_devicep = resolve_device (dst_device_num);
3371 if (*dst_devicep == NULL)
3372 return EINVAL;
3373
3374 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3375 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3376 *dst_devicep = NULL;
3377 }
3378
3379 if (src_device_num != num_devices_openmp)
3380 {
3381 if (src_device_num < 0)
3382 return EINVAL;
3383
3384 *src_devicep = resolve_device (src_device_num);
3385 if (*src_devicep == NULL)
3386 return EINVAL;
3387
3388 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3389 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3390 *src_devicep = NULL;
3391 }
3392
3393 return 0;
3394 }
3395
3396 static int
3397 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
3398 size_t dst_offset, size_t src_offset,
3399 struct gomp_device_descr *dst_devicep,
3400 struct gomp_device_descr *src_devicep)
3401 {
3402 bool ret;
3403 if (src_devicep == NULL && dst_devicep == NULL)
3404 {
3405 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3406 return 0;
3407 }
3408 if (src_devicep == NULL)
3409 {
3410 gomp_mutex_lock (&dst_devicep->lock);
3411 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3412 (char *) dst + dst_offset,
3413 (char *) src + src_offset, length);
3414 gomp_mutex_unlock (&dst_devicep->lock);
3415 return (ret ? 0 : EINVAL);
3416 }
3417 if (dst_devicep == NULL)
3418 {
3419 gomp_mutex_lock (&src_devicep->lock);
3420 ret = src_devicep->dev2host_func (src_devicep->target_id,
3421 (char *) dst + dst_offset,
3422 (char *) src + src_offset, length);
3423 gomp_mutex_unlock (&src_devicep->lock);
3424 return (ret ? 0 : EINVAL);
3425 }
3426 if (src_devicep == dst_devicep)
3427 {
3428 gomp_mutex_lock (&src_devicep->lock);
3429 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3430 (char *) dst + dst_offset,
3431 (char *) src + src_offset, length);
3432 gomp_mutex_unlock (&src_devicep->lock);
3433 return (ret ? 0 : EINVAL);
3434 }
3435 return EINVAL;
3436 }
3437
3438 int
3439 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
3440 size_t src_offset, int dst_device_num, int src_device_num)
3441 {
3442 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3443 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
3444 &dst_devicep, &src_devicep);
3445
3446 if (ret)
3447 return ret;
3448
3449 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
3450 dst_devicep, src_devicep);
3451
3452 return ret;
3453 }
3454
3455 typedef struct
3456 {
3457 void *dst;
3458 const void *src;
3459 size_t length;
3460 size_t dst_offset;
3461 size_t src_offset;
3462 struct gomp_device_descr *dst_devicep;
3463 struct gomp_device_descr *src_devicep;
3464 } omp_target_memcpy_data;
3465
3466 static void
3467 omp_target_memcpy_async_helper (void *args)
3468 {
3469 omp_target_memcpy_data *a = args;
3470 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
3471 a->src_offset, a->dst_devicep, a->src_devicep))
3472 gomp_fatal ("omp_target_memcpy failed");
3473 }
3474
3475 int
3476 omp_target_memcpy_async (void *dst, const void *src, size_t length,
3477 size_t dst_offset, size_t src_offset,
3478 int dst_device_num, int src_device_num,
3479 int depobj_count, omp_depend_t *depobj_list)
3480 {
3481 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3482 unsigned int flags = 0;
3483 void *depend[depobj_count + 5];
3484 int i;
3485 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
3486 &dst_devicep, &src_devicep);
3487
3488 omp_target_memcpy_data s = {
3489 .dst = dst,
3490 .src = src,
3491 .length = length,
3492 .dst_offset = dst_offset,
3493 .src_offset = src_offset,
3494 .dst_devicep = dst_devicep,
3495 .src_devicep = src_devicep
3496 };
3497
3498 if (check)
3499 return check;
3500
3501 if (depobj_count > 0 && depobj_list != NULL)
3502 {
3503 flags |= GOMP_TASK_FLAG_DEPEND;
3504 depend[0] = 0;
3505 depend[1] = (void *) (uintptr_t) depobj_count;
3506 depend[2] = depend[3] = depend[4] = 0;
3507 for (i = 0; i < depobj_count; ++i)
3508 depend[i + 5] = &depobj_list[i];
3509 }
3510
3511 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
3512 __alignof__ (s), true, flags, depend, 0, NULL);
3513
3514 return 0;
3515 }
3516
3517 static int
3518 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3519 int num_dims, const size_t *volume,
3520 const size_t *dst_offsets,
3521 const size_t *src_offsets,
3522 const size_t *dst_dimensions,
3523 const size_t *src_dimensions,
3524 struct gomp_device_descr *dst_devicep,
3525 struct gomp_device_descr *src_devicep)
3526 {
3527 size_t dst_slice = element_size;
3528 size_t src_slice = element_size;
3529 size_t j, dst_off, src_off, length;
3530 int i, ret;
3531
3532 if (num_dims == 1)
3533 {
3534 if (__builtin_mul_overflow (element_size, volume[0], &length)
3535 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3536 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3537 return EINVAL;
3538 if (dst_devicep == NULL && src_devicep == NULL)
3539 {
3540 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3541 length);
3542 ret = 1;
3543 }
3544 else if (src_devicep == NULL)
3545 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3546 (char *) dst + dst_off,
3547 (const char *) src + src_off,
3548 length);
3549 else if (dst_devicep == NULL)
3550 ret = src_devicep->dev2host_func (src_devicep->target_id,
3551 (char *) dst + dst_off,
3552 (const char *) src + src_off,
3553 length);
3554 else if (src_devicep == dst_devicep)
3555 ret = src_devicep->dev2dev_func (src_devicep->target_id,
3556 (char *) dst + dst_off,
3557 (const char *) src + src_off,
3558 length);
3559 else
3560 ret = 0;
3561 return ret ? 0 : EINVAL;
3562 }
3563
3564 /* FIXME: it would be nice to have some plugin function to handle
3565 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3566 be handled in the generic recursion below, and for host-host it
3567 should be used even for any num_dims >= 2. */
3568
3569 for (i = 1; i < num_dims; i++)
3570 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3571 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3572 return EINVAL;
3573 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3574 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3575 return EINVAL;
3576 for (j = 0; j < volume[0]; j++)
3577 {
3578 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3579 (const char *) src + src_off,
3580 element_size, num_dims - 1,
3581 volume + 1, dst_offsets + 1,
3582 src_offsets + 1, dst_dimensions + 1,
3583 src_dimensions + 1, dst_devicep,
3584 src_devicep);
3585 if (ret)
3586 return ret;
3587 dst_off += dst_slice;
3588 src_off += src_slice;
3589 }
3590 return 0;
3591 }
3592
3593 static int
3594 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
3595 int src_device_num,
3596 struct gomp_device_descr **dst_devicep,
3597 struct gomp_device_descr **src_devicep)
3598 {
3599 if (!dst && !src)
3600 return INT_MAX;
3601
3602 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
3603 dst_devicep, src_devicep);
3604 if (ret)
3605 return ret;
3606
3607 if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
3608 return EINVAL;
3609
3610 return 0;
3611 }
3612
3613 static int
3614 omp_target_memcpy_rect_copy (void *dst, const void *src,
3615 size_t element_size, int num_dims,
3616 const size_t *volume, const size_t *dst_offsets,
3617 const size_t *src_offsets,
3618 const size_t *dst_dimensions,
3619 const size_t *src_dimensions,
3620 struct gomp_device_descr *dst_devicep,
3621 struct gomp_device_descr *src_devicep)
3622 {
3623 if (src_devicep)
3624 gomp_mutex_lock (&src_devicep->lock);
3625 else if (dst_devicep)
3626 gomp_mutex_lock (&dst_devicep->lock);
3627 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3628 volume, dst_offsets, src_offsets,
3629 dst_dimensions, src_dimensions,
3630 dst_devicep, src_devicep);
3631 if (src_devicep)
3632 gomp_mutex_unlock (&src_devicep->lock);
3633 else if (dst_devicep)
3634 gomp_mutex_unlock (&dst_devicep->lock);
3635
3636 return ret;
3637 }
3638
3639 int
3640 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3641 int num_dims, const size_t *volume,
3642 const size_t *dst_offsets,
3643 const size_t *src_offsets,
3644 const size_t *dst_dimensions,
3645 const size_t *src_dimensions,
3646 int dst_device_num, int src_device_num)
3647 {
3648 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3649
3650 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
3651 src_device_num, &dst_devicep,
3652 &src_devicep);
3653
3654 if (check)
3655 return check;
3656
3657 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
3658 volume, dst_offsets, src_offsets,
3659 dst_dimensions, src_dimensions,
3660 dst_devicep, src_devicep);
3661
3662 return ret;
3663 }
3664
3665 typedef struct
3666 {
3667 void *dst;
3668 const void *src;
3669 size_t element_size;
3670 const size_t *volume;
3671 const size_t *dst_offsets;
3672 const size_t *src_offsets;
3673 const size_t *dst_dimensions;
3674 const size_t *src_dimensions;
3675 struct gomp_device_descr *dst_devicep;
3676 struct gomp_device_descr *src_devicep;
3677 int num_dims;
3678 } omp_target_memcpy_rect_data;
3679
3680 static void
3681 omp_target_memcpy_rect_async_helper (void *args)
3682 {
3683 omp_target_memcpy_rect_data *a = args;
3684 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
3685 a->num_dims, a->volume, a->dst_offsets,
3686 a->src_offsets, a->dst_dimensions,
3687 a->src_dimensions, a->dst_devicep,
3688 a->src_devicep);
3689 if (ret)
3690 gomp_fatal ("omp_target_memcpy_rect failed");
3691 }
3692
3693 int
3694 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
3695 int num_dims, const size_t *volume,
3696 const size_t *dst_offsets,
3697 const size_t *src_offsets,
3698 const size_t *dst_dimensions,
3699 const size_t *src_dimensions,
3700 int dst_device_num, int src_device_num,
3701 int depobj_count, omp_depend_t *depobj_list)
3702 {
3703 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3704 unsigned flags = 0;
3705 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
3706 src_device_num, &dst_devicep,
3707 &src_devicep);
3708 void *depend[depobj_count + 5];
3709 int i;
3710
3711 omp_target_memcpy_rect_data s = {
3712 .dst = dst,
3713 .src = src,
3714 .element_size = element_size,
3715 .num_dims = num_dims,
3716 .volume = volume,
3717 .dst_offsets = dst_offsets,
3718 .src_offsets = src_offsets,
3719 .dst_dimensions = dst_dimensions,
3720 .src_dimensions = src_dimensions,
3721 .dst_devicep = dst_devicep,
3722 .src_devicep = src_devicep
3723 };
3724
3725 if (check)
3726 return check;
3727
3728 if (depobj_count > 0 && depobj_list != NULL)
3729 {
3730 flags |= GOMP_TASK_FLAG_DEPEND;
3731 depend[0] = 0;
3732 depend[1] = (void *) (uintptr_t) depobj_count;
3733 depend[2] = depend[3] = depend[4] = 0;
3734 for (i = 0; i < depobj_count; ++i)
3735 depend[i + 5] = &depobj_list[i];
3736 }
3737
3738 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
3739 __alignof__ (s), true, flags, depend, 0, NULL);
3740
3741 return 0;
3742 }
3743
3744 int
3745 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3746 size_t size, size_t device_offset, int device_num)
3747 {
3748 if (device_num == gomp_get_num_devices ())
3749 return EINVAL;
3750
3751 if (device_num < 0)
3752 return EINVAL;
3753
3754 struct gomp_device_descr *devicep = resolve_device (device_num);
3755 if (devicep == NULL)
3756 return EINVAL;
3757
3758 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3759 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3760 return EINVAL;
3761
3762 gomp_mutex_lock (&devicep->lock);
3763
3764 struct splay_tree_s *mem_map = &devicep->mem_map;
3765 struct splay_tree_key_s cur_node;
3766 int ret = EINVAL;
3767
3768 cur_node.host_start = (uintptr_t) host_ptr;
3769 cur_node.host_end = cur_node.host_start + size;
3770 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3771 if (n)
3772 {
3773 if (n->tgt->tgt_start + n->tgt_offset
3774 == (uintptr_t) device_ptr + device_offset
3775 && n->host_start <= cur_node.host_start
3776 && n->host_end >= cur_node.host_end)
3777 ret = 0;
3778 }
3779 else
3780 {
3781 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3782 tgt->array = gomp_malloc (sizeof (*tgt->array));
3783 tgt->refcount = 1;
3784 tgt->tgt_start = 0;
3785 tgt->tgt_end = 0;
3786 tgt->to_free = NULL;
3787 tgt->prev = NULL;
3788 tgt->list_count = 0;
3789 tgt->device_descr = devicep;
3790 splay_tree_node array = tgt->array;
3791 splay_tree_key k = &array->key;
3792 k->host_start = cur_node.host_start;
3793 k->host_end = cur_node.host_end;
3794 k->tgt = tgt;
3795 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3796 k->refcount = REFCOUNT_INFINITY;
3797 k->dynamic_refcount = 0;
3798 k->aux = NULL;
3799 array->left = NULL;
3800 array->right = NULL;
3801 splay_tree_insert (&devicep->mem_map, array);
3802 ret = 0;
3803 }
3804 gomp_mutex_unlock (&devicep->lock);
3805 return ret;
3806 }
3807
3808 int
3809 omp_target_disassociate_ptr (const void *ptr, int device_num)
3810 {
3811 if (device_num == gomp_get_num_devices ())
3812 return EINVAL;
3813
3814 if (device_num < 0)
3815 return EINVAL;
3816
3817 struct gomp_device_descr *devicep = resolve_device (device_num);
3818 if (devicep == NULL)
3819 return EINVAL;
3820
3821 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3822 return EINVAL;
3823
3824 gomp_mutex_lock (&devicep->lock);
3825
3826 struct splay_tree_s *mem_map = &devicep->mem_map;
3827 struct splay_tree_key_s cur_node;
3828 int ret = EINVAL;
3829
3830 cur_node.host_start = (uintptr_t) ptr;
3831 cur_node.host_end = cur_node.host_start;
3832 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3833 if (n
3834 && n->host_start == cur_node.host_start
3835 && n->refcount == REFCOUNT_INFINITY
3836 && n->tgt->tgt_start == 0
3837 && n->tgt->to_free == NULL
3838 && n->tgt->refcount == 1
3839 && n->tgt->list_count == 0)
3840 {
3841 splay_tree_remove (&devicep->mem_map, n);
3842 gomp_unmap_tgt (n->tgt);
3843 ret = 0;
3844 }
3845
3846 gomp_mutex_unlock (&devicep->lock);
3847 return ret;
3848 }
3849
3850 void *
3851 omp_get_mapped_ptr (const void *ptr, int device_num)
3852 {
3853 if (device_num < 0 || device_num > gomp_get_num_devices ())
3854 return NULL;
3855
3856 if (device_num == omp_get_initial_device ())
3857 return (void *) ptr;
3858
3859 struct gomp_device_descr *devicep = resolve_device (device_num);
3860 if (devicep == NULL)
3861 return NULL;
3862
3863 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3864 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3865 return (void *) ptr;
3866
3867 gomp_mutex_lock (&devicep->lock);
3868
3869 struct splay_tree_s *mem_map = &devicep->mem_map;
3870 struct splay_tree_key_s cur_node;
3871 void *ret = NULL;
3872
3873 cur_node.host_start = (uintptr_t) ptr;
3874 cur_node.host_end = cur_node.host_start;
3875 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3876
3877 if (n)
3878 {
3879 uintptr_t offset = cur_node.host_start - n->host_start;
3880 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
3881 }
3882
3883 gomp_mutex_unlock (&devicep->lock);
3884
3885 return ret;
3886 }
3887
3888 int
3889 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
3890 {
3891 if (device_num < 0 || device_num > gomp_get_num_devices ())
3892 return false;
3893
3894 if (device_num == gomp_get_num_devices ())
3895 return true;
3896
3897 struct gomp_device_descr *devicep = resolve_device (device_num);
3898 if (devicep == NULL)
3899 return false;
3900
3901 /* TODO: Unified shared memory must be handled when available. */
3902
3903 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
3904 }
3905
3906 int
3907 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3908 {
3909 (void) kind;
3910 if (device_num == gomp_get_num_devices ())
3911 return gomp_pause_host ();
3912 if (device_num < 0 || device_num >= num_devices_openmp)
3913 return -1;
3914 /* Do nothing for target devices for now. */
3915 return 0;
3916 }
3917
3918 int
3919 omp_pause_resource_all (omp_pause_resource_t kind)
3920 {
3921 (void) kind;
3922 if (gomp_pause_host ())
3923 return -1;
3924 /* Do nothing for target devices for now. */
3925 return 0;
3926 }
3927
3928 ialias (omp_pause_resource)
3929 ialias (omp_pause_resource_all)
3930
3931 #ifdef PLUGIN_SUPPORT
3932
3933 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3934 in PLUGIN_NAME.
3935 The handles of the found functions are stored in the corresponding fields
3936 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3937
3938 static bool
3939 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3940 const char *plugin_name)
3941 {
3942 const char *err = NULL, *last_missing = NULL;
3943
3944 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3945 if (!plugin_handle)
3946 #if OFFLOAD_DEFAULTED
3947 return 0;
3948 #else
3949 goto dl_fail;
3950 #endif
3951
3952 /* Check if all required functions are available in the plugin and store
3953 their handlers. None of the symbols can legitimately be NULL,
3954 so we don't need to check dlerror all the time. */
3955 #define DLSYM(f) \
3956 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3957 goto dl_fail
3958 /* Similar, but missing functions are not an error. Return false if
3959 failed, true otherwise. */
3960 #define DLSYM_OPT(f, n) \
3961 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3962 || (last_missing = #n, 0))
3963
3964 DLSYM (version);
3965 if (device->version_func () != GOMP_VERSION)
3966 {
3967 err = "plugin version mismatch";
3968 goto fail;
3969 }
3970
3971 DLSYM (get_name);
3972 DLSYM (get_caps);
3973 DLSYM (get_type);
3974 DLSYM (get_num_devices);
3975 DLSYM (init_device);
3976 DLSYM (fini_device);
3977 DLSYM (load_image);
3978 DLSYM (unload_image);
3979 DLSYM (alloc);
3980 DLSYM (free);
3981 DLSYM (dev2host);
3982 DLSYM (host2dev);
3983 device->capabilities = device->get_caps_func ();
3984 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3985 {
3986 DLSYM (run);
3987 DLSYM_OPT (async_run, async_run);
3988 DLSYM_OPT (can_run, can_run);
3989 DLSYM (dev2dev);
3990 }
3991 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3992 {
3993 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3994 || !DLSYM_OPT (openacc.create_thread_data,
3995 openacc_create_thread_data)
3996 || !DLSYM_OPT (openacc.destroy_thread_data,
3997 openacc_destroy_thread_data)
3998 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3999 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
4000 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
4001 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
4002 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
4003 || !DLSYM_OPT (openacc.async.queue_callback,
4004 openacc_async_queue_callback)
4005 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
4006 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
4007 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
4008 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
4009 {
4010 /* Require all the OpenACC handlers if we have
4011 GOMP_OFFLOAD_CAP_OPENACC_200. */
4012 err = "plugin missing OpenACC handler function";
4013 goto fail;
4014 }
4015
4016 unsigned cuda = 0;
4017 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
4018 openacc_cuda_get_current_device);
4019 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
4020 openacc_cuda_get_current_context);
4021 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
4022 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
4023 if (cuda && cuda != 4)
4024 {
4025 /* Make sure all the CUDA functions are there if any of them are. */
4026 err = "plugin missing OpenACC CUDA handler function";
4027 goto fail;
4028 }
4029 }
4030 #undef DLSYM
4031 #undef DLSYM_OPT
4032
4033 return 1;
4034
4035 dl_fail:
4036 err = dlerror ();
4037 fail:
4038 gomp_error ("while loading %s: %s", plugin_name, err);
4039 if (last_missing)
4040 gomp_error ("missing function was %s", last_missing);
4041 if (plugin_handle)
4042 dlclose (plugin_handle);
4043
4044 return 0;
4045 }
4046
4047 /* This function finalizes all initialized devices. */
4048
4049 static void
4050 gomp_target_fini (void)
4051 {
4052 int i;
4053 for (i = 0; i < num_devices; i++)
4054 {
4055 bool ret = true;
4056 struct gomp_device_descr *devicep = &devices[i];
4057 gomp_mutex_lock (&devicep->lock);
4058 if (devicep->state == GOMP_DEVICE_INITIALIZED)
4059 ret = gomp_fini_device (devicep);
4060 gomp_mutex_unlock (&devicep->lock);
4061 if (!ret)
4062 gomp_fatal ("device finalization failed");
4063 }
4064 }
4065
4066 /* This function initializes the runtime for offloading.
4067 It parses the list of offload plugins, and tries to load these.
4068 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
4069 will be set, and the array DEVICES initialized, containing descriptors for
4070 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
4071 by the others. */
4072
4073 static void
4074 gomp_target_init (void)
4075 {
4076 const char *prefix ="libgomp-plugin-";
4077 const char *suffix = SONAME_SUFFIX (1);
4078 const char *cur, *next;
4079 char *plugin_name;
4080 int i, new_num_devs;
4081 int num_devs = 0, num_devs_openmp;
4082 struct gomp_device_descr *devs = NULL;
4083
4084 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
4085 return;
4086
4087 cur = OFFLOAD_PLUGINS;
4088 if (*cur)
4089 do
4090 {
4091 struct gomp_device_descr current_device;
4092 size_t prefix_len, suffix_len, cur_len;
4093
4094 next = strchr (cur, ',');
4095
4096 prefix_len = strlen (prefix);
4097 cur_len = next ? next - cur : strlen (cur);
4098 suffix_len = strlen (suffix);
4099
4100 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
4101 if (!plugin_name)
4102 {
4103 num_devs = 0;
4104 break;
4105 }
4106
4107 memcpy (plugin_name, prefix, prefix_len);
4108 memcpy (plugin_name + prefix_len, cur, cur_len);
4109 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
4110
4111 if (gomp_load_plugin_for_device (&current_device, plugin_name))
4112 {
4113 new_num_devs = current_device.get_num_devices_func ();
4114 if (new_num_devs >= 1)
4115 {
4116 /* Augment DEVICES and NUM_DEVICES. */
4117
4118 devs = realloc (devs, (num_devs + new_num_devs)
4119 * sizeof (struct gomp_device_descr));
4120 if (!devs)
4121 {
4122 num_devs = 0;
4123 free (plugin_name);
4124 break;
4125 }
4126
4127 current_device.name = current_device.get_name_func ();
4128 /* current_device.capabilities has already been set. */
4129 current_device.type = current_device.get_type_func ();
4130 current_device.mem_map.root = NULL;
4131 current_device.state = GOMP_DEVICE_UNINITIALIZED;
4132 for (i = 0; i < new_num_devs; i++)
4133 {
4134 current_device.target_id = i;
4135 devs[num_devs] = current_device;
4136 gomp_mutex_init (&devs[num_devs].lock);
4137 num_devs++;
4138 }
4139 }
4140 }
4141
4142 free (plugin_name);
4143 cur = next + 1;
4144 }
4145 while (next);
4146
4147 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
4148 NUM_DEVICES_OPENMP. */
4149 struct gomp_device_descr *devs_s
4150 = malloc (num_devs * sizeof (struct gomp_device_descr));
4151 if (!devs_s)
4152 {
4153 num_devs = 0;
4154 free (devs);
4155 devs = NULL;
4156 }
4157 num_devs_openmp = 0;
4158 for (i = 0; i < num_devs; i++)
4159 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4160 devs_s[num_devs_openmp++] = devs[i];
4161 int num_devs_after_openmp = num_devs_openmp;
4162 for (i = 0; i < num_devs; i++)
4163 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4164 devs_s[num_devs_after_openmp++] = devs[i];
4165 free (devs);
4166 devs = devs_s;
4167
4168 for (i = 0; i < num_devs; i++)
4169 {
4170 /* The 'devices' array can be moved (by the realloc call) until we have
4171 found all the plugins, so registering with the OpenACC runtime (which
4172 takes a copy of the pointer argument) must be delayed until now. */
4173 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
4174 goacc_register (&devs[i]);
4175 }
4176
4177 num_devices = num_devs;
4178 num_devices_openmp = num_devs_openmp;
4179 devices = devs;
4180 if (atexit (gomp_target_fini) != 0)
4181 gomp_fatal ("atexit failed");
4182 }
4183
4184 #else /* PLUGIN_SUPPORT */
4185 /* If dlfcn.h is unavailable we always fallback to host execution.
4186 GOMP_target* routines are just stubs for this case. */
4187 static void
4188 gomp_target_init (void)
4189 {
4190 }
4191 #endif /* PLUGIN_SUPPORT */