]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
'-foffload-memory=pinned' using offloading device interfaces
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
42
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_c
51 #include "splay-tree.h"
52
53
54 typedef uintptr_t *hash_entry_type;
55 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
56 static inline void htab_free (void *ptr) { free (ptr); }
57 #include "hashtab.h"
58
59 ialias_redirect (GOMP_task)
60
61 static inline hashval_t
62 htab_hash (hash_entry_type element)
63 {
64 return hash_pointer ((void *) element);
65 }
66
67 static inline bool
68 htab_eq (hash_entry_type x, hash_entry_type y)
69 {
70 return x == y;
71 }
72
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
74
75 static void gomp_target_init (void);
76
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
79
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock;
82
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr {
87 unsigned version;
88 enum offload_target_type type;
89 const void *host_table;
90 const void *target_data;
91 };
92
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr *offload_images;
95
96 /* Total number of offload images. */
97 static int num_offload_images;
98
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr *devices;
101
102 /* Total number of available devices. */
103 static int num_devices;
104
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp;
107
108 /* OpenMP requires mask. */
109 static int omp_requires_mask;
110
111
112 static void *gomp_page_locked_host_alloc_dev (struct gomp_device_descr *,
113 size_t, bool);
114 static bool gomp_page_locked_host_free_dev (struct gomp_device_descr *,
115 void *,
116 struct goacc_asyncqueue *);
117 static void *gomp_page_locked_host_aligned_alloc_dev (struct gomp_device_descr *,
118 size_t, size_t);
119 static bool gomp_page_locked_host_aligned_free_dev (struct gomp_device_descr *,
120 void *,
121 struct goacc_asyncqueue *);
122
123 /* Use (that is, allocate or register) page-locked host memory for memory
124 objects participating in host <-> device memory transfers.
125
126 When this is enabled, there is no fallback to non-page-locked host
127 memory. */
128
129 attribute_hidden
130 bool always_pinned_mode = false;
131
132 /* This function is called by the compiler when -foffload-memory=pinned
133 is used. */
134
135 void
136 GOMP_enable_pinned_mode ()
137 {
138 always_pinned_mode = true;
139 }
140
141 /* Verify that page-locked host memory is used for memory objects participating
142 in host <-> device memory transfers. */
143
144 static const bool verify_always_pinned_mode = false;
145
146 static bool
147 gomp_verify_always_pinned_mode (struct gomp_device_descr *device,
148 const void *ptr, size_t size)
149 {
150 gomp_debug (0, "%s: device=%p (%s), ptr=%p, size=%llu\n",
151 __FUNCTION__,
152 device, device->name, ptr, (unsigned long long) size);
153
154 if (size == 0)
155 /* Skip zero-size requests; for those we've got no actual region of
156 page-locked host memory. */
157 ;
158 else if (device->page_locked_host_register_func)
159 {
160 int page_locked_host_p
161 = device->page_locked_host_p_func (device->target_id, ptr, size);
162 if (page_locked_host_p < 0)
163 {
164 gomp_error ("Failed to test page-locked host memory"
165 " via %s libgomp plugin",
166 device->name);
167 return false;
168 }
169 if (!page_locked_host_p)
170 {
171 gomp_error ("Failed page-locked host memory test");
172 return false;
173 }
174 }
175 return true;
176 }
177
178
179 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
180
181 static void *
182 gomp_realloc_unlock (void *old, size_t size)
183 {
184 void *ret = realloc (old, size);
185 if (ret == NULL)
186 {
187 gomp_mutex_unlock (&register_lock);
188 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
189 }
190 return ret;
191 }
192
193 attribute_hidden void
194 gomp_init_targets_once (void)
195 {
196 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
197 }
198
199 attribute_hidden int
200 gomp_get_num_devices (void)
201 {
202 gomp_init_targets_once ();
203 return num_devices_openmp;
204 }
205
206 static struct gomp_device_descr *
207 resolve_device (int device_id, bool remapped)
208 {
209 if (remapped && device_id == GOMP_DEVICE_ICV)
210 {
211 struct gomp_task_icv *icv = gomp_icv (false);
212 device_id = icv->default_device_var;
213 remapped = false;
214 }
215
216 if (device_id < 0)
217 {
218 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
219 : omp_initial_device))
220 return NULL;
221 if (device_id == omp_invalid_device)
222 gomp_fatal ("omp_invalid_device encountered");
223 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
224 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
225 "but device not found");
226
227 return NULL;
228 }
229 else if (device_id >= gomp_get_num_devices ())
230 {
231 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
232 && device_id != num_devices_openmp)
233 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
234 "but device not found");
235
236 return NULL;
237 }
238
239 gomp_mutex_lock (&devices[device_id].lock);
240 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
241 gomp_init_device (&devices[device_id]);
242 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
243 {
244 gomp_mutex_unlock (&devices[device_id].lock);
245
246 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
247 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
248 "but device is finalized");
249
250 return NULL;
251 }
252 gomp_mutex_unlock (&devices[device_id].lock);
253
254 return &devices[device_id];
255 }
256
257
258 static inline splay_tree_key
259 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
260 {
261 if (key->host_start != key->host_end)
262 return splay_tree_lookup (mem_map, key);
263
264 key->host_end++;
265 splay_tree_key n = splay_tree_lookup (mem_map, key);
266 key->host_end--;
267 if (n)
268 return n;
269 key->host_start--;
270 n = splay_tree_lookup (mem_map, key);
271 key->host_start++;
272 if (n)
273 return n;
274 return splay_tree_lookup (mem_map, key);
275 }
276
277 static inline reverse_splay_tree_key
278 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
279 {
280 return reverse_splay_tree_lookup (mem_map_rev, key);
281 }
282
283 static inline splay_tree_key
284 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
285 {
286 if (key->host_start != key->host_end)
287 return splay_tree_lookup (mem_map, key);
288
289 key->host_end++;
290 splay_tree_key n = splay_tree_lookup (mem_map, key);
291 key->host_end--;
292 return n;
293 }
294
295 static inline void
296 gomp_device_copy (struct gomp_device_descr *devicep,
297 bool (*copy_func) (int, void *, const void *, size_t),
298 const char *dst, void *dstaddr,
299 const char *src, const void *srcaddr,
300 size_t size)
301 {
302 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
303 {
304 gomp_mutex_unlock (&devicep->lock);
305 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
306 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
307 }
308 }
309
310 static inline void
311 goacc_device_copy_async (struct gomp_device_descr *devicep,
312 bool (*copy_func) (int, void *, const void *, size_t,
313 struct goacc_asyncqueue *),
314 const char *dst, void *dstaddr,
315 const char *src, const void *srcaddr,
316 const void *srcaddr_orig,
317 size_t size, struct goacc_asyncqueue *aq)
318 {
319 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
320 {
321 gomp_mutex_unlock (&devicep->lock);
322 if (srcaddr_orig && srcaddr_orig != srcaddr)
323 gomp_fatal ("Copying of %s object [%p..%p)"
324 " via buffer %s object [%p..%p)"
325 " to %s object [%p..%p) failed",
326 src, srcaddr_orig, srcaddr_orig + size,
327 src, srcaddr, srcaddr + size,
328 dst, dstaddr, dstaddr + size);
329 else
330 gomp_fatal ("Copying of %s object [%p..%p)"
331 " to %s object [%p..%p) failed",
332 src, srcaddr, srcaddr + size,
333 dst, dstaddr, dstaddr + size);
334 }
335 }
336
337 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
338 host to device memory transfers. */
339
340 struct gomp_coalesce_chunk
341 {
342 /* The starting and ending point of a coalesced chunk of memory. */
343 size_t start, end;
344 };
345
346 struct gomp_coalesce_buf
347 {
348 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
349 it will be copied to the device. */
350 void *buf;
351 struct target_mem_desc *tgt;
352 /* Array with offsets, chunks[i].start is the starting offset and
353 chunks[i].end ending offset relative to tgt->tgt_start device address
354 of chunks which are to be copied to buf and later copied to device. */
355 struct gomp_coalesce_chunk *chunks;
356 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
357 be performed. */
358 long chunk_cnt;
359 /* During construction of chunks array, how many memory regions are within
360 the last chunk. If there is just one memory region for a chunk, we copy
361 it directly to device rather than going through buf. */
362 long use_cnt;
363 };
364
365 /* Maximum size of memory region considered for coalescing. Larger copies
366 are performed directly. */
367 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
368
369 /* Maximum size of a gap in between regions to consider them being copied
370 within the same chunk. All the device offsets considered are within
371 newly allocated device memory, so it isn't fatal if we copy some padding
372 in between from host to device. The gaps come either from alignment
373 padding or from memory regions which are not supposed to be copied from
374 host to device (e.g. map(alloc:), map(from:) etc.). */
375 #define MAX_COALESCE_BUF_GAP (4 * 1024)
376
377 /* Add region with device tgt_start relative offset and length to CBUF.
378
379 This must not be used for asynchronous copies, because the host data might
380 not be computed yet (by an earlier asynchronous compute region, for
381 example). The exception is for EPHEMERAL data, that we know is available
382 already "by construction". */
383
384 static inline void
385 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
386 {
387 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
388 return;
389 if (cbuf->chunk_cnt)
390 {
391 if (cbuf->chunk_cnt < 0)
392 return;
393 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
394 {
395 cbuf->chunk_cnt = -1;
396 return;
397 }
398 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
399 {
400 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
401 cbuf->use_cnt++;
402 return;
403 }
404 /* If the last chunk is only used by one mapping, discard it,
405 as it will be one host to device copy anyway and
406 memcpying it around will only waste cycles. */
407 if (cbuf->use_cnt == 1)
408 cbuf->chunk_cnt--;
409 }
410 cbuf->chunks[cbuf->chunk_cnt].start = start;
411 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
412 cbuf->chunk_cnt++;
413 cbuf->use_cnt = 1;
414 }
415
416 /* Return true for mapping kinds which need to copy data from the
417 host to device for regions that weren't previously mapped. */
418
419 static inline bool
420 gomp_to_device_kind_p (int kind)
421 {
422 switch (kind)
423 {
424 case GOMP_MAP_ALLOC:
425 case GOMP_MAP_FROM:
426 case GOMP_MAP_FORCE_ALLOC:
427 case GOMP_MAP_FORCE_FROM:
428 case GOMP_MAP_ALWAYS_FROM:
429 case GOMP_MAP_PRESENT_FROM:
430 case GOMP_MAP_ALWAYS_PRESENT_FROM:
431 return false;
432 default:
433 return true;
434 }
435 }
436
437 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
438 non-NULL), when the source data is stack or may otherwise be deallocated
439 before the asynchronous copy takes place, EPHEMERAL must be passed as
440 TRUE. */
441
442 attribute_hidden void
443 gomp_copy_host2dev (struct gomp_device_descr *devicep,
444 struct goacc_asyncqueue *aq,
445 void *d, const void *h, size_t sz,
446 bool ephemeral, struct gomp_coalesce_buf *cbuf)
447 {
448 if (cbuf)
449 {
450 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
451 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
452 {
453 long first = 0;
454 long last = cbuf->chunk_cnt - 1;
455 while (first <= last)
456 {
457 long middle = (first + last) >> 1;
458 if (cbuf->chunks[middle].end <= doff)
459 first = middle + 1;
460 else if (cbuf->chunks[middle].start <= doff)
461 {
462 if (doff + sz > cbuf->chunks[middle].end)
463 {
464 gomp_mutex_unlock (&devicep->lock);
465 gomp_fatal ("internal libgomp cbuf error");
466 }
467
468 /* In an asynchronous context, verify that CBUF isn't used
469 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
470 if (__builtin_expect (aq != NULL, 0))
471 assert (ephemeral);
472
473 /* We're just filling the CBUF; 'always_pinned_mode' isn't
474 relevant. */
475
476 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
477 h, sz);
478 return;
479 }
480 else
481 last = middle - 1;
482 }
483 }
484 }
485
486 if (__builtin_expect (aq != NULL, 0))
487 {
488 void *h_buf = (void *) h;
489 if (ephemeral)
490 {
491 /* We're queueing up an asynchronous copy from data that may
492 disappear before the transfer takes place (i.e. because it is a
493 stack local in a function that is no longer executing). As we've
494 not been able to use CBUF, make a copy of the data into a
495 temporary buffer. */
496 if (always_pinned_mode)
497 {
498 h_buf = gomp_page_locked_host_alloc_dev (devicep, sz, false);
499 if (!h_buf)
500 {
501 gomp_mutex_unlock (&devicep->lock);
502 exit (EXIT_FAILURE);
503 }
504 }
505 else
506 h_buf = gomp_malloc (sz);
507 memcpy (h_buf, h, sz);
508 }
509
510 /* No 'gomp_verify_always_pinned_mode' for 'ephemeral'; have just
511 allocated. */
512 if (!ephemeral
513 && verify_always_pinned_mode
514 && always_pinned_mode)
515 if (!gomp_verify_always_pinned_mode (devicep, h_buf, sz))
516 {
517 gomp_mutex_unlock (&devicep->lock);
518 exit (EXIT_FAILURE);
519 }
520
521 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
522 "dev", d, "host", h_buf, h, sz, aq);
523
524 if (ephemeral)
525 {
526 if (always_pinned_mode)
527 {
528 if (!gomp_page_locked_host_free_dev (devicep, h_buf, aq))
529 {
530 gomp_mutex_unlock (&devicep->lock);
531 exit (EXIT_FAILURE);
532 }
533 }
534 else
535 /* Free once the transfer has completed. */
536 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
537 }
538 }
539 else
540 {
541 if (ephemeral
542 && always_pinned_mode)
543 {
544 /* TODO: Page-locking on the spot probably doesn't make a lot of
545 sense (performance-wise). Should we instead use a "page-locked
546 host memory bounce buffer" (per host thread, or per device,
547 or...)? */
548 void *ptr = (void *) h;
549 int page_locked_host_p
550 = gomp_page_locked_host_register_dev (devicep,
551 ptr, sz, GOMP_MAP_TO);
552 if (page_locked_host_p < 0)
553 {
554 gomp_mutex_unlock (&devicep->lock);
555 exit (EXIT_FAILURE);
556 }
557 /* Ephemeral data isn't already page-locked host memory. */
558 assert (page_locked_host_p);
559 }
560 else if (verify_always_pinned_mode
561 && always_pinned_mode)
562 if (!gomp_verify_always_pinned_mode (devicep, h, sz))
563 {
564 gomp_mutex_unlock (&devicep->lock);
565 exit (EXIT_FAILURE);
566 }
567
568 gomp_device_copy (devicep, devicep->host2dev_func,
569 "dev", d, "host", h, sz);
570
571 if (ephemeral
572 && always_pinned_mode)
573 {
574 void *ptr = (void *) h;
575 if (!gomp_page_locked_host_unregister_dev (devicep, ptr, sz, aq))
576 {
577 gomp_mutex_unlock (&devicep->lock);
578 exit (EXIT_FAILURE);
579 }
580 }
581 }
582 }
583
584 attribute_hidden void
585 gomp_copy_dev2host (struct gomp_device_descr *devicep,
586 struct goacc_asyncqueue *aq,
587 void *h, const void *d, size_t sz)
588 {
589 if (verify_always_pinned_mode
590 && always_pinned_mode)
591 if (!gomp_verify_always_pinned_mode (devicep, h, sz))
592 {
593 gomp_mutex_unlock (&devicep->lock);
594 exit (EXIT_FAILURE);
595 }
596
597 if (__builtin_expect (aq != NULL, 0))
598 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
599 "host", h, "dev", d, NULL, sz, aq);
600 else
601 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
602 }
603
604 static void
605 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
606 {
607 if (!devicep->free_func (devicep->target_id, devptr))
608 {
609 gomp_mutex_unlock (&devicep->lock);
610 gomp_fatal ("error in freeing device memory block at %p", devptr);
611 }
612 }
613
614 /* Increment reference count of a splay_tree_key region K by 1.
615 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
616 increment the value if refcount is not yet contained in the set (used for
617 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
618 once for each construct). */
619
620 static inline void
621 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
622 {
623 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
624 return;
625
626 uintptr_t *refcount_ptr = &k->refcount;
627
628 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
629 refcount_ptr = &k->structelem_refcount;
630 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
631 refcount_ptr = k->structelem_refcount_ptr;
632
633 if (refcount_set)
634 {
635 if (htab_find (*refcount_set, refcount_ptr))
636 return;
637 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
638 *slot = refcount_ptr;
639 }
640
641 *refcount_ptr += 1;
642 return;
643 }
644
645 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
646 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
647 track already seen refcounts, and only adjust the value if refcount is not
648 yet contained in the set (like gomp_increment_refcount).
649
650 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
651 it is already zero and we know we decremented it earlier. This signals that
652 associated maps should be copied back to host.
653
654 *DO_REMOVE is set to true when we this is the first handling of this refcount
655 and we are setting it to zero. This signals a removal of this key from the
656 splay-tree map.
657
658 Copy and removal are separated due to cases like handling of structure
659 elements, e.g. each map of a structure element representing a possible copy
660 out of a structure field has to be handled individually, but we only signal
661 removal for one (the first encountered) sibing map. */
662
663 static inline void
664 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
665 bool *do_copy, bool *do_remove)
666 {
667 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
668 {
669 *do_copy = *do_remove = false;
670 return;
671 }
672
673 uintptr_t *refcount_ptr = &k->refcount;
674
675 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
676 refcount_ptr = &k->structelem_refcount;
677 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
678 refcount_ptr = k->structelem_refcount_ptr;
679
680 bool new_encountered_refcount;
681 bool set_to_zero = false;
682 bool is_zero = false;
683
684 uintptr_t orig_refcount = *refcount_ptr;
685
686 if (refcount_set)
687 {
688 if (htab_find (*refcount_set, refcount_ptr))
689 {
690 new_encountered_refcount = false;
691 goto end;
692 }
693
694 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
695 *slot = refcount_ptr;
696 new_encountered_refcount = true;
697 }
698 else
699 /* If no refcount_set being used, assume all keys are being decremented
700 for the first time. */
701 new_encountered_refcount = true;
702
703 if (delete_p)
704 *refcount_ptr = 0;
705 else if (*refcount_ptr > 0)
706 *refcount_ptr -= 1;
707
708 end:
709 if (*refcount_ptr == 0)
710 {
711 if (orig_refcount > 0)
712 set_to_zero = true;
713
714 is_zero = true;
715 }
716
717 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
718 *do_remove = (new_encountered_refcount && set_to_zero);
719 }
720
721 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
722 gomp_map_0len_lookup found oldn for newn.
723 Helper function of gomp_map_vars. */
724
725 static inline void
726 gomp_map_vars_existing (struct gomp_device_descr *devicep,
727 struct goacc_asyncqueue *aq, splay_tree_key oldn,
728 splay_tree_key newn, struct target_var_desc *tgt_var,
729 unsigned char kind, bool always_to_flag, bool implicit,
730 struct gomp_coalesce_buf *cbuf,
731 htab_t *refcount_set)
732 {
733 assert (kind != GOMP_MAP_ATTACH
734 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
735
736 tgt_var->key = oldn;
737 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
738 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
739 tgt_var->is_attach = false;
740 tgt_var->offset = newn->host_start - oldn->host_start;
741
742 /* For implicit maps, old contained in new is valid. */
743 bool implicit_subset = (implicit
744 && newn->host_start <= oldn->host_start
745 && oldn->host_end <= newn->host_end);
746 if (implicit_subset)
747 tgt_var->length = oldn->host_end - oldn->host_start;
748 else
749 tgt_var->length = newn->host_end - newn->host_start;
750
751 if (GOMP_MAP_FORCE_P (kind)
752 /* For implicit maps, old contained in new is valid. */
753 || !(implicit_subset
754 /* Otherwise, new contained inside old is considered valid. */
755 || (oldn->host_start <= newn->host_start
756 && newn->host_end <= oldn->host_end)))
757 {
758 gomp_mutex_unlock (&devicep->lock);
759 gomp_fatal ("Trying to map into device [%p..%p) object when "
760 "[%p..%p) is already mapped",
761 (void *) newn->host_start, (void *) newn->host_end,
762 (void *) oldn->host_start, (void *) oldn->host_end);
763 }
764
765 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
766 {
767 /* Implicit + always should not happen. If this does occur, below
768 address/length adjustment is a TODO. */
769 assert (!implicit_subset);
770
771 if (oldn->aux && oldn->aux->attach_count)
772 {
773 /* We have to be careful not to overwrite still attached pointers
774 during the copyback to host. */
775 uintptr_t addr = newn->host_start;
776 while (addr < newn->host_end)
777 {
778 size_t i = (addr - oldn->host_start) / sizeof (void *);
779 if (oldn->aux->attach_count[i] == 0)
780 gomp_copy_host2dev (devicep, aq,
781 (void *) (oldn->tgt->tgt_start
782 + oldn->tgt_offset
783 + addr - oldn->host_start),
784 (void *) addr,
785 sizeof (void *), false, cbuf);
786 addr += sizeof (void *);
787 }
788 }
789 else
790 gomp_copy_host2dev (devicep, aq,
791 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
792 + newn->host_start - oldn->host_start),
793 (void *) newn->host_start,
794 newn->host_end - newn->host_start, false, cbuf);
795 }
796
797 gomp_increment_refcount (oldn, refcount_set);
798 }
799
800 static int
801 get_kind (bool short_mapkind, void *kinds, int idx)
802 {
803 if (!short_mapkind)
804 return ((unsigned char *) kinds)[idx];
805
806 int val = ((unsigned short *) kinds)[idx];
807 if (GOMP_MAP_IMPLICIT_P (val))
808 val &= ~GOMP_MAP_IMPLICIT;
809 return val;
810 }
811
812
813 static bool
814 get_implicit (bool short_mapkind, void *kinds, int idx)
815 {
816 if (!short_mapkind)
817 return false;
818
819 int val = ((unsigned short *) kinds)[idx];
820 return GOMP_MAP_IMPLICIT_P (val);
821 }
822
823 static void
824 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
825 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
826 struct gomp_coalesce_buf *cbuf,
827 bool allow_zero_length_array_sections)
828 {
829 struct gomp_device_descr *devicep = tgt->device_descr;
830 struct splay_tree_s *mem_map = &devicep->mem_map;
831 struct splay_tree_key_s cur_node;
832
833 cur_node.host_start = host_ptr;
834 if (cur_node.host_start == (uintptr_t) NULL)
835 {
836 cur_node.tgt_offset = (uintptr_t) NULL;
837 gomp_copy_host2dev (devicep, aq,
838 (void *) (tgt->tgt_start + target_offset),
839 (void *) &cur_node.tgt_offset, sizeof (void *),
840 true, cbuf);
841 return;
842 }
843 /* Add bias to the pointer value. */
844 cur_node.host_start += bias;
845 cur_node.host_end = cur_node.host_start;
846 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
847 if (n == NULL)
848 {
849 if (allow_zero_length_array_sections)
850 cur_node.tgt_offset = 0;
851 else if (devicep->is_usm_ptr_func
852 && devicep->is_usm_ptr_func ((void*)cur_node.host_start))
853 cur_node.tgt_offset = cur_node.host_start;
854 else
855 {
856 gomp_mutex_unlock (&devicep->lock);
857 gomp_fatal ("Pointer target of array section wasn't mapped");
858 }
859 }
860 else
861 {
862 cur_node.host_start -= n->host_start;
863 cur_node.tgt_offset
864 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
865 /* At this point tgt_offset is target address of the
866 array section. Now subtract bias to get what we want
867 to initialize the pointer with. */
868 cur_node.tgt_offset -= bias;
869 }
870 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
871 (void *) &cur_node.tgt_offset, sizeof (void *),
872 true, cbuf);
873 }
874
875 static void
876 gomp_map_fields_existing (struct target_mem_desc *tgt,
877 struct goacc_asyncqueue *aq, splay_tree_key n,
878 size_t first, size_t i, void **hostaddrs,
879 size_t *sizes, void *kinds,
880 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
881 {
882 struct gomp_device_descr *devicep = tgt->device_descr;
883 struct splay_tree_s *mem_map = &devicep->mem_map;
884 struct splay_tree_key_s cur_node;
885 int kind;
886 bool implicit;
887 const bool short_mapkind = true;
888 const int typemask = short_mapkind ? 0xff : 0x7;
889
890 cur_node.host_start = (uintptr_t) hostaddrs[i];
891 cur_node.host_end = cur_node.host_start + sizes[i];
892 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
893 kind = get_kind (short_mapkind, kinds, i);
894 implicit = get_implicit (short_mapkind, kinds, i);
895 if (n2
896 && n2->tgt == n->tgt
897 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
898 {
899 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
900 kind & typemask, false, implicit, cbuf,
901 refcount_set);
902 return;
903 }
904 if (sizes[i] == 0)
905 {
906 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
907 {
908 cur_node.host_start--;
909 n2 = splay_tree_lookup (mem_map, &cur_node);
910 cur_node.host_start++;
911 if (n2
912 && n2->tgt == n->tgt
913 && n2->host_start - n->host_start
914 == n2->tgt_offset - n->tgt_offset)
915 {
916 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
917 kind & typemask, false, implicit, cbuf,
918 refcount_set);
919 return;
920 }
921 }
922 cur_node.host_end++;
923 n2 = splay_tree_lookup (mem_map, &cur_node);
924 cur_node.host_end--;
925 if (n2
926 && n2->tgt == n->tgt
927 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
928 {
929 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
930 kind & typemask, false, implicit, cbuf,
931 refcount_set);
932 return;
933 }
934 }
935 gomp_mutex_unlock (&devicep->lock);
936 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
937 "other mapped elements from the same structure weren't mapped "
938 "together with it", (void *) cur_node.host_start,
939 (void *) cur_node.host_end);
940 }
941
942 attribute_hidden void
943 gomp_attach_pointer (struct gomp_device_descr *devicep,
944 struct goacc_asyncqueue *aq, splay_tree mem_map,
945 splay_tree_key n, uintptr_t attach_to, size_t bias,
946 struct gomp_coalesce_buf *cbufp,
947 bool allow_zero_length_array_sections)
948 {
949 struct splay_tree_key_s s;
950 size_t size, idx;
951
952 if (n == NULL)
953 {
954 gomp_mutex_unlock (&devicep->lock);
955 gomp_fatal ("enclosing struct not mapped for attach");
956 }
957
958 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
959 /* We might have a pointer in a packed struct: however we cannot have more
960 than one such pointer in each pointer-sized portion of the struct, so
961 this is safe. */
962 idx = (attach_to - n->host_start) / sizeof (void *);
963
964 if (!n->aux)
965 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
966
967 if (!n->aux->attach_count)
968 n->aux->attach_count
969 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
970
971 if (n->aux->attach_count[idx] < UINTPTR_MAX)
972 n->aux->attach_count[idx]++;
973 else
974 {
975 gomp_mutex_unlock (&devicep->lock);
976 gomp_fatal ("attach count overflow");
977 }
978
979 if (n->aux->attach_count[idx] == 1)
980 {
981 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
982 - n->host_start;
983 uintptr_t target = (uintptr_t) *(void **) attach_to;
984 splay_tree_key tn;
985 uintptr_t data;
986
987 if ((void *) target == NULL)
988 {
989 gomp_mutex_unlock (&devicep->lock);
990 gomp_fatal ("attempt to attach null pointer");
991 }
992
993 if (devicep->is_usm_ptr_func
994 && devicep->is_usm_ptr_func ((void*)(target + bias)))
995 /* Nothing to do here. */
996 return;
997
998 s.host_start = target + bias;
999 s.host_end = s.host_start + 1;
1000 tn = splay_tree_lookup (mem_map, &s);
1001
1002 if (!tn)
1003 {
1004 if (allow_zero_length_array_sections)
1005 /* When allowing attachment to zero-length array sections, we
1006 allow attaching to NULL pointers when the target region is not
1007 mapped. */
1008 data = 0;
1009 else
1010 {
1011 gomp_mutex_unlock (&devicep->lock);
1012 gomp_fatal ("pointer target not mapped for attach");
1013 }
1014 }
1015 else
1016 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
1017
1018 gomp_debug (1,
1019 "%s: attaching host %p, target %p (struct base %p) to %p\n",
1020 __FUNCTION__, (void *) attach_to, (void *) devptr,
1021 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
1022
1023 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
1024 sizeof (void *), true, cbufp);
1025 }
1026 else
1027 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
1028 (void *) attach_to, (int) n->aux->attach_count[idx]);
1029 }
1030
1031 attribute_hidden void
1032 gomp_detach_pointer (struct gomp_device_descr *devicep,
1033 struct goacc_asyncqueue *aq, splay_tree_key n,
1034 uintptr_t detach_from, bool finalize,
1035 struct gomp_coalesce_buf *cbufp)
1036 {
1037 size_t idx;
1038
1039 if (n == NULL)
1040 {
1041 gomp_mutex_unlock (&devicep->lock);
1042 gomp_fatal ("enclosing struct not mapped for detach");
1043 }
1044
1045 idx = (detach_from - n->host_start) / sizeof (void *);
1046
1047 if (!n->aux || !n->aux->attach_count)
1048 {
1049 gomp_mutex_unlock (&devicep->lock);
1050 gomp_fatal ("no attachment counters for struct");
1051 }
1052
1053 if (finalize)
1054 n->aux->attach_count[idx] = 1;
1055
1056 if (n->aux->attach_count[idx] == 0)
1057 {
1058 gomp_mutex_unlock (&devicep->lock);
1059 gomp_fatal ("attach count underflow");
1060 }
1061 else
1062 n->aux->attach_count[idx]--;
1063
1064 if (n->aux->attach_count[idx] == 0)
1065 {
1066 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
1067 - n->host_start;
1068 uintptr_t target = (uintptr_t) *(void **) detach_from;
1069
1070 gomp_debug (1,
1071 "%s: detaching host %p, target %p (struct base %p) to %p\n",
1072 __FUNCTION__, (void *) detach_from, (void *) devptr,
1073 (void *) (n->tgt->tgt_start + n->tgt_offset),
1074 (void *) target);
1075
1076 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
1077 sizeof (void *), true, cbufp);
1078 }
1079 else
1080 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
1081 (void *) detach_from, (int) n->aux->attach_count[idx]);
1082 }
1083
1084 attribute_hidden uintptr_t
1085 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
1086 {
1087 if (tgt->list[i].key != NULL)
1088 return tgt->list[i].key->tgt->tgt_start
1089 + tgt->list[i].key->tgt_offset
1090 + tgt->list[i].offset;
1091
1092 switch (tgt->list[i].offset)
1093 {
1094 case OFFSET_INLINED:
1095 case OFFSET_USM:
1096 return (uintptr_t) hostaddrs[i];
1097
1098 case OFFSET_POINTER:
1099 return 0;
1100
1101 case OFFSET_STRUCT:
1102 return tgt->list[i + 1].key->tgt->tgt_start
1103 + tgt->list[i + 1].key->tgt_offset
1104 + tgt->list[i + 1].offset
1105 + (uintptr_t) hostaddrs[i]
1106 - (uintptr_t) hostaddrs[i + 1];
1107
1108 default:
1109 return tgt->tgt_start + tgt->list[i].offset;
1110 }
1111 }
1112
1113 static inline __attribute__((always_inline)) struct target_mem_desc *
1114 gomp_map_vars_internal (struct gomp_device_descr *devicep,
1115 struct goacc_asyncqueue *aq, size_t mapnum,
1116 void **hostaddrs, void **devaddrs, size_t *sizes,
1117 void *kinds, struct goacc_ncarray_info *nca_info,
1118 bool short_mapkind, htab_t *refcount_set,
1119 enum gomp_map_vars_kind pragma_kind)
1120 {
1121 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
1122 size_t nca_data_row_num = (nca_info ? nca_info->num_data_rows : 0);
1123 bool has_firstprivate = false;
1124 bool has_always_ptrset = false;
1125 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
1126 const int rshift = short_mapkind ? 8 : 3;
1127 const int typemask = short_mapkind ? 0xff : 0x7;
1128 struct splay_tree_s *mem_map = &devicep->mem_map;
1129 struct splay_tree_key_s cur_node;
1130 struct target_mem_desc *tgt
1131 = gomp_malloc (sizeof (*tgt)
1132 + sizeof (tgt->list[0]) * (mapnum + nca_data_row_num));
1133 tgt->list_count = mapnum + nca_data_row_num;
1134 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
1135 tgt->device_descr = devicep;
1136 tgt->prev = NULL;
1137 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
1138
1139 if (mapnum == 0)
1140 {
1141 tgt->tgt_start = 0;
1142 tgt->tgt_end = 0;
1143 return tgt;
1144 }
1145
1146 tgt_align = sizeof (void *);
1147 tgt_size = 0;
1148 cbuf.chunks = NULL;
1149 cbuf.chunk_cnt = -1;
1150 cbuf.use_cnt = 0;
1151 cbuf.buf = NULL;
1152 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
1153 {
1154 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
1155 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
1156 cbuf.chunk_cnt = 0;
1157 }
1158 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1159 {
1160 size_t align = 4 * sizeof (void *);
1161 tgt_align = align;
1162 tgt_size = mapnum * sizeof (void *);
1163 cbuf.chunk_cnt = 1;
1164 cbuf.use_cnt = 1 + (mapnum > 1);
1165 cbuf.chunks[0].start = 0;
1166 cbuf.chunks[0].end = tgt_size;
1167 }
1168
1169 gomp_mutex_lock (&devicep->lock);
1170 if (devicep->state == GOMP_DEVICE_FINALIZED)
1171 {
1172 gomp_mutex_unlock (&devicep->lock);
1173 free (tgt);
1174 return NULL;
1175 }
1176
1177 for (i = 0; i < mapnum; i++)
1178 {
1179 int kind = get_kind (short_mapkind, kinds, i);
1180 bool implicit = get_implicit (short_mapkind, kinds, i);
1181 tgt->list[i].offset = 0;
1182 if (hostaddrs[i] == NULL
1183 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1184 {
1185 tgt->list[i].key = NULL;
1186 tgt->list[i].offset = OFFSET_INLINED;
1187 continue;
1188 }
1189 else if (devicep->is_usm_ptr_func
1190 && devicep->is_usm_ptr_func (hostaddrs[i]))
1191 {
1192 /* The memory is visible from both host and target
1193 so nothing needs to be moved. */
1194 tgt->list[i].key = NULL;
1195 tgt->list[i].offset = OFFSET_USM;
1196 continue;
1197 }
1198 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1199 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1200 {
1201 tgt->list[i].key = NULL;
1202 if (!not_found_cnt)
1203 {
1204 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1205 on a separate construct prior to using use_device_{addr,ptr}.
1206 In OpenMP 5.0, map directives need to be ordered by the
1207 middle-end before the use_device_* clauses. If
1208 !not_found_cnt, all mappings requested (if any) are already
1209 mapped, so use_device_{addr,ptr} can be resolved right away.
1210 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1211 now but would succeed after performing the mappings in the
1212 following loop. We can't defer this always to the second
1213 loop, because it is not even invoked when !not_found_cnt
1214 after the first loop. */
1215 cur_node.host_start = (uintptr_t) hostaddrs[i];
1216 cur_node.host_end = cur_node.host_start;
1217 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1218 if (n != NULL)
1219 {
1220 cur_node.host_start -= n->host_start;
1221 hostaddrs[i]
1222 = (void *) (n->tgt->tgt_start + n->tgt_offset
1223 + cur_node.host_start);
1224 }
1225 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1226 {
1227 gomp_mutex_unlock (&devicep->lock);
1228 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1229 }
1230 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1231 /* If not present, continue using the host address. */
1232 ;
1233 else
1234 __builtin_unreachable ();
1235 tgt->list[i].offset = OFFSET_INLINED;
1236 }
1237 else
1238 tgt->list[i].offset = 0;
1239 continue;
1240 }
1241 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1242 {
1243 size_t first = i + 1;
1244 size_t last = i + sizes[i];
1245 cur_node.host_start = (uintptr_t) hostaddrs[i];
1246 cur_node.host_end = (uintptr_t) hostaddrs[last]
1247 + sizes[last];
1248 tgt->list[i].key = NULL;
1249 tgt->list[i].offset = OFFSET_STRUCT;
1250 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1251 if (n == NULL)
1252 {
1253 size_t align = (size_t) 1 << (kind >> rshift);
1254 if (tgt_align < align)
1255 tgt_align = align;
1256 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1257 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1258 tgt_size += cur_node.host_end - cur_node.host_start;
1259 not_found_cnt += last - i;
1260 for (i = first; i <= last; i++)
1261 {
1262 tgt->list[i].key = NULL;
1263 if (!aq
1264 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1265 & typemask))
1266 gomp_coalesce_buf_add (&cbuf,
1267 tgt_size - cur_node.host_end
1268 + (uintptr_t) hostaddrs[i],
1269 sizes[i]);
1270 }
1271 i--;
1272 continue;
1273 }
1274 for (i = first; i <= last; i++)
1275 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1276 sizes, kinds, NULL, refcount_set);
1277 i--;
1278 continue;
1279 }
1280 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1281 {
1282 tgt->list[i].key = NULL;
1283 tgt->list[i].offset = OFFSET_POINTER;
1284 has_firstprivate = true;
1285 continue;
1286 }
1287 else if ((kind & typemask) == GOMP_MAP_ATTACH
1288 || ((kind & typemask)
1289 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1290 {
1291 tgt->list[i].key = NULL;
1292 has_firstprivate = true;
1293 continue;
1294 }
1295 else if (GOMP_MAP_NONCONTIG_ARRAY_P (kind & typemask))
1296 {
1297 /* Ignore non-contiguous arrays for now, we process them together
1298 later. */
1299 tgt->list[i].key = NULL;
1300 tgt->list[i].offset = 0;
1301 not_found_cnt++;
1302
1303 /* The map for the non-contiguous array itself is never copied from
1304 during unmapping, its the data rows that count. Set copy-from
1305 flags to false here. */
1306 tgt->list[i].copy_from = false;
1307 tgt->list[i].always_copy_from = false;
1308 tgt->list[i].is_attach = false;
1309
1310 size_t align = (size_t) 1 << (kind >> rshift);
1311 if (tgt_align < align)
1312 tgt_align = align;
1313
1314 continue;
1315 }
1316
1317 cur_node.host_start = (uintptr_t) hostaddrs[i];
1318 if (!GOMP_MAP_POINTER_P (kind & typemask))
1319 cur_node.host_end = cur_node.host_start + sizes[i];
1320 else
1321 cur_node.host_end = cur_node.host_start + sizeof (void *);
1322 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1323 {
1324 tgt->list[i].key = NULL;
1325
1326 size_t align = (size_t) 1 << (kind >> rshift);
1327 if (tgt_align < align)
1328 tgt_align = align;
1329 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1330 if (!aq)
1331 gomp_coalesce_buf_add (&cbuf, tgt_size,
1332 cur_node.host_end - cur_node.host_start);
1333 tgt_size += cur_node.host_end - cur_node.host_start;
1334 has_firstprivate = true;
1335 continue;
1336 }
1337 splay_tree_key n;
1338 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1339 {
1340 n = gomp_map_0len_lookup (mem_map, &cur_node);
1341 if (!n)
1342 {
1343 tgt->list[i].key = NULL;
1344 tgt->list[i].offset = OFFSET_POINTER;
1345 continue;
1346 }
1347 }
1348 else
1349 n = splay_tree_lookup (mem_map, &cur_node);
1350 if (n && n->refcount != REFCOUNT_LINK)
1351 {
1352 int always_to_cnt = 0;
1353 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1354 {
1355 bool has_nullptr = false;
1356 size_t j;
1357 for (j = 0; j < n->tgt->list_count; j++)
1358 if (n->tgt->list[j].key == n)
1359 {
1360 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1361 break;
1362 }
1363 if (n->tgt->list_count == 0)
1364 {
1365 /* 'declare target'; assume has_nullptr; it could also be
1366 statically assigned pointer, but that it should be to
1367 the equivalent variable on the host. */
1368 assert (n->refcount == REFCOUNT_INFINITY);
1369 has_nullptr = true;
1370 }
1371 else
1372 assert (j < n->tgt->list_count);
1373 /* Re-map the data if there is an 'always' modifier or if it a
1374 null pointer was there and non a nonnull has been found; that
1375 permits transparent re-mapping for Fortran array descriptors
1376 which were previously mapped unallocated. */
1377 for (j = i + 1; j < mapnum; j++)
1378 {
1379 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1380 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1381 && (!has_nullptr
1382 || !GOMP_MAP_POINTER_P (ptr_kind)
1383 || *(void **) hostaddrs[j] == NULL))
1384 break;
1385 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1386 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1387 > cur_node.host_end))
1388 break;
1389 else
1390 {
1391 has_always_ptrset = true;
1392 ++always_to_cnt;
1393 }
1394 }
1395 }
1396 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1397 kind & typemask, always_to_cnt > 0, implicit,
1398 NULL, refcount_set);
1399 i += always_to_cnt;
1400 }
1401 else
1402 {
1403 tgt->list[i].key = NULL;
1404
1405 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1406 {
1407 /* Not present, hence, skip entry - including its MAP_POINTER,
1408 when existing. */
1409 tgt->list[i].offset = OFFSET_INLINED;
1410 if (i + 1 < mapnum
1411 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1412 == GOMP_MAP_POINTER))
1413 {
1414 ++i;
1415 tgt->list[i].key = NULL;
1416 tgt->list[i].offset = 0;
1417 }
1418 continue;
1419 }
1420 size_t align = (size_t) 1 << (kind >> rshift);
1421 not_found_cnt++;
1422 if (tgt_align < align)
1423 tgt_align = align;
1424 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1425 if (!aq
1426 && gomp_to_device_kind_p (kind & typemask))
1427 gomp_coalesce_buf_add (&cbuf, tgt_size,
1428 cur_node.host_end - cur_node.host_start);
1429 tgt_size += cur_node.host_end - cur_node.host_start;
1430 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1431 {
1432 size_t j;
1433 int kind;
1434 for (j = i + 1; j < mapnum; j++)
1435 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1436 kinds, j)) & typemask))
1437 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1438 break;
1439 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1440 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1441 > cur_node.host_end))
1442 break;
1443 else
1444 {
1445 tgt->list[j].key = NULL;
1446 i++;
1447 }
1448 }
1449 }
1450 }
1451
1452 /* For non-contiguous arrays. Each data row is one target item, separated
1453 from the normal map clause items, hence we order them after mapnum. */
1454 if (nca_info)
1455 {
1456 struct target_var_desc *next_var_desc = &tgt->list[mapnum];
1457 for (i = 0; i < nca_info->num_ncarray; i++)
1458 {
1459 struct goacc_ncarray *nca = &nca_info->ncarray[i];
1460 int kind = get_kind (short_mapkind, kinds, nca->map_index);
1461 size_t align = (size_t) 1 << (kind >> rshift);
1462 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1463 tgt_size += nca->ptrblock_size;
1464
1465 for (size_t j = 0; j < nca->data_row_num; j++)
1466 {
1467 struct target_var_desc *row_desc = next_var_desc++;
1468 void *row = nca->data_rows[j];
1469 cur_node.host_start = (uintptr_t) row;
1470 cur_node.host_end = cur_node.host_start + nca->data_row_size;
1471 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1472 if (n)
1473 {
1474 assert (n->refcount != REFCOUNT_LINK);
1475 gomp_map_vars_existing (devicep, aq, n, &cur_node, row_desc,
1476 kind & typemask, false, false,
1477 /* TODO: cbuf? */ NULL,
1478 refcount_set);
1479 }
1480 else
1481 {
1482 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1483 tgt_size += nca->data_row_size;
1484 not_found_cnt++;
1485 }
1486 }
1487 }
1488 assert (next_var_desc == &tgt->list[mapnum + nca_info->num_data_rows]);
1489 }
1490
1491 if (devaddrs)
1492 {
1493 if (mapnum != 1)
1494 {
1495 gomp_mutex_unlock (&devicep->lock);
1496 gomp_fatal ("unexpected aggregation");
1497 }
1498 tgt->to_free = devaddrs[0];
1499 tgt->tgt_start = (uintptr_t) tgt->to_free;
1500 tgt->tgt_end = tgt->tgt_start + sizes[0];
1501 }
1502 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1503 {
1504 /* Allocate tgt_align aligned tgt_size block of memory. */
1505 /* FIXME: Perhaps change interface to allocate properly aligned
1506 memory. */
1507 tgt->to_free = devicep->alloc_func (devicep->target_id,
1508 tgt_size + tgt_align - 1);
1509 if (!tgt->to_free)
1510 {
1511 gomp_mutex_unlock (&devicep->lock);
1512 gomp_fatal ("device memory allocation fail");
1513 }
1514
1515 tgt->tgt_start = (uintptr_t) tgt->to_free;
1516 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1517 tgt->tgt_end = tgt->tgt_start + tgt_size;
1518
1519 if (cbuf.use_cnt == 1)
1520 cbuf.chunk_cnt--;
1521 if (cbuf.chunk_cnt > 0)
1522 {
1523 size_t sz
1524 = cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start;
1525 if (always_pinned_mode)
1526 {
1527 cbuf.buf = gomp_page_locked_host_alloc_dev (devicep, sz, false);
1528 if (!cbuf.buf)
1529 {
1530 gomp_mutex_unlock (&devicep->lock);
1531 exit (EXIT_FAILURE);
1532 }
1533 }
1534 else
1535 cbuf.buf = malloc (sz);
1536 if (cbuf.buf)
1537 {
1538 cbuf.tgt = tgt;
1539 cbufp = &cbuf;
1540 }
1541 }
1542 }
1543 else
1544 {
1545 tgt->to_free = NULL;
1546 tgt->tgt_start = 0;
1547 tgt->tgt_end = 0;
1548 }
1549
1550 tgt_size = 0;
1551 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1552 tgt_size = mapnum * sizeof (void *);
1553
1554 tgt->array = NULL;
1555 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1556 {
1557 if (not_found_cnt)
1558 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1559 splay_tree_node array = tgt->array;
1560 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1561 uintptr_t field_tgt_base = 0;
1562 splay_tree_key field_tgt_structelem_first = NULL;
1563
1564 for (i = 0; i < mapnum; i++)
1565 if (has_always_ptrset
1566 && tgt->list[i].key
1567 && (get_kind (short_mapkind, kinds, i) & typemask)
1568 == GOMP_MAP_TO_PSET)
1569 {
1570 splay_tree_key k = tgt->list[i].key;
1571 bool has_nullptr = false;
1572 size_t j;
1573 for (j = 0; j < k->tgt->list_count; j++)
1574 if (k->tgt->list[j].key == k)
1575 {
1576 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1577 break;
1578 }
1579 if (k->tgt->list_count == 0)
1580 has_nullptr = true;
1581 else
1582 assert (j < k->tgt->list_count);
1583
1584 tgt->list[i].has_null_ptr_assoc = false;
1585 for (j = i + 1; j < mapnum; j++)
1586 {
1587 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1588 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1589 && (!has_nullptr
1590 || !GOMP_MAP_POINTER_P (ptr_kind)
1591 || *(void **) hostaddrs[j] == NULL))
1592 break;
1593 else if ((uintptr_t) hostaddrs[j] < k->host_start
1594 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1595 > k->host_end))
1596 break;
1597 else
1598 {
1599 if (*(void **) hostaddrs[j] == NULL)
1600 tgt->list[i].has_null_ptr_assoc = true;
1601 tgt->list[j].key = k;
1602 tgt->list[j].copy_from = false;
1603 tgt->list[j].always_copy_from = false;
1604 tgt->list[j].is_attach = false;
1605 gomp_increment_refcount (k, refcount_set);
1606 gomp_map_pointer (k->tgt, aq,
1607 (uintptr_t) *(void **) hostaddrs[j],
1608 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1609 - k->host_start),
1610 sizes[j], cbufp, false);
1611 }
1612 }
1613 i = j - 1;
1614 }
1615 else if (tgt->list[i].key == NULL)
1616 {
1617 int kind = get_kind (short_mapkind, kinds, i);
1618 bool implicit = get_implicit (short_mapkind, kinds, i);
1619 if (hostaddrs[i] == NULL)
1620 continue;
1621 if (tgt->list[i].offset == OFFSET_USM)
1622 continue;
1623 switch (kind & typemask)
1624 {
1625 size_t align, len, first, last;
1626 splay_tree_key n;
1627 case GOMP_MAP_FIRSTPRIVATE:
1628 align = (size_t) 1 << (kind >> rshift);
1629 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1630 tgt->list[i].offset = tgt_size;
1631 len = sizes[i];
1632 gomp_copy_host2dev (devicep, aq,
1633 (void *) (tgt->tgt_start + tgt_size),
1634 (void *) hostaddrs[i], len, false, cbufp);
1635 /* Save device address in hostaddr to permit latter availablity
1636 when doing a deep-firstprivate with pointer attach. */
1637 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1638 tgt_size += len;
1639
1640 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1641 firstprivate to hostaddrs[i+1], which is assumed to contain a
1642 device address. */
1643 if (i + 1 < mapnum
1644 && (GOMP_MAP_ATTACH
1645 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1646 {
1647 uintptr_t target = (uintptr_t) hostaddrs[i];
1648 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1649 /* Per
1650 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1651 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1652 this probably needs revision for 'aq' usage. */
1653 assert (!aq);
1654 gomp_copy_host2dev (devicep, aq, devptr, &target,
1655 sizeof (void *), false, cbufp);
1656 ++i;
1657 }
1658 continue;
1659 case GOMP_MAP_FIRSTPRIVATE_INT:
1660 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1661 continue;
1662 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1663 /* The OpenACC 'host_data' construct only allows 'use_device'
1664 "mapping" clauses, so in the first loop, 'not_found_cnt'
1665 must always have been zero, so all OpenACC 'use_device'
1666 clauses have already been handled. (We can only easily test
1667 'use_device' with 'if_present' clause here.) */
1668 assert (tgt->list[i].offset == OFFSET_INLINED);
1669 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1670 code conceptually simple, similar to the first loop. */
1671 case GOMP_MAP_USE_DEVICE_PTR:
1672 if (tgt->list[i].offset == 0)
1673 {
1674 cur_node.host_start = (uintptr_t) hostaddrs[i];
1675 cur_node.host_end = cur_node.host_start;
1676 n = gomp_map_lookup (mem_map, &cur_node);
1677 if (n != NULL)
1678 {
1679 cur_node.host_start -= n->host_start;
1680 hostaddrs[i]
1681 = (void *) (n->tgt->tgt_start + n->tgt_offset
1682 + cur_node.host_start);
1683 }
1684 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1685 {
1686 gomp_mutex_unlock (&devicep->lock);
1687 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1688 }
1689 else if ((kind & typemask)
1690 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1691 /* If not present, continue using the host address. */
1692 ;
1693 else
1694 __builtin_unreachable ();
1695 tgt->list[i].offset = OFFSET_INLINED;
1696 }
1697 continue;
1698 case GOMP_MAP_STRUCT:
1699 first = i + 1;
1700 last = i + sizes[i];
1701 cur_node.host_start = (uintptr_t) hostaddrs[i];
1702 cur_node.host_end = (uintptr_t) hostaddrs[last]
1703 + sizes[last];
1704 if (tgt->list[first].key != NULL)
1705 continue;
1706 n = splay_tree_lookup (mem_map, &cur_node);
1707 if (n == NULL)
1708 {
1709 size_t align = (size_t) 1 << (kind >> rshift);
1710 tgt_size -= (uintptr_t) hostaddrs[first]
1711 - (uintptr_t) hostaddrs[i];
1712 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1713 tgt_size += (uintptr_t) hostaddrs[first]
1714 - (uintptr_t) hostaddrs[i];
1715 field_tgt_base = (uintptr_t) hostaddrs[first];
1716 field_tgt_offset = tgt_size;
1717 field_tgt_clear = last;
1718 field_tgt_structelem_first = NULL;
1719 tgt_size += cur_node.host_end
1720 - (uintptr_t) hostaddrs[first];
1721 continue;
1722 }
1723 for (i = first; i <= last; i++)
1724 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1725 sizes, kinds, cbufp, refcount_set);
1726 i--;
1727 continue;
1728 case GOMP_MAP_ALWAYS_POINTER:
1729 cur_node.host_start = (uintptr_t) hostaddrs[i];
1730 cur_node.host_end = cur_node.host_start + sizeof (void *);
1731 n = splay_tree_lookup (mem_map, &cur_node);
1732 if (n == NULL
1733 || n->host_start > cur_node.host_start
1734 || n->host_end < cur_node.host_end)
1735 {
1736 gomp_mutex_unlock (&devicep->lock);
1737 gomp_fatal ("always pointer not mapped");
1738 }
1739 if (i > 0
1740 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1741 != GOMP_MAP_ALWAYS_POINTER))
1742 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1743 if (cur_node.tgt_offset)
1744 cur_node.tgt_offset -= sizes[i];
1745 gomp_copy_host2dev (devicep, aq,
1746 (void *) (n->tgt->tgt_start
1747 + n->tgt_offset
1748 + cur_node.host_start
1749 - n->host_start),
1750 (void *) &cur_node.tgt_offset,
1751 sizeof (void *), true, cbufp);
1752 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1753 + cur_node.host_start - n->host_start;
1754 continue;
1755 case GOMP_MAP_IF_PRESENT:
1756 /* Not present - otherwise handled above. Skip over its
1757 MAP_POINTER as well. */
1758 if (i + 1 < mapnum
1759 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1760 == GOMP_MAP_POINTER))
1761 ++i;
1762 continue;
1763 case GOMP_MAP_ATTACH:
1764 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1765 {
1766 cur_node.host_start = (uintptr_t) hostaddrs[i];
1767 cur_node.host_end = cur_node.host_start + sizeof (void *);
1768 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1769 if (n != NULL)
1770 {
1771 tgt->list[i].key = n;
1772 tgt->list[i].offset = cur_node.host_start - n->host_start;
1773 tgt->list[i].length = n->host_end - n->host_start;
1774 tgt->list[i].copy_from = false;
1775 tgt->list[i].always_copy_from = false;
1776 tgt->list[i].is_attach = true;
1777 /* OpenACC 'attach'/'detach' doesn't affect
1778 structured/dynamic reference counts ('n->refcount',
1779 'n->dynamic_refcount'). */
1780
1781 bool zlas
1782 = ((kind & typemask)
1783 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1784 gomp_attach_pointer (devicep, aq, mem_map, n,
1785 (uintptr_t) hostaddrs[i], sizes[i],
1786 cbufp, zlas);
1787 }
1788 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1789 {
1790 gomp_mutex_unlock (&devicep->lock);
1791 gomp_fatal ("outer struct not mapped for attach");
1792 }
1793 continue;
1794 }
1795 default:
1796 if (tgt->list[i].offset == OFFSET_INLINED
1797 && !array)
1798 continue;
1799 break;
1800 }
1801
1802 if (GOMP_MAP_NONCONTIG_ARRAY_P (kind & typemask))
1803 {
1804 tgt->list[i].key = &array->key;
1805 tgt->list[i].key->tgt = tgt;
1806 array++;
1807 continue;
1808 }
1809
1810 splay_tree_key k = &array->key;
1811 k->host_start = (uintptr_t) hostaddrs[i];
1812 if (!GOMP_MAP_POINTER_P (kind & typemask))
1813 k->host_end = k->host_start + sizes[i];
1814 else
1815 k->host_end = k->host_start + sizeof (void *);
1816 splay_tree_key n = splay_tree_lookup (mem_map, k);
1817 if (n && n->refcount != REFCOUNT_LINK)
1818 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1819 kind & typemask, false, implicit, cbufp,
1820 refcount_set);
1821 else
1822 {
1823 k->aux = NULL;
1824 if (n && n->refcount == REFCOUNT_LINK)
1825 {
1826 /* Replace target address of the pointer with target address
1827 of mapped object in the splay tree. */
1828 splay_tree_remove (mem_map, n);
1829 k->aux
1830 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1831 k->aux->link_key = n;
1832 }
1833 size_t align = (size_t) 1 << (kind >> rshift);
1834 tgt->list[i].key = k;
1835 k->tgt = tgt;
1836 k->refcount = 0;
1837 k->dynamic_refcount = 0;
1838 k->page_locked_host_p = false;
1839 if (always_pinned_mode)
1840 {
1841 void *ptr = (void *) k->host_start;
1842 size_t size = k->host_end - k->host_start;
1843 int page_locked_host_p = 0;
1844 if (size != 0)
1845 page_locked_host_p = gomp_page_locked_host_register_dev
1846 (devicep, ptr, size, kind & typemask);
1847 if (page_locked_host_p < 0)
1848 {
1849 gomp_mutex_unlock (&devicep->lock);
1850 exit (EXIT_FAILURE);
1851 }
1852 if (page_locked_host_p)
1853 k->page_locked_host_p = true;
1854 }
1855 if (field_tgt_clear != FIELD_TGT_EMPTY)
1856 {
1857 k->tgt_offset = k->host_start - field_tgt_base
1858 + field_tgt_offset;
1859 if (openmp_p)
1860 {
1861 k->refcount = REFCOUNT_STRUCTELEM;
1862 if (field_tgt_structelem_first == NULL)
1863 {
1864 /* Set to first structure element of sequence. */
1865 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1866 field_tgt_structelem_first = k;
1867 }
1868 else
1869 /* Point to refcount of leading element, but do not
1870 increment again. */
1871 k->structelem_refcount_ptr
1872 = &field_tgt_structelem_first->structelem_refcount;
1873
1874 if (i == field_tgt_clear)
1875 {
1876 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1877 field_tgt_structelem_first = NULL;
1878 }
1879 }
1880 if (i == field_tgt_clear)
1881 field_tgt_clear = FIELD_TGT_EMPTY;
1882 }
1883 else
1884 {
1885 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1886 k->tgt_offset = tgt_size;
1887 tgt_size += k->host_end - k->host_start;
1888 }
1889 /* First increment, from 0 to 1. gomp_increment_refcount
1890 encapsulates the different increment cases, so use this
1891 instead of directly setting 1 during initialization. */
1892 gomp_increment_refcount (k, refcount_set);
1893
1894 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1895 tgt->list[i].always_copy_from
1896 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1897 tgt->list[i].is_attach = false;
1898 tgt->list[i].offset = 0;
1899 tgt->list[i].length = k->host_end - k->host_start;
1900 tgt->refcount++;
1901 array->left = NULL;
1902 array->right = NULL;
1903 splay_tree_insert (mem_map, array);
1904 switch (kind & typemask)
1905 {
1906 case GOMP_MAP_ALLOC:
1907 case GOMP_MAP_FROM:
1908 case GOMP_MAP_FORCE_ALLOC:
1909 case GOMP_MAP_FORCE_FROM:
1910 case GOMP_MAP_ALWAYS_FROM:
1911 break;
1912 case GOMP_MAP_TO:
1913 case GOMP_MAP_TOFROM:
1914 case GOMP_MAP_FORCE_TO:
1915 case GOMP_MAP_FORCE_TOFROM:
1916 case GOMP_MAP_ALWAYS_TO:
1917 case GOMP_MAP_ALWAYS_TOFROM:
1918 gomp_copy_host2dev (devicep, aq,
1919 (void *) (tgt->tgt_start
1920 + k->tgt_offset),
1921 (void *) k->host_start,
1922 k->host_end - k->host_start,
1923 false, cbufp);
1924 break;
1925 case GOMP_MAP_POINTER:
1926 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1927 gomp_map_pointer
1928 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1929 k->tgt_offset, sizes[i], cbufp,
1930 ((kind & typemask)
1931 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1932 break;
1933 case GOMP_MAP_TO_PSET:
1934 gomp_copy_host2dev (devicep, aq,
1935 (void *) (tgt->tgt_start
1936 + k->tgt_offset),
1937 (void *) k->host_start,
1938 k->host_end - k->host_start,
1939 false, cbufp);
1940 tgt->list[i].has_null_ptr_assoc = false;
1941
1942 for (j = i + 1; j < mapnum; j++)
1943 {
1944 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1945 & typemask);
1946 if (!GOMP_MAP_POINTER_P (ptr_kind)
1947 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1948 break;
1949 else if ((uintptr_t) hostaddrs[j] < k->host_start
1950 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1951 > k->host_end))
1952 break;
1953 else
1954 {
1955 tgt->list[j].key = k;
1956 tgt->list[j].copy_from = false;
1957 tgt->list[j].always_copy_from = false;
1958 tgt->list[j].is_attach = false;
1959 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1960 /* For OpenMP, the use of refcount_sets causes
1961 errors if we set k->refcount = 1 above but also
1962 increment it again here, for decrementing will
1963 not properly match, since we decrement only once
1964 for each key's refcount. Therefore avoid this
1965 increment for OpenMP constructs. */
1966 if (!openmp_p)
1967 gomp_increment_refcount (k, refcount_set);
1968 gomp_map_pointer (tgt, aq,
1969 (uintptr_t) *(void **) hostaddrs[j],
1970 k->tgt_offset
1971 + ((uintptr_t) hostaddrs[j]
1972 - k->host_start),
1973 sizes[j], cbufp, false);
1974 }
1975 }
1976 i = j - 1;
1977 break;
1978 case GOMP_MAP_FORCE_PRESENT:
1979 {
1980 /* We already looked up the memory region above and it
1981 was missing. */
1982 size_t size = k->host_end - k->host_start;
1983 gomp_mutex_unlock (&devicep->lock);
1984 #ifdef HAVE_INTTYPES_H
1985 gomp_fatal ("present clause: !acc_is_present (%p, "
1986 "%"PRIu64" (0x%"PRIx64"))",
1987 (void *) k->host_start,
1988 (uint64_t) size, (uint64_t) size);
1989 #else
1990 gomp_fatal ("present clause: !acc_is_present (%p, "
1991 "%lu (0x%lx))", (void *) k->host_start,
1992 (unsigned long) size, (unsigned long) size);
1993 #endif
1994 }
1995 break;
1996 case GOMP_MAP_PRESENT_ALLOC:
1997 case GOMP_MAP_PRESENT_TO:
1998 case GOMP_MAP_PRESENT_FROM:
1999 case GOMP_MAP_PRESENT_TOFROM:
2000 case GOMP_MAP_ALWAYS_PRESENT_TO:
2001 case GOMP_MAP_ALWAYS_PRESENT_FROM:
2002 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
2003 /* We already looked up the memory region above and it
2004 was missing. */
2005 gomp_mutex_unlock (&devicep->lock);
2006 gomp_fatal ("present clause: !omp_target_is_present "
2007 "(%p, %d)",
2008 (void *) k->host_start, devicep->target_id);
2009 break;
2010 case GOMP_MAP_FORCE_DEVICEPTR:
2011 assert (k->host_end - k->host_start == sizeof (void *));
2012 gomp_copy_host2dev (devicep, aq,
2013 (void *) (tgt->tgt_start
2014 + k->tgt_offset),
2015 (void *) k->host_start,
2016 sizeof (void *), false, cbufp);
2017 break;
2018 default:
2019 gomp_mutex_unlock (&devicep->lock);
2020 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
2021 kind);
2022 }
2023
2024 if (k->aux && k->aux->link_key)
2025 {
2026 /* Set link pointer on target to the device address of the
2027 mapped object. */
2028 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
2029 /* We intentionally do not use coalescing here, as it's not
2030 data allocated by the current call to this function. */
2031 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
2032 &tgt_addr, sizeof (void *), true, NULL);
2033 }
2034 array++;
2035 }
2036 }
2037
2038 /* Processing of non-contiguous array rows. */
2039 if (nca_info)
2040 {
2041 struct target_var_desc *next_var_desc = &tgt->list[mapnum];
2042 for (i = 0; i < nca_info->num_ncarray; i++)
2043 {
2044 struct goacc_ncarray *nca = &nca_info->ncarray[i];
2045 int kind = get_kind (short_mapkind, kinds, nca->map_index);
2046 size_t align = (size_t) 1 << (kind >> rshift);
2047 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2048
2049 assert (nca->ptr == hostaddrs[nca->map_index]);
2050
2051 /* For the map of the non-contiguous array itself, adjust so that
2052 the passed device address points to the beginning of the
2053 ptrblock. Remember to adjust the first-dimension's bias here. */
2054 tgt->list[nca->map_index].key->tgt_offset
2055 = tgt_size - nca->descr->dims[0].base;
2056
2057 void *target_ptrblock = (void*) tgt->tgt_start + tgt_size;
2058 tgt_size += nca->ptrblock_size;
2059
2060 /* Add splay key for each data row in current non-contiguous
2061 array. */
2062 for (size_t j = 0; j < nca->data_row_num; j++)
2063 {
2064 struct target_var_desc *row_desc = next_var_desc++;
2065 void *row = nca->data_rows[j];
2066 cur_node.host_start = (uintptr_t) row;
2067 cur_node.host_end = cur_node.host_start + nca->data_row_size;
2068 splay_tree_key k = splay_tree_lookup (mem_map, &cur_node);
2069 if (k)
2070 {
2071 assert (k->refcount != REFCOUNT_LINK);
2072 gomp_map_vars_existing (devicep, aq, k, &cur_node, row_desc,
2073 kind & typemask, false, false,
2074 cbufp, refcount_set);
2075 }
2076 else
2077 {
2078 tgt->refcount++;
2079 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2080
2081 k = &array->key;
2082 k->host_start = (uintptr_t) row;
2083 k->host_end = k->host_start + nca->data_row_size;
2084
2085 k->tgt = tgt;
2086 k->refcount = 1;
2087 k->dynamic_refcount = 0;
2088 k->aux = NULL;
2089 k->tgt_offset = tgt_size;
2090
2091 tgt_size += nca->data_row_size;
2092
2093 row_desc->key = k;
2094 row_desc->copy_from
2095 = GOMP_MAP_COPY_FROM_P (kind & typemask);
2096 row_desc->always_copy_from
2097 = GOMP_MAP_COPY_FROM_P (kind & typemask);
2098 row_desc->is_attach = false;
2099 row_desc->offset = 0;
2100 row_desc->length = nca->data_row_size;
2101
2102 array->left = NULL;
2103 array->right = NULL;
2104 splay_tree_insert (mem_map, array);
2105
2106 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2107 gomp_copy_host2dev (devicep, aq,
2108 (void *) tgt->tgt_start + k->tgt_offset,
2109 (void *) k->host_start,
2110 nca->data_row_size, false,
2111 cbufp);
2112 array++;
2113 }
2114 nca->tgt_data_rows[j]
2115 = (void *) (k->tgt->tgt_start + k->tgt_offset);
2116 }
2117
2118 /* Now we have the target memory allocated, and target offsets of all
2119 row blocks assigned and calculated, we can construct the
2120 accelerator side ptrblock and copy it in. */
2121 if (nca->ptrblock_size)
2122 {
2123 void *ptrblock = gomp_malloc (nca->ptrblock_size);
2124 goacc_noncontig_array_create_ptrblock
2125 (nca, ptrblock, target_ptrblock);
2126 gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock,
2127 nca->ptrblock_size, false, cbufp);
2128 if (aq)
2129 /* Free once the transfer has completed. */
2130 devicep->openacc.async.queue_callback_func (aq, free, ptrblock);
2131 else
2132 free (ptrblock);
2133 }
2134 }
2135 }
2136 }
2137
2138 if (pragma_kind & GOMP_MAP_VARS_TARGET)
2139 {
2140 for (i = 0; i < mapnum; i++)
2141 {
2142 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
2143 gomp_copy_host2dev (devicep, aq,
2144 (void *) (tgt->tgt_start + i * sizeof (void *)),
2145 (void *) &cur_node.tgt_offset, sizeof (void *),
2146 true, cbufp);
2147 }
2148 }
2149
2150 if (cbufp)
2151 {
2152 long c = 0;
2153 for (c = 0; c < cbuf.chunk_cnt; ++c)
2154 gomp_copy_host2dev (devicep, aq,
2155 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
2156 (char *) cbuf.buf + (cbuf.chunks[c].start
2157 - cbuf.chunks[0].start),
2158 cbuf.chunks[c].end - cbuf.chunks[c].start,
2159 false, NULL);
2160 if (always_pinned_mode)
2161 {
2162 if (!gomp_page_locked_host_free_dev (devicep, cbuf.buf, aq))
2163 {
2164 gomp_mutex_unlock (&devicep->lock);
2165 exit (EXIT_FAILURE);
2166 }
2167 }
2168 else
2169 {
2170 if (aq)
2171 /* Free once the transfer has completed. */
2172 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
2173 else
2174 free (cbuf.buf);
2175 }
2176 cbuf.buf = NULL;
2177 cbufp = NULL;
2178 }
2179
2180 /* If the variable from "omp target enter data" map-list was already mapped,
2181 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
2182 gomp_exit_data. */
2183 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
2184 {
2185 free (tgt);
2186 tgt = NULL;
2187 }
2188
2189 gomp_mutex_unlock (&devicep->lock);
2190 return tgt;
2191 }
2192
2193 static struct target_mem_desc *
2194 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
2195 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
2196 bool short_mapkind, htab_t *refcount_set,
2197 enum gomp_map_vars_kind pragma_kind)
2198 {
2199 /* This management of a local refcount_set is for convenience of callers
2200 who do not share a refcount_set over multiple map/unmap uses. */
2201 htab_t local_refcount_set = NULL;
2202 if (refcount_set == NULL)
2203 {
2204 local_refcount_set = htab_create (mapnum);
2205 refcount_set = &local_refcount_set;
2206 }
2207
2208 struct target_mem_desc *tgt;
2209 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
2210 sizes, kinds, NULL, short_mapkind,
2211 refcount_set, pragma_kind);
2212 if (local_refcount_set)
2213 htab_free (local_refcount_set);
2214
2215 return tgt;
2216 }
2217
2218 attribute_hidden struct target_mem_desc *
2219 goacc_map_vars (struct gomp_device_descr *devicep,
2220 struct goacc_asyncqueue *aq, size_t mapnum,
2221 void **hostaddrs, void **devaddrs, size_t *sizes,
2222 void *kinds, struct goacc_ncarray_info *nca_info,
2223 bool short_mapkind,
2224 enum gomp_map_vars_kind pragma_kind)
2225 {
2226 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
2227 sizes, kinds, nca_info, short_mapkind, NULL,
2228 GOMP_MAP_VARS_OPENACC | pragma_kind);
2229 }
2230
2231 static void
2232 gomp_unmap_tgt (struct target_mem_desc *tgt)
2233 {
2234 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
2235 if (tgt->tgt_end)
2236 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
2237
2238 free (tgt->array);
2239 free (tgt);
2240 }
2241
2242 static bool
2243 gomp_unref_tgt (void *ptr)
2244 {
2245 bool is_tgt_unmapped = false;
2246
2247 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
2248
2249 if (tgt->refcount > 1)
2250 tgt->refcount--;
2251 else
2252 {
2253 gomp_unmap_tgt (tgt);
2254 is_tgt_unmapped = true;
2255 }
2256
2257 return is_tgt_unmapped;
2258 }
2259
2260 static void
2261 gomp_unref_tgt_void (void *ptr)
2262 {
2263 (void) gomp_unref_tgt (ptr);
2264 }
2265
2266 static void
2267 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
2268 {
2269 splay_tree_remove (sp, k);
2270 if (k->aux)
2271 {
2272 if (k->aux->link_key)
2273 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
2274 if (k->aux->attach_count)
2275 free (k->aux->attach_count);
2276 free (k->aux);
2277 k->aux = NULL;
2278 }
2279 }
2280
2281 static inline __attribute__((always_inline)) bool
2282 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
2283 struct goacc_asyncqueue *aq)
2284 {
2285 bool is_tgt_unmapped = false;
2286
2287 if (REFCOUNT_STRUCTELEM_P (k->refcount))
2288 {
2289 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
2290 /* Infer the splay_tree_key of the first structelem key using the
2291 pointer to the first structleme_refcount. */
2292 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
2293 - offsetof (struct splay_tree_key_s,
2294 structelem_refcount));
2295 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
2296
2297 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
2298 with the splay_tree_keys embedded inside. */
2299 splay_tree_node node =
2300 (splay_tree_node) ((char *) k
2301 - offsetof (struct splay_tree_node_s, key));
2302 while (true)
2303 {
2304 /* Starting from the _FIRST key, and continue for all following
2305 sibling keys. */
2306 gomp_remove_splay_tree_key (&devicep->mem_map, k);
2307
2308 if (always_pinned_mode)
2309 {
2310 if (k->page_locked_host_p)
2311 {
2312 void *ptr = (void *) k->host_start;
2313 size_t size = k->host_end - k->host_start;
2314 if (!gomp_page_locked_host_unregister_dev (devicep,
2315 ptr, size, aq))
2316 {
2317 gomp_mutex_unlock (&devicep->lock);
2318 exit (EXIT_FAILURE);
2319 }
2320 k->page_locked_host_p = false;
2321 }
2322 }
2323
2324 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
2325 break;
2326 else
2327 k = &(++node)->key;
2328 }
2329 }
2330 else
2331 {
2332 gomp_remove_splay_tree_key (&devicep->mem_map, k);
2333
2334 if (always_pinned_mode)
2335 {
2336 if (k->page_locked_host_p)
2337 {
2338 void *ptr = (void *) k->host_start;
2339 size_t size = k->host_end - k->host_start;
2340 if (!gomp_page_locked_host_unregister_dev (devicep,
2341 ptr, size, aq))
2342 {
2343 gomp_mutex_unlock (&devicep->lock);
2344 exit (EXIT_FAILURE);
2345 }
2346 k->page_locked_host_p = false;
2347 }
2348 }
2349 }
2350
2351 if (aq)
2352 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2353 (void *) k->tgt);
2354 else
2355 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
2356 return is_tgt_unmapped;
2357 }
2358
2359 attribute_hidden bool
2360 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
2361 {
2362 return gomp_remove_var_internal (devicep, k, NULL);
2363 }
2364
2365 /* Remove a variable asynchronously. This actually removes the variable
2366 mapping immediately, but retains the linked target_mem_desc until the
2367 asynchronous operation has completed (as it may still refer to target
2368 memory). The device lock must be held before entry, and remains locked on
2369 exit. */
2370
2371 attribute_hidden void
2372 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
2373 struct goacc_asyncqueue *aq)
2374 {
2375 (void) gomp_remove_var_internal (devicep, k, aq);
2376 }
2377
2378 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2379 variables back from device to host: if it is false, it is assumed that this
2380 has been done already. */
2381
2382 static inline __attribute__((always_inline)) void
2383 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
2384 htab_t *refcount_set, struct goacc_asyncqueue *aq)
2385 {
2386 struct gomp_device_descr *devicep = tgt->device_descr;
2387
2388 if (tgt->list_count == 0)
2389 {
2390 free (tgt);
2391 return;
2392 }
2393
2394 gomp_mutex_lock (&devicep->lock);
2395 if (devicep->state == GOMP_DEVICE_FINALIZED)
2396 {
2397 gomp_mutex_unlock (&devicep->lock);
2398 free (tgt->array);
2399 free (tgt);
2400 return;
2401 }
2402
2403 size_t i;
2404
2405 /* We must perform detachments before any copies back to the host. */
2406 for (i = 0; i < tgt->list_count; i++)
2407 {
2408 splay_tree_key k = tgt->list[i].key;
2409
2410 if (k != NULL && tgt->list[i].is_attach)
2411 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
2412 + tgt->list[i].offset,
2413 false, NULL);
2414 }
2415
2416 size_t nrmvars = 0;
2417 splay_tree_key remove_vars[tgt->list_count];
2418
2419 for (i = 0; i < tgt->list_count; i++)
2420 {
2421 splay_tree_key k = tgt->list[i].key;
2422 if (k == NULL)
2423 continue;
2424
2425 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2426 counts ('n->refcount', 'n->dynamic_refcount'). */
2427 if (tgt->list[i].is_attach)
2428 continue;
2429
2430 bool do_copy, do_remove;
2431 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2432
2433 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2434 || tgt->list[i].always_copy_from)
2435 gomp_copy_dev2host (devicep, aq,
2436 (void *) (k->host_start + tgt->list[i].offset),
2437 (void *) (k->tgt->tgt_start + k->tgt_offset
2438 + tgt->list[i].offset),
2439 tgt->list[i].length);
2440 /* Queue all removals together for processing below.
2441 We may unregister page-locked host memory only after all device to
2442 host memory transfers have completed.
2443 See also 'gomp_exit_data'. */
2444 if (do_remove)
2445 remove_vars[nrmvars++] = k;
2446 }
2447
2448 for (i = 0; i < nrmvars; i++)
2449 {
2450 splay_tree_key k = remove_vars[i];
2451 struct target_mem_desc *k_tgt = k->tgt;
2452 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2453 /* It would be bad if TGT got unmapped while we're still iterating over
2454 its LIST_COUNT, and also expect to use it in the following code. */
2455 assert (!is_tgt_unmapped
2456 || k_tgt != tgt);
2457 }
2458
2459 if (aq)
2460 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2461 (void *) tgt);
2462 else
2463 gomp_unref_tgt ((void *) tgt);
2464
2465 gomp_mutex_unlock (&devicep->lock);
2466 }
2467
2468 static void
2469 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2470 htab_t *refcount_set)
2471 {
2472 /* This management of a local refcount_set is for convenience of callers
2473 who do not share a refcount_set over multiple map/unmap uses. */
2474 htab_t local_refcount_set = NULL;
2475 if (refcount_set == NULL)
2476 {
2477 local_refcount_set = htab_create (tgt->list_count);
2478 refcount_set = &local_refcount_set;
2479 }
2480
2481 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2482
2483 if (local_refcount_set)
2484 htab_free (local_refcount_set);
2485 }
2486
2487 attribute_hidden void
2488 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2489 struct goacc_asyncqueue *aq)
2490 {
2491 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2492 }
2493
2494 static void
2495 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2496 size_t *sizes, void *kinds, bool short_mapkind)
2497 {
2498 size_t i;
2499 struct splay_tree_key_s cur_node;
2500 const int typemask = short_mapkind ? 0xff : 0x7;
2501
2502 if (!devicep)
2503 return;
2504
2505 if (mapnum == 0)
2506 return;
2507
2508 gomp_mutex_lock (&devicep->lock);
2509 if (devicep->state == GOMP_DEVICE_FINALIZED)
2510 {
2511 gomp_mutex_unlock (&devicep->lock);
2512 return;
2513 }
2514
2515 for (i = 0; i < mapnum; i++)
2516 if (sizes[i])
2517 {
2518 cur_node.host_start = (uintptr_t) hostaddrs[i];
2519 cur_node.host_end = cur_node.host_start + sizes[i];
2520 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2521 if (n)
2522 {
2523 int kind = get_kind (short_mapkind, kinds, i);
2524 if (n->host_start > cur_node.host_start
2525 || n->host_end < cur_node.host_end)
2526 {
2527 gomp_mutex_unlock (&devicep->lock);
2528 gomp_fatal ("Trying to update [%p..%p) object when "
2529 "only [%p..%p) is mapped",
2530 (void *) cur_node.host_start,
2531 (void *) cur_node.host_end,
2532 (void *) n->host_start,
2533 (void *) n->host_end);
2534 }
2535
2536 if (n->aux && n->aux->attach_count)
2537 {
2538 uintptr_t addr = cur_node.host_start;
2539 while (addr < cur_node.host_end)
2540 {
2541 /* We have to be careful not to overwrite still attached
2542 pointers during host<->device updates. */
2543 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2544 if (n->aux->attach_count[i] == 0)
2545 {
2546 void *devaddr = (void *) (n->tgt->tgt_start
2547 + n->tgt_offset
2548 + addr - n->host_start);
2549 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2550 gomp_copy_host2dev (devicep, NULL,
2551 devaddr, (void *) addr,
2552 sizeof (void *), false, NULL);
2553 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2554 gomp_copy_dev2host (devicep, NULL,
2555 (void *) addr, devaddr,
2556 sizeof (void *));
2557 }
2558 addr += sizeof (void *);
2559 }
2560 }
2561 else
2562 {
2563 void *hostaddr = (void *) cur_node.host_start;
2564 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2565 + cur_node.host_start
2566 - n->host_start);
2567 size_t size = cur_node.host_end - cur_node.host_start;
2568
2569 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2570 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2571 false, NULL);
2572 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2573 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2574 }
2575 }
2576 else
2577 {
2578 int kind = get_kind (short_mapkind, kinds, i);
2579
2580 if (GOMP_MAP_PRESENT_P (kind))
2581 {
2582 /* We already looked up the memory region above and it
2583 was missing. */
2584 gomp_mutex_unlock (&devicep->lock);
2585 gomp_fatal ("present clause: !omp_target_is_present "
2586 "(%p, %d)",
2587 (void *) hostaddrs[i], devicep->target_id);
2588 }
2589 }
2590 }
2591 gomp_mutex_unlock (&devicep->lock);
2592 }
2593
2594 static struct gomp_offload_icv_list *
2595 gomp_get_offload_icv_item (int dev_num)
2596 {
2597 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2598 while (l != NULL && l->device_num != dev_num)
2599 l = l->next;
2600
2601 return l;
2602 }
2603
2604 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2605 depending on the device num and the variable hierarchy
2606 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2607 device and thus no item with that device number is contained in
2608 gomp_offload_icv_list, then a new item is created and added to the list. */
2609
2610 static struct gomp_offload_icvs *
2611 get_gomp_offload_icvs (int dev_num)
2612 {
2613 struct gomp_icv_list *dev
2614 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2615 struct gomp_icv_list *all
2616 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2617 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2618 struct gomp_offload_icv_list *offload_icvs
2619 = gomp_get_offload_icv_item (dev_num);
2620
2621 if (offload_icvs != NULL)
2622 return &offload_icvs->icvs;
2623
2624 struct gomp_offload_icv_list *new;
2625 size_t size = sizeof (struct gomp_offload_icv_list);
2626 if (always_pinned_mode)
2627 {
2628 struct gomp_device_descr *device = &devices[dev_num];
2629 new = gomp_page_locked_host_alloc_dev (device, size, false);
2630 if (!new)
2631 exit (EXIT_FAILURE);
2632 }
2633 else
2634 new = gomp_malloc (size);
2635
2636 new->device_num = dev_num;
2637 new->icvs.device_num = dev_num;
2638 new->next = gomp_offload_icv_list;
2639
2640 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2641 new->icvs.nteams = dev_x->icvs.nteams_var;
2642 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2643 new->icvs.nteams = dev->icvs.nteams_var;
2644 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2645 new->icvs.nteams = all->icvs.nteams_var;
2646 else
2647 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2648
2649 if (dev_x != NULL
2650 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2651 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2652 else if (dev != NULL
2653 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2654 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2655 else if (all != NULL
2656 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2657 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2658 else
2659 new->icvs.teams_thread_limit
2660 = gomp_default_icv_values.teams_thread_limit_var;
2661
2662 if (dev_x != NULL
2663 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2664 new->icvs.default_device = dev_x->icvs.default_device_var;
2665 else if (dev != NULL
2666 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2667 new->icvs.default_device = dev->icvs.default_device_var;
2668 else if (all != NULL
2669 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2670 new->icvs.default_device = all->icvs.default_device_var;
2671 else
2672 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2673
2674 gomp_offload_icv_list = new;
2675 return &new->icvs;
2676 }
2677
2678 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2679 And insert to splay tree the mapping between addresses from HOST_TABLE and
2680 from loaded target image. We rely in the host and device compiler
2681 emitting variable and functions in the same order. */
2682
2683 static void
2684 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2685 const void *host_table, const void *target_data,
2686 bool is_register_lock)
2687 {
2688 gomp_debug (0, "%s: devicep=%p (%s)\n",
2689 __FUNCTION__, devicep, devicep->name);
2690 void **host_func_table = ((void ***) host_table)[0];
2691 void **host_funcs_end = ((void ***) host_table)[1];
2692 void **host_var_table = ((void ***) host_table)[2];
2693 void **host_vars_end = ((void ***) host_table)[3];
2694
2695 /* The func table contains only addresses, the var table contains addresses
2696 and corresponding sizes. */
2697 int num_funcs = host_funcs_end - host_func_table;
2698 int num_vars = (host_vars_end - host_var_table) / 2;
2699
2700 /* Load image to device and get target addresses for the image. */
2701 struct addr_pair *target_table = NULL;
2702 uint64_t *rev_target_fn_table = NULL;
2703 int i, num_target_entries;
2704
2705 /* With reverse offload, insert also target-host addresses. */
2706 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2707
2708 num_target_entries
2709 = devicep->load_image_func (devicep->target_id, version,
2710 target_data, &target_table,
2711 rev_lookup ? &rev_target_fn_table : NULL);
2712
2713 if (num_target_entries != num_funcs + num_vars
2714 /* "+1" due to the additional ICV struct. */
2715 && num_target_entries != num_funcs + num_vars + 1)
2716 {
2717 gomp_mutex_unlock (&devicep->lock);
2718 if (is_register_lock)
2719 gomp_mutex_unlock (&register_lock);
2720 gomp_fatal ("Cannot map target functions or variables"
2721 " (expected %u, have %u)", num_funcs + num_vars,
2722 num_target_entries);
2723 }
2724
2725 /* Insert host-target address mapping into splay tree. */
2726 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2727 /* "+1" due to the additional ICV struct. */
2728 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2729 * sizeof (*tgt->array));
2730 if (rev_target_fn_table)
2731 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2732 else
2733 tgt->rev_array = NULL;
2734 tgt->refcount = REFCOUNT_INFINITY;
2735 tgt->tgt_start = 0;
2736 tgt->tgt_end = 0;
2737 tgt->to_free = NULL;
2738 tgt->prev = NULL;
2739 tgt->list_count = 0;
2740 tgt->device_descr = devicep;
2741 splay_tree_node array = tgt->array;
2742 reverse_splay_tree_node rev_array = tgt->rev_array;
2743
2744 for (i = 0; i < num_funcs; i++)
2745 {
2746 splay_tree_key k = &array->key;
2747 k->host_start = (uintptr_t) host_func_table[i];
2748 k->host_end = k->host_start + 1;
2749 k->tgt = tgt;
2750 k->tgt_offset = target_table[i].start;
2751 k->refcount = REFCOUNT_INFINITY;
2752 k->dynamic_refcount = 0;
2753 k->aux = NULL;
2754 k->page_locked_host_p = false;
2755 array->left = NULL;
2756 array->right = NULL;
2757 splay_tree_insert (&devicep->mem_map, array);
2758 if (rev_target_fn_table)
2759 {
2760 reverse_splay_tree_key k2 = &rev_array->key;
2761 k2->dev = rev_target_fn_table[i];
2762 k2->k = k;
2763 rev_array->left = NULL;
2764 rev_array->right = NULL;
2765 if (k2->dev != 0)
2766 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2767 rev_array++;
2768 }
2769 array++;
2770 }
2771
2772 /* Most significant bit of the size in host and target tables marks
2773 "omp declare target link" variables. */
2774 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2775 const uintptr_t size_mask = ~link_bit;
2776
2777 for (i = 0; i < num_vars; i++)
2778 {
2779 struct addr_pair *target_var = &target_table[num_funcs + i];
2780 uintptr_t target_size = target_var->end - target_var->start;
2781 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2782
2783 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2784 {
2785 gomp_mutex_unlock (&devicep->lock);
2786 if (is_register_lock)
2787 gomp_mutex_unlock (&register_lock);
2788 gomp_fatal ("Cannot map target variables (size mismatch)");
2789 }
2790
2791 splay_tree_key k = &array->key;
2792 k->host_start = (uintptr_t) host_var_table[i * 2];
2793 k->host_end
2794 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2795 k->tgt = tgt;
2796 k->tgt_offset = target_var->start;
2797 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2798 k->dynamic_refcount = 0;
2799 k->aux = NULL;
2800 k->page_locked_host_p = false;
2801 if (always_pinned_mode)
2802 {
2803 void *ptr = (void *) k->host_start;
2804 size_t size = k->host_end - k->host_start;
2805 gomp_debug (0, " var %d: ptr=%p, size=%llu, is_link_var=%d\n",
2806 i, ptr, (unsigned long long) size, is_link_var);
2807 if (!is_link_var)
2808 {
2809 /* '#pragma omp declare target' variables typically are
2810 read/write, but in particular artificial ones, like Fortran
2811 array constructors, may be placed in section '.rodata'.
2812 We don't have the actual mapping kind available here, so we
2813 use a magic number. */
2814 const int kind = -1;
2815 int page_locked_host_p = gomp_page_locked_host_register_dev
2816 (devicep, ptr, size, kind);
2817 if (page_locked_host_p < 0)
2818 {
2819 gomp_mutex_unlock (&devicep->lock);
2820 if (is_register_lock)
2821 gomp_mutex_unlock (&register_lock);
2822 exit (EXIT_FAILURE);
2823 }
2824 if (page_locked_host_p)
2825 k->page_locked_host_p = true;
2826 }
2827 }
2828 array->left = NULL;
2829 array->right = NULL;
2830 splay_tree_insert (&devicep->mem_map, array);
2831 array++;
2832 }
2833
2834 /* Last entry is for a ICVs variable.
2835 Tolerate case where plugin does not return those entries. */
2836 if (num_funcs + num_vars < num_target_entries)
2837 {
2838 struct addr_pair *var = &target_table[num_funcs + num_vars];
2839
2840 /* Start address will be non-zero for the ICVs variable if
2841 the variable was found in this image. */
2842 if (var->start != 0)
2843 {
2844 /* The index of the devicep within devices[] is regarded as its
2845 'device number', which is different from the per-device type
2846 devicep->target_id. */
2847 int dev_num = (int) (devicep - &devices[0]);
2848 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2849 if (!icvs)
2850 {
2851 gomp_mutex_unlock (&devicep->lock);
2852 if (is_register_lock)
2853 gomp_mutex_unlock (&register_lock);
2854 gomp_fatal ("'get_gomp_offload_icvs' failed");
2855 }
2856 size_t var_size = var->end - var->start;
2857 if (var_size != sizeof (struct gomp_offload_icvs))
2858 {
2859 gomp_mutex_unlock (&devicep->lock);
2860 if (is_register_lock)
2861 gomp_mutex_unlock (&register_lock);
2862 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2863 "format");
2864 }
2865 /* Copy the ICVs variable to place on device memory, hereby
2866 actually designating its device number into effect. */
2867 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2868 var_size, false, NULL);
2869 splay_tree_key k = &array->key;
2870 k->host_start = (uintptr_t) icvs;
2871 k->host_end =
2872 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2873 k->tgt = tgt;
2874 k->tgt_offset = var->start;
2875 k->refcount = REFCOUNT_INFINITY;
2876 k->dynamic_refcount = 0;
2877 k->aux = NULL;
2878 /* 'always_pinned_mode' handled via 'get_gomp_offload_icvs'. */
2879 k->page_locked_host_p = always_pinned_mode;
2880 array->left = NULL;
2881 array->right = NULL;
2882 splay_tree_insert (&devicep->mem_map, array);
2883 array++;
2884 }
2885 }
2886
2887 free (target_table);
2888 }
2889
2890 /* Unload the mappings described by target_data from device DEVICE_P.
2891 The device must be locked. */
2892
2893 static void
2894 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2895 unsigned version,
2896 const void *host_table, const void *target_data)
2897 {
2898 void **host_func_table = ((void ***) host_table)[0];
2899 void **host_funcs_end = ((void ***) host_table)[1];
2900 void **host_var_table = ((void ***) host_table)[2];
2901 void **host_vars_end = ((void ***) host_table)[3];
2902
2903 /* The func table contains only addresses, the var table contains addresses
2904 and corresponding sizes. */
2905 int num_funcs = host_funcs_end - host_func_table;
2906 int num_vars = (host_vars_end - host_var_table) / 2;
2907
2908 struct splay_tree_key_s k;
2909 splay_tree_key node = NULL;
2910
2911 /* Find mapping at start of node array */
2912 if (num_funcs || num_vars)
2913 {
2914 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2915 : (uintptr_t) host_var_table[0]);
2916 k.host_end = k.host_start + 1;
2917 node = splay_tree_lookup (&devicep->mem_map, &k);
2918 }
2919
2920 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2921 {
2922 gomp_mutex_unlock (&devicep->lock);
2923 gomp_fatal ("image unload fail");
2924 }
2925 if (devicep->mem_map_rev.root)
2926 {
2927 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2928 real allocation. */
2929 assert (node && node->tgt && node->tgt->rev_array);
2930 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2931 free (node->tgt->rev_array);
2932 devicep->mem_map_rev.root = NULL;
2933 }
2934
2935 /* Remove mappings from splay tree. */
2936 int i;
2937 for (i = 0; i < num_funcs; i++)
2938 {
2939 k.host_start = (uintptr_t) host_func_table[i];
2940 k.host_end = k.host_start + 1;
2941 splay_tree_remove (&devicep->mem_map, &k);
2942 }
2943
2944 /* Most significant bit of the size in host and target tables marks
2945 "omp declare target link" variables. */
2946 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2947 const uintptr_t size_mask = ~link_bit;
2948 bool is_tgt_unmapped = false;
2949
2950 for (i = 0; i < num_vars; i++)
2951 {
2952 k.host_start = (uintptr_t) host_var_table[i * 2];
2953 k.host_end
2954 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2955
2956 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2957 splay_tree_remove (&devicep->mem_map, &k);
2958 else
2959 {
2960 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2961 is_tgt_unmapped = gomp_remove_var (devicep, n);
2962 }
2963 }
2964
2965 if (node && !is_tgt_unmapped)
2966 {
2967 free (node->tgt);
2968 free (node);
2969 }
2970 }
2971
2972 static void
2973 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2974 {
2975 char *end = buf + size, *p = buf;
2976 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2977 p += snprintf (p, end - p, "unified_address");
2978 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2979 p += snprintf (p, end - p, "%sunified_shared_memory",
2980 (p == buf ? "" : ", "));
2981 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2982 p += snprintf (p, end - p, "%sreverse_offload",
2983 (p == buf ? "" : ", "));
2984 }
2985
2986 /* This function should be called from every offload image while loading.
2987 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2988 the target, and DATA. */
2989
2990 void
2991 GOMP_offload_register_ver (unsigned version, const void *host_table,
2992 int target_type, const void *data)
2993 {
2994 int i;
2995
2996 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2997 gomp_fatal ("Library too old for offload (version %u < %u)",
2998 GOMP_VERSION, GOMP_VERSION_LIB (version));
2999
3000 int omp_req;
3001 const void *target_data;
3002 if (GOMP_VERSION_LIB (version) > 1)
3003 {
3004 omp_req = (int) (size_t) ((void **) data)[0];
3005 target_data = &((void **) data)[1];
3006 }
3007 else
3008 {
3009 omp_req = 0;
3010 target_data = data;
3011 }
3012
3013 gomp_mutex_lock (&register_lock);
3014
3015 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
3016 {
3017 char buf1[sizeof ("unified_address, unified_shared_memory, "
3018 "reverse_offload")];
3019 char buf2[sizeof ("unified_address, unified_shared_memory, "
3020 "reverse_offload")];
3021 gomp_requires_to_name (buf2, sizeof (buf2),
3022 omp_req != GOMP_REQUIRES_TARGET_USED
3023 ? omp_req : omp_requires_mask);
3024 if (omp_req != GOMP_REQUIRES_TARGET_USED
3025 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
3026 {
3027 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
3028 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
3029 "in multiple compilation units: '%s' vs. '%s'",
3030 buf1, buf2);
3031 }
3032 else
3033 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
3034 "some compilation units", buf2);
3035 }
3036 omp_requires_mask = omp_req;
3037
3038 /* Load image to all initialized devices. */
3039 for (i = 0; i < num_devices; i++)
3040 {
3041 struct gomp_device_descr *devicep = &devices[i];
3042 gomp_mutex_lock (&devicep->lock);
3043 if (devicep->type == target_type
3044 && devicep->state == GOMP_DEVICE_INITIALIZED)
3045 gomp_load_image_to_device (devicep, version,
3046 host_table, target_data, true);
3047 gomp_mutex_unlock (&devicep->lock);
3048 }
3049
3050 /* Insert image to array of pending images. */
3051 offload_images
3052 = gomp_realloc_unlock (offload_images,
3053 (num_offload_images + 1)
3054 * sizeof (struct offload_image_descr));
3055 offload_images[num_offload_images].version = version;
3056 offload_images[num_offload_images].type = target_type;
3057 offload_images[num_offload_images].host_table = host_table;
3058 offload_images[num_offload_images].target_data = target_data;
3059
3060 num_offload_images++;
3061 gomp_mutex_unlock (&register_lock);
3062 }
3063
3064 /* Legacy entry point. */
3065
3066 void
3067 GOMP_offload_register (const void *host_table, int target_type,
3068 const void *target_data)
3069 {
3070 GOMP_offload_register_ver (0, host_table, target_type, target_data);
3071 }
3072
3073 /* This function should be called from every offload image while unloading.
3074 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
3075 the target, and DATA. */
3076
3077 void
3078 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
3079 int target_type, const void *data)
3080 {
3081 int i;
3082
3083 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
3084 gomp_fatal ("Library too old for offload (version %u < %u)",
3085 GOMP_VERSION, GOMP_VERSION_LIB (version));
3086
3087 const void *target_data;
3088 if (GOMP_VERSION_LIB (version) > 1)
3089 target_data = &((void **) data)[1];
3090 else
3091 target_data = data;
3092
3093 gomp_mutex_lock (&register_lock);
3094
3095 /* Unload image from all initialized devices. */
3096 for (i = 0; i < num_devices; i++)
3097 {
3098 struct gomp_device_descr *devicep = &devices[i];
3099 gomp_mutex_lock (&devicep->lock);
3100 if (devicep->type == target_type
3101 && devicep->state == GOMP_DEVICE_INITIALIZED)
3102 gomp_unload_image_from_device (devicep, version,
3103 host_table, target_data);
3104 gomp_mutex_unlock (&devicep->lock);
3105 }
3106
3107 /* Remove image from array of pending images. */
3108 for (i = 0; i < num_offload_images; i++)
3109 if (offload_images[i].target_data == target_data)
3110 {
3111 offload_images[i] = offload_images[--num_offload_images];
3112 break;
3113 }
3114
3115 gomp_mutex_unlock (&register_lock);
3116 }
3117
3118 /* Legacy entry point. */
3119
3120 void
3121 GOMP_offload_unregister (const void *host_table, int target_type,
3122 const void *target_data)
3123 {
3124 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
3125 }
3126
3127 /* This function initializes the target device, specified by DEVICEP. DEVICEP
3128 must be locked on entry, and remains locked on return. */
3129
3130 attribute_hidden void
3131 gomp_init_device (struct gomp_device_descr *devicep)
3132 {
3133 int i;
3134 if (!devicep->init_device_func (devicep->target_id))
3135 {
3136 gomp_mutex_unlock (&devicep->lock);
3137 gomp_fatal ("device initialization failed");
3138 }
3139
3140 /* Load to device all images registered by the moment. */
3141 for (i = 0; i < num_offload_images; i++)
3142 {
3143 struct offload_image_descr *image = &offload_images[i];
3144 if (image->type == devicep->type)
3145 gomp_load_image_to_device (devicep, image->version,
3146 image->host_table, image->target_data,
3147 false);
3148 }
3149
3150 /* Initialize OpenACC asynchronous queues. */
3151 goacc_init_asyncqueues (devicep);
3152
3153 devicep->state = GOMP_DEVICE_INITIALIZED;
3154 }
3155
3156 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
3157 must be locked on entry, and remains locked on return. */
3158
3159 attribute_hidden bool
3160 gomp_fini_device (struct gomp_device_descr *devicep)
3161 {
3162 bool ret = goacc_fini_asyncqueues (devicep);
3163 ret &= devicep->fini_device_func (devicep->target_id);
3164 devicep->state = GOMP_DEVICE_FINALIZED;
3165 return ret;
3166 }
3167
3168 attribute_hidden void
3169 gomp_unload_device (struct gomp_device_descr *devicep)
3170 {
3171 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3172 {
3173 unsigned i;
3174
3175 /* Unload from device all images registered at the moment. */
3176 for (i = 0; i < num_offload_images; i++)
3177 {
3178 struct offload_image_descr *image = &offload_images[i];
3179 if (image->type == devicep->type)
3180 gomp_unload_image_from_device (devicep, image->version,
3181 image->host_table,
3182 image->target_data);
3183 }
3184 }
3185 }
3186
3187 /* Host fallback for GOMP_target{,_ext} routines. */
3188
3189 static void
3190 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
3191 struct gomp_device_descr *devicep, void **args)
3192 {
3193 struct gomp_thread old_thr, *thr = gomp_thread ();
3194
3195 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3196 && devicep != NULL)
3197 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3198 "be used for offloading");
3199
3200 old_thr = *thr;
3201 memset (thr, '\0', sizeof (*thr));
3202 if (gomp_places_list)
3203 {
3204 thr->place = old_thr.place;
3205 thr->ts.place_partition_len = gomp_places_list_len;
3206 }
3207 if (args)
3208 while (*args)
3209 {
3210 intptr_t id = (intptr_t) *args++, val;
3211 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3212 val = (intptr_t) *args++;
3213 else
3214 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
3215 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
3216 continue;
3217 id &= GOMP_TARGET_ARG_ID_MASK;
3218 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
3219 continue;
3220 val = val > INT_MAX ? INT_MAX : val;
3221 if (val)
3222 gomp_icv (true)->thread_limit_var = val;
3223 break;
3224 }
3225
3226 fn (hostaddrs);
3227 gomp_free_thread (thr);
3228 *thr = old_thr;
3229 }
3230
3231 /* Calculate alignment and size requirements of a private copy of data shared
3232 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
3233
3234 static inline void
3235 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
3236 unsigned short *kinds, size_t *tgt_align,
3237 size_t *tgt_size)
3238 {
3239 size_t i;
3240 for (i = 0; i < mapnum; i++)
3241 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
3242 {
3243 size_t align = (size_t) 1 << (kinds[i] >> 8);
3244 if (*tgt_align < align)
3245 *tgt_align = align;
3246 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
3247 *tgt_size += sizes[i];
3248 }
3249 }
3250
3251 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
3252
3253 static inline void
3254 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
3255 size_t *sizes, unsigned short *kinds, size_t tgt_align,
3256 size_t tgt_size)
3257 {
3258 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3259 if (al)
3260 tgt += tgt_align - al;
3261 tgt_size = 0;
3262 size_t i;
3263 for (i = 0; i < mapnum; i++)
3264 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
3265 {
3266 size_t align = (size_t) 1 << (kinds[i] >> 8);
3267 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3268 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
3269 hostaddrs[i] = tgt + tgt_size;
3270 tgt_size = tgt_size + sizes[i];
3271 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
3272 {
3273 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
3274 ++i;
3275 }
3276 }
3277 }
3278
3279 /* Helper function of GOMP_target{,_ext} routines. */
3280
3281 static void *
3282 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
3283 void (*host_fn) (void *))
3284 {
3285 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
3286 return (void *) host_fn;
3287 else
3288 {
3289 gomp_mutex_lock (&devicep->lock);
3290 if (devicep->state == GOMP_DEVICE_FINALIZED)
3291 {
3292 gomp_mutex_unlock (&devicep->lock);
3293 return NULL;
3294 }
3295
3296 struct splay_tree_key_s k;
3297 k.host_start = (uintptr_t) host_fn;
3298 k.host_end = k.host_start + 1;
3299 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
3300 gomp_mutex_unlock (&devicep->lock);
3301 if (tgt_fn == NULL)
3302 return NULL;
3303
3304 return (void *) tgt_fn->tgt_offset;
3305 }
3306 }
3307
3308 /* Called when encountering a target directive. If DEVICE
3309 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
3310 GOMP_DEVICE_HOST_FALLBACK (or any value
3311 larger than last available hw device), use host fallback.
3312 FN is address of host code, UNUSED is part of the current ABI, but
3313 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
3314 with MAPNUM entries, with addresses of the host objects,
3315 sizes of the host objects (resp. for pointer kind pointer bias
3316 and assumed sizeof (void *) size) and kinds. */
3317
3318 void
3319 GOMP_target (int device, void (*fn) (void *), const void *unused,
3320 size_t mapnum, void **hostaddrs, size_t *sizes,
3321 unsigned char *kinds)
3322 {
3323 struct gomp_device_descr *devicep = resolve_device (device, true);
3324
3325 void *fn_addr;
3326 if (devicep == NULL
3327 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3328 /* All shared memory devices should use the GOMP_target_ext function. */
3329 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
3330 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
3331 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
3332
3333 htab_t refcount_set = htab_create (mapnum);
3334 struct target_mem_desc *tgt_vars
3335 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3336 &refcount_set, GOMP_MAP_VARS_TARGET);
3337 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
3338 NULL);
3339 htab_clear (refcount_set);
3340 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3341 htab_free (refcount_set);
3342 }
3343
3344 static inline unsigned int
3345 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
3346 {
3347 /* If we cannot run asynchronously, simply ignore nowait. */
3348 if (devicep != NULL && devicep->async_run_func == NULL)
3349 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
3350
3351 return flags;
3352 }
3353
3354 static void
3355 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
3356 {
3357 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3358 if (item == NULL)
3359 return;
3360
3361 void *host_ptr = &item->icvs;
3362 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
3363 if (dev_ptr != NULL)
3364 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
3365 sizeof (struct gomp_offload_icvs));
3366 }
3367
3368 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
3369 and several arguments have been added:
3370 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
3371 DEPEND is array of dependencies, see GOMP_task for details.
3372
3373 ARGS is a pointer to an array consisting of a variable number of both
3374 device-independent and device-specific arguments, which can take one two
3375 elements where the first specifies for which device it is intended, the type
3376 and optionally also the value. If the value is not present in the first
3377 one, the whole second element the actual value. The last element of the
3378 array is a single NULL. Among the device independent can be for example
3379 NUM_TEAMS and THREAD_LIMIT.
3380
3381 NUM_TEAMS is positive if GOMP_teams will be called in the body with
3382 that value, or 1 if teams construct is not present, or 0, if
3383 teams construct does not have num_teams clause and so the choice is
3384 implementation defined, and -1 if it can't be determined on the host
3385 what value will GOMP_teams have on the device.
3386 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
3387 body with that value, or 0, if teams construct does not have thread_limit
3388 clause or the teams construct is not present, or -1 if it can't be
3389 determined on the host what value will GOMP_teams have on the device. */
3390
3391 void
3392 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
3393 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3394 unsigned int flags, void **depend, void **args)
3395 {
3396 struct gomp_device_descr *devicep = resolve_device (device, true);
3397 size_t tgt_align = 0, tgt_size = 0;
3398 bool fpc_done = false;
3399
3400 /* Obtain the original TEAMS and THREADS values from ARGS. */
3401 intptr_t orig_teams = 1, orig_threads = 0;
3402 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
3403 void **tmpargs = args;
3404 while (*tmpargs)
3405 {
3406 intptr_t id = (intptr_t) *tmpargs++, val;
3407 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3408 {
3409 val = (intptr_t) *tmpargs++;
3410 len = 2;
3411 }
3412 else
3413 {
3414 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
3415 len = 1;
3416 }
3417 num_args += len;
3418 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
3419 continue;
3420 val = val > INT_MAX ? INT_MAX : val;
3421 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
3422 {
3423 orig_teams = val;
3424 teams_len = len;
3425 }
3426 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
3427 {
3428 orig_threads = val;
3429 threads_len = len;
3430 }
3431 }
3432
3433 intptr_t new_teams = orig_teams, new_threads = orig_threads;
3434 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3435 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3436 value could not be determined. No change.
3437 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3438 Set device-specific value.
3439 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3440 No change. */
3441 if (orig_teams == -2)
3442 new_teams = 1;
3443 else if (orig_teams == 0)
3444 {
3445 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3446 if (item != NULL)
3447 new_teams = item->icvs.nteams;
3448 }
3449 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3450 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3451 e.g. a THREAD_LIMIT clause. */
3452 if (orig_teams > -2 && orig_threads == 0)
3453 {
3454 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3455 if (item != NULL)
3456 new_threads = item->icvs.teams_thread_limit;
3457 }
3458
3459 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3460 updated. */
3461 void **new_args = args;
3462 if (orig_teams != new_teams || orig_threads != new_threads)
3463 {
3464 size_t tms_len = (orig_teams == new_teams
3465 ? teams_len
3466 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3467 ? 1 : 2));
3468 size_t ths_len = (orig_threads == new_threads
3469 ? threads_len
3470 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3471 ? 1 : 2));
3472 /* One additional item after the last arg must be NULL. */
3473 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3474 + ths_len + 1;
3475 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3476
3477 tmpargs = args;
3478 void **tmp_new_args = new_args;
3479 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3480 too if they have not been changed and skipped otherwise. */
3481 while (*tmpargs)
3482 {
3483 intptr_t id = (intptr_t) *tmpargs;
3484 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3485 && orig_teams != new_teams)
3486 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3487 && orig_threads != new_threads))
3488 {
3489 tmpargs++;
3490 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3491 tmpargs++;
3492 }
3493 else
3494 {
3495 *tmp_new_args++ = *tmpargs++;
3496 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3497 *tmp_new_args++ = *tmpargs++;
3498 }
3499 }
3500
3501 /* Add the new TEAMS arg to the new args list if it has been changed. */
3502 if (orig_teams != new_teams)
3503 {
3504 intptr_t new_val = new_teams;
3505 if (tms_len == 1)
3506 {
3507 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3508 | GOMP_TARGET_ARG_NUM_TEAMS;
3509 *tmp_new_args++ = (void *) new_val;
3510 }
3511 else
3512 {
3513 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3514 | GOMP_TARGET_ARG_NUM_TEAMS);
3515 *tmp_new_args++ = (void *) new_val;
3516 }
3517 }
3518
3519 /* Add the new THREADS arg to the new args list if it has been changed. */
3520 if (orig_threads != new_threads)
3521 {
3522 intptr_t new_val = new_threads;
3523 if (ths_len == 1)
3524 {
3525 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3526 | GOMP_TARGET_ARG_THREAD_LIMIT;
3527 *tmp_new_args++ = (void *) new_val;
3528 }
3529 else
3530 {
3531 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3532 | GOMP_TARGET_ARG_THREAD_LIMIT);
3533 *tmp_new_args++ = (void *) new_val;
3534 }
3535 }
3536
3537 *tmp_new_args = NULL;
3538 }
3539
3540 flags = clear_unsupported_flags (devicep, flags);
3541
3542 /* For 'nowait' we supposedly have to unregister/free page-locked host memory
3543 via 'GOMP_PLUGIN_target_task_completion'. There is no current
3544 configuration exercising this (and thus, infeasible to test). */
3545 assert (!(flags & GOMP_TARGET_FLAG_NOWAIT)
3546 || !(devicep && devicep->page_locked_host_register_func));
3547
3548 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3549 {
3550 struct gomp_thread *thr = gomp_thread ();
3551 /* Create a team if we don't have any around, as nowait
3552 target tasks make sense to run asynchronously even when
3553 outside of any parallel. */
3554 if (__builtin_expect (thr->ts.team == NULL, 0))
3555 {
3556 struct gomp_team *team = gomp_new_team (1);
3557 struct gomp_task *task = thr->task;
3558 struct gomp_task **implicit_task = &task;
3559 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3560 team->prev_ts = thr->ts;
3561 thr->ts.team = team;
3562 thr->ts.team_id = 0;
3563 thr->ts.work_share = &team->work_shares[0];
3564 thr->ts.last_work_share = NULL;
3565 #ifdef HAVE_SYNC_BUILTINS
3566 thr->ts.single_count = 0;
3567 #endif
3568 thr->ts.static_trip = 0;
3569 thr->task = &team->implicit_task[0];
3570 gomp_init_task (thr->task, NULL, icv);
3571 while (*implicit_task
3572 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3573 implicit_task = &(*implicit_task)->parent;
3574 if (*implicit_task)
3575 {
3576 thr->task = *implicit_task;
3577 gomp_end_task ();
3578 free (*implicit_task);
3579 thr->task = &team->implicit_task[0];
3580 }
3581 else
3582 pthread_setspecific (gomp_thread_destructor, thr);
3583 if (implicit_task != &task)
3584 {
3585 *implicit_task = thr->task;
3586 thr->task = task;
3587 }
3588 }
3589 if (thr->ts.team
3590 && !thr->task->final_task)
3591 {
3592 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3593 sizes, kinds, flags, depend, new_args,
3594 GOMP_TARGET_TASK_BEFORE_MAP);
3595 return;
3596 }
3597 }
3598
3599 /* If there are depend clauses, but nowait is not present
3600 (or we are in a final task), block the parent task until the
3601 dependencies are resolved and then just continue with the rest
3602 of the function as if it is a merged task. */
3603 if (depend != NULL)
3604 {
3605 struct gomp_thread *thr = gomp_thread ();
3606 if (thr->task && thr->task->depend_hash)
3607 {
3608 /* If we might need to wait, copy firstprivate now. */
3609 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3610 &tgt_align, &tgt_size);
3611 if (tgt_align)
3612 {
3613 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3614 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3615 tgt_align, tgt_size);
3616 }
3617 fpc_done = true;
3618 gomp_task_maybe_wait_for_dependencies (depend);
3619 }
3620 }
3621
3622 void *fn_addr;
3623 if (devicep == NULL
3624 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3625 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3626 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3627 {
3628 if (!fpc_done)
3629 {
3630 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3631 &tgt_align, &tgt_size);
3632 if (tgt_align)
3633 {
3634 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3635 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3636 tgt_align, tgt_size);
3637 }
3638 }
3639 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3640 return;
3641 }
3642
3643 struct target_mem_desc *tgt_vars;
3644 htab_t refcount_set = NULL;
3645
3646 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3647 {
3648 if (!fpc_done)
3649 {
3650 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3651 &tgt_align, &tgt_size);
3652 if (tgt_align)
3653 {
3654 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3655 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3656 tgt_align, tgt_size);
3657 }
3658 }
3659 tgt_vars = NULL;
3660 }
3661 else
3662 {
3663 refcount_set = htab_create (mapnum);
3664 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3665 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3666 }
3667 devicep->run_func (devicep->target_id, fn_addr,
3668 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3669 new_args);
3670 if (tgt_vars)
3671 {
3672 htab_clear (refcount_set);
3673 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3674 }
3675 if (refcount_set)
3676 htab_free (refcount_set);
3677
3678 /* Copy back ICVs from device to host.
3679 HOST_PTR is expected to exist since it was added in
3680 gomp_load_image_to_device if not already available. */
3681 gomp_copy_back_icvs (devicep, device);
3682
3683 }
3684
3685
3686 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3687 keeping track of all variable handling - assuming that reverse offload occurs
3688 ony very rarely. Downside is that the reverse search is slow. */
3689
3690 struct gomp_splay_tree_rev_lookup_data {
3691 uintptr_t tgt_start;
3692 uintptr_t tgt_end;
3693 splay_tree_key key;
3694 };
3695
3696 static int
3697 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3698 {
3699 struct gomp_splay_tree_rev_lookup_data *data;
3700 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3701 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3702
3703 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3704 return 0;
3705
3706 size_t j;
3707 for (j = 0; j < key->tgt->list_count; j++)
3708 if (key->tgt->list[j].key == key)
3709 break;
3710 assert (j < key->tgt->list_count);
3711 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3712
3713 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3714 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3715 {
3716 data->key = key;
3717 return 1;
3718 }
3719 return 0;
3720 }
3721
3722 static inline splay_tree_key
3723 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3724 bool zero_len)
3725 {
3726 struct gomp_splay_tree_rev_lookup_data data;
3727 data.key = NULL;
3728 data.tgt_start = tgt_start;
3729 data.tgt_end = tgt_end;
3730
3731 if (tgt_start != tgt_end)
3732 {
3733 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3734 return data.key;
3735 }
3736
3737 data.tgt_end++;
3738 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3739 if (data.key != NULL || zero_len)
3740 return data.key;
3741 data.tgt_end--;
3742
3743 data.tgt_start--;
3744 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3745 return data.key;
3746 }
3747
3748 struct cpy_data
3749 {
3750 uint64_t devaddr;
3751 bool present, aligned;
3752 };
3753
3754
3755 /* Search just mapped reverse-offload data; returns index if found,
3756 otherwise >= n. */
3757
3758 static inline int
3759 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3760 unsigned short *kinds, uint64_t *sizes, size_t n,
3761 uint64_t tgt_start, uint64_t tgt_end)
3762 {
3763 const bool short_mapkind = true;
3764 const int typemask = short_mapkind ? 0xff : 0x7;
3765 size_t i;
3766 for (i = 0; i < n; i++)
3767 {
3768 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3769 == GOMP_MAP_STRUCT);
3770 uint64_t dev_end;
3771 if (!is_struct)
3772 dev_end = d[i].devaddr + sizes[i];
3773 else
3774 {
3775 if (i + sizes[i] < n)
3776 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3777 else
3778 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3779 }
3780 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3781 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3782 break;
3783 if (is_struct)
3784 i += sizes[i];
3785 }
3786 return i;
3787 }
3788
3789 static inline int
3790 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3791 unsigned short *kinds, uint64_t *sizes,
3792 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3793 bool zero_len)
3794 {
3795 size_t i;
3796 if (tgt_start != tgt_end)
3797 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3798 tgt_start, tgt_end);
3799 tgt_end++;
3800 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3801 tgt_start, tgt_end);
3802 if (i < n || zero_len)
3803 return i;
3804 tgt_end--;
3805
3806 tgt_start--;
3807 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3808 tgt_start, tgt_end);
3809 }
3810
3811 /* Handle reverse offload. This is called by the device plugins for a
3812 reverse offload; it is not called if the outer target runs on the host.
3813 The mapping is simplified device-affecting constructs (except for target
3814 with device(ancestor:1)) must not be encountered; in particular not
3815 target (enter/exit) data. */
3816
3817 void
3818 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3819 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3820 struct goacc_asyncqueue *aq)
3821 {
3822 /* Return early if there is no offload code. */
3823 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3824 return;
3825 /* Currently, this fails because of calculate_firstprivate_requirements
3826 below; it could be fixed but additional code needs to be updated to
3827 handle 32bit hosts - thus, it is not worthwhile. */
3828 if (sizeof (void *) != sizeof (uint64_t))
3829 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3830
3831 struct cpy_data *cdata = NULL;
3832 uint64_t *devaddrs;
3833 uint64_t *sizes;
3834 unsigned short *kinds;
3835 const bool short_mapkind = true;
3836 const int typemask = short_mapkind ? 0xff : 0x7;
3837 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3838
3839 reverse_splay_tree_key n;
3840 struct reverse_splay_tree_key_s k;
3841 k.dev = fn_ptr;
3842
3843 gomp_mutex_lock (&devicep->lock);
3844 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3845 gomp_mutex_unlock (&devicep->lock);
3846
3847 if (n == NULL)
3848 gomp_fatal ("Cannot find reverse-offload function");
3849 void (*host_fn)() = (void (*)()) n->k->host_start;
3850
3851 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3852 {
3853 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3854 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3855 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3856 }
3857 else
3858 {
3859 size_t devaddrs_size = mapnum * sizeof (uint64_t);
3860 size_t sizes_size = mapnum * sizeof (uint64_t);
3861 size_t kinds_size = mapnum * sizeof (unsigned short);
3862 if (always_pinned_mode)
3863 {
3864 if (!(devaddrs = gomp_page_locked_host_alloc_dev (devicep,
3865 devaddrs_size,
3866 false))
3867 || !(sizes = gomp_page_locked_host_alloc_dev (devicep,
3868 sizes_size,
3869 false))
3870 || !(kinds = gomp_page_locked_host_alloc_dev (devicep,
3871 kinds_size,
3872 false)))
3873 exit (EXIT_FAILURE);
3874 }
3875 else
3876 {
3877 devaddrs = gomp_malloc (devaddrs_size);
3878 sizes = gomp_malloc (sizes_size);
3879 kinds = gomp_malloc (kinds_size);
3880 }
3881 gomp_copy_dev2host (devicep, aq, devaddrs,
3882 (const void *) (uintptr_t) devaddrs_ptr,
3883 devaddrs_size);
3884 gomp_copy_dev2host (devicep, aq, sizes,
3885 (const void *) (uintptr_t) sizes_ptr,
3886 sizes_size);
3887 gomp_copy_dev2host (devicep, aq, kinds,
3888 (const void *) (uintptr_t) kinds_ptr,
3889 kinds_size);
3890 if (aq && !devicep->openacc.async.synchronize_func (aq))
3891 exit (EXIT_FAILURE);
3892 }
3893
3894 size_t tgt_align = 0, tgt_size = 0;
3895
3896 /* If actually executed on 32bit systems, the casts lead to wrong code;
3897 but 32bit with offloading is not supported; see top of this function. */
3898 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3899 (void *) (uintptr_t) kinds,
3900 &tgt_align, &tgt_size);
3901
3902 if (tgt_align)
3903 {
3904 size_t tgt_alloc_size = tgt_size + tgt_align - 1;
3905 char *tgt = gomp_alloca (tgt_alloc_size);
3906 if (always_pinned_mode)
3907 {
3908 /* TODO: See 'gomp_copy_host2dev' re "page-locking on the spot".
3909 On the other hand, performance isn't really a concern, here. */
3910 int page_locked_host_p = 0;
3911 if (tgt_alloc_size != 0)
3912 {
3913 page_locked_host_p = gomp_page_locked_host_register_dev
3914 (devicep, tgt, tgt_alloc_size, GOMP_MAP_TOFROM);
3915 if (page_locked_host_p < 0)
3916 exit (EXIT_FAILURE);
3917 /* 'gomp_alloca' isn't already page-locked host memory. */
3918 assert (page_locked_host_p);
3919 }
3920 }
3921 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3922 if (al)
3923 tgt += tgt_align - al;
3924 tgt_size = 0;
3925 for (uint64_t i = 0; i < mapnum; i++)
3926 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3927 && devaddrs[i] != 0)
3928 {
3929 size_t align = (size_t) 1 << (kinds[i] >> 8);
3930 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3931 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3932 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3933 (size_t) sizes[i]);
3934 else
3935 {
3936 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3937 (void *) (uintptr_t) devaddrs[i],
3938 (size_t) sizes[i]);
3939 if (aq && !devicep->openacc.async.synchronize_func (aq))
3940 exit (EXIT_FAILURE);
3941 }
3942 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3943 tgt_size = tgt_size + sizes[i];
3944 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3945 && i + 1 < mapnum
3946 && ((get_kind (short_mapkind, kinds, i) & typemask)
3947 == GOMP_MAP_ATTACH))
3948 {
3949 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3950 = (uint64_t) devaddrs[i];
3951 ++i;
3952 }
3953 }
3954 if (always_pinned_mode)
3955 {
3956 if (tgt_alloc_size != 0
3957 && !gomp_page_locked_host_unregister_dev (devicep,
3958 tgt, tgt_alloc_size,
3959 NULL))
3960 exit (EXIT_FAILURE);
3961 }
3962 }
3963
3964 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3965 {
3966 size_t j, struct_cpy = 0;
3967 splay_tree_key n2;
3968 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3969 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3970 gomp_mutex_lock (&devicep->lock);
3971 for (uint64_t i = 0; i < mapnum; i++)
3972 {
3973 if (devaddrs[i] == 0)
3974 continue;
3975 n = NULL;
3976 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3977 switch (kind)
3978 {
3979 case GOMP_MAP_FIRSTPRIVATE:
3980 case GOMP_MAP_FIRSTPRIVATE_INT:
3981 continue;
3982
3983 case GOMP_MAP_DELETE:
3984 case GOMP_MAP_RELEASE:
3985 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3986 /* Assume it is present; look it up - but ignore unless the
3987 present clause is there. */
3988 case GOMP_MAP_ALLOC:
3989 case GOMP_MAP_FROM:
3990 case GOMP_MAP_FORCE_ALLOC:
3991 case GOMP_MAP_FORCE_FROM:
3992 case GOMP_MAP_ALWAYS_FROM:
3993 case GOMP_MAP_TO:
3994 case GOMP_MAP_TOFROM:
3995 case GOMP_MAP_FORCE_TO:
3996 case GOMP_MAP_FORCE_TOFROM:
3997 case GOMP_MAP_ALWAYS_TO:
3998 case GOMP_MAP_ALWAYS_TOFROM:
3999 case GOMP_MAP_PRESENT_FROM:
4000 case GOMP_MAP_PRESENT_TO:
4001 case GOMP_MAP_PRESENT_TOFROM:
4002 case GOMP_MAP_ALWAYS_PRESENT_FROM:
4003 case GOMP_MAP_ALWAYS_PRESENT_TO:
4004 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
4005 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
4006 cdata[i].devaddr = devaddrs[i];
4007 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4008 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
4009 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
4010 devaddrs[i],
4011 devaddrs[i] + sizes[i], zero_len);
4012 if (j < i)
4013 {
4014 n2 = NULL;
4015 cdata[i].present = true;
4016 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
4017 }
4018 else
4019 {
4020 n2 = gomp_map_rev_lookup (&devicep->mem_map,
4021 devaddrs[i],
4022 devaddrs[i] + sizes[i], zero_len);
4023 cdata[i].present = n2 != NULL;
4024 }
4025 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
4026 {
4027 gomp_mutex_unlock (&devicep->lock);
4028 #ifdef HAVE_INTTYPES_H
4029 gomp_fatal ("present clause: no corresponding data on "
4030 "parent device at %p with size %"PRIu64,
4031 (void *) (uintptr_t) devaddrs[i],
4032 (uint64_t) sizes[i]);
4033 #else
4034 gomp_fatal ("present clause: no corresponding data on "
4035 "parent device at %p with size %lu",
4036 (void *) (uintptr_t) devaddrs[i],
4037 (unsigned long) sizes[i]);
4038 #endif
4039 break;
4040 }
4041 else if (!cdata[i].present
4042 && kind != GOMP_MAP_DELETE
4043 && kind != GOMP_MAP_RELEASE
4044 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
4045 {
4046 cdata[i].aligned = true;
4047 size_t align = (size_t) 1 << (kinds[i] >> 8);
4048 void *ptr;
4049 if (always_pinned_mode)
4050 {
4051 ptr = gomp_page_locked_host_aligned_alloc_dev
4052 (devicep, align, sizes[i]);
4053 if (!ptr)
4054 {
4055 gomp_mutex_unlock (&devicep->lock);
4056 exit (EXIT_FAILURE);
4057 }
4058 }
4059 else
4060 ptr = gomp_aligned_alloc (align, sizes[i]);
4061 devaddrs[i] = (uint64_t) (uintptr_t) ptr;
4062 }
4063 else if (n2 != NULL)
4064 devaddrs[i] = (n2->host_start + cdata[i].devaddr
4065 - (n2->tgt->tgt_start + n2->tgt_offset));
4066 if (((!cdata[i].present || struct_cpy)
4067 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
4068 || kind == GOMP_MAP_FORCE_TO
4069 || kind == GOMP_MAP_FORCE_TOFROM
4070 || GOMP_MAP_ALWAYS_TO_P (kind))
4071 {
4072 gomp_copy_dev2host (devicep, aq,
4073 (void *) (uintptr_t) devaddrs[i],
4074 (void *) (uintptr_t) cdata[i].devaddr,
4075 sizes[i]);
4076 if (aq && !devicep->openacc.async.synchronize_func (aq))
4077 {
4078 gomp_mutex_unlock (&devicep->lock);
4079 exit (EXIT_FAILURE);
4080 }
4081 }
4082 if (struct_cpy)
4083 struct_cpy--;
4084 break;
4085 case GOMP_MAP_ATTACH:
4086 case GOMP_MAP_POINTER:
4087 case GOMP_MAP_ALWAYS_POINTER:
4088 n2 = gomp_map_rev_lookup (&devicep->mem_map,
4089 devaddrs[i] + sizes[i],
4090 devaddrs[i] + sizes[i]
4091 + sizeof (void*), false);
4092 cdata[i].present = n2 != NULL;
4093 cdata[i].devaddr = devaddrs[i];
4094 if (n2)
4095 devaddrs[i] = (n2->host_start + cdata[i].devaddr
4096 - (n2->tgt->tgt_start + n2->tgt_offset));
4097 else
4098 {
4099 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
4100 devaddrs[i] + sizes[i],
4101 devaddrs[i] + sizes[i]
4102 + sizeof (void*), false);
4103 if (j < i)
4104 {
4105 cdata[i].present = true;
4106 devaddrs[i] = (devaddrs[j] + devaddrs[i]
4107 - cdata[j].devaddr);
4108 }
4109 }
4110 if (!cdata[i].present)
4111 {
4112 void *ptr;
4113 size_t size = sizeof (void *);
4114 if (always_pinned_mode)
4115 {
4116 ptr = gomp_page_locked_host_alloc_dev (devicep,
4117 size, false);
4118 if (!ptr)
4119 {
4120 gomp_mutex_unlock (&devicep->lock);
4121 exit (EXIT_FAILURE);
4122 }
4123 }
4124 else
4125 ptr = gomp_malloc (size);
4126 devaddrs[i] = (uintptr_t) ptr;
4127 }
4128 /* Assume that when present, the pointer is already correct. */
4129 if (!n2)
4130 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
4131 = devaddrs[i-1];
4132 break;
4133 case GOMP_MAP_TO_PSET:
4134 /* Assume that when present, the pointers are fine and no 'to:'
4135 is required. */
4136 n2 = gomp_map_rev_lookup (&devicep->mem_map,
4137 devaddrs[i], devaddrs[i] + sizes[i],
4138 false);
4139 cdata[i].present = n2 != NULL;
4140 cdata[i].devaddr = devaddrs[i];
4141 if (n2)
4142 devaddrs[i] = (n2->host_start + cdata[i].devaddr
4143 - (n2->tgt->tgt_start + n2->tgt_offset));
4144 else
4145 {
4146 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
4147 devaddrs[i],
4148 devaddrs[i] + sizes[i], false);
4149 if (j < i)
4150 {
4151 cdata[i].present = true;
4152 devaddrs[i] = (devaddrs[j] + devaddrs[i]
4153 - cdata[j].devaddr);
4154 }
4155 }
4156 if (!cdata[i].present)
4157 {
4158 cdata[i].aligned = true;
4159 size_t align = (size_t) 1 << (kinds[i] >> 8);
4160 void *ptr;
4161 if (always_pinned_mode)
4162 {
4163 ptr = gomp_page_locked_host_aligned_alloc_dev
4164 (devicep, align, sizes[i]);
4165 if (!ptr)
4166 {
4167 gomp_mutex_unlock (&devicep->lock);
4168 exit (EXIT_FAILURE);
4169 }
4170 }
4171 else
4172 ptr = gomp_aligned_alloc (align, sizes[i]);
4173 devaddrs[i] = (uint64_t) (uintptr_t) ptr;
4174 gomp_copy_dev2host (devicep, aq,
4175 (void *) (uintptr_t) devaddrs[i],
4176 (void *) (uintptr_t) cdata[i].devaddr,
4177 sizes[i]);
4178 if (aq && !devicep->openacc.async.synchronize_func (aq))
4179 {
4180 gomp_mutex_unlock (&devicep->lock);
4181 exit (EXIT_FAILURE);
4182 }
4183 }
4184 for (j = i + 1; j < mapnum; j++)
4185 {
4186 kind = get_kind (short_mapkind, kinds, j) & typemask;
4187 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
4188 && !GOMP_MAP_POINTER_P (kind))
4189 break;
4190 if (devaddrs[j] < devaddrs[i])
4191 break;
4192 if (cdata[i].present)
4193 continue;
4194 if (devaddrs[j] == 0)
4195 {
4196 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
4197 continue;
4198 }
4199 int k;
4200 n2 = NULL;
4201 /* Dereference devaddrs[j] to get the device addr. */
4202 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
4203 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
4204 + sizes[j]);
4205 cdata[j].present = true;
4206 cdata[j].devaddr = devaddrs[j];
4207 if (devaddrs[j] == 0)
4208 continue;
4209 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
4210 devaddrs[j],
4211 devaddrs[j] + sizeof (void*),
4212 false);
4213 if (k < j)
4214 devaddrs[j] = (devaddrs[k] + devaddrs[j]
4215 - cdata[k].devaddr);
4216 else
4217 {
4218 n2 = gomp_map_rev_lookup (&devicep->mem_map,
4219 devaddrs[j],
4220 devaddrs[j] + sizeof (void*),
4221 false);
4222 if (n2 == NULL)
4223 {
4224 gomp_mutex_unlock (&devicep->lock);
4225 gomp_fatal ("Pointer target wasn't mapped");
4226 }
4227 devaddrs[j] = (n2->host_start + cdata[j].devaddr
4228 - (n2->tgt->tgt_start + n2->tgt_offset));
4229 }
4230 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
4231 = (void *) (uintptr_t) devaddrs[j];
4232 }
4233 i = j -1;
4234 break;
4235 case GOMP_MAP_STRUCT:
4236 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
4237 devaddrs[i + sizes[i]]
4238 + sizes[i + sizes[i]], false);
4239 cdata[i].present = n2 != NULL;
4240 cdata[i].devaddr = devaddrs[i];
4241 struct_cpy = cdata[i].present ? 0 : sizes[i];
4242 if (!n2)
4243 {
4244 size_t sz = (size_t) (devaddrs[i + sizes[i]]
4245 - devaddrs[i+1]
4246 + sizes[i + sizes[i]]);
4247 size_t align = (size_t) 1 << (kinds[i] >> 8);
4248 cdata[i].aligned = true;
4249 void *ptr;
4250 if (always_pinned_mode)
4251 {
4252 ptr = gomp_page_locked_host_aligned_alloc_dev
4253 (devicep, align, sz);
4254 if (!ptr)
4255 {
4256 gomp_mutex_unlock (&devicep->lock);
4257 exit (EXIT_FAILURE);
4258 }
4259 }
4260 else
4261 ptr = gomp_aligned_alloc (align, sz);
4262 devaddrs[i] = (uintptr_t) ptr;
4263 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
4264 }
4265 else
4266 devaddrs[i] = (n2->host_start + cdata[i].devaddr
4267 - (n2->tgt->tgt_start + n2->tgt_offset));
4268 break;
4269 default:
4270 gomp_mutex_unlock (&devicep->lock);
4271 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
4272 }
4273 }
4274 gomp_mutex_unlock (&devicep->lock);
4275 }
4276
4277 host_fn (devaddrs);
4278
4279 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
4280 {
4281 uint64_t struct_cpy = 0;
4282 bool clean_struct = false;
4283 for (uint64_t i = 0; i < mapnum; i++)
4284 {
4285 if (cdata[i].devaddr == 0)
4286 continue;
4287 int kind = get_kind (short_mapkind, kinds, i) & typemask;
4288 bool copy = !cdata[i].present || struct_cpy;
4289 switch (kind)
4290 {
4291 case GOMP_MAP_FORCE_FROM:
4292 case GOMP_MAP_FORCE_TOFROM:
4293 case GOMP_MAP_ALWAYS_FROM:
4294 case GOMP_MAP_ALWAYS_TOFROM:
4295 case GOMP_MAP_PRESENT_FROM:
4296 case GOMP_MAP_PRESENT_TOFROM:
4297 case GOMP_MAP_ALWAYS_PRESENT_FROM:
4298 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
4299 copy = true;
4300 /* FALLTHRU */
4301 case GOMP_MAP_FROM:
4302 case GOMP_MAP_TOFROM:
4303 if (copy)
4304 {
4305 gomp_copy_host2dev (devicep, aq,
4306 (void *) (uintptr_t) cdata[i].devaddr,
4307 (void *) (uintptr_t) devaddrs[i],
4308 sizes[i], false, NULL);
4309 if (aq && !devicep->openacc.async.synchronize_func (aq))
4310 exit (EXIT_FAILURE);
4311 }
4312 default:
4313 break;
4314 }
4315 if (struct_cpy)
4316 {
4317 struct_cpy--;
4318 continue;
4319 }
4320 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
4321 {
4322 clean_struct = true;
4323 struct_cpy = sizes[i];
4324 }
4325 else if (!cdata[i].present && cdata[i].aligned)
4326 {
4327 void *ptr = (void *) (uintptr_t) devaddrs[i];
4328 if (always_pinned_mode)
4329 {
4330 if (!gomp_page_locked_host_aligned_free_dev (devicep,
4331 ptr,
4332 aq))
4333 exit (EXIT_FAILURE);
4334 }
4335 else
4336 gomp_aligned_free (ptr);
4337 }
4338 else if (!cdata[i].present)
4339 {
4340 void *ptr = (void *) (uintptr_t) devaddrs[i];
4341 if (always_pinned_mode)
4342 {
4343 if (!gomp_page_locked_host_free_dev (devicep, ptr, aq))
4344 exit (EXIT_FAILURE);
4345 }
4346 else
4347 free (ptr);
4348 }
4349 }
4350 if (clean_struct)
4351 for (uint64_t i = 0; i < mapnum; i++)
4352 if (!cdata[i].present
4353 && ((get_kind (short_mapkind, kinds, i) & typemask)
4354 == GOMP_MAP_STRUCT))
4355 {
4356 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
4357 void *ptr = (void *) (uintptr_t) devaddrs[i];
4358 if (always_pinned_mode)
4359 {
4360 if (!gomp_page_locked_host_aligned_free_dev (devicep,
4361 ptr, aq))
4362 exit (EXIT_FAILURE);
4363 }
4364 else
4365 gomp_aligned_free (ptr);
4366 }
4367
4368 if (always_pinned_mode)
4369 {
4370 if (!gomp_page_locked_host_free_dev (devicep, devaddrs, aq)
4371 || !gomp_page_locked_host_free_dev (devicep, sizes, aq)
4372 || !gomp_page_locked_host_free_dev (devicep, kinds, aq))
4373 exit (EXIT_FAILURE);
4374 }
4375 else
4376 {
4377 free (devaddrs);
4378 free (sizes);
4379 free (kinds);
4380 }
4381 }
4382 }
4383
4384 /* Host fallback for GOMP_target_data{,_ext} routines. */
4385
4386 static void
4387 gomp_target_data_fallback (struct gomp_device_descr *devicep)
4388 {
4389 struct gomp_task_icv *icv = gomp_icv (false);
4390
4391 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
4392 && devicep != NULL)
4393 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
4394 "be used for offloading");
4395
4396 if (icv->target_data)
4397 {
4398 /* Even when doing a host fallback, if there are any active
4399 #pragma omp target data constructs, need to remember the
4400 new #pragma omp target data, otherwise GOMP_target_end_data
4401 would get out of sync. */
4402 struct target_mem_desc *tgt
4403 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
4404 NULL, GOMP_MAP_VARS_DATA);
4405 tgt->prev = icv->target_data;
4406 icv->target_data = tgt;
4407 }
4408 }
4409
4410 void
4411 GOMP_target_data (int device, const void *unused, size_t mapnum,
4412 void **hostaddrs, size_t *sizes, unsigned char *kinds)
4413 {
4414 struct gomp_device_descr *devicep = resolve_device (device, true);
4415
4416 if (devicep == NULL
4417 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4418 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
4419 return gomp_target_data_fallback (devicep);
4420
4421 struct target_mem_desc *tgt
4422 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
4423 NULL, GOMP_MAP_VARS_DATA);
4424 struct gomp_task_icv *icv = gomp_icv (true);
4425 tgt->prev = icv->target_data;
4426 icv->target_data = tgt;
4427 }
4428
4429 void
4430 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
4431 size_t *sizes, unsigned short *kinds)
4432 {
4433 struct gomp_device_descr *devicep = resolve_device (device, true);
4434
4435 if (devicep == NULL
4436 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4437 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4438 return gomp_target_data_fallback (devicep);
4439
4440 struct target_mem_desc *tgt
4441 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
4442 NULL, GOMP_MAP_VARS_DATA);
4443 struct gomp_task_icv *icv = gomp_icv (true);
4444 tgt->prev = icv->target_data;
4445 icv->target_data = tgt;
4446 }
4447
4448 void
4449 GOMP_target_end_data (void)
4450 {
4451 struct gomp_task_icv *icv = gomp_icv (false);
4452 if (icv->target_data)
4453 {
4454 struct target_mem_desc *tgt = icv->target_data;
4455 icv->target_data = tgt->prev;
4456 gomp_unmap_vars (tgt, true, NULL);
4457 }
4458 }
4459
4460 void
4461 GOMP_target_update (int device, const void *unused, size_t mapnum,
4462 void **hostaddrs, size_t *sizes, unsigned char *kinds)
4463 {
4464 struct gomp_device_descr *devicep = resolve_device (device, true);
4465
4466 if (devicep == NULL
4467 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4468 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4469 return;
4470
4471 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
4472 }
4473
4474 void
4475 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
4476 size_t *sizes, unsigned short *kinds,
4477 unsigned int flags, void **depend)
4478 {
4479 struct gomp_device_descr *devicep = resolve_device (device, true);
4480
4481 /* If there are depend clauses, but nowait is not present,
4482 block the parent task until the dependencies are resolved
4483 and then just continue with the rest of the function as if it
4484 is a merged task. Until we are able to schedule task during
4485 variable mapping or unmapping, ignore nowait if depend clauses
4486 are not present. */
4487 if (depend != NULL)
4488 {
4489 struct gomp_thread *thr = gomp_thread ();
4490 if (thr->task && thr->task->depend_hash)
4491 {
4492 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4493 && thr->ts.team
4494 && !thr->task->final_task)
4495 {
4496 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4497 mapnum, hostaddrs, sizes, kinds,
4498 flags | GOMP_TARGET_FLAG_UPDATE,
4499 depend, NULL, GOMP_TARGET_TASK_DATA))
4500 return;
4501 }
4502 else
4503 {
4504 struct gomp_team *team = thr->ts.team;
4505 /* If parallel or taskgroup has been cancelled, don't start new
4506 tasks. */
4507 if (__builtin_expect (gomp_cancel_var, 0) && team)
4508 {
4509 if (gomp_team_barrier_cancelled (&team->barrier))
4510 return;
4511 if (thr->task->taskgroup)
4512 {
4513 if (thr->task->taskgroup->cancelled)
4514 return;
4515 if (thr->task->taskgroup->workshare
4516 && thr->task->taskgroup->prev
4517 && thr->task->taskgroup->prev->cancelled)
4518 return;
4519 }
4520 }
4521
4522 gomp_task_maybe_wait_for_dependencies (depend);
4523 }
4524 }
4525 }
4526
4527 if (devicep == NULL
4528 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4529 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4530 return;
4531
4532 struct gomp_thread *thr = gomp_thread ();
4533 struct gomp_team *team = thr->ts.team;
4534 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4535 if (__builtin_expect (gomp_cancel_var, 0) && team)
4536 {
4537 if (gomp_team_barrier_cancelled (&team->barrier))
4538 return;
4539 if (thr->task->taskgroup)
4540 {
4541 if (thr->task->taskgroup->cancelled)
4542 return;
4543 if (thr->task->taskgroup->workshare
4544 && thr->task->taskgroup->prev
4545 && thr->task->taskgroup->prev->cancelled)
4546 return;
4547 }
4548 }
4549
4550 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
4551 }
4552
4553 static void
4554 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
4555 void **hostaddrs, size_t *sizes, unsigned short *kinds,
4556 htab_t *refcount_set)
4557 {
4558 const int typemask = 0xff;
4559 size_t i;
4560 gomp_mutex_lock (&devicep->lock);
4561 if (devicep->state == GOMP_DEVICE_FINALIZED)
4562 {
4563 gomp_mutex_unlock (&devicep->lock);
4564 return;
4565 }
4566
4567 for (i = 0; i < mapnum; i++)
4568 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
4569 {
4570 struct splay_tree_key_s cur_node;
4571 cur_node.host_start = (uintptr_t) hostaddrs[i];
4572 cur_node.host_end = cur_node.host_start + sizeof (void *);
4573 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
4574
4575 if (n)
4576 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
4577 false, NULL);
4578 }
4579
4580 size_t nrmvars = 0;
4581 splay_tree_key remove_vars[mapnum];
4582
4583 for (i = 0; i < mapnum; i++)
4584 {
4585 struct splay_tree_key_s cur_node;
4586 unsigned char kind = kinds[i] & typemask;
4587 switch (kind)
4588 {
4589 case GOMP_MAP_FROM:
4590 case GOMP_MAP_ALWAYS_FROM:
4591 case GOMP_MAP_DELETE:
4592 case GOMP_MAP_RELEASE:
4593 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
4594 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
4595 cur_node.host_start = (uintptr_t) hostaddrs[i];
4596 cur_node.host_end = cur_node.host_start + sizes[i];
4597 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4598 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
4599 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
4600 : splay_tree_lookup (&devicep->mem_map, &cur_node);
4601 if (!k)
4602 continue;
4603
4604 bool delete_p = (kind == GOMP_MAP_DELETE
4605 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4606 bool do_copy, do_remove;
4607 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4608 &do_remove);
4609
4610 if ((kind == GOMP_MAP_FROM && do_copy)
4611 || kind == GOMP_MAP_ALWAYS_FROM)
4612 {
4613 if (k->aux && k->aux->attach_count)
4614 {
4615 /* We have to be careful not to overwrite still attached
4616 pointers during the copyback to host. */
4617 uintptr_t addr = k->host_start;
4618 while (addr < k->host_end)
4619 {
4620 size_t i = (addr - k->host_start) / sizeof (void *);
4621 if (k->aux->attach_count[i] == 0)
4622 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4623 (void *) (k->tgt->tgt_start
4624 + k->tgt_offset
4625 + addr - k->host_start),
4626 sizeof (void *));
4627 addr += sizeof (void *);
4628 }
4629 }
4630 else
4631 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4632 (void *) (k->tgt->tgt_start + k->tgt_offset
4633 + cur_node.host_start
4634 - k->host_start),
4635 cur_node.host_end - cur_node.host_start);
4636 }
4637
4638 /* Structure elements lists are removed altogether at once, which
4639 may cause immediate deallocation of the target_mem_desc, causing
4640 errors if we still have following element siblings to copy back.
4641 While we're at it, it also seems more disciplined to simply
4642 queue all removals together for processing below.
4643 */
4644 if (do_remove)
4645 remove_vars[nrmvars++] = k;
4646 break;
4647
4648 case GOMP_MAP_DETACH:
4649 break;
4650 default:
4651 gomp_mutex_unlock (&devicep->lock);
4652 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4653 kind);
4654 }
4655 }
4656
4657 for (i = 0; i < nrmvars; i++)
4658 gomp_remove_var (devicep, remove_vars[i]);
4659
4660 gomp_mutex_unlock (&devicep->lock);
4661 }
4662
4663 void
4664 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4665 size_t *sizes, unsigned short *kinds,
4666 unsigned int flags, void **depend)
4667 {
4668 struct gomp_device_descr *devicep = resolve_device (device, true);
4669
4670 /* If there are depend clauses, but nowait is not present,
4671 block the parent task until the dependencies are resolved
4672 and then just continue with the rest of the function as if it
4673 is a merged task. Until we are able to schedule task during
4674 variable mapping or unmapping, ignore nowait if depend clauses
4675 are not present. */
4676 if (depend != NULL)
4677 {
4678 struct gomp_thread *thr = gomp_thread ();
4679 if (thr->task && thr->task->depend_hash)
4680 {
4681 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4682 && thr->ts.team
4683 && !thr->task->final_task)
4684 {
4685 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4686 mapnum, hostaddrs, sizes, kinds,
4687 flags, depend, NULL,
4688 GOMP_TARGET_TASK_DATA))
4689 return;
4690 }
4691 else
4692 {
4693 struct gomp_team *team = thr->ts.team;
4694 /* If parallel or taskgroup has been cancelled, don't start new
4695 tasks. */
4696 if (__builtin_expect (gomp_cancel_var, 0) && team)
4697 {
4698 if (gomp_team_barrier_cancelled (&team->barrier))
4699 return;
4700 if (thr->task->taskgroup)
4701 {
4702 if (thr->task->taskgroup->cancelled)
4703 return;
4704 if (thr->task->taskgroup->workshare
4705 && thr->task->taskgroup->prev
4706 && thr->task->taskgroup->prev->cancelled)
4707 return;
4708 }
4709 }
4710
4711 gomp_task_maybe_wait_for_dependencies (depend);
4712 }
4713 }
4714 }
4715
4716 if (devicep == NULL
4717 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4718 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4719 return;
4720
4721 struct gomp_thread *thr = gomp_thread ();
4722 struct gomp_team *team = thr->ts.team;
4723 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4724 if (__builtin_expect (gomp_cancel_var, 0) && team)
4725 {
4726 if (gomp_team_barrier_cancelled (&team->barrier))
4727 return;
4728 if (thr->task->taskgroup)
4729 {
4730 if (thr->task->taskgroup->cancelled)
4731 return;
4732 if (thr->task->taskgroup->workshare
4733 && thr->task->taskgroup->prev
4734 && thr->task->taskgroup->prev->cancelled)
4735 return;
4736 }
4737 }
4738
4739 htab_t refcount_set = htab_create (mapnum);
4740
4741 /* The variables are mapped separately such that they can be released
4742 independently. */
4743 size_t i, j;
4744 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4745 for (i = 0; i < mapnum; i++)
4746 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4747 {
4748 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4749 &kinds[i], true, &refcount_set,
4750 GOMP_MAP_VARS_ENTER_DATA);
4751 i += sizes[i];
4752 }
4753 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4754 {
4755 for (j = i + 1; j < mapnum; j++)
4756 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4757 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4758 break;
4759 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4760 &kinds[i], true, &refcount_set,
4761 GOMP_MAP_VARS_ENTER_DATA);
4762 i += j - i - 1;
4763 }
4764 else if (i + 1 < mapnum
4765 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4766 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4767 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4768 {
4769 /* An attach operation must be processed together with the mapped
4770 base-pointer list item. */
4771 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4772 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4773 i += 1;
4774 }
4775 else
4776 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4777 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4778 else
4779 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4780 htab_free (refcount_set);
4781 }
4782
4783 bool
4784 gomp_target_task_fn (void *data)
4785 {
4786 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4787 struct gomp_device_descr *devicep = ttask->devicep;
4788
4789 if (ttask->fn != NULL)
4790 {
4791 void *fn_addr;
4792 if (devicep == NULL
4793 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4794 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4795 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4796 {
4797 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4798 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4799 ttask->args);
4800 return false;
4801 }
4802
4803 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4804 {
4805 if (ttask->tgt)
4806 gomp_unmap_vars (ttask->tgt, true, NULL);
4807 return false;
4808 }
4809
4810 void *actual_arguments;
4811 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4812 {
4813 ttask->tgt = NULL;
4814 actual_arguments = ttask->hostaddrs;
4815 }
4816 else
4817 {
4818 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4819 NULL, ttask->sizes, ttask->kinds, true,
4820 NULL, GOMP_MAP_VARS_TARGET);
4821 actual_arguments = (void *) ttask->tgt->tgt_start;
4822 }
4823 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4824
4825 assert (devicep->async_run_func);
4826 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4827 ttask->args, (void *) ttask);
4828 return true;
4829 }
4830 else if (devicep == NULL
4831 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4832 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4833 return false;
4834
4835 size_t i;
4836 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4837 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4838 ttask->kinds, true);
4839 else
4840 {
4841 htab_t refcount_set = htab_create (ttask->mapnum);
4842 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4843 for (i = 0; i < ttask->mapnum; i++)
4844 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4845 {
4846 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4847 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4848 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4849 i += ttask->sizes[i];
4850 }
4851 else
4852 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4853 &ttask->kinds[i], true, &refcount_set,
4854 GOMP_MAP_VARS_ENTER_DATA);
4855 else
4856 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4857 ttask->kinds, &refcount_set);
4858 htab_free (refcount_set);
4859 }
4860 return false;
4861 }
4862
4863 void
4864 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4865 {
4866 if (thread_limit)
4867 {
4868 struct gomp_task_icv *icv = gomp_icv (true);
4869 icv->thread_limit_var
4870 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4871 }
4872 (void) num_teams;
4873 }
4874
4875 bool
4876 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4877 unsigned int thread_limit, bool first)
4878 {
4879 struct gomp_thread *thr = gomp_thread ();
4880 if (first)
4881 {
4882 if (thread_limit)
4883 {
4884 struct gomp_task_icv *icv = gomp_icv (true);
4885 icv->thread_limit_var
4886 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4887 }
4888 (void) num_teams_high;
4889 if (num_teams_low == 0)
4890 num_teams_low = 1;
4891 thr->num_teams = num_teams_low - 1;
4892 thr->team_num = 0;
4893 }
4894 else if (thr->team_num == thr->num_teams)
4895 return false;
4896 else
4897 ++thr->team_num;
4898 return true;
4899 }
4900
4901 void *
4902 omp_target_alloc (size_t size, int device_num)
4903 {
4904 if (device_num == omp_initial_device
4905 || device_num == gomp_get_num_devices ())
4906 return malloc (size);
4907
4908 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4909 if (devicep == NULL)
4910 return NULL;
4911
4912 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4913 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4914 return malloc (size);
4915
4916 gomp_mutex_lock (&devicep->lock);
4917 void *ret = devicep->alloc_func (devicep->target_id, size);
4918 gomp_mutex_unlock (&devicep->lock);
4919 return ret;
4920 }
4921
4922 void
4923 omp_target_free (void *device_ptr, int device_num)
4924 {
4925 if (device_num == omp_initial_device
4926 || device_num == gomp_get_num_devices ())
4927 {
4928 free (device_ptr);
4929 return;
4930 }
4931
4932 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4933 if (devicep == NULL || device_ptr == NULL)
4934 return;
4935
4936 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4937 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4938 {
4939 free (device_ptr);
4940 return;
4941 }
4942
4943 gomp_mutex_lock (&devicep->lock);
4944 gomp_free_device_memory (devicep, device_ptr);
4945 gomp_mutex_unlock (&devicep->lock);
4946 }
4947
4948 void *
4949 gomp_usm_alloc (size_t size, int device_num)
4950 {
4951 if (device_num == gomp_get_num_devices ())
4952 return malloc (size);
4953
4954 struct gomp_device_descr *devicep = resolve_device (device_num, true);
4955 if (devicep == NULL)
4956 return NULL;
4957
4958 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4959 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4960 return malloc (size);
4961
4962 void *ret = NULL;
4963 gomp_mutex_lock (&devicep->lock);
4964 if (devicep->usm_alloc_func)
4965 ret = devicep->usm_alloc_func (devicep->target_id, size);
4966 gomp_mutex_unlock (&devicep->lock);
4967 return ret;
4968 }
4969
4970 void
4971 gomp_usm_free (void *device_ptr, int device_num)
4972 {
4973 if (device_ptr == NULL)
4974 return;
4975
4976 if (device_num == gomp_get_num_devices ())
4977 {
4978 free (device_ptr);
4979 return;
4980 }
4981
4982 struct gomp_device_descr *devicep = resolve_device (device_num, true);
4983 if (devicep == NULL)
4984 return;
4985
4986 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4987 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4988 {
4989 free (device_ptr);
4990 return;
4991 }
4992
4993 gomp_mutex_lock (&devicep->lock);
4994 if (devicep->usm_free_func
4995 && !devicep->usm_free_func (devicep->target_id, device_ptr))
4996 {
4997 gomp_mutex_unlock (&devicep->lock);
4998 gomp_fatal ("error in freeing device memory block at %p", device_ptr);
4999 }
5000 gomp_mutex_unlock (&devicep->lock);
5001 }
5002
5003
5004 /* Allocate page-locked host memory via DEVICE. */
5005
5006 static void *
5007 gomp_page_locked_host_alloc_dev (struct gomp_device_descr *device,
5008 size_t size, bool allow_null)
5009 {
5010 gomp_debug (0, "%s: device=%p (%s), size=%llu\n",
5011 __FUNCTION__, device, device->name, (unsigned long long) size);
5012
5013 void *ret;
5014 if (!device->page_locked_host_alloc_func (&ret, size))
5015 {
5016 const char *fmt
5017 = "Failed to allocate page-locked host memory via %s libgomp plugin";
5018 if (allow_null)
5019 gomp_fatal (fmt, device->name);
5020 else
5021 gomp_error (fmt, device->name);
5022 ret = NULL;
5023 }
5024 else if (ret == NULL && !allow_null)
5025 gomp_error ("Out of memory allocating %lu bytes"
5026 " page-locked host memory"
5027 " via %s libgomp plugin",
5028 (unsigned long) size, device->name);
5029 else
5030 gomp_debug (0, " -> ret=[%p, %p)\n",
5031 ret, ret + size);
5032 return ret;
5033 }
5034
5035 /* Free page-locked host memory via DEVICE. */
5036
5037 static bool
5038 gomp_page_locked_host_free_dev (struct gomp_device_descr *device,
5039 void *ptr,
5040 struct goacc_asyncqueue *aq)
5041 {
5042 gomp_debug (0, "%s: device=%p (%s), ptr=%p, aq=%p\n",
5043 __FUNCTION__, device, device->name, ptr, aq);
5044
5045 if (!device->page_locked_host_free_func (ptr, aq))
5046 {
5047 gomp_error ("Failed to free page-locked host memory"
5048 " via %s libgomp plugin",
5049 device->name);
5050 return false;
5051 }
5052 return true;
5053 }
5054
5055 /* Allocate aligned page-locked host memory via DEVICE.
5056
5057 That is, 'gomp_aligned_alloc' (see 'alloc.c') for page-locked host
5058 memory. */
5059
5060 static void *
5061 gomp_page_locked_host_aligned_alloc_dev (struct gomp_device_descr *device,
5062 size_t al, size_t size)
5063 {
5064 gomp_debug (0, "%s: device=%p (%s), al=%llu, size=%llu\n",
5065 __FUNCTION__, device, device->name,
5066 (unsigned long long) al, (unsigned long long) size);
5067
5068 void *ret;
5069 if (al < sizeof (void *))
5070 al = sizeof (void *);
5071 ret = NULL;
5072 if ((al & (al - 1)) == 0 && size)
5073 {
5074 void *p = gomp_page_locked_host_alloc_dev (device, size + al, true);
5075 if (p)
5076 {
5077 void *ap = (void *) (((uintptr_t) p + al) & -al);
5078 ((void **) ap)[-1] = p;
5079 ret = ap;
5080 }
5081 }
5082 if (ret == NULL)
5083 gomp_error ("Out of memory allocating %lu bytes", (unsigned long) size);
5084 else
5085 gomp_debug (0, " -> ret=[%p, %p)\n",
5086 ret, ret + size);
5087 return ret;
5088 }
5089
5090 /* Free aligned page-locked host memory via DEVICE.
5091
5092 That is, 'gomp_aligned_free' (see 'alloc.c') for page-locked host
5093 memory. */
5094
5095 static bool
5096 gomp_page_locked_host_aligned_free_dev (struct gomp_device_descr *device,
5097 void *ptr,
5098 struct goacc_asyncqueue *aq)
5099 {
5100 gomp_debug (0, "%s: device=%p (%s), ptr=%p, aq=%p\n",
5101 __FUNCTION__, device, device->name, ptr, aq);
5102
5103 if (ptr)
5104 {
5105 ptr = ((void **) ptr)[-1];
5106 gomp_debug (0, " ptr=%p\n",
5107 ptr);
5108
5109 if (!gomp_page_locked_host_free_dev (device, ptr, aq))
5110 return false;
5111 }
5112 return true;
5113 }
5114
5115 /* Register page-locked host memory via DEVICE. */
5116
5117 attribute_hidden int
5118 gomp_page_locked_host_register_dev (struct gomp_device_descr *device,
5119 void *ptr, size_t size, int kind)
5120 {
5121 gomp_debug (0, "%s: device=%p (%s), ptr=%p, size=%llu, kind=%d\n",
5122 __FUNCTION__, device, device->name,
5123 ptr, (unsigned long long) size, kind);
5124 assert (size != 0);
5125
5126 int ret = device->page_locked_host_register_func (device->target_id,
5127 ptr, size, kind);
5128 if (ret < 0)
5129 gomp_error ("Failed to register page-locked host memory"
5130 " via %s libgomp plugin",
5131 device->name);
5132 return ret;
5133 }
5134
5135 /* Unregister page-locked host memory via DEVICE. */
5136
5137 attribute_hidden bool
5138 gomp_page_locked_host_unregister_dev (struct gomp_device_descr *device,
5139 void *ptr, size_t size,
5140 struct goacc_asyncqueue *aq)
5141 {
5142 gomp_debug (0, "%s: device=%p (%s), ptr=%p, size=%llu, aq=%p\n",
5143 __FUNCTION__, device, device->name,
5144 ptr, (unsigned long long) size, aq);
5145 assert (size != 0);
5146
5147 if (!device->page_locked_host_unregister_func (ptr, size, aq))
5148 {
5149 gomp_error ("Failed to unregister page-locked host memory"
5150 " via %s libgomp plugin",
5151 device->name);
5152 return false;
5153 }
5154 return true;
5155 }
5156
5157
5158 /* Device (really: libgomp plugin) to use for paged-locked memory. We
5159 assume there is either none or exactly one such device for the lifetime of
5160 the process. */
5161
5162 static struct gomp_device_descr *device_for_page_locked
5163 = /* uninitialized */ (void *) -1;
5164
5165 static struct gomp_device_descr *
5166 get_device_for_page_locked (void)
5167 {
5168 gomp_debug (0, "%s\n",
5169 __FUNCTION__);
5170
5171 struct gomp_device_descr *device;
5172 #ifdef HAVE_SYNC_BUILTINS
5173 device
5174 = __atomic_load_n (&device_for_page_locked, MEMMODEL_RELAXED);
5175 if (device == (void *) -1)
5176 {
5177 gomp_debug (0, " init\n");
5178
5179 gomp_init_targets_once ();
5180
5181 device = NULL;
5182 for (int i = 0; i < num_devices; ++i)
5183 {
5184 gomp_debug (0, " i=%d, target_id=%d\n",
5185 i, devices[i].target_id);
5186
5187 /* We consider only the first device of potentially several of the
5188 same type as this functionality is not specific to an individual
5189 offloading device, but instead relates to the host-side
5190 implementation of the respective offloading implementation. */
5191 if (devices[i].target_id != 0)
5192 continue;
5193
5194 if (!devices[i].page_locked_host_alloc_func)
5195 continue;
5196
5197 gomp_debug (0, " found device: %p (%s)\n",
5198 &devices[i], devices[i].name);
5199 if (device)
5200 gomp_fatal ("Unclear how %s and %s libgomp plugins may"
5201 " simultaneously provide functionality"
5202 " for page-locked memory",
5203 device->name, devices[i].name);
5204 else
5205 device = &devices[i];
5206 }
5207
5208 struct gomp_device_descr *device_old
5209 = __atomic_exchange_n (&device_for_page_locked, device,
5210 MEMMODEL_RELAXED);
5211 gomp_debug (0, " old device_for_page_locked: %p\n",
5212 device_old);
5213 assert (device_old == (void *) -1
5214 /* We shouldn't have concurrently found a different or no
5215 device. */
5216 || device_old == device);
5217 }
5218 #else /* !HAVE_SYNC_BUILTINS */
5219 gomp_debug (0, " not implemented for '!HAVE_SYNC_BUILTINS'\n");
5220 (void) &device_for_page_locked;
5221 device = NULL;
5222 #endif /* HAVE_SYNC_BUILTINS */
5223
5224 gomp_debug (0, " -> device=%p (%s)\n",
5225 device, device ? device->name : "[none]");
5226 return device;
5227 }
5228
5229 /* Allocate page-locked host memory.
5230 Returns whether we have a device capable of that. */
5231
5232 attribute_hidden bool
5233 gomp_page_locked_host_alloc (void **ptr, size_t size)
5234 {
5235 gomp_debug (0, "%s: ptr=%p, size=%llu\n",
5236 __FUNCTION__, ptr, (unsigned long long) size);
5237
5238 struct gomp_device_descr *device = get_device_for_page_locked ();
5239 gomp_debug (0, " device=%p (%s)\n",
5240 device, device ? device->name : "[none]");
5241 if (device)
5242 {
5243 gomp_mutex_lock (&device->lock);
5244 if (device->state == GOMP_DEVICE_UNINITIALIZED)
5245 gomp_init_device (device);
5246 else if (device->state == GOMP_DEVICE_FINALIZED)
5247 {
5248 gomp_mutex_unlock (&device->lock);
5249 gomp_fatal ("Device %s used for for page-locked memory is finalized",
5250 device->name);
5251 }
5252 gomp_mutex_unlock (&device->lock);
5253
5254 *ptr = gomp_page_locked_host_alloc_dev (device, size, true);
5255 }
5256 return device != NULL;
5257 }
5258
5259 /* Free page-locked host memory.
5260 This must only be called if 'gomp_page_locked_host_alloc' returned
5261 'true'. */
5262
5263 attribute_hidden void
5264 gomp_page_locked_host_free (void *ptr)
5265 {
5266 gomp_debug (0, "%s: ptr=%p\n",
5267 __FUNCTION__, ptr);
5268
5269 struct gomp_device_descr *device = get_device_for_page_locked ();
5270 gomp_debug (0, " device=%p (%s)\n",
5271 device, device ? device->name : "[none]");
5272 assert (device);
5273
5274 gomp_mutex_lock (&device->lock);
5275 assert (device->state != GOMP_DEVICE_UNINITIALIZED);
5276 if (device->state == GOMP_DEVICE_FINALIZED)
5277 {
5278 gomp_mutex_unlock (&device->lock);
5279 return;
5280 }
5281 gomp_mutex_unlock (&device->lock);
5282
5283 if (!gomp_page_locked_host_free_dev (device, ptr, NULL))
5284 exit (EXIT_FAILURE);
5285 }
5286
5287
5288 int
5289 omp_target_is_present (const void *ptr, int device_num)
5290 {
5291 if (device_num == omp_initial_device
5292 || device_num == gomp_get_num_devices ())
5293 return 1;
5294
5295 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5296 if (devicep == NULL)
5297 return 0;
5298
5299 if (ptr == NULL)
5300 return 1;
5301
5302 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5303 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5304 return 1;
5305
5306 gomp_mutex_lock (&devicep->lock);
5307 struct splay_tree_s *mem_map = &devicep->mem_map;
5308 struct splay_tree_key_s cur_node;
5309
5310 cur_node.host_start = (uintptr_t) ptr;
5311 cur_node.host_end = cur_node.host_start;
5312 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
5313 int ret = n != NULL;
5314 gomp_mutex_unlock (&devicep->lock);
5315 return ret;
5316 }
5317
5318 static int
5319 omp_target_memcpy_check (int dst_device_num, int src_device_num,
5320 struct gomp_device_descr **dst_devicep,
5321 struct gomp_device_descr **src_devicep)
5322 {
5323 if (dst_device_num != gomp_get_num_devices ()
5324 /* Above gomp_get_num_devices has to be called unconditionally. */
5325 && dst_device_num != omp_initial_device)
5326 {
5327 *dst_devicep = resolve_device (dst_device_num, false);
5328 if (*dst_devicep == NULL)
5329 return EINVAL;
5330
5331 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5332 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5333 *dst_devicep = NULL;
5334 }
5335
5336 if (src_device_num != num_devices_openmp
5337 && src_device_num != omp_initial_device)
5338 {
5339 *src_devicep = resolve_device (src_device_num, false);
5340 if (*src_devicep == NULL)
5341 return EINVAL;
5342
5343 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5344 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5345 *src_devicep = NULL;
5346 }
5347
5348 return 0;
5349 }
5350
5351 static int
5352 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
5353 size_t dst_offset, size_t src_offset,
5354 struct gomp_device_descr *dst_devicep,
5355 struct gomp_device_descr *src_devicep)
5356 {
5357 bool ret;
5358 if (src_devicep == NULL && dst_devicep == NULL)
5359 {
5360 /* No 'gomp_verify_always_pinned_mode' here. */
5361 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
5362 return 0;
5363 }
5364 if (src_devicep == NULL)
5365 {
5366 gomp_mutex_lock (&dst_devicep->lock);
5367
5368 void *src_ptr = (void *) src + src_offset;
5369 int src_ptr_page_locked_host_p = 0;
5370
5371 if (always_pinned_mode)
5372 {
5373 if (length != 0)
5374 src_ptr_page_locked_host_p = gomp_page_locked_host_register_dev
5375 (dst_devicep, src_ptr, length, GOMP_MAP_TO);
5376 if (src_ptr_page_locked_host_p < 0)
5377 {
5378 gomp_mutex_unlock (&dst_devicep->lock);
5379 return ENOMEM;
5380 }
5381 }
5382
5383 /* No 'gomp_verify_always_pinned_mode' here; have just registered. */
5384 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
5385 (char *) dst + dst_offset,
5386 src_ptr, length);
5387
5388 if (src_ptr_page_locked_host_p
5389 && !gomp_page_locked_host_unregister_dev (dst_devicep,
5390 src_ptr, length, NULL))
5391 {
5392 gomp_mutex_unlock (&dst_devicep->lock);
5393 return ENOMEM;
5394 }
5395
5396 gomp_mutex_unlock (&dst_devicep->lock);
5397 return (ret ? 0 : EINVAL);
5398 }
5399 if (dst_devicep == NULL)
5400 {
5401 gomp_mutex_lock (&src_devicep->lock);
5402
5403 void *dst_ptr = (void *) dst + dst_offset;
5404 int dst_ptr_page_locked_host_p = 0;
5405
5406 if (always_pinned_mode)
5407 {
5408 if (length != 0)
5409 dst_ptr_page_locked_host_p = gomp_page_locked_host_register_dev
5410 (src_devicep, dst_ptr, length, GOMP_MAP_FROM);
5411 if (dst_ptr_page_locked_host_p < 0)
5412 {
5413 gomp_mutex_unlock (&src_devicep->lock);
5414 return ENOMEM;
5415 }
5416 }
5417
5418 /* No 'gomp_verify_always_pinned_mode' here; have just registered. */
5419 ret = src_devicep->dev2host_func (src_devicep->target_id,
5420 dst_ptr,
5421 (char *) src + src_offset, length);
5422
5423 if (dst_ptr_page_locked_host_p
5424 && !gomp_page_locked_host_unregister_dev (src_devicep,
5425 dst_ptr, length, NULL))
5426 {
5427 gomp_mutex_unlock (&src_devicep->lock);
5428 return ENOMEM;
5429 }
5430
5431 gomp_mutex_unlock (&src_devicep->lock);
5432 return (ret ? 0 : EINVAL);
5433 }
5434 if (src_devicep == dst_devicep)
5435 {
5436 gomp_mutex_lock (&src_devicep->lock);
5437 /* No 'gomp_verify_always_pinned_mode' here. */
5438 ret = src_devicep->dev2dev_func (src_devicep->target_id,
5439 (char *) dst + dst_offset,
5440 (char *) src + src_offset, length);
5441 gomp_mutex_unlock (&src_devicep->lock);
5442 return (ret ? 0 : EINVAL);
5443 }
5444 return EINVAL;
5445 }
5446
5447 int
5448 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
5449 size_t src_offset, int dst_device_num, int src_device_num)
5450 {
5451 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
5452 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
5453 &dst_devicep, &src_devicep);
5454
5455 if (ret)
5456 return ret;
5457
5458 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
5459 dst_devicep, src_devicep);
5460
5461 return ret;
5462 }
5463
5464 typedef struct
5465 {
5466 void *dst;
5467 const void *src;
5468 size_t length;
5469 size_t dst_offset;
5470 size_t src_offset;
5471 struct gomp_device_descr *dst_devicep;
5472 struct gomp_device_descr *src_devicep;
5473 } omp_target_memcpy_data;
5474
5475 static void
5476 omp_target_memcpy_async_helper (void *args)
5477 {
5478 omp_target_memcpy_data *a = args;
5479 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
5480 a->src_offset, a->dst_devicep, a->src_devicep))
5481 gomp_fatal ("omp_target_memcpy failed");
5482 }
5483
5484 int
5485 omp_target_memcpy_async (void *dst, const void *src, size_t length,
5486 size_t dst_offset, size_t src_offset,
5487 int dst_device_num, int src_device_num,
5488 int depobj_count, omp_depend_t *depobj_list)
5489 {
5490 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
5491 unsigned int flags = 0;
5492 void *depend[depobj_count + 5];
5493 int i;
5494 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
5495 &dst_devicep, &src_devicep);
5496
5497 omp_target_memcpy_data s = {
5498 .dst = dst,
5499 .src = src,
5500 .length = length,
5501 .dst_offset = dst_offset,
5502 .src_offset = src_offset,
5503 .dst_devicep = dst_devicep,
5504 .src_devicep = src_devicep
5505 };
5506
5507 if (check)
5508 return check;
5509
5510 if (depobj_count > 0 && depobj_list != NULL)
5511 {
5512 flags |= GOMP_TASK_FLAG_DEPEND;
5513 depend[0] = 0;
5514 depend[1] = (void *) (uintptr_t) depobj_count;
5515 depend[2] = depend[3] = depend[4] = 0;
5516 for (i = 0; i < depobj_count; ++i)
5517 depend[i + 5] = &depobj_list[i];
5518 }
5519
5520 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
5521 __alignof__ (s), true, flags, depend, 0, NULL);
5522
5523 return 0;
5524 }
5525
5526 static int
5527 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
5528 int num_dims, const size_t *volume,
5529 const size_t *dst_offsets,
5530 const size_t *src_offsets,
5531 const size_t *dst_dimensions,
5532 const size_t *src_dimensions,
5533 struct gomp_device_descr *dst_devicep,
5534 struct gomp_device_descr *src_devicep)
5535 {
5536 size_t dst_slice = element_size;
5537 size_t src_slice = element_size;
5538 size_t j, dst_off, src_off, length;
5539 int i, ret;
5540
5541 if (num_dims == 1)
5542 {
5543 if (__builtin_mul_overflow (element_size, volume[0], &length)
5544 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
5545 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
5546 return EINVAL;
5547 if (dst_devicep == NULL && src_devicep == NULL)
5548 {
5549 /* No 'gomp_verify_always_pinned_mode' here. */
5550 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
5551 length);
5552 ret = 1;
5553 }
5554 else if (src_devicep == NULL)
5555 {
5556 void *src_ptr = (void *) src + src_off;
5557 int src_ptr_page_locked_host_p = 0;
5558
5559 if (always_pinned_mode)
5560 {
5561 if (length != 0)
5562 src_ptr_page_locked_host_p = gomp_page_locked_host_register_dev
5563 (dst_devicep, src_ptr, length, GOMP_MAP_TO);
5564 if (src_ptr_page_locked_host_p < 0)
5565 return ENOMEM;
5566 }
5567
5568 /* No 'gomp_verify_always_pinned_mode' here; have just registered. */
5569 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
5570 (char *) dst + dst_off,
5571 src_ptr,
5572 length);
5573
5574 if (src_ptr_page_locked_host_p
5575 && !gomp_page_locked_host_unregister_dev (dst_devicep,
5576 src_ptr, length, NULL))
5577 return ENOMEM;
5578 }
5579 else if (dst_devicep == NULL)
5580 {
5581 void *dst_ptr = (void *) dst + dst_off;
5582 int dst_ptr_page_locked_host_p = 0;
5583
5584 if (always_pinned_mode)
5585 {
5586 if (length != 0)
5587 dst_ptr_page_locked_host_p = gomp_page_locked_host_register_dev
5588 (src_devicep, dst_ptr, length, GOMP_MAP_FROM);
5589 if (dst_ptr_page_locked_host_p < 0)
5590 return ENOMEM;
5591 }
5592
5593 /* No 'gomp_verify_always_pinned_mode' here; have just registered. */
5594 ret = src_devicep->dev2host_func (src_devicep->target_id,
5595 dst_ptr,
5596 (const char *) src + src_off,
5597 length);
5598
5599 if (dst_ptr_page_locked_host_p
5600 && !gomp_page_locked_host_unregister_dev (src_devicep,
5601 dst_ptr, length, NULL))
5602 return ENOMEM;
5603 }
5604 else if (src_devicep == dst_devicep)
5605 /* No 'gomp_verify_always_pinned_mode' here. */
5606 ret = src_devicep->dev2dev_func (src_devicep->target_id,
5607 (char *) dst + dst_off,
5608 (const char *) src + src_off,
5609 length);
5610 else
5611 ret = 0;
5612 return ret ? 0 : EINVAL;
5613 }
5614
5615 /* FIXME: it would be nice to have some plugin function to handle
5616 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
5617 be handled in the generic recursion below, and for host-host it
5618 should be used even for any num_dims >= 2. */
5619
5620 for (i = 1; i < num_dims; i++)
5621 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
5622 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
5623 return EINVAL;
5624 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
5625 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
5626 return EINVAL;
5627 for (j = 0; j < volume[0]; j++)
5628 {
5629 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
5630 (const char *) src + src_off,
5631 element_size, num_dims - 1,
5632 volume + 1, dst_offsets + 1,
5633 src_offsets + 1, dst_dimensions + 1,
5634 src_dimensions + 1, dst_devicep,
5635 src_devicep);
5636 if (ret)
5637 return ret;
5638 dst_off += dst_slice;
5639 src_off += src_slice;
5640 }
5641 return 0;
5642 }
5643
5644 static int
5645 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
5646 int src_device_num,
5647 struct gomp_device_descr **dst_devicep,
5648 struct gomp_device_descr **src_devicep)
5649 {
5650 if (!dst && !src)
5651 return INT_MAX;
5652
5653 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
5654 dst_devicep, src_devicep);
5655 if (ret)
5656 return ret;
5657
5658 if (*src_devicep != NULL && *dst_devicep != NULL && *src_devicep != *dst_devicep)
5659 return EINVAL;
5660
5661 return 0;
5662 }
5663
5664 static int
5665 omp_target_memcpy_rect_copy (void *dst, const void *src,
5666 size_t element_size, int num_dims,
5667 const size_t *volume, const size_t *dst_offsets,
5668 const size_t *src_offsets,
5669 const size_t *dst_dimensions,
5670 const size_t *src_dimensions,
5671 struct gomp_device_descr *dst_devicep,
5672 struct gomp_device_descr *src_devicep)
5673 {
5674 if (src_devicep)
5675 gomp_mutex_lock (&src_devicep->lock);
5676 else if (dst_devicep)
5677 gomp_mutex_lock (&dst_devicep->lock);
5678 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
5679 volume, dst_offsets, src_offsets,
5680 dst_dimensions, src_dimensions,
5681 dst_devicep, src_devicep);
5682 if (src_devicep)
5683 gomp_mutex_unlock (&src_devicep->lock);
5684 else if (dst_devicep)
5685 gomp_mutex_unlock (&dst_devicep->lock);
5686
5687 return ret;
5688 }
5689
5690 int
5691 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
5692 int num_dims, const size_t *volume,
5693 const size_t *dst_offsets,
5694 const size_t *src_offsets,
5695 const size_t *dst_dimensions,
5696 const size_t *src_dimensions,
5697 int dst_device_num, int src_device_num)
5698 {
5699 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
5700
5701 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
5702 src_device_num, &dst_devicep,
5703 &src_devicep);
5704
5705 if (check)
5706 return check;
5707
5708 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
5709 volume, dst_offsets, src_offsets,
5710 dst_dimensions, src_dimensions,
5711 dst_devicep, src_devicep);
5712
5713 return ret;
5714 }
5715
5716 typedef struct
5717 {
5718 void *dst;
5719 const void *src;
5720 size_t element_size;
5721 const size_t *volume;
5722 const size_t *dst_offsets;
5723 const size_t *src_offsets;
5724 const size_t *dst_dimensions;
5725 const size_t *src_dimensions;
5726 struct gomp_device_descr *dst_devicep;
5727 struct gomp_device_descr *src_devicep;
5728 int num_dims;
5729 } omp_target_memcpy_rect_data;
5730
5731 static void
5732 omp_target_memcpy_rect_async_helper (void *args)
5733 {
5734 omp_target_memcpy_rect_data *a = args;
5735 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
5736 a->num_dims, a->volume, a->dst_offsets,
5737 a->src_offsets, a->dst_dimensions,
5738 a->src_dimensions, a->dst_devicep,
5739 a->src_devicep);
5740 if (ret)
5741 gomp_fatal ("omp_target_memcpy_rect failed");
5742 }
5743
5744 int
5745 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
5746 int num_dims, const size_t *volume,
5747 const size_t *dst_offsets,
5748 const size_t *src_offsets,
5749 const size_t *dst_dimensions,
5750 const size_t *src_dimensions,
5751 int dst_device_num, int src_device_num,
5752 int depobj_count, omp_depend_t *depobj_list)
5753 {
5754 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
5755 unsigned flags = 0;
5756 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
5757 src_device_num, &dst_devicep,
5758 &src_devicep);
5759 void *depend[depobj_count + 5];
5760 int i;
5761
5762 omp_target_memcpy_rect_data s = {
5763 .dst = dst,
5764 .src = src,
5765 .element_size = element_size,
5766 .num_dims = num_dims,
5767 .volume = volume,
5768 .dst_offsets = dst_offsets,
5769 .src_offsets = src_offsets,
5770 .dst_dimensions = dst_dimensions,
5771 .src_dimensions = src_dimensions,
5772 .dst_devicep = dst_devicep,
5773 .src_devicep = src_devicep
5774 };
5775
5776 if (check)
5777 return check;
5778
5779 if (depobj_count > 0 && depobj_list != NULL)
5780 {
5781 flags |= GOMP_TASK_FLAG_DEPEND;
5782 depend[0] = 0;
5783 depend[1] = (void *) (uintptr_t) depobj_count;
5784 depend[2] = depend[3] = depend[4] = 0;
5785 for (i = 0; i < depobj_count; ++i)
5786 depend[i + 5] = &depobj_list[i];
5787 }
5788
5789 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
5790 __alignof__ (s), true, flags, depend, 0, NULL);
5791
5792 return 0;
5793 }
5794
5795 int
5796 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
5797 size_t size, size_t device_offset, int device_num)
5798 {
5799 if (device_num == omp_initial_device
5800 || device_num == gomp_get_num_devices ())
5801 return EINVAL;
5802
5803 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5804 if (devicep == NULL)
5805 return EINVAL;
5806
5807 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5808 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5809 return EINVAL;
5810
5811 gomp_mutex_lock (&devicep->lock);
5812
5813 struct splay_tree_s *mem_map = &devicep->mem_map;
5814 struct splay_tree_key_s cur_node;
5815 int ret = EINVAL;
5816
5817 cur_node.host_start = (uintptr_t) host_ptr;
5818 cur_node.host_end = cur_node.host_start + size;
5819 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
5820 if (n)
5821 {
5822 if (n->tgt->tgt_start + n->tgt_offset
5823 == (uintptr_t) device_ptr + device_offset
5824 && n->host_start <= cur_node.host_start
5825 && n->host_end >= cur_node.host_end)
5826 ret = 0;
5827 }
5828 else
5829 {
5830 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
5831 tgt->array = gomp_malloc (sizeof (*tgt->array));
5832 tgt->refcount = 1;
5833 tgt->tgt_start = 0;
5834 tgt->tgt_end = 0;
5835 tgt->to_free = NULL;
5836 tgt->prev = NULL;
5837 tgt->list_count = 0;
5838 tgt->device_descr = devicep;
5839 splay_tree_node array = tgt->array;
5840 splay_tree_key k = &array->key;
5841 k->host_start = cur_node.host_start;
5842 k->host_end = cur_node.host_end;
5843 k->tgt = tgt;
5844 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
5845 k->refcount = REFCOUNT_INFINITY;
5846 k->dynamic_refcount = 0;
5847 k->aux = NULL;
5848 k->page_locked_host_p = false;
5849 array->left = NULL;
5850 array->right = NULL;
5851 splay_tree_insert (&devicep->mem_map, array);
5852 ret = 0;
5853 }
5854 gomp_mutex_unlock (&devicep->lock);
5855 return ret;
5856 }
5857
5858 int
5859 omp_target_disassociate_ptr (const void *ptr, int device_num)
5860 {
5861 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5862 if (devicep == NULL)
5863 return EINVAL;
5864
5865 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5866 return EINVAL;
5867
5868 gomp_mutex_lock (&devicep->lock);
5869
5870 struct splay_tree_s *mem_map = &devicep->mem_map;
5871 struct splay_tree_key_s cur_node;
5872 int ret = EINVAL;
5873
5874 cur_node.host_start = (uintptr_t) ptr;
5875 cur_node.host_end = cur_node.host_start;
5876 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
5877 if (n
5878 && n->host_start == cur_node.host_start
5879 && n->refcount == REFCOUNT_INFINITY
5880 && n->tgt->tgt_start == 0
5881 && n->tgt->to_free == NULL
5882 && n->tgt->refcount == 1
5883 && n->tgt->list_count == 0)
5884 {
5885 splay_tree_remove (&devicep->mem_map, n);
5886 gomp_unmap_tgt (n->tgt);
5887 ret = 0;
5888 }
5889
5890 gomp_mutex_unlock (&devicep->lock);
5891 return ret;
5892 }
5893
5894 void *
5895 omp_get_mapped_ptr (const void *ptr, int device_num)
5896 {
5897 if (device_num == omp_initial_device
5898 || device_num == omp_get_initial_device ())
5899 return (void *) ptr;
5900
5901 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5902 if (devicep == NULL)
5903 return NULL;
5904
5905 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5906 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5907 return (void *) ptr;
5908
5909 gomp_mutex_lock (&devicep->lock);
5910
5911 struct splay_tree_s *mem_map = &devicep->mem_map;
5912 struct splay_tree_key_s cur_node;
5913 void *ret = NULL;
5914
5915 cur_node.host_start = (uintptr_t) ptr;
5916 cur_node.host_end = cur_node.host_start;
5917 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
5918
5919 if (n)
5920 {
5921 uintptr_t offset = cur_node.host_start - n->host_start;
5922 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
5923 }
5924
5925 gomp_mutex_unlock (&devicep->lock);
5926
5927 return ret;
5928 }
5929
5930 int
5931 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
5932 {
5933 if (device_num == omp_initial_device
5934 || device_num == gomp_get_num_devices ())
5935 return true;
5936
5937 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5938 if (devicep == NULL)
5939 return false;
5940
5941 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
5942 return true;
5943
5944 if (devicep->is_usm_ptr_func && devicep->is_usm_ptr_func ((void *) ptr))
5945 return true;
5946
5947 return false;
5948 }
5949
5950 int
5951 omp_pause_resource (omp_pause_resource_t kind, int device_num)
5952 {
5953 (void) kind;
5954 if (device_num == omp_initial_device
5955 || device_num == gomp_get_num_devices ())
5956 return gomp_pause_host ();
5957
5958 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5959 if (devicep == NULL)
5960 return -1;
5961
5962 /* Do nothing for target devices for now. */
5963 return 0;
5964 }
5965
5966 int
5967 omp_pause_resource_all (omp_pause_resource_t kind)
5968 {
5969 (void) kind;
5970 if (gomp_pause_host ())
5971 return -1;
5972 /* Do nothing for target devices for now. */
5973 return 0;
5974 }
5975
5976 ialias (omp_pause_resource)
5977 ialias (omp_pause_resource_all)
5978
5979 bool
5980 GOMP_evaluate_target_device (int device_num, const char *kind,
5981 const char *arch, const char *isa)
5982 {
5983 bool result = true;
5984
5985 if (device_num < 0)
5986 device_num = omp_get_default_device ();
5987
5988 if (kind && strcmp (kind, "any") == 0)
5989 kind = NULL;
5990
5991 gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
5992 __FUNCTION__, device_num, kind, arch, isa);
5993
5994 if (omp_get_device_num () == device_num)
5995 result = GOMP_evaluate_current_device (kind, arch, isa);
5996 else
5997 {
5998 if (!omp_is_initial_device ())
5999 /* Accelerators are not expected to know about other devices. */
6000 result = false;
6001 else
6002 {
6003 struct gomp_device_descr *device = resolve_device (device_num, true);
6004 if (device == NULL)
6005 result = false;
6006 else if (device->evaluate_device_func)
6007 result = device->evaluate_device_func (device_num, kind, arch,
6008 isa);
6009 }
6010 }
6011
6012 gomp_debug (1, " -> %s\n", result ? "true" : "false");
6013 return result;
6014 }
6015
6016 #ifdef PLUGIN_SUPPORT
6017
6018 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
6019 in PLUGIN_NAME.
6020 The handles of the found functions are stored in the corresponding fields
6021 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
6022
6023 static bool
6024 gomp_load_plugin_for_device (struct gomp_device_descr *device,
6025 const char *plugin_name)
6026 {
6027 const char *err = NULL, *last_missing = NULL;
6028
6029 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
6030 if (!plugin_handle)
6031 #if OFFLOAD_DEFAULTED
6032 return 0;
6033 #else
6034 goto dl_fail;
6035 #endif
6036
6037 /* Check if all required functions are available in the plugin and store
6038 their handlers. None of the symbols can legitimately be NULL,
6039 so we don't need to check dlerror all the time. */
6040 #define DLSYM(f) \
6041 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
6042 goto dl_fail
6043 /* Similar, but missing functions are not an error. Return false if
6044 failed, true otherwise. */
6045 #define DLSYM_OPT(f, n) \
6046 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
6047 || (last_missing = #n, 0))
6048
6049 DLSYM (version);
6050 if (device->version_func () != GOMP_VERSION)
6051 {
6052 err = "plugin version mismatch";
6053 goto fail;
6054 }
6055
6056 DLSYM (get_name);
6057 DLSYM (get_caps);
6058 DLSYM (get_type);
6059 DLSYM (get_num_devices);
6060 DLSYM (init_device);
6061 DLSYM (fini_device);
6062 DLSYM (load_image);
6063 DLSYM (unload_image);
6064 DLSYM (alloc);
6065 DLSYM (free);
6066 DLSYM_OPT (usm_alloc, usm_alloc);
6067 DLSYM_OPT (usm_free, usm_free);
6068 DLSYM_OPT (is_usm_ptr, is_usm_ptr);
6069 DLSYM_OPT (page_locked_host_alloc, page_locked_host_alloc);
6070 DLSYM_OPT (page_locked_host_free, page_locked_host_free);
6071 DLSYM_OPT (page_locked_host_register, page_locked_host_register);
6072 DLSYM_OPT (page_locked_host_unregister, page_locked_host_unregister);
6073 DLSYM_OPT (page_locked_host_p, page_locked_host_p);
6074 DLSYM (dev2host);
6075 DLSYM (host2dev);
6076 DLSYM (evaluate_device);
6077 device->capabilities = device->get_caps_func ();
6078 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
6079 {
6080 DLSYM (run);
6081 DLSYM_OPT (async_run, async_run);
6082 DLSYM_OPT (can_run, can_run);
6083 DLSYM (dev2dev);
6084 }
6085 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
6086 {
6087 if (!DLSYM_OPT (openacc.exec, openacc_exec)
6088 || !DLSYM_OPT (openacc.create_thread_data,
6089 openacc_create_thread_data)
6090 || !DLSYM_OPT (openacc.destroy_thread_data,
6091 openacc_destroy_thread_data)
6092 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
6093 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
6094 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
6095 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
6096 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
6097 || !DLSYM_OPT (openacc.async.queue_callback,
6098 openacc_async_queue_callback)
6099 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
6100 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
6101 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
6102 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
6103 {
6104 /* Require all the OpenACC handlers if we have
6105 GOMP_OFFLOAD_CAP_OPENACC_200. */
6106 err = "plugin missing OpenACC handler function";
6107 goto fail;
6108 }
6109
6110 unsigned cuda = 0;
6111 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
6112 openacc_cuda_get_current_device);
6113 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
6114 openacc_cuda_get_current_context);
6115 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
6116 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
6117 if (cuda && cuda != 4)
6118 {
6119 /* Make sure all the CUDA functions are there if any of them are. */
6120 err = "plugin missing OpenACC CUDA handler function";
6121 goto fail;
6122 }
6123 }
6124 #undef DLSYM
6125 #undef DLSYM_OPT
6126
6127 return 1;
6128
6129 dl_fail:
6130 err = dlerror ();
6131 fail:
6132 gomp_error ("while loading %s: %s", plugin_name, err);
6133 if (last_missing)
6134 gomp_error ("missing function was %s", last_missing);
6135 if (plugin_handle)
6136 dlclose (plugin_handle);
6137
6138 return 0;
6139 }
6140
6141 /* This function finalizes all initialized devices. */
6142
6143 static void
6144 gomp_target_fini (void)
6145 {
6146 int i;
6147 for (i = 0; i < num_devices; i++)
6148 {
6149 bool ret = true;
6150 struct gomp_device_descr *devicep = &devices[i];
6151 gomp_mutex_lock (&devicep->lock);
6152 if (devicep->state == GOMP_DEVICE_INITIALIZED)
6153 ret = gomp_fini_device (devicep);
6154 gomp_mutex_unlock (&devicep->lock);
6155 if (!ret)
6156 gomp_fatal ("device finalization failed");
6157 }
6158 }
6159
6160 /* This function initializes the runtime for offloading.
6161 It parses the list of offload plugins, and tries to load these.
6162 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
6163 will be set, and the array DEVICES initialized, containing descriptors for
6164 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
6165 by the others. */
6166
6167 static void
6168 gomp_target_init (void)
6169 {
6170 const char *prefix ="libgomp-plugin-";
6171 const char *suffix = SONAME_SUFFIX (1);
6172 const char *cur, *next;
6173 char *plugin_name;
6174 int i, new_num_devs;
6175 int num_devs = 0, num_devs_openmp;
6176 struct gomp_device_descr *devs = NULL;
6177
6178 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
6179 return;
6180
6181 cur = OFFLOAD_PLUGINS;
6182 if (*cur)
6183 do
6184 {
6185 struct gomp_device_descr current_device;
6186 size_t prefix_len, suffix_len, cur_len;
6187
6188 next = strchr (cur, ',');
6189
6190 prefix_len = strlen (prefix);
6191 cur_len = next ? next - cur : strlen (cur);
6192 suffix_len = strlen (suffix);
6193
6194 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
6195 if (!plugin_name)
6196 {
6197 num_devs = 0;
6198 break;
6199 }
6200
6201 memcpy (plugin_name, prefix, prefix_len);
6202 memcpy (plugin_name + prefix_len, cur, cur_len);
6203 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
6204
6205 if (gomp_load_plugin_for_device (&current_device, plugin_name))
6206 {
6207 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
6208 new_num_devs = current_device.get_num_devices_func (omp_req);
6209 if (gomp_debug_var > 0 && new_num_devs < 0)
6210 {
6211 bool found = false;
6212 int type = current_device.get_type_func ();
6213 for (int img = 0; img < num_offload_images; img++)
6214 if (type == offload_images[img].type)
6215 found = true;
6216 if (found)
6217 {
6218 char buf[sizeof ("unified_address, unified_shared_memory, "
6219 "reverse_offload")];
6220 gomp_requires_to_name (buf, sizeof (buf), omp_req);
6221 char *name = (char *) malloc (cur_len + 1);
6222 memcpy (name, cur, cur_len);
6223 name[cur_len] = '\0';
6224 gomp_debug (1,
6225 "%s devices present but 'omp requires %s' "
6226 "cannot be fulfilled\n", name, buf);
6227 free (name);
6228 }
6229 }
6230 else if (new_num_devs >= 1)
6231 {
6232 /* Augment DEVICES and NUM_DEVICES. */
6233
6234 devs = realloc (devs, (num_devs + new_num_devs)
6235 * sizeof (struct gomp_device_descr));
6236 if (!devs)
6237 {
6238 num_devs = 0;
6239 free (plugin_name);
6240 break;
6241 }
6242
6243 current_device.name = current_device.get_name_func ();
6244 /* current_device.capabilities has already been set. */
6245 current_device.type = current_device.get_type_func ();
6246 current_device.mem_map.root = NULL;
6247 current_device.mem_map_rev.root = NULL;
6248 current_device.state = GOMP_DEVICE_UNINITIALIZED;
6249 for (i = 0; i < new_num_devs; i++)
6250 {
6251 current_device.target_id = i;
6252 devs[num_devs] = current_device;
6253 gomp_mutex_init (&devs[num_devs].lock);
6254 num_devs++;
6255 }
6256 }
6257 }
6258
6259 free (plugin_name);
6260 cur = next + 1;
6261 }
6262 while (next);
6263
6264 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
6265 NUM_DEVICES_OPENMP. */
6266 struct gomp_device_descr *devs_s
6267 = malloc (num_devs * sizeof (struct gomp_device_descr));
6268 if (!devs_s)
6269 {
6270 num_devs = 0;
6271 free (devs);
6272 devs = NULL;
6273 }
6274 num_devs_openmp = 0;
6275 for (i = 0; i < num_devs; i++)
6276 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
6277 devs_s[num_devs_openmp++] = devs[i];
6278 int num_devs_after_openmp = num_devs_openmp;
6279 for (i = 0; i < num_devs; i++)
6280 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
6281 devs_s[num_devs_after_openmp++] = devs[i];
6282 free (devs);
6283 devs = devs_s;
6284
6285 for (i = 0; i < num_devs; i++)
6286 {
6287 /* The 'devices' array can be moved (by the realloc call) until we have
6288 found all the plugins, so registering with the OpenACC runtime (which
6289 takes a copy of the pointer argument) must be delayed until now. */
6290 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
6291 goacc_register (&devs[i]);
6292 }
6293
6294 num_devices = num_devs;
6295 num_devices_openmp = num_devs_openmp;
6296 devices = devs;
6297 if (atexit (gomp_target_fini) != 0)
6298 gomp_fatal ("atexit failed");
6299 }
6300
6301 #else /* PLUGIN_SUPPORT */
6302 /* If dlfcn.h is unavailable we always fallback to host execution.
6303 GOMP_target* routines are just stubs for this case. */
6304 static void
6305 gomp_target_init (void)
6306 {
6307 }
6308 #endif /* PLUGIN_SUPPORT */