]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
OpenMP/OpenACC: Rework clause expansion and nested struct handling
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <stdio.h> /* For snprintf. */
40 #include <assert.h>
41 #include <errno.h>
42
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_static
51 #define splay_tree_c
52 #include "splay-tree.h"
53
54
55 typedef uintptr_t *hash_entry_type;
56 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
57 static inline void htab_free (void *ptr) { free (ptr); }
58 #include "hashtab.h"
59
60 ialias_redirect (GOMP_task)
61
62 static inline hashval_t
63 htab_hash (hash_entry_type element)
64 {
65 return hash_pointer ((void *) element);
66 }
67
68 static inline bool
69 htab_eq (hash_entry_type x, hash_entry_type y)
70 {
71 return x == y;
72 }
73
74 #define FIELD_TGT_EMPTY (~(size_t) 0)
75
76 static void gomp_target_init (void);
77
78 /* The whole initialization code for offloading plugins is only run one. */
79 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
80
81 /* Mutex for offload image registration. */
82 static gomp_mutex_t register_lock;
83
84 /* This structure describes an offload image.
85 It contains type of the target device, pointer to host table descriptor, and
86 pointer to target data. */
87 struct offload_image_descr {
88 unsigned version;
89 enum offload_target_type type;
90 const void *host_table;
91 const void *target_data;
92 };
93
94 /* Array of descriptors of offload images. */
95 static struct offload_image_descr *offload_images;
96
97 /* Total number of offload images. */
98 static int num_offload_images;
99
100 /* Array of descriptors for all available devices. */
101 static struct gomp_device_descr *devices;
102
103 /* Total number of available devices. */
104 static int num_devices;
105
106 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
107 static int num_devices_openmp;
108
109 /* OpenMP requires mask. */
110 static int omp_requires_mask;
111
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
113
114 static void *
115 gomp_realloc_unlock (void *old, size_t size)
116 {
117 void *ret = realloc (old, size);
118 if (ret == NULL)
119 {
120 gomp_mutex_unlock (&register_lock);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
122 }
123 return ret;
124 }
125
126 attribute_hidden void
127 gomp_init_targets_once (void)
128 {
129 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
130 }
131
132 attribute_hidden int
133 gomp_get_num_devices (void)
134 {
135 gomp_init_targets_once ();
136 return num_devices_openmp;
137 }
138
139 static struct gomp_device_descr *
140 resolve_device (int device_id, bool remapped)
141 {
142 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
143 called, which must be done before using default_device_var. */
144 int num_devices = gomp_get_num_devices ();
145
146 if (remapped && device_id == GOMP_DEVICE_ICV)
147 {
148 struct gomp_task_icv *icv = gomp_icv (false);
149 device_id = icv->default_device_var;
150 remapped = false;
151 }
152
153 if (device_id < 0)
154 {
155 if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK
156 : omp_initial_device))
157 return NULL;
158 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
159 && num_devices == 0)
160 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
161 "but only the host device is available");
162 else if (device_id == omp_invalid_device)
163 gomp_fatal ("omp_invalid_device encountered");
164 else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
167
168 return NULL;
169 }
170 else if (device_id >= num_devices)
171 {
172 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
173 && device_id != num_devices)
174 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
175 "but device not found");
176
177 return NULL;
178 }
179
180 gomp_mutex_lock (&devices[device_id].lock);
181 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
182 gomp_init_device (&devices[device_id]);
183 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
184 {
185 gomp_mutex_unlock (&devices[device_id].lock);
186
187 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
188 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
189 "but device is finalized");
190
191 return NULL;
192 }
193 gomp_mutex_unlock (&devices[device_id].lock);
194
195 return &devices[device_id];
196 }
197
198
199 static inline splay_tree_key
200 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
201 {
202 if (key->host_start != key->host_end)
203 return splay_tree_lookup (mem_map, key);
204
205 key->host_end++;
206 splay_tree_key n = splay_tree_lookup (mem_map, key);
207 key->host_end--;
208 if (n)
209 return n;
210 key->host_start--;
211 n = splay_tree_lookup (mem_map, key);
212 key->host_start++;
213 if (n)
214 return n;
215 return splay_tree_lookup (mem_map, key);
216 }
217
218 static inline reverse_splay_tree_key
219 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev, reverse_splay_tree_key key)
220 {
221 return reverse_splay_tree_lookup (mem_map_rev, key);
222 }
223
224 static inline splay_tree_key
225 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
226 {
227 if (key->host_start != key->host_end)
228 return splay_tree_lookup (mem_map, key);
229
230 key->host_end++;
231 splay_tree_key n = splay_tree_lookup (mem_map, key);
232 key->host_end--;
233 return n;
234 }
235
236 static inline void
237 gomp_device_copy (struct gomp_device_descr *devicep,
238 bool (*copy_func) (int, void *, const void *, size_t),
239 const char *dst, void *dstaddr,
240 const char *src, const void *srcaddr,
241 size_t size)
242 {
243 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
244 {
245 gomp_mutex_unlock (&devicep->lock);
246 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
247 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
248 }
249 }
250
251 static inline void
252 goacc_device_copy_async (struct gomp_device_descr *devicep,
253 bool (*copy_func) (int, void *, const void *, size_t,
254 struct goacc_asyncqueue *),
255 const char *dst, void *dstaddr,
256 const char *src, const void *srcaddr,
257 const void *srcaddr_orig,
258 size_t size, struct goacc_asyncqueue *aq)
259 {
260 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
261 {
262 gomp_mutex_unlock (&devicep->lock);
263 if (srcaddr_orig && srcaddr_orig != srcaddr)
264 gomp_fatal ("Copying of %s object [%p..%p)"
265 " via buffer %s object [%p..%p)"
266 " to %s object [%p..%p) failed",
267 src, srcaddr_orig, srcaddr_orig + size,
268 src, srcaddr, srcaddr + size,
269 dst, dstaddr, dstaddr + size);
270 else
271 gomp_fatal ("Copying of %s object [%p..%p)"
272 " to %s object [%p..%p) failed",
273 src, srcaddr, srcaddr + size,
274 dst, dstaddr, dstaddr + size);
275 }
276 }
277
278 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
279 host to device memory transfers. */
280
281 struct gomp_coalesce_chunk
282 {
283 /* The starting and ending point of a coalesced chunk of memory. */
284 size_t start, end;
285 };
286
287 struct gomp_coalesce_buf
288 {
289 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
290 it will be copied to the device. */
291 void *buf;
292 struct target_mem_desc *tgt;
293 /* Array with offsets, chunks[i].start is the starting offset and
294 chunks[i].end ending offset relative to tgt->tgt_start device address
295 of chunks which are to be copied to buf and later copied to device. */
296 struct gomp_coalesce_chunk *chunks;
297 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
298 be performed. */
299 long chunk_cnt;
300 /* During construction of chunks array, how many memory regions are within
301 the last chunk. If there is just one memory region for a chunk, we copy
302 it directly to device rather than going through buf. */
303 long use_cnt;
304 };
305
306 /* Maximum size of memory region considered for coalescing. Larger copies
307 are performed directly. */
308 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
309
310 /* Maximum size of a gap in between regions to consider them being copied
311 within the same chunk. All the device offsets considered are within
312 newly allocated device memory, so it isn't fatal if we copy some padding
313 in between from host to device. The gaps come either from alignment
314 padding or from memory regions which are not supposed to be copied from
315 host to device (e.g. map(alloc:), map(from:) etc.). */
316 #define MAX_COALESCE_BUF_GAP (4 * 1024)
317
318 /* Add region with device tgt_start relative offset and length to CBUF.
319
320 This must not be used for asynchronous copies, because the host data might
321 not be computed yet (by an earlier asynchronous compute region, for
322 example). The exception is for EPHEMERAL data, that we know is available
323 already "by construction". */
324
325 static inline void
326 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
327 {
328 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
329 return;
330 if (cbuf->chunk_cnt)
331 {
332 if (cbuf->chunk_cnt < 0)
333 return;
334 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
335 {
336 cbuf->chunk_cnt = -1;
337 return;
338 }
339 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
340 {
341 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
342 cbuf->use_cnt++;
343 return;
344 }
345 /* If the last chunk is only used by one mapping, discard it,
346 as it will be one host to device copy anyway and
347 memcpying it around will only waste cycles. */
348 if (cbuf->use_cnt == 1)
349 cbuf->chunk_cnt--;
350 }
351 cbuf->chunks[cbuf->chunk_cnt].start = start;
352 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
353 cbuf->chunk_cnt++;
354 cbuf->use_cnt = 1;
355 }
356
357 /* Return true for mapping kinds which need to copy data from the
358 host to device for regions that weren't previously mapped. */
359
360 static inline bool
361 gomp_to_device_kind_p (int kind)
362 {
363 switch (kind)
364 {
365 case GOMP_MAP_ALLOC:
366 case GOMP_MAP_FROM:
367 case GOMP_MAP_FORCE_ALLOC:
368 case GOMP_MAP_FORCE_FROM:
369 case GOMP_MAP_ALWAYS_FROM:
370 case GOMP_MAP_ALWAYS_PRESENT_FROM:
371 case GOMP_MAP_FORCE_PRESENT:
372 return false;
373 default:
374 return true;
375 }
376 }
377
378 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
379 non-NULL), when the source data is stack or may otherwise be deallocated
380 before the asynchronous copy takes place, EPHEMERAL must be passed as
381 TRUE. */
382
383 attribute_hidden void
384 gomp_copy_host2dev (struct gomp_device_descr *devicep,
385 struct goacc_asyncqueue *aq,
386 void *d, const void *h, size_t sz,
387 bool ephemeral, struct gomp_coalesce_buf *cbuf)
388 {
389 if (cbuf)
390 {
391 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
392 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
393 {
394 long first = 0;
395 long last = cbuf->chunk_cnt - 1;
396 while (first <= last)
397 {
398 long middle = (first + last) >> 1;
399 if (cbuf->chunks[middle].end <= doff)
400 first = middle + 1;
401 else if (cbuf->chunks[middle].start <= doff)
402 {
403 if (doff + sz > cbuf->chunks[middle].end)
404 {
405 gomp_mutex_unlock (&devicep->lock);
406 gomp_fatal ("internal libgomp cbuf error");
407 }
408
409 /* In an asynchronous context, verify that CBUF isn't used
410 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
411 if (__builtin_expect (aq != NULL, 0))
412 assert (ephemeral);
413
414 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
415 h, sz);
416 return;
417 }
418 else
419 last = middle - 1;
420 }
421 }
422 }
423
424 if (__builtin_expect (aq != NULL, 0))
425 {
426 void *h_buf = (void *) h;
427 if (ephemeral)
428 {
429 /* We're queueing up an asynchronous copy from data that may
430 disappear before the transfer takes place (i.e. because it is a
431 stack local in a function that is no longer executing). As we've
432 not been able to use CBUF, make a copy of the data into a
433 temporary buffer. */
434 h_buf = gomp_malloc (sz);
435 memcpy (h_buf, h, sz);
436 }
437 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
438 "dev", d, "host", h_buf, h, sz, aq);
439 if (ephemeral)
440 /* Free once the transfer has completed. */
441 devicep->openacc.async.queue_callback_func (aq, free, h_buf);
442 }
443 else
444 gomp_device_copy (devicep, devicep->host2dev_func,
445 "dev", d, "host", h, sz);
446 }
447
448 attribute_hidden void
449 gomp_copy_dev2host (struct gomp_device_descr *devicep,
450 struct goacc_asyncqueue *aq,
451 void *h, const void *d, size_t sz)
452 {
453 if (__builtin_expect (aq != NULL, 0))
454 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
455 "host", h, "dev", d, NULL, sz, aq);
456 else
457 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
458 }
459
460 static void
461 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
462 {
463 if (!devicep->free_func (devicep->target_id, devptr))
464 {
465 gomp_mutex_unlock (&devicep->lock);
466 gomp_fatal ("error in freeing device memory block at %p", devptr);
467 }
468 }
469
470 /* Increment reference count of a splay_tree_key region K by 1.
471 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
472 increment the value if refcount is not yet contained in the set (used for
473 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
474 once for each construct). */
475
476 static inline void
477 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
478 {
479 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
480 return;
481
482 uintptr_t *refcount_ptr = &k->refcount;
483
484 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
485 refcount_ptr = &k->structelem_refcount;
486 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
487 refcount_ptr = k->structelem_refcount_ptr;
488
489 if (refcount_set)
490 {
491 if (htab_find (*refcount_set, refcount_ptr))
492 return;
493 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
494 *slot = refcount_ptr;
495 }
496
497 *refcount_ptr += 1;
498 return;
499 }
500
501 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
502 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
503 track already seen refcounts, and only adjust the value if refcount is not
504 yet contained in the set (like gomp_increment_refcount).
505
506 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
507 it is already zero and we know we decremented it earlier. This signals that
508 associated maps should be copied back to host.
509
510 *DO_REMOVE is set to true when we this is the first handling of this refcount
511 and we are setting it to zero. This signals a removal of this key from the
512 splay-tree map.
513
514 Copy and removal are separated due to cases like handling of structure
515 elements, e.g. each map of a structure element representing a possible copy
516 out of a structure field has to be handled individually, but we only signal
517 removal for one (the first encountered) sibing map. */
518
519 static inline void
520 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
521 bool *do_copy, bool *do_remove)
522 {
523 if (k == NULL || k->refcount == REFCOUNT_INFINITY)
524 {
525 *do_copy = *do_remove = false;
526 return;
527 }
528
529 uintptr_t *refcount_ptr = &k->refcount;
530
531 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
532 refcount_ptr = &k->structelem_refcount;
533 else if (REFCOUNT_STRUCTELEM_P (k->refcount))
534 refcount_ptr = k->structelem_refcount_ptr;
535
536 bool new_encountered_refcount;
537 bool set_to_zero = false;
538 bool is_zero = false;
539
540 uintptr_t orig_refcount = *refcount_ptr;
541
542 if (refcount_set)
543 {
544 if (htab_find (*refcount_set, refcount_ptr))
545 {
546 new_encountered_refcount = false;
547 goto end;
548 }
549
550 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
551 *slot = refcount_ptr;
552 new_encountered_refcount = true;
553 }
554 else
555 /* If no refcount_set being used, assume all keys are being decremented
556 for the first time. */
557 new_encountered_refcount = true;
558
559 if (delete_p)
560 *refcount_ptr = 0;
561 else if (*refcount_ptr > 0)
562 *refcount_ptr -= 1;
563
564 end:
565 if (*refcount_ptr == 0)
566 {
567 if (orig_refcount > 0)
568 set_to_zero = true;
569
570 is_zero = true;
571 }
572
573 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
574 *do_remove = (new_encountered_refcount && set_to_zero);
575 }
576
577 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
578 gomp_map_0len_lookup found oldn for newn.
579 Helper function of gomp_map_vars. */
580
581 static inline void
582 gomp_map_vars_existing (struct gomp_device_descr *devicep,
583 struct goacc_asyncqueue *aq, splay_tree_key oldn,
584 splay_tree_key newn, struct target_var_desc *tgt_var,
585 unsigned char kind, bool always_to_flag, bool implicit,
586 struct gomp_coalesce_buf *cbuf,
587 htab_t *refcount_set)
588 {
589 assert (kind != GOMP_MAP_ATTACH
590 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
591
592 tgt_var->key = oldn;
593 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
594 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
595 tgt_var->is_attach = false;
596 tgt_var->offset = newn->host_start - oldn->host_start;
597
598 /* For implicit maps, old contained in new is valid. */
599 bool implicit_subset = (implicit
600 && newn->host_start <= oldn->host_start
601 && oldn->host_end <= newn->host_end);
602 if (implicit_subset)
603 tgt_var->length = oldn->host_end - oldn->host_start;
604 else
605 tgt_var->length = newn->host_end - newn->host_start;
606
607 if (GOMP_MAP_FORCE_P (kind)
608 /* For implicit maps, old contained in new is valid. */
609 || !(implicit_subset
610 /* Otherwise, new contained inside old is considered valid. */
611 || (oldn->host_start <= newn->host_start
612 && newn->host_end <= oldn->host_end)))
613 {
614 gomp_mutex_unlock (&devicep->lock);
615 gomp_fatal ("Trying to map into device [%p..%p) object when "
616 "[%p..%p) is already mapped",
617 (void *) newn->host_start, (void *) newn->host_end,
618 (void *) oldn->host_start, (void *) oldn->host_end);
619 }
620
621 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
622 {
623 /* Implicit + always should not happen. If this does occur, below
624 address/length adjustment is a TODO. */
625 assert (!implicit_subset);
626
627 if (oldn->aux && oldn->aux->attach_count)
628 {
629 /* We have to be careful not to overwrite still attached pointers
630 during the copyback to host. */
631 uintptr_t addr = newn->host_start;
632 while (addr < newn->host_end)
633 {
634 size_t i = (addr - oldn->host_start) / sizeof (void *);
635 if (oldn->aux->attach_count[i] == 0)
636 gomp_copy_host2dev (devicep, aq,
637 (void *) (oldn->tgt->tgt_start
638 + oldn->tgt_offset
639 + addr - oldn->host_start),
640 (void *) addr,
641 sizeof (void *), false, cbuf);
642 addr += sizeof (void *);
643 }
644 }
645 else
646 gomp_copy_host2dev (devicep, aq,
647 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
648 + newn->host_start - oldn->host_start),
649 (void *) newn->host_start,
650 newn->host_end - newn->host_start, false, cbuf);
651 }
652
653 gomp_increment_refcount (oldn, refcount_set);
654 }
655
656 static int
657 get_kind (bool short_mapkind, void *kinds, int idx)
658 {
659 if (!short_mapkind)
660 return ((unsigned char *) kinds)[idx];
661
662 int val = ((unsigned short *) kinds)[idx];
663 if (GOMP_MAP_IMPLICIT_P (val))
664 val &= ~GOMP_MAP_IMPLICIT;
665 return val;
666 }
667
668
669 static bool
670 get_implicit (bool short_mapkind, void *kinds, int idx)
671 {
672 if (!short_mapkind)
673 return false;
674
675 int val = ((unsigned short *) kinds)[idx];
676 return GOMP_MAP_IMPLICIT_P (val);
677 }
678
679 static void
680 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
681 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
682 struct gomp_coalesce_buf *cbuf,
683 bool allow_zero_length_array_sections)
684 {
685 struct gomp_device_descr *devicep = tgt->device_descr;
686 struct splay_tree_s *mem_map = &devicep->mem_map;
687 struct splay_tree_key_s cur_node;
688
689 cur_node.host_start = host_ptr;
690 if (cur_node.host_start == (uintptr_t) NULL)
691 {
692 cur_node.tgt_offset = (uintptr_t) NULL;
693 gomp_copy_host2dev (devicep, aq,
694 (void *) (tgt->tgt_start + target_offset),
695 (void *) &cur_node.tgt_offset, sizeof (void *),
696 true, cbuf);
697 return;
698 }
699 /* Add bias to the pointer value. */
700 cur_node.host_start += bias;
701 cur_node.host_end = cur_node.host_start;
702 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
703 if (n == NULL)
704 {
705 if (allow_zero_length_array_sections)
706 cur_node.tgt_offset = cur_node.host_start;
707 else
708 {
709 gomp_mutex_unlock (&devicep->lock);
710 gomp_fatal ("Pointer target of array section wasn't mapped");
711 }
712 }
713 else
714 {
715 cur_node.host_start -= n->host_start;
716 cur_node.tgt_offset
717 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
718 /* At this point tgt_offset is target address of the
719 array section. Now subtract bias to get what we want
720 to initialize the pointer with. */
721 cur_node.tgt_offset -= bias;
722 }
723 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
724 (void *) &cur_node.tgt_offset, sizeof (void *),
725 true, cbuf);
726 }
727
728 static void
729 gomp_map_fields_existing (struct target_mem_desc *tgt,
730 struct goacc_asyncqueue *aq, splay_tree_key n,
731 size_t first, size_t i, void **hostaddrs,
732 size_t *sizes, void *kinds,
733 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
734 {
735 struct gomp_device_descr *devicep = tgt->device_descr;
736 struct splay_tree_s *mem_map = &devicep->mem_map;
737 struct splay_tree_key_s cur_node;
738 int kind;
739 bool implicit;
740 const bool short_mapkind = true;
741 const int typemask = short_mapkind ? 0xff : 0x7;
742
743 cur_node.host_start = (uintptr_t) hostaddrs[i];
744 cur_node.host_end = cur_node.host_start + sizes[i];
745 splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node);
746 kind = get_kind (short_mapkind, kinds, i);
747 implicit = get_implicit (short_mapkind, kinds, i);
748 if (n2
749 && n2->tgt == n->tgt
750 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
751 {
752 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
753 kind & typemask, false, implicit, cbuf,
754 refcount_set);
755 return;
756 }
757 if (sizes[i] == 0)
758 {
759 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
760 {
761 cur_node.host_start--;
762 n2 = splay_tree_lookup (mem_map, &cur_node);
763 cur_node.host_start++;
764 if (n2
765 && n2->tgt == n->tgt
766 && n2->host_start - n->host_start
767 == n2->tgt_offset - n->tgt_offset)
768 {
769 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
770 kind & typemask, false, implicit, cbuf,
771 refcount_set);
772 return;
773 }
774 }
775 cur_node.host_end++;
776 n2 = splay_tree_lookup (mem_map, &cur_node);
777 cur_node.host_end--;
778 if (n2
779 && n2->tgt == n->tgt
780 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
781 {
782 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
783 kind & typemask, false, implicit, cbuf,
784 refcount_set);
785 return;
786 }
787 }
788 gomp_mutex_unlock (&devicep->lock);
789 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
790 "other mapped elements from the same structure weren't mapped "
791 "together with it", (void *) cur_node.host_start,
792 (void *) cur_node.host_end);
793 }
794
795 attribute_hidden void
796 gomp_attach_pointer (struct gomp_device_descr *devicep,
797 struct goacc_asyncqueue *aq, splay_tree mem_map,
798 splay_tree_key n, uintptr_t attach_to, size_t bias,
799 struct gomp_coalesce_buf *cbufp,
800 bool allow_zero_length_array_sections)
801 {
802 struct splay_tree_key_s s;
803 size_t size, idx;
804
805 if (n == NULL)
806 {
807 gomp_mutex_unlock (&devicep->lock);
808 gomp_fatal ("enclosing struct not mapped for attach");
809 }
810
811 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
812 /* We might have a pointer in a packed struct: however we cannot have more
813 than one such pointer in each pointer-sized portion of the struct, so
814 this is safe. */
815 idx = (attach_to - n->host_start) / sizeof (void *);
816
817 if (!n->aux)
818 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
819
820 if (!n->aux->attach_count)
821 n->aux->attach_count
822 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
823
824 if (n->aux->attach_count[idx] < UINTPTR_MAX)
825 n->aux->attach_count[idx]++;
826 else
827 {
828 gomp_mutex_unlock (&devicep->lock);
829 gomp_fatal ("attach count overflow");
830 }
831
832 if (n->aux->attach_count[idx] == 1)
833 {
834 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
835 - n->host_start;
836 uintptr_t target = (uintptr_t) *(void **) attach_to;
837 splay_tree_key tn;
838 uintptr_t data;
839
840 if ((void *) target == NULL)
841 {
842 /* As a special case, allow attaching NULL host pointers. This
843 allows e.g. unassociated Fortran pointers to be mapped
844 properly. */
845 data = 0;
846
847 gomp_debug (1,
848 "%s: attaching NULL host pointer, target %p "
849 "(struct base %p)\n", __FUNCTION__, (void *) devptr,
850 (void *) (n->tgt->tgt_start + n->tgt_offset));
851
852 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
853 sizeof (void *), true, cbufp);
854
855 return;
856 }
857
858 s.host_start = target + bias;
859 s.host_end = s.host_start + 1;
860 tn = splay_tree_lookup (mem_map, &s);
861
862 if (!tn)
863 {
864 if (allow_zero_length_array_sections)
865 /* When allowing attachment to zero-length array sections, we
866 copy the host pointer when the target region is not mapped. */
867 data = target;
868 else
869 {
870 gomp_mutex_unlock (&devicep->lock);
871 gomp_fatal ("pointer target not mapped for attach");
872 }
873 }
874 else
875 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
876
877 gomp_debug (1,
878 "%s: attaching host %p, target %p (struct base %p) to %p\n",
879 __FUNCTION__, (void *) attach_to, (void *) devptr,
880 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
881
882 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
883 sizeof (void *), true, cbufp);
884 }
885 else
886 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
887 (void *) attach_to, (int) n->aux->attach_count[idx]);
888 }
889
890 attribute_hidden void
891 gomp_detach_pointer (struct gomp_device_descr *devicep,
892 struct goacc_asyncqueue *aq, splay_tree_key n,
893 uintptr_t detach_from, bool finalize,
894 struct gomp_coalesce_buf *cbufp)
895 {
896 size_t idx;
897
898 if (n == NULL)
899 {
900 gomp_mutex_unlock (&devicep->lock);
901 gomp_fatal ("enclosing struct not mapped for detach");
902 }
903
904 idx = (detach_from - n->host_start) / sizeof (void *);
905
906 if (!n->aux || !n->aux->attach_count)
907 {
908 gomp_mutex_unlock (&devicep->lock);
909 gomp_fatal ("no attachment counters for struct");
910 }
911
912 if (finalize)
913 n->aux->attach_count[idx] = 1;
914
915 if (n->aux->attach_count[idx] == 0)
916 {
917 gomp_mutex_unlock (&devicep->lock);
918 gomp_fatal ("attach count underflow");
919 }
920 else
921 n->aux->attach_count[idx]--;
922
923 if (n->aux->attach_count[idx] == 0)
924 {
925 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
926 - n->host_start;
927 uintptr_t target = (uintptr_t) *(void **) detach_from;
928
929 gomp_debug (1,
930 "%s: detaching host %p, target %p (struct base %p) to %p\n",
931 __FUNCTION__, (void *) detach_from, (void *) devptr,
932 (void *) (n->tgt->tgt_start + n->tgt_offset),
933 (void *) target);
934
935 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
936 sizeof (void *), true, cbufp);
937 }
938 else
939 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
940 (void *) detach_from, (int) n->aux->attach_count[idx]);
941 }
942
943 attribute_hidden uintptr_t
944 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
945 {
946 if (tgt->list[i].key != NULL)
947 return tgt->list[i].key->tgt->tgt_start
948 + tgt->list[i].key->tgt_offset
949 + tgt->list[i].offset;
950
951 switch (tgt->list[i].offset)
952 {
953 case OFFSET_INLINED:
954 return (uintptr_t) hostaddrs[i];
955
956 case OFFSET_POINTER:
957 return 0;
958
959 case OFFSET_STRUCT:
960 return tgt->list[i + 1].key->tgt->tgt_start
961 + tgt->list[i + 1].key->tgt_offset
962 + tgt->list[i + 1].offset
963 + (uintptr_t) hostaddrs[i]
964 - (uintptr_t) hostaddrs[i + 1];
965
966 default:
967 return tgt->tgt_start + tgt->list[i].offset;
968 }
969 }
970
971 static inline __attribute__((always_inline)) struct target_mem_desc *
972 gomp_map_vars_internal (struct gomp_device_descr *devicep,
973 struct goacc_asyncqueue *aq, size_t mapnum,
974 void **hostaddrs, void **devaddrs, size_t *sizes,
975 void *kinds, bool short_mapkind,
976 htab_t *refcount_set,
977 enum gomp_map_vars_kind pragma_kind)
978 {
979 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
980 bool has_firstprivate = false;
981 bool has_always_ptrset = false;
982 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
983 const int rshift = short_mapkind ? 8 : 3;
984 const int typemask = short_mapkind ? 0xff : 0x7;
985 struct splay_tree_s *mem_map = &devicep->mem_map;
986 struct splay_tree_key_s cur_node;
987 struct target_mem_desc *tgt
988 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
989 tgt->list_count = mapnum;
990 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
991 tgt->device_descr = devicep;
992 tgt->prev = NULL;
993 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
994
995 if (mapnum == 0)
996 {
997 tgt->tgt_start = 0;
998 tgt->tgt_end = 0;
999 return tgt;
1000 }
1001
1002 tgt_align = sizeof (void *);
1003 tgt_size = 0;
1004 cbuf.chunks = NULL;
1005 cbuf.chunk_cnt = -1;
1006 cbuf.use_cnt = 0;
1007 cbuf.buf = NULL;
1008 if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
1009 {
1010 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
1011 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
1012 cbuf.chunk_cnt = 0;
1013 }
1014 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1015 {
1016 size_t align = 4 * sizeof (void *);
1017 tgt_align = align;
1018 tgt_size = mapnum * sizeof (void *);
1019 cbuf.chunk_cnt = 1;
1020 cbuf.use_cnt = 1 + (mapnum > 1);
1021 cbuf.chunks[0].start = 0;
1022 cbuf.chunks[0].end = tgt_size;
1023 }
1024
1025 gomp_mutex_lock (&devicep->lock);
1026 if (devicep->state == GOMP_DEVICE_FINALIZED)
1027 {
1028 gomp_mutex_unlock (&devicep->lock);
1029 free (tgt);
1030 return NULL;
1031 }
1032
1033 for (i = 0; i < mapnum; i++)
1034 {
1035 int kind = get_kind (short_mapkind, kinds, i);
1036 bool implicit = get_implicit (short_mapkind, kinds, i);
1037 if (hostaddrs[i] == NULL
1038 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
1039 {
1040 tgt->list[i].key = NULL;
1041 tgt->list[i].offset = OFFSET_INLINED;
1042 continue;
1043 }
1044 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
1045 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1046 {
1047 tgt->list[i].key = NULL;
1048 if (!not_found_cnt)
1049 {
1050 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1051 on a separate construct prior to using use_device_{addr,ptr}.
1052 In OpenMP 5.0, map directives need to be ordered by the
1053 middle-end before the use_device_* clauses. If
1054 !not_found_cnt, all mappings requested (if any) are already
1055 mapped, so use_device_{addr,ptr} can be resolved right away.
1056 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1057 now but would succeed after performing the mappings in the
1058 following loop. We can't defer this always to the second
1059 loop, because it is not even invoked when !not_found_cnt
1060 after the first loop. */
1061 cur_node.host_start = (uintptr_t) hostaddrs[i];
1062 cur_node.host_end = cur_node.host_start;
1063 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1064 if (n != NULL)
1065 {
1066 cur_node.host_start -= n->host_start;
1067 hostaddrs[i]
1068 = (void *) (n->tgt->tgt_start + n->tgt_offset
1069 + cur_node.host_start);
1070 }
1071 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1072 {
1073 gomp_mutex_unlock (&devicep->lock);
1074 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1075 }
1076 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1077 /* If not present, continue using the host address. */
1078 ;
1079 else
1080 __builtin_unreachable ();
1081 tgt->list[i].offset = OFFSET_INLINED;
1082 }
1083 else
1084 tgt->list[i].offset = 0;
1085 continue;
1086 }
1087 else if ((kind & typemask) == GOMP_MAP_STRUCT)
1088 {
1089 size_t first = i + 1;
1090 size_t last = i + sizes[i];
1091 cur_node.host_start = (uintptr_t) hostaddrs[i];
1092 cur_node.host_end = (uintptr_t) hostaddrs[last]
1093 + sizes[last];
1094 tgt->list[i].key = NULL;
1095 tgt->list[i].offset = OFFSET_STRUCT;
1096 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1097 if (n == NULL)
1098 {
1099 size_t align = (size_t) 1 << (kind >> rshift);
1100 if (tgt_align < align)
1101 tgt_align = align;
1102 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1103 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1104 tgt_size += cur_node.host_end - cur_node.host_start;
1105 not_found_cnt += last - i;
1106 for (i = first; i <= last; i++)
1107 {
1108 tgt->list[i].key = NULL;
1109 if (!aq
1110 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1111 & typemask)
1112 && sizes[i] != 0)
1113 gomp_coalesce_buf_add (&cbuf,
1114 tgt_size - cur_node.host_end
1115 + (uintptr_t) hostaddrs[i],
1116 sizes[i]);
1117 }
1118 i--;
1119 continue;
1120 }
1121 for (i = first; i <= last; i++)
1122 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1123 sizes, kinds, NULL, refcount_set);
1124 i--;
1125 continue;
1126 }
1127 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1128 {
1129 tgt->list[i].key = NULL;
1130 tgt->list[i].offset = OFFSET_POINTER;
1131 has_firstprivate = true;
1132 continue;
1133 }
1134 else if ((kind & typemask) == GOMP_MAP_ATTACH
1135 || ((kind & typemask)
1136 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1137 {
1138 tgt->list[i].key = NULL;
1139 has_firstprivate = true;
1140 continue;
1141 }
1142 cur_node.host_start = (uintptr_t) hostaddrs[i];
1143 if (!GOMP_MAP_POINTER_P (kind & typemask))
1144 cur_node.host_end = cur_node.host_start + sizes[i];
1145 else
1146 cur_node.host_end = cur_node.host_start + sizeof (void *);
1147 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1148 {
1149 tgt->list[i].key = NULL;
1150
1151 size_t align = (size_t) 1 << (kind >> rshift);
1152 if (tgt_align < align)
1153 tgt_align = align;
1154 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1155 if (!aq)
1156 gomp_coalesce_buf_add (&cbuf, tgt_size,
1157 cur_node.host_end - cur_node.host_start);
1158 tgt_size += cur_node.host_end - cur_node.host_start;
1159 has_firstprivate = true;
1160 continue;
1161 }
1162 splay_tree_key n;
1163 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1164 {
1165 n = gomp_map_0len_lookup (mem_map, &cur_node);
1166 if (!n)
1167 {
1168 tgt->list[i].key = NULL;
1169 tgt->list[i].offset = OFFSET_INLINED;
1170 continue;
1171 }
1172 }
1173 else
1174 n = splay_tree_lookup (mem_map, &cur_node);
1175 if (n && n->refcount != REFCOUNT_LINK)
1176 {
1177 int always_to_cnt = 0;
1178 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1179 {
1180 bool has_nullptr = false;
1181 size_t j;
1182 for (j = 0; j < n->tgt->list_count; j++)
1183 if (n->tgt->list[j].key == n)
1184 {
1185 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1186 break;
1187 }
1188 if (n->tgt->list_count == 0)
1189 {
1190 /* 'declare target'; assume has_nullptr; it could also be
1191 statically assigned pointer, but that it should be to
1192 the equivalent variable on the host. */
1193 assert (n->refcount == REFCOUNT_INFINITY);
1194 has_nullptr = true;
1195 }
1196 else
1197 assert (j < n->tgt->list_count);
1198 /* Re-map the data if there is an 'always' modifier or if it a
1199 null pointer was there and non a nonnull has been found; that
1200 permits transparent re-mapping for Fortran array descriptors
1201 which were previously mapped unallocated. */
1202 for (j = i + 1; j < mapnum; j++)
1203 {
1204 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1205 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1206 && (!has_nullptr
1207 || !GOMP_MAP_POINTER_P (ptr_kind)
1208 || *(void **) hostaddrs[j] == NULL))
1209 break;
1210 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1211 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1212 > cur_node.host_end))
1213 break;
1214 else
1215 {
1216 has_always_ptrset = true;
1217 ++always_to_cnt;
1218 }
1219 }
1220 }
1221 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1222 kind & typemask, always_to_cnt > 0, implicit,
1223 NULL, refcount_set);
1224 i += always_to_cnt;
1225 }
1226 else
1227 {
1228 tgt->list[i].key = NULL;
1229
1230 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1231 {
1232 /* Not present, hence, skip entry - including its MAP_POINTER,
1233 when existing. */
1234 tgt->list[i].offset = OFFSET_INLINED;
1235 if (i + 1 < mapnum
1236 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1237 == GOMP_MAP_POINTER))
1238 {
1239 ++i;
1240 tgt->list[i].key = NULL;
1241 tgt->list[i].offset = 0;
1242 }
1243 continue;
1244 }
1245 size_t align = (size_t) 1 << (kind >> rshift);
1246 not_found_cnt++;
1247 if (tgt_align < align)
1248 tgt_align = align;
1249 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1250 if (!aq
1251 && gomp_to_device_kind_p (kind & typemask))
1252 gomp_coalesce_buf_add (&cbuf, tgt_size,
1253 cur_node.host_end - cur_node.host_start);
1254 tgt_size += cur_node.host_end - cur_node.host_start;
1255 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1256 {
1257 size_t j;
1258 int kind;
1259 for (j = i + 1; j < mapnum; j++)
1260 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1261 kinds, j)) & typemask))
1262 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1263 break;
1264 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1265 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1266 > cur_node.host_end))
1267 break;
1268 else
1269 {
1270 tgt->list[j].key = NULL;
1271 i++;
1272 }
1273 }
1274 }
1275 }
1276
1277 if (devaddrs)
1278 {
1279 if (mapnum != 1)
1280 {
1281 gomp_mutex_unlock (&devicep->lock);
1282 gomp_fatal ("unexpected aggregation");
1283 }
1284 tgt->to_free = devaddrs[0];
1285 tgt->tgt_start = (uintptr_t) tgt->to_free;
1286 tgt->tgt_end = tgt->tgt_start + sizes[0];
1287 }
1288 else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
1289 {
1290 /* Allocate tgt_align aligned tgt_size block of memory. */
1291 /* FIXME: Perhaps change interface to allocate properly aligned
1292 memory. */
1293 tgt->to_free = devicep->alloc_func (devicep->target_id,
1294 tgt_size + tgt_align - 1);
1295 if (!tgt->to_free)
1296 {
1297 gomp_mutex_unlock (&devicep->lock);
1298 gomp_fatal ("device memory allocation fail");
1299 }
1300
1301 tgt->tgt_start = (uintptr_t) tgt->to_free;
1302 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1303 tgt->tgt_end = tgt->tgt_start + tgt_size;
1304
1305 if (cbuf.use_cnt == 1)
1306 cbuf.chunk_cnt--;
1307 if (cbuf.chunk_cnt > 0)
1308 {
1309 cbuf.buf
1310 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1311 if (cbuf.buf)
1312 {
1313 cbuf.tgt = tgt;
1314 cbufp = &cbuf;
1315 }
1316 }
1317 }
1318 else
1319 {
1320 tgt->to_free = NULL;
1321 tgt->tgt_start = 0;
1322 tgt->tgt_end = 0;
1323 }
1324
1325 tgt_size = 0;
1326 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1327 tgt_size = mapnum * sizeof (void *);
1328
1329 tgt->array = NULL;
1330 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1331 {
1332 if (not_found_cnt)
1333 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1334 splay_tree_node array = tgt->array;
1335 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1336 uintptr_t field_tgt_base = 0;
1337 splay_tree_key field_tgt_structelem_first = NULL;
1338
1339 for (i = 0; i < mapnum; i++)
1340 if (has_always_ptrset
1341 && tgt->list[i].key
1342 && (get_kind (short_mapkind, kinds, i) & typemask)
1343 == GOMP_MAP_TO_PSET)
1344 {
1345 splay_tree_key k = tgt->list[i].key;
1346 bool has_nullptr = false;
1347 size_t j;
1348 for (j = 0; j < k->tgt->list_count; j++)
1349 if (k->tgt->list[j].key == k)
1350 {
1351 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1352 break;
1353 }
1354 if (k->tgt->list_count == 0)
1355 has_nullptr = true;
1356 else
1357 assert (j < k->tgt->list_count);
1358
1359 tgt->list[i].has_null_ptr_assoc = false;
1360 for (j = i + 1; j < mapnum; j++)
1361 {
1362 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1363 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1364 && (!has_nullptr
1365 || !GOMP_MAP_POINTER_P (ptr_kind)
1366 || *(void **) hostaddrs[j] == NULL))
1367 break;
1368 else if ((uintptr_t) hostaddrs[j] < k->host_start
1369 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1370 > k->host_end))
1371 break;
1372 else
1373 {
1374 if (*(void **) hostaddrs[j] == NULL)
1375 tgt->list[i].has_null_ptr_assoc = true;
1376 tgt->list[j].key = k;
1377 tgt->list[j].copy_from = false;
1378 tgt->list[j].always_copy_from = false;
1379 tgt->list[j].is_attach = false;
1380 gomp_increment_refcount (k, refcount_set);
1381 gomp_map_pointer (k->tgt, aq,
1382 (uintptr_t) *(void **) hostaddrs[j],
1383 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1384 - k->host_start),
1385 sizes[j], cbufp, false);
1386 }
1387 }
1388 i = j - 1;
1389 }
1390 else if (tgt->list[i].key == NULL)
1391 {
1392 int kind = get_kind (short_mapkind, kinds, i);
1393 bool implicit = get_implicit (short_mapkind, kinds, i);
1394 if (hostaddrs[i] == NULL)
1395 continue;
1396 switch (kind & typemask)
1397 {
1398 size_t align, len, first, last;
1399 splay_tree_key n;
1400 case GOMP_MAP_FIRSTPRIVATE:
1401 align = (size_t) 1 << (kind >> rshift);
1402 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1403 tgt->list[i].offset = tgt_size;
1404 len = sizes[i];
1405 gomp_copy_host2dev (devicep, aq,
1406 (void *) (tgt->tgt_start + tgt_size),
1407 (void *) hostaddrs[i], len, false, cbufp);
1408 /* Save device address in hostaddr to permit latter availablity
1409 when doing a deep-firstprivate with pointer attach. */
1410 hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size);
1411 tgt_size += len;
1412
1413 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1414 firstprivate to hostaddrs[i+1], which is assumed to contain a
1415 device address. */
1416 if (i + 1 < mapnum
1417 && (GOMP_MAP_ATTACH
1418 == (typemask & get_kind (short_mapkind, kinds, i+1))))
1419 {
1420 uintptr_t target = (uintptr_t) hostaddrs[i];
1421 void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1];
1422 /* Per
1423 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1424 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1425 this probably needs revision for 'aq' usage. */
1426 assert (!aq);
1427 gomp_copy_host2dev (devicep, aq, devptr, &target,
1428 sizeof (void *), false, cbufp);
1429 ++i;
1430 }
1431 continue;
1432 case GOMP_MAP_FIRSTPRIVATE_INT:
1433 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1434 continue;
1435 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1436 /* The OpenACC 'host_data' construct only allows 'use_device'
1437 "mapping" clauses, so in the first loop, 'not_found_cnt'
1438 must always have been zero, so all OpenACC 'use_device'
1439 clauses have already been handled. (We can only easily test
1440 'use_device' with 'if_present' clause here.) */
1441 assert (tgt->list[i].offset == OFFSET_INLINED);
1442 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1443 code conceptually simple, similar to the first loop. */
1444 case GOMP_MAP_USE_DEVICE_PTR:
1445 if (tgt->list[i].offset == 0)
1446 {
1447 cur_node.host_start = (uintptr_t) hostaddrs[i];
1448 cur_node.host_end = cur_node.host_start;
1449 n = gomp_map_lookup (mem_map, &cur_node);
1450 if (n != NULL)
1451 {
1452 cur_node.host_start -= n->host_start;
1453 hostaddrs[i]
1454 = (void *) (n->tgt->tgt_start + n->tgt_offset
1455 + cur_node.host_start);
1456 }
1457 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1458 {
1459 gomp_mutex_unlock (&devicep->lock);
1460 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1461 }
1462 else if ((kind & typemask)
1463 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1464 /* If not present, continue using the host address. */
1465 ;
1466 else
1467 __builtin_unreachable ();
1468 tgt->list[i].offset = OFFSET_INLINED;
1469 }
1470 continue;
1471 case GOMP_MAP_STRUCT:
1472 first = i + 1;
1473 last = i + sizes[i];
1474 cur_node.host_start = (uintptr_t) hostaddrs[i];
1475 cur_node.host_end = (uintptr_t) hostaddrs[last]
1476 + sizes[last];
1477 if (tgt->list[first].key != NULL)
1478 continue;
1479 if (sizes[last] == 0)
1480 cur_node.host_end++;
1481 n = splay_tree_lookup (mem_map, &cur_node);
1482 if (sizes[last] == 0)
1483 cur_node.host_end--;
1484 if (n == NULL && cur_node.host_start == cur_node.host_end)
1485 {
1486 gomp_mutex_unlock (&devicep->lock);
1487 gomp_fatal ("Struct pointer member not mapped (%p)",
1488 (void*) hostaddrs[first]);
1489 }
1490 if (n == NULL)
1491 {
1492 size_t align = (size_t) 1 << (kind >> rshift);
1493 tgt_size -= (uintptr_t) hostaddrs[first]
1494 - (uintptr_t) hostaddrs[i];
1495 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1496 tgt_size += (uintptr_t) hostaddrs[first]
1497 - (uintptr_t) hostaddrs[i];
1498 field_tgt_base = (uintptr_t) hostaddrs[first];
1499 field_tgt_offset = tgt_size;
1500 field_tgt_clear = last;
1501 field_tgt_structelem_first = NULL;
1502 tgt_size += cur_node.host_end
1503 - (uintptr_t) hostaddrs[first];
1504 continue;
1505 }
1506 for (i = first; i <= last; i++)
1507 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1508 sizes, kinds, cbufp, refcount_set);
1509 i--;
1510 continue;
1511 case GOMP_MAP_ALWAYS_POINTER:
1512 cur_node.host_start = (uintptr_t) hostaddrs[i];
1513 cur_node.host_end = cur_node.host_start + sizeof (void *);
1514 n = splay_tree_lookup (mem_map, &cur_node);
1515 if (n == NULL
1516 || n->host_start > cur_node.host_start
1517 || n->host_end < cur_node.host_end)
1518 {
1519 gomp_mutex_unlock (&devicep->lock);
1520 gomp_fatal ("always pointer not mapped");
1521 }
1522 if (i > 0
1523 && ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1524 != GOMP_MAP_ALWAYS_POINTER))
1525 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1526 if (cur_node.tgt_offset)
1527 cur_node.tgt_offset -= sizes[i];
1528 gomp_copy_host2dev (devicep, aq,
1529 (void *) (n->tgt->tgt_start
1530 + n->tgt_offset
1531 + cur_node.host_start
1532 - n->host_start),
1533 (void *) &cur_node.tgt_offset,
1534 sizeof (void *), true, cbufp);
1535 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1536 + cur_node.host_start - n->host_start;
1537 continue;
1538 case GOMP_MAP_IF_PRESENT:
1539 /* Not present - otherwise handled above. Skip over its
1540 MAP_POINTER as well. */
1541 if (i + 1 < mapnum
1542 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1543 == GOMP_MAP_POINTER))
1544 ++i;
1545 continue;
1546 case GOMP_MAP_ATTACH:
1547 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1548 {
1549 cur_node.host_start = (uintptr_t) hostaddrs[i];
1550 cur_node.host_end = cur_node.host_start + sizeof (void *);
1551 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1552 if (n != NULL)
1553 {
1554 tgt->list[i].key = n;
1555 tgt->list[i].offset = cur_node.host_start - n->host_start;
1556 tgt->list[i].length = n->host_end - n->host_start;
1557 tgt->list[i].copy_from = false;
1558 tgt->list[i].always_copy_from = false;
1559 tgt->list[i].is_attach = true;
1560 /* OpenACC 'attach'/'detach' doesn't affect
1561 structured/dynamic reference counts ('n->refcount',
1562 'n->dynamic_refcount'). */
1563
1564 bool zlas
1565 = ((kind & typemask)
1566 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1567 gomp_attach_pointer (devicep, aq, mem_map, n,
1568 (uintptr_t) hostaddrs[i], sizes[i],
1569 cbufp, zlas);
1570 }
1571 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1572 {
1573 gomp_mutex_unlock (&devicep->lock);
1574 gomp_fatal ("outer struct not mapped for attach");
1575 }
1576 continue;
1577 }
1578 default:
1579 break;
1580 }
1581 splay_tree_key k = &array->key;
1582 k->host_start = (uintptr_t) hostaddrs[i];
1583 if (!GOMP_MAP_POINTER_P (kind & typemask))
1584 k->host_end = k->host_start + sizes[i];
1585 else
1586 k->host_end = k->host_start + sizeof (void *);
1587 splay_tree_key n = splay_tree_lookup (mem_map, k);
1588 if (n && n->refcount != REFCOUNT_LINK)
1589 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1590 kind & typemask, false, implicit, cbufp,
1591 refcount_set);
1592 else
1593 {
1594 k->aux = NULL;
1595 if (n && n->refcount == REFCOUNT_LINK)
1596 {
1597 /* Replace target address of the pointer with target address
1598 of mapped object in the splay tree. */
1599 splay_tree_remove (mem_map, n);
1600 k->aux
1601 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1602 k->aux->link_key = n;
1603 }
1604 size_t align = (size_t) 1 << (kind >> rshift);
1605 tgt->list[i].key = k;
1606 k->tgt = tgt;
1607 k->refcount = 0;
1608 k->dynamic_refcount = 0;
1609 if (field_tgt_clear != FIELD_TGT_EMPTY)
1610 {
1611 k->tgt_offset = k->host_start - field_tgt_base
1612 + field_tgt_offset;
1613 if (openmp_p)
1614 {
1615 k->refcount = REFCOUNT_STRUCTELEM;
1616 if (field_tgt_structelem_first == NULL)
1617 {
1618 /* Set to first structure element of sequence. */
1619 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1620 field_tgt_structelem_first = k;
1621 }
1622 else
1623 /* Point to refcount of leading element, but do not
1624 increment again. */
1625 k->structelem_refcount_ptr
1626 = &field_tgt_structelem_first->structelem_refcount;
1627
1628 if (i == field_tgt_clear)
1629 {
1630 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1631 field_tgt_structelem_first = NULL;
1632 }
1633 }
1634 if (i == field_tgt_clear)
1635 field_tgt_clear = FIELD_TGT_EMPTY;
1636 }
1637 else
1638 {
1639 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1640 k->tgt_offset = tgt_size;
1641 tgt_size += k->host_end - k->host_start;
1642 }
1643 /* First increment, from 0 to 1. gomp_increment_refcount
1644 encapsulates the different increment cases, so use this
1645 instead of directly setting 1 during initialization. */
1646 gomp_increment_refcount (k, refcount_set);
1647
1648 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1649 tgt->list[i].always_copy_from
1650 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1651 tgt->list[i].is_attach = false;
1652 tgt->list[i].offset = 0;
1653 tgt->list[i].length = k->host_end - k->host_start;
1654 tgt->refcount++;
1655 array->left = NULL;
1656 array->right = NULL;
1657 splay_tree_insert (mem_map, array);
1658 switch (kind & typemask)
1659 {
1660 case GOMP_MAP_ALLOC:
1661 case GOMP_MAP_FROM:
1662 case GOMP_MAP_FORCE_ALLOC:
1663 case GOMP_MAP_FORCE_FROM:
1664 case GOMP_MAP_ALWAYS_FROM:
1665 break;
1666 case GOMP_MAP_TO:
1667 case GOMP_MAP_TOFROM:
1668 case GOMP_MAP_FORCE_TO:
1669 case GOMP_MAP_FORCE_TOFROM:
1670 case GOMP_MAP_ALWAYS_TO:
1671 case GOMP_MAP_ALWAYS_TOFROM:
1672 gomp_copy_host2dev (devicep, aq,
1673 (void *) (tgt->tgt_start
1674 + k->tgt_offset),
1675 (void *) k->host_start,
1676 k->host_end - k->host_start,
1677 false, cbufp);
1678 break;
1679 case GOMP_MAP_POINTER:
1680 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1681 gomp_map_pointer
1682 (tgt, aq, (uintptr_t) *(void **) k->host_start,
1683 k->tgt_offset, sizes[i], cbufp,
1684 ((kind & typemask)
1685 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1686 break;
1687 case GOMP_MAP_TO_PSET:
1688 gomp_copy_host2dev (devicep, aq,
1689 (void *) (tgt->tgt_start
1690 + k->tgt_offset),
1691 (void *) k->host_start,
1692 k->host_end - k->host_start,
1693 false, cbufp);
1694 tgt->list[i].has_null_ptr_assoc = false;
1695
1696 for (j = i + 1; j < mapnum; j++)
1697 {
1698 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1699 & typemask);
1700 if (!GOMP_MAP_POINTER_P (ptr_kind)
1701 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1702 break;
1703 else if ((uintptr_t) hostaddrs[j] < k->host_start
1704 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1705 > k->host_end))
1706 break;
1707 else
1708 {
1709 tgt->list[j].key = k;
1710 tgt->list[j].copy_from = false;
1711 tgt->list[j].always_copy_from = false;
1712 tgt->list[j].is_attach = false;
1713 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1714 /* For OpenMP, the use of refcount_sets causes
1715 errors if we set k->refcount = 1 above but also
1716 increment it again here, for decrementing will
1717 not properly match, since we decrement only once
1718 for each key's refcount. Therefore avoid this
1719 increment for OpenMP constructs. */
1720 if (!openmp_p)
1721 gomp_increment_refcount (k, refcount_set);
1722 gomp_map_pointer (tgt, aq,
1723 (uintptr_t) *(void **) hostaddrs[j],
1724 k->tgt_offset
1725 + ((uintptr_t) hostaddrs[j]
1726 - k->host_start),
1727 sizes[j], cbufp, false);
1728 }
1729 }
1730 i = j - 1;
1731 break;
1732 case GOMP_MAP_FORCE_PRESENT:
1733 case GOMP_MAP_ALWAYS_PRESENT_TO:
1734 case GOMP_MAP_ALWAYS_PRESENT_FROM:
1735 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
1736 {
1737 /* We already looked up the memory region above and it
1738 was missing. */
1739 size_t size = k->host_end - k->host_start;
1740 gomp_mutex_unlock (&devicep->lock);
1741 #ifdef HAVE_INTTYPES_H
1742 gomp_fatal ("present clause: not present on the device "
1743 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
1744 "dev: %d)", (void *) k->host_start,
1745 (uint64_t) size, (uint64_t) size,
1746 devicep->target_id);
1747 #else
1748 gomp_fatal ("present clause: not present on the device "
1749 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1750 (void *) k->host_start,
1751 (unsigned long) size, (unsigned long) size,
1752 devicep->target_id);
1753 #endif
1754 }
1755 break;
1756 case GOMP_MAP_FORCE_DEVICEPTR:
1757 assert (k->host_end - k->host_start == sizeof (void *));
1758 gomp_copy_host2dev (devicep, aq,
1759 (void *) (tgt->tgt_start
1760 + k->tgt_offset),
1761 (void *) k->host_start,
1762 sizeof (void *), false, cbufp);
1763 break;
1764 default:
1765 gomp_mutex_unlock (&devicep->lock);
1766 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1767 kind);
1768 }
1769
1770 if (k->aux && k->aux->link_key)
1771 {
1772 /* Set link pointer on target to the device address of the
1773 mapped object. */
1774 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1775 /* We intentionally do not use coalescing here, as it's not
1776 data allocated by the current call to this function. */
1777 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1778 &tgt_addr, sizeof (void *), true, NULL);
1779 }
1780 array++;
1781 }
1782 }
1783 }
1784
1785 if (pragma_kind & GOMP_MAP_VARS_TARGET)
1786 {
1787 for (i = 0; i < mapnum; i++)
1788 {
1789 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1790 gomp_copy_host2dev (devicep, aq,
1791 (void *) (tgt->tgt_start + i * sizeof (void *)),
1792 (void *) &cur_node.tgt_offset, sizeof (void *),
1793 true, cbufp);
1794 }
1795 }
1796
1797 if (cbufp)
1798 {
1799 long c = 0;
1800 for (c = 0; c < cbuf.chunk_cnt; ++c)
1801 gomp_copy_host2dev (devicep, aq,
1802 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1803 (char *) cbuf.buf + (cbuf.chunks[c].start
1804 - cbuf.chunks[0].start),
1805 cbuf.chunks[c].end - cbuf.chunks[c].start,
1806 false, NULL);
1807 if (aq)
1808 /* Free once the transfer has completed. */
1809 devicep->openacc.async.queue_callback_func (aq, free, cbuf.buf);
1810 else
1811 free (cbuf.buf);
1812 cbuf.buf = NULL;
1813 cbufp = NULL;
1814 }
1815
1816 /* If the variable from "omp target enter data" map-list was already mapped,
1817 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1818 gomp_exit_data. */
1819 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1820 {
1821 free (tgt);
1822 tgt = NULL;
1823 }
1824
1825 gomp_mutex_unlock (&devicep->lock);
1826 return tgt;
1827 }
1828
1829 static struct target_mem_desc *
1830 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1831 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1832 bool short_mapkind, htab_t *refcount_set,
1833 enum gomp_map_vars_kind pragma_kind)
1834 {
1835 /* This management of a local refcount_set is for convenience of callers
1836 who do not share a refcount_set over multiple map/unmap uses. */
1837 htab_t local_refcount_set = NULL;
1838 if (refcount_set == NULL)
1839 {
1840 local_refcount_set = htab_create (mapnum);
1841 refcount_set = &local_refcount_set;
1842 }
1843
1844 struct target_mem_desc *tgt;
1845 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1846 sizes, kinds, short_mapkind, refcount_set,
1847 pragma_kind);
1848 if (local_refcount_set)
1849 htab_free (local_refcount_set);
1850
1851 return tgt;
1852 }
1853
1854 attribute_hidden struct target_mem_desc *
1855 goacc_map_vars (struct gomp_device_descr *devicep,
1856 struct goacc_asyncqueue *aq, size_t mapnum,
1857 void **hostaddrs, void **devaddrs, size_t *sizes,
1858 void *kinds, bool short_mapkind,
1859 enum gomp_map_vars_kind pragma_kind)
1860 {
1861 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1862 sizes, kinds, short_mapkind, NULL,
1863 GOMP_MAP_VARS_OPENACC | pragma_kind);
1864 }
1865
1866 static void
1867 gomp_unmap_tgt (struct target_mem_desc *tgt)
1868 {
1869 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1870 if (tgt->tgt_end)
1871 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1872
1873 free (tgt->array);
1874 free (tgt);
1875 }
1876
1877 static bool
1878 gomp_unref_tgt (void *ptr)
1879 {
1880 bool is_tgt_unmapped = false;
1881
1882 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1883
1884 if (tgt->refcount > 1)
1885 tgt->refcount--;
1886 else
1887 {
1888 gomp_unmap_tgt (tgt);
1889 is_tgt_unmapped = true;
1890 }
1891
1892 return is_tgt_unmapped;
1893 }
1894
1895 static void
1896 gomp_unref_tgt_void (void *ptr)
1897 {
1898 (void) gomp_unref_tgt (ptr);
1899 }
1900
1901 static void
1902 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1903 {
1904 splay_tree_remove (sp, k);
1905 if (k->aux)
1906 {
1907 if (k->aux->link_key)
1908 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1909 if (k->aux->attach_count)
1910 free (k->aux->attach_count);
1911 free (k->aux);
1912 k->aux = NULL;
1913 }
1914 }
1915
1916 static inline __attribute__((always_inline)) bool
1917 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1918 struct goacc_asyncqueue *aq)
1919 {
1920 bool is_tgt_unmapped = false;
1921
1922 if (REFCOUNT_STRUCTELEM_P (k->refcount))
1923 {
1924 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1925 /* Infer the splay_tree_key of the first structelem key using the
1926 pointer to the first structleme_refcount. */
1927 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1928 - offsetof (struct splay_tree_key_s,
1929 structelem_refcount));
1930 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1931
1932 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1933 with the splay_tree_keys embedded inside. */
1934 splay_tree_node node =
1935 (splay_tree_node) ((char *) k
1936 - offsetof (struct splay_tree_node_s, key));
1937 while (true)
1938 {
1939 /* Starting from the _FIRST key, and continue for all following
1940 sibling keys. */
1941 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1942 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1943 break;
1944 else
1945 k = &(++node)->key;
1946 }
1947 }
1948 else
1949 gomp_remove_splay_tree_key (&devicep->mem_map, k);
1950
1951 if (aq)
1952 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1953 (void *) k->tgt);
1954 else
1955 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1956 return is_tgt_unmapped;
1957 }
1958
1959 attribute_hidden bool
1960 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1961 {
1962 return gomp_remove_var_internal (devicep, k, NULL);
1963 }
1964
1965 /* Remove a variable asynchronously. This actually removes the variable
1966 mapping immediately, but retains the linked target_mem_desc until the
1967 asynchronous operation has completed (as it may still refer to target
1968 memory). The device lock must be held before entry, and remains locked on
1969 exit. */
1970
1971 attribute_hidden void
1972 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1973 struct goacc_asyncqueue *aq)
1974 {
1975 (void) gomp_remove_var_internal (devicep, k, aq);
1976 }
1977
1978 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1979 variables back from device to host: if it is false, it is assumed that this
1980 has been done already. */
1981
1982 static inline __attribute__((always_inline)) void
1983 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1984 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1985 {
1986 struct gomp_device_descr *devicep = tgt->device_descr;
1987
1988 if (tgt->list_count == 0)
1989 {
1990 free (tgt);
1991 return;
1992 }
1993
1994 gomp_mutex_lock (&devicep->lock);
1995 if (devicep->state == GOMP_DEVICE_FINALIZED)
1996 {
1997 gomp_mutex_unlock (&devicep->lock);
1998 free (tgt->array);
1999 free (tgt);
2000 return;
2001 }
2002
2003 size_t i;
2004
2005 /* We must perform detachments before any copies back to the host. */
2006 for (i = 0; i < tgt->list_count; i++)
2007 {
2008 splay_tree_key k = tgt->list[i].key;
2009
2010 if (k != NULL && tgt->list[i].is_attach)
2011 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
2012 + tgt->list[i].offset,
2013 false, NULL);
2014 }
2015
2016 for (i = 0; i < tgt->list_count; i++)
2017 {
2018 splay_tree_key k = tgt->list[i].key;
2019 if (k == NULL)
2020 continue;
2021
2022 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2023 counts ('n->refcount', 'n->dynamic_refcount'). */
2024 if (tgt->list[i].is_attach)
2025 continue;
2026
2027 bool do_copy, do_remove;
2028 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
2029
2030 if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
2031 || tgt->list[i].always_copy_from)
2032 gomp_copy_dev2host (devicep, aq,
2033 (void *) (k->host_start + tgt->list[i].offset),
2034 (void *) (k->tgt->tgt_start + k->tgt_offset
2035 + tgt->list[i].offset),
2036 tgt->list[i].length);
2037 if (do_remove)
2038 {
2039 struct target_mem_desc *k_tgt = k->tgt;
2040 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
2041 /* It would be bad if TGT got unmapped while we're still iterating
2042 over its LIST_COUNT, and also expect to use it in the following
2043 code. */
2044 assert (!is_tgt_unmapped
2045 || k_tgt != tgt);
2046 }
2047 }
2048
2049 if (aq)
2050 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
2051 (void *) tgt);
2052 else
2053 gomp_unref_tgt ((void *) tgt);
2054
2055 gomp_mutex_unlock (&devicep->lock);
2056 }
2057
2058 static void
2059 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2060 htab_t *refcount_set)
2061 {
2062 /* This management of a local refcount_set is for convenience of callers
2063 who do not share a refcount_set over multiple map/unmap uses. */
2064 htab_t local_refcount_set = NULL;
2065 if (refcount_set == NULL)
2066 {
2067 local_refcount_set = htab_create (tgt->list_count);
2068 refcount_set = &local_refcount_set;
2069 }
2070
2071 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
2072
2073 if (local_refcount_set)
2074 htab_free (local_refcount_set);
2075 }
2076
2077 attribute_hidden void
2078 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
2079 struct goacc_asyncqueue *aq)
2080 {
2081 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
2082 }
2083
2084 static void
2085 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
2086 size_t *sizes, void *kinds, bool short_mapkind)
2087 {
2088 size_t i;
2089 struct splay_tree_key_s cur_node;
2090 const int typemask = short_mapkind ? 0xff : 0x7;
2091
2092 if (!devicep)
2093 return;
2094
2095 if (mapnum == 0)
2096 return;
2097
2098 gomp_mutex_lock (&devicep->lock);
2099 if (devicep->state == GOMP_DEVICE_FINALIZED)
2100 {
2101 gomp_mutex_unlock (&devicep->lock);
2102 return;
2103 }
2104
2105 for (i = 0; i < mapnum; i++)
2106 if (sizes[i])
2107 {
2108 cur_node.host_start = (uintptr_t) hostaddrs[i];
2109 cur_node.host_end = cur_node.host_start + sizes[i];
2110 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2111 if (n)
2112 {
2113 int kind = get_kind (short_mapkind, kinds, i);
2114 if (n->host_start > cur_node.host_start
2115 || n->host_end < cur_node.host_end)
2116 {
2117 gomp_mutex_unlock (&devicep->lock);
2118 gomp_fatal ("Trying to update [%p..%p) object when "
2119 "only [%p..%p) is mapped",
2120 (void *) cur_node.host_start,
2121 (void *) cur_node.host_end,
2122 (void *) n->host_start,
2123 (void *) n->host_end);
2124 }
2125
2126 if (n->aux && n->aux->attach_count)
2127 {
2128 uintptr_t addr = cur_node.host_start;
2129 while (addr < cur_node.host_end)
2130 {
2131 /* We have to be careful not to overwrite still attached
2132 pointers during host<->device updates. */
2133 size_t i = (addr - cur_node.host_start) / sizeof (void *);
2134 if (n->aux->attach_count[i] == 0)
2135 {
2136 void *devaddr = (void *) (n->tgt->tgt_start
2137 + n->tgt_offset
2138 + addr - n->host_start);
2139 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2140 gomp_copy_host2dev (devicep, NULL,
2141 devaddr, (void *) addr,
2142 sizeof (void *), false, NULL);
2143 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2144 gomp_copy_dev2host (devicep, NULL,
2145 (void *) addr, devaddr,
2146 sizeof (void *));
2147 }
2148 addr += sizeof (void *);
2149 }
2150 }
2151 else
2152 {
2153 void *hostaddr = (void *) cur_node.host_start;
2154 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2155 + cur_node.host_start
2156 - n->host_start);
2157 size_t size = cur_node.host_end - cur_node.host_start;
2158
2159 if (GOMP_MAP_COPY_TO_P (kind & typemask))
2160 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2161 false, NULL);
2162 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2163 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2164 }
2165 }
2166 else
2167 {
2168 int kind = get_kind (short_mapkind, kinds, i);
2169
2170 if (GOMP_MAP_PRESENT_P (kind))
2171 {
2172 /* We already looked up the memory region above and it
2173 was missing. */
2174 gomp_mutex_unlock (&devicep->lock);
2175 #ifdef HAVE_INTTYPES_H
2176 gomp_fatal ("present clause: not present on the device "
2177 "(addr: %p, size: %"PRIu64" (0x%"PRIx64"), "
2178 "dev: %d)", (void *) hostaddrs[i],
2179 (uint64_t) sizes[i], (uint64_t) sizes[i],
2180 devicep->target_id);
2181 #else
2182 gomp_fatal ("present clause: not present on the device "
2183 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2184 (void *) hostaddrs[i], (unsigned long) sizes[i],
2185 (unsigned long) sizes[i], devicep->target_id);
2186 #endif
2187 }
2188 }
2189 }
2190 gomp_mutex_unlock (&devicep->lock);
2191 }
2192
2193 static struct gomp_offload_icv_list *
2194 gomp_get_offload_icv_item (int dev_num)
2195 {
2196 struct gomp_offload_icv_list *l = gomp_offload_icv_list;
2197 while (l != NULL && l->device_num != dev_num)
2198 l = l->next;
2199
2200 return l;
2201 }
2202
2203 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2204 depending on the device num and the variable hierarchy
2205 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2206 device and thus no item with that device number is contained in
2207 gomp_offload_icv_list, then a new item is created and added to the list. */
2208
2209 static struct gomp_offload_icvs *
2210 get_gomp_offload_icvs (int dev_num)
2211 {
2212 struct gomp_icv_list *dev
2213 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV);
2214 struct gomp_icv_list *all
2215 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL);
2216 struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num);
2217 struct gomp_offload_icv_list *offload_icvs
2218 = gomp_get_offload_icv_item (dev_num);
2219
2220 if (offload_icvs != NULL)
2221 return &offload_icvs->icvs;
2222
2223 struct gomp_offload_icv_list *new
2224 = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list));
2225
2226 new->device_num = dev_num;
2227 new->icvs.device_num = dev_num;
2228 new->next = gomp_offload_icv_list;
2229
2230 if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS))
2231 new->icvs.nteams = dev_x->icvs.nteams_var;
2232 else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS))
2233 new->icvs.nteams = dev->icvs.nteams_var;
2234 else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS))
2235 new->icvs.nteams = all->icvs.nteams_var;
2236 else
2237 new->icvs.nteams = gomp_default_icv_values.nteams_var;
2238
2239 if (dev_x != NULL
2240 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2241 new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var;
2242 else if (dev != NULL
2243 && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2244 new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var;
2245 else if (all != NULL
2246 && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT))
2247 new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var;
2248 else
2249 new->icvs.teams_thread_limit
2250 = gomp_default_icv_values.teams_thread_limit_var;
2251
2252 if (dev_x != NULL
2253 && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE))
2254 new->icvs.default_device = dev_x->icvs.default_device_var;
2255 else if (dev != NULL
2256 && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE))
2257 new->icvs.default_device = dev->icvs.default_device_var;
2258 else if (all != NULL
2259 && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE))
2260 new->icvs.default_device = all->icvs.default_device_var;
2261 else
2262 new->icvs.default_device = gomp_default_icv_values.default_device_var;
2263
2264 gomp_offload_icv_list = new;
2265 return &new->icvs;
2266 }
2267
2268 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2269 And insert to splay tree the mapping between addresses from HOST_TABLE and
2270 from loaded target image. We rely in the host and device compiler
2271 emitting variable and functions in the same order. */
2272
2273 static void
2274 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2275 const void *host_table, const void *target_data,
2276 bool is_register_lock)
2277 {
2278 void **host_func_table = ((void ***) host_table)[0];
2279 void **host_funcs_end = ((void ***) host_table)[1];
2280 void **host_var_table = ((void ***) host_table)[2];
2281 void **host_vars_end = ((void ***) host_table)[3];
2282 void **host_ind_func_table = NULL;
2283 void **host_ind_funcs_end = NULL;
2284
2285 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
2286 {
2287 host_ind_func_table = ((void ***) host_table)[4];
2288 host_ind_funcs_end = ((void ***) host_table)[5];
2289 }
2290
2291 /* The func and ind_func tables contain only addresses, the var table
2292 contains addresses and corresponding sizes. */
2293 int num_funcs = host_funcs_end - host_func_table;
2294 int num_vars = (host_vars_end - host_var_table) / 2;
2295 int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
2296
2297 /* Load image to device and get target addresses for the image. */
2298 struct addr_pair *target_table = NULL;
2299 uint64_t *rev_target_fn_table = NULL;
2300 int i, num_target_entries;
2301
2302 /* With reverse offload, insert also target-host addresses. */
2303 bool rev_lookup = omp_requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD;
2304
2305 num_target_entries
2306 = devicep->load_image_func (devicep->target_id, version,
2307 target_data, &target_table,
2308 rev_lookup ? &rev_target_fn_table : NULL,
2309 num_ind_funcs
2310 ? (uint64_t *) host_ind_func_table : NULL);
2311
2312 if (num_target_entries != num_funcs + num_vars
2313 /* "+1" due to the additional ICV struct. */
2314 && num_target_entries != num_funcs + num_vars + 1)
2315 {
2316 gomp_mutex_unlock (&devicep->lock);
2317 if (is_register_lock)
2318 gomp_mutex_unlock (&register_lock);
2319 gomp_fatal ("Cannot map target functions or variables"
2320 " (expected %u, have %u)", num_funcs + num_vars,
2321 num_target_entries);
2322 }
2323
2324 /* Insert host-target address mapping into splay tree. */
2325 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2326 /* "+1" due to the additional ICV struct. */
2327 tgt->array = gomp_malloc ((num_funcs + num_vars + 1)
2328 * sizeof (*tgt->array));
2329 if (rev_target_fn_table)
2330 tgt->rev_array = gomp_malloc (num_funcs * sizeof (*tgt->rev_array));
2331 else
2332 tgt->rev_array = NULL;
2333 tgt->refcount = REFCOUNT_INFINITY;
2334 tgt->tgt_start = 0;
2335 tgt->tgt_end = 0;
2336 tgt->to_free = NULL;
2337 tgt->prev = NULL;
2338 tgt->list_count = 0;
2339 tgt->device_descr = devicep;
2340 splay_tree_node array = tgt->array;
2341 reverse_splay_tree_node rev_array = tgt->rev_array;
2342
2343 for (i = 0; i < num_funcs; i++)
2344 {
2345 splay_tree_key k = &array->key;
2346 k->host_start = (uintptr_t) host_func_table[i];
2347 k->host_end = k->host_start + 1;
2348 k->tgt = tgt;
2349 k->tgt_offset = target_table[i].start;
2350 k->refcount = REFCOUNT_INFINITY;
2351 k->dynamic_refcount = 0;
2352 k->aux = NULL;
2353 array->left = NULL;
2354 array->right = NULL;
2355 splay_tree_insert (&devicep->mem_map, array);
2356 if (rev_target_fn_table)
2357 {
2358 reverse_splay_tree_key k2 = &rev_array->key;
2359 k2->dev = rev_target_fn_table[i];
2360 k2->k = k;
2361 rev_array->left = NULL;
2362 rev_array->right = NULL;
2363 if (k2->dev != 0)
2364 reverse_splay_tree_insert (&devicep->mem_map_rev, rev_array);
2365 rev_array++;
2366 }
2367 array++;
2368 }
2369
2370 /* Most significant bit of the size in host and target tables marks
2371 "omp declare target link" variables. */
2372 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2373 const uintptr_t size_mask = ~link_bit;
2374
2375 for (i = 0; i < num_vars; i++)
2376 {
2377 struct addr_pair *target_var = &target_table[num_funcs + i];
2378 uintptr_t target_size = target_var->end - target_var->start;
2379 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2380
2381 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2382 {
2383 gomp_mutex_unlock (&devicep->lock);
2384 if (is_register_lock)
2385 gomp_mutex_unlock (&register_lock);
2386 gomp_fatal ("Cannot map target variables (size mismatch)");
2387 }
2388
2389 splay_tree_key k = &array->key;
2390 k->host_start = (uintptr_t) host_var_table[i * 2];
2391 k->host_end
2392 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2393 k->tgt = tgt;
2394 k->tgt_offset = target_var->start;
2395 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2396 k->dynamic_refcount = 0;
2397 k->aux = NULL;
2398 array->left = NULL;
2399 array->right = NULL;
2400 splay_tree_insert (&devicep->mem_map, array);
2401 array++;
2402 }
2403
2404 /* Last entry is for a ICVs variable.
2405 Tolerate case where plugin does not return those entries. */
2406 if (num_funcs + num_vars < num_target_entries)
2407 {
2408 struct addr_pair *var = &target_table[num_funcs + num_vars];
2409
2410 /* Start address will be non-zero for the ICVs variable if
2411 the variable was found in this image. */
2412 if (var->start != 0)
2413 {
2414 /* The index of the devicep within devices[] is regarded as its
2415 'device number', which is different from the per-device type
2416 devicep->target_id. */
2417 int dev_num = (int) (devicep - &devices[0]);
2418 struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num);
2419 size_t var_size = var->end - var->start;
2420 if (var_size != sizeof (struct gomp_offload_icvs))
2421 {
2422 gomp_mutex_unlock (&devicep->lock);
2423 if (is_register_lock)
2424 gomp_mutex_unlock (&register_lock);
2425 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2426 "format");
2427 }
2428 /* Copy the ICVs variable to place on device memory, hereby
2429 actually designating its device number into effect. */
2430 gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs,
2431 var_size, false, NULL);
2432 splay_tree_key k = &array->key;
2433 k->host_start = (uintptr_t) icvs;
2434 k->host_end =
2435 k->host_start + (size_mask & sizeof (struct gomp_offload_icvs));
2436 k->tgt = tgt;
2437 k->tgt_offset = var->start;
2438 k->refcount = REFCOUNT_INFINITY;
2439 k->dynamic_refcount = 0;
2440 k->aux = NULL;
2441 array->left = NULL;
2442 array->right = NULL;
2443 splay_tree_insert (&devicep->mem_map, array);
2444 array++;
2445 }
2446 }
2447
2448 free (target_table);
2449 }
2450
2451 /* Unload the mappings described by target_data from device DEVICE_P.
2452 The device must be locked. */
2453
2454 static void
2455 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2456 unsigned version,
2457 const void *host_table, const void *target_data)
2458 {
2459 void **host_func_table = ((void ***) host_table)[0];
2460 void **host_funcs_end = ((void ***) host_table)[1];
2461 void **host_var_table = ((void ***) host_table)[2];
2462 void **host_vars_end = ((void ***) host_table)[3];
2463
2464 /* The func table contains only addresses, the var table contains addresses
2465 and corresponding sizes. */
2466 int num_funcs = host_funcs_end - host_func_table;
2467 int num_vars = (host_vars_end - host_var_table) / 2;
2468
2469 struct splay_tree_key_s k;
2470 splay_tree_key node = NULL;
2471
2472 /* Find mapping at start of node array */
2473 if (num_funcs || num_vars)
2474 {
2475 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2476 : (uintptr_t) host_var_table[0]);
2477 k.host_end = k.host_start + 1;
2478 node = splay_tree_lookup (&devicep->mem_map, &k);
2479 }
2480
2481 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2482 {
2483 gomp_mutex_unlock (&devicep->lock);
2484 gomp_fatal ("image unload fail");
2485 }
2486 if (devicep->mem_map_rev.root)
2487 {
2488 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2489 real allocation. */
2490 assert (node && node->tgt && node->tgt->rev_array);
2491 assert (devicep->mem_map_rev.root->key.k->tgt == node->tgt);
2492 free (node->tgt->rev_array);
2493 devicep->mem_map_rev.root = NULL;
2494 }
2495
2496 /* Remove mappings from splay tree. */
2497 int i;
2498 for (i = 0; i < num_funcs; i++)
2499 {
2500 k.host_start = (uintptr_t) host_func_table[i];
2501 k.host_end = k.host_start + 1;
2502 splay_tree_remove (&devicep->mem_map, &k);
2503 }
2504
2505 /* Most significant bit of the size in host and target tables marks
2506 "omp declare target link" variables. */
2507 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2508 const uintptr_t size_mask = ~link_bit;
2509 bool is_tgt_unmapped = false;
2510
2511 for (i = 0; i < num_vars; i++)
2512 {
2513 k.host_start = (uintptr_t) host_var_table[i * 2];
2514 k.host_end
2515 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2516
2517 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2518 splay_tree_remove (&devicep->mem_map, &k);
2519 else
2520 {
2521 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2522 is_tgt_unmapped = gomp_remove_var (devicep, n);
2523 }
2524 }
2525
2526 if (node && !is_tgt_unmapped)
2527 {
2528 free (node->tgt);
2529 free (node);
2530 }
2531 }
2532
2533 static void
2534 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
2535 {
2536 char *end = buf + size, *p = buf;
2537 if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
2538 p += snprintf (p, end - p, "unified_address");
2539 if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
2540 p += snprintf (p, end - p, "%sunified_shared_memory",
2541 (p == buf ? "" : ", "));
2542 if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
2543 p += snprintf (p, end - p, "%sreverse_offload",
2544 (p == buf ? "" : ", "));
2545 }
2546
2547 /* This function should be called from every offload image while loading.
2548 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2549 the target, and DATA. */
2550
2551 void
2552 GOMP_offload_register_ver (unsigned version, const void *host_table,
2553 int target_type, const void *data)
2554 {
2555 int i;
2556
2557 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2558 gomp_fatal ("Library too old for offload (version %u < %u)",
2559 GOMP_VERSION, GOMP_VERSION_LIB (version));
2560
2561 int omp_req;
2562 const void *target_data;
2563 if (GOMP_VERSION_LIB (version) > 1)
2564 {
2565 omp_req = (int) (size_t) ((void **) data)[0];
2566 target_data = &((void **) data)[1];
2567 }
2568 else
2569 {
2570 omp_req = 0;
2571 target_data = data;
2572 }
2573
2574 gomp_mutex_lock (&register_lock);
2575
2576 if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
2577 {
2578 char buf1[sizeof ("unified_address, unified_shared_memory, "
2579 "reverse_offload")];
2580 char buf2[sizeof ("unified_address, unified_shared_memory, "
2581 "reverse_offload")];
2582 gomp_requires_to_name (buf2, sizeof (buf2),
2583 omp_req != GOMP_REQUIRES_TARGET_USED
2584 ? omp_req : omp_requires_mask);
2585 if (omp_req != GOMP_REQUIRES_TARGET_USED
2586 && omp_requires_mask != GOMP_REQUIRES_TARGET_USED)
2587 {
2588 gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask);
2589 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2590 "in multiple compilation units: '%s' vs. '%s'",
2591 buf1, buf2);
2592 }
2593 else
2594 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2595 "some compilation units", buf2);
2596 }
2597 omp_requires_mask = omp_req;
2598
2599 /* Load image to all initialized devices. */
2600 for (i = 0; i < num_devices; i++)
2601 {
2602 struct gomp_device_descr *devicep = &devices[i];
2603 gomp_mutex_lock (&devicep->lock);
2604 if (devicep->type == target_type
2605 && devicep->state == GOMP_DEVICE_INITIALIZED)
2606 gomp_load_image_to_device (devicep, version,
2607 host_table, target_data, true);
2608 gomp_mutex_unlock (&devicep->lock);
2609 }
2610
2611 /* Insert image to array of pending images. */
2612 offload_images
2613 = gomp_realloc_unlock (offload_images,
2614 (num_offload_images + 1)
2615 * sizeof (struct offload_image_descr));
2616 offload_images[num_offload_images].version = version;
2617 offload_images[num_offload_images].type = target_type;
2618 offload_images[num_offload_images].host_table = host_table;
2619 offload_images[num_offload_images].target_data = target_data;
2620
2621 num_offload_images++;
2622 gomp_mutex_unlock (&register_lock);
2623 }
2624
2625 /* Legacy entry point. */
2626
2627 void
2628 GOMP_offload_register (const void *host_table, int target_type,
2629 const void *target_data)
2630 {
2631 GOMP_offload_register_ver (0, host_table, target_type, target_data);
2632 }
2633
2634 /* This function should be called from every offload image while unloading.
2635 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2636 the target, and DATA. */
2637
2638 void
2639 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2640 int target_type, const void *data)
2641 {
2642 int i;
2643
2644 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2645 gomp_fatal ("Library too old for offload (version %u < %u)",
2646 GOMP_VERSION, GOMP_VERSION_LIB (version));
2647
2648 const void *target_data;
2649 if (GOMP_VERSION_LIB (version) > 1)
2650 target_data = &((void **) data)[1];
2651 else
2652 target_data = data;
2653
2654 gomp_mutex_lock (&register_lock);
2655
2656 /* Unload image from all initialized devices. */
2657 for (i = 0; i < num_devices; i++)
2658 {
2659 struct gomp_device_descr *devicep = &devices[i];
2660 gomp_mutex_lock (&devicep->lock);
2661 if (devicep->type == target_type
2662 && devicep->state == GOMP_DEVICE_INITIALIZED)
2663 gomp_unload_image_from_device (devicep, version,
2664 host_table, target_data);
2665 gomp_mutex_unlock (&devicep->lock);
2666 }
2667
2668 /* Remove image from array of pending images. */
2669 for (i = 0; i < num_offload_images; i++)
2670 if (offload_images[i].target_data == target_data)
2671 {
2672 offload_images[i] = offload_images[--num_offload_images];
2673 break;
2674 }
2675
2676 gomp_mutex_unlock (&register_lock);
2677 }
2678
2679 /* Legacy entry point. */
2680
2681 void
2682 GOMP_offload_unregister (const void *host_table, int target_type,
2683 const void *target_data)
2684 {
2685 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2686 }
2687
2688 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2689 must be locked on entry, and remains locked on return. */
2690
2691 attribute_hidden void
2692 gomp_init_device (struct gomp_device_descr *devicep)
2693 {
2694 int i;
2695 if (!devicep->init_device_func (devicep->target_id))
2696 {
2697 gomp_mutex_unlock (&devicep->lock);
2698 gomp_fatal ("device initialization failed");
2699 }
2700
2701 /* Load to device all images registered by the moment. */
2702 for (i = 0; i < num_offload_images; i++)
2703 {
2704 struct offload_image_descr *image = &offload_images[i];
2705 if (image->type == devicep->type)
2706 gomp_load_image_to_device (devicep, image->version,
2707 image->host_table, image->target_data,
2708 false);
2709 }
2710
2711 /* Initialize OpenACC asynchronous queues. */
2712 goacc_init_asyncqueues (devicep);
2713
2714 devicep->state = GOMP_DEVICE_INITIALIZED;
2715 }
2716
2717 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2718 must be locked on entry, and remains locked on return. */
2719
2720 attribute_hidden bool
2721 gomp_fini_device (struct gomp_device_descr *devicep)
2722 {
2723 bool ret = goacc_fini_asyncqueues (devicep);
2724 ret &= devicep->fini_device_func (devicep->target_id);
2725 devicep->state = GOMP_DEVICE_FINALIZED;
2726 return ret;
2727 }
2728
2729 attribute_hidden void
2730 gomp_unload_device (struct gomp_device_descr *devicep)
2731 {
2732 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2733 {
2734 unsigned i;
2735
2736 /* Unload from device all images registered at the moment. */
2737 for (i = 0; i < num_offload_images; i++)
2738 {
2739 struct offload_image_descr *image = &offload_images[i];
2740 if (image->type == devicep->type)
2741 gomp_unload_image_from_device (devicep, image->version,
2742 image->host_table,
2743 image->target_data);
2744 }
2745 }
2746 }
2747
2748 /* Host fallback for GOMP_target{,_ext} routines. */
2749
2750 static void
2751 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2752 struct gomp_device_descr *devicep, void **args)
2753 {
2754 struct gomp_thread old_thr, *thr = gomp_thread ();
2755
2756 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2757 && devicep != NULL)
2758 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2759 "be used for offloading");
2760
2761 old_thr = *thr;
2762 memset (thr, '\0', sizeof (*thr));
2763 if (gomp_places_list)
2764 {
2765 thr->place = old_thr.place;
2766 thr->ts.place_partition_len = gomp_places_list_len;
2767 }
2768 if (args)
2769 while (*args)
2770 {
2771 intptr_t id = (intptr_t) *args++, val;
2772 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2773 val = (intptr_t) *args++;
2774 else
2775 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2776 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2777 continue;
2778 id &= GOMP_TARGET_ARG_ID_MASK;
2779 if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2780 continue;
2781 val = val > INT_MAX ? INT_MAX : val;
2782 if (val)
2783 gomp_icv (true)->thread_limit_var = val;
2784 break;
2785 }
2786
2787 fn (hostaddrs);
2788 gomp_free_thread (thr);
2789 *thr = old_thr;
2790 }
2791
2792 /* Calculate alignment and size requirements of a private copy of data shared
2793 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2794
2795 static inline void
2796 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2797 unsigned short *kinds, size_t *tgt_align,
2798 size_t *tgt_size)
2799 {
2800 size_t i;
2801 for (i = 0; i < mapnum; i++)
2802 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2803 {
2804 size_t align = (size_t) 1 << (kinds[i] >> 8);
2805 if (*tgt_align < align)
2806 *tgt_align = align;
2807 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2808 *tgt_size += sizes[i];
2809 }
2810 }
2811
2812 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2813
2814 static inline void
2815 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2816 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2817 size_t tgt_size)
2818 {
2819 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2820 if (al)
2821 tgt += tgt_align - al;
2822 tgt_size = 0;
2823 size_t i;
2824 for (i = 0; i < mapnum; i++)
2825 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2826 {
2827 size_t align = (size_t) 1 << (kinds[i] >> 8);
2828 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2829 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2830 hostaddrs[i] = tgt + tgt_size;
2831 tgt_size = tgt_size + sizes[i];
2832 if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH)
2833 {
2834 *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i];
2835 ++i;
2836 }
2837 }
2838 }
2839
2840 /* Helper function of GOMP_target{,_ext} routines. */
2841
2842 static void *
2843 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2844 void (*host_fn) (void *))
2845 {
2846 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2847 return (void *) host_fn;
2848 else
2849 {
2850 gomp_mutex_lock (&devicep->lock);
2851 if (devicep->state == GOMP_DEVICE_FINALIZED)
2852 {
2853 gomp_mutex_unlock (&devicep->lock);
2854 return NULL;
2855 }
2856
2857 struct splay_tree_key_s k;
2858 k.host_start = (uintptr_t) host_fn;
2859 k.host_end = k.host_start + 1;
2860 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2861 gomp_mutex_unlock (&devicep->lock);
2862 if (tgt_fn == NULL)
2863 return NULL;
2864
2865 return (void *) tgt_fn->tgt_offset;
2866 }
2867 }
2868
2869 /* Called when encountering a target directive. If DEVICE
2870 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2871 GOMP_DEVICE_HOST_FALLBACK (or any value
2872 larger than last available hw device), use host fallback.
2873 FN is address of host code, UNUSED is part of the current ABI, but
2874 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2875 with MAPNUM entries, with addresses of the host objects,
2876 sizes of the host objects (resp. for pointer kind pointer bias
2877 and assumed sizeof (void *) size) and kinds. */
2878
2879 void
2880 GOMP_target (int device, void (*fn) (void *), const void *unused,
2881 size_t mapnum, void **hostaddrs, size_t *sizes,
2882 unsigned char *kinds)
2883 {
2884 struct gomp_device_descr *devicep = resolve_device (device, true);
2885
2886 void *fn_addr;
2887 if (devicep == NULL
2888 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2889 /* All shared memory devices should use the GOMP_target_ext function. */
2890 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2891 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2892 return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2893
2894 htab_t refcount_set = htab_create (mapnum);
2895 struct target_mem_desc *tgt_vars
2896 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2897 &refcount_set, GOMP_MAP_VARS_TARGET);
2898 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2899 NULL);
2900 htab_clear (refcount_set);
2901 gomp_unmap_vars (tgt_vars, true, &refcount_set);
2902 htab_free (refcount_set);
2903 }
2904
2905 static inline unsigned int
2906 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2907 {
2908 /* If we cannot run asynchronously, simply ignore nowait. */
2909 if (devicep != NULL && devicep->async_run_func == NULL)
2910 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2911
2912 return flags;
2913 }
2914
2915 static void
2916 gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device)
2917 {
2918 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
2919 if (item == NULL)
2920 return;
2921
2922 void *host_ptr = &item->icvs;
2923 void *dev_ptr = omp_get_mapped_ptr (host_ptr, device);
2924 if (dev_ptr != NULL)
2925 gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr,
2926 sizeof (struct gomp_offload_icvs));
2927 }
2928
2929 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2930 and several arguments have been added:
2931 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2932 DEPEND is array of dependencies, see GOMP_task for details.
2933
2934 ARGS is a pointer to an array consisting of a variable number of both
2935 device-independent and device-specific arguments, which can take one two
2936 elements where the first specifies for which device it is intended, the type
2937 and optionally also the value. If the value is not present in the first
2938 one, the whole second element the actual value. The last element of the
2939 array is a single NULL. Among the device independent can be for example
2940 NUM_TEAMS and THREAD_LIMIT.
2941
2942 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2943 that value, or 1 if teams construct is not present, or 0, if
2944 teams construct does not have num_teams clause and so the choice is
2945 implementation defined, and -1 if it can't be determined on the host
2946 what value will GOMP_teams have on the device.
2947 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2948 body with that value, or 0, if teams construct does not have thread_limit
2949 clause or the teams construct is not present, or -1 if it can't be
2950 determined on the host what value will GOMP_teams have on the device. */
2951
2952 void
2953 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2954 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2955 unsigned int flags, void **depend, void **args)
2956 {
2957 struct gomp_device_descr *devicep = resolve_device (device, true);
2958 size_t tgt_align = 0, tgt_size = 0;
2959 bool fpc_done = false;
2960
2961 /* Obtain the original TEAMS and THREADS values from ARGS. */
2962 intptr_t orig_teams = 1, orig_threads = 0;
2963 size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1;
2964 void **tmpargs = args;
2965 while (*tmpargs)
2966 {
2967 intptr_t id = (intptr_t) *tmpargs++, val;
2968 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2969 {
2970 val = (intptr_t) *tmpargs++;
2971 len = 2;
2972 }
2973 else
2974 {
2975 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2976 len = 1;
2977 }
2978 num_args += len;
2979 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2980 continue;
2981 val = val > INT_MAX ? INT_MAX : val;
2982 if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS)
2983 {
2984 orig_teams = val;
2985 teams_len = len;
2986 }
2987 else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT)
2988 {
2989 orig_threads = val;
2990 threads_len = len;
2991 }
2992 }
2993
2994 intptr_t new_teams = orig_teams, new_threads = orig_threads;
2995 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2996 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2997 value could not be determined. No change.
2998 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2999 Set device-specific value.
3000 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3001 No change. */
3002 if (orig_teams == -2)
3003 new_teams = 1;
3004 else if (orig_teams == 0)
3005 {
3006 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3007 if (item != NULL)
3008 new_teams = item->icvs.nteams;
3009 }
3010 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3011 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3012 e.g. a THREAD_LIMIT clause. */
3013 if (orig_teams > -2 && orig_threads == 0)
3014 {
3015 struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device);
3016 if (item != NULL)
3017 new_threads = item->icvs.teams_thread_limit;
3018 }
3019
3020 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3021 updated. */
3022 void **new_args = args;
3023 if (orig_teams != new_teams || orig_threads != new_threads)
3024 {
3025 size_t tms_len = (orig_teams == new_teams
3026 ? teams_len
3027 : (new_teams > -(1 << 15) && new_teams < (1 << 15)
3028 ? 1 : 2));
3029 size_t ths_len = (orig_threads == new_threads
3030 ? threads_len
3031 : (new_threads > -(1 << 15) && new_threads < (1 << 15)
3032 ? 1 : 2));
3033 /* One additional item after the last arg must be NULL. */
3034 size_t new_args_cnt = num_args - teams_len - threads_len + tms_len
3035 + ths_len + 1;
3036 new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*));
3037
3038 tmpargs = args;
3039 void **tmp_new_args = new_args;
3040 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3041 too if they have not been changed and skipped otherwise. */
3042 while (*tmpargs)
3043 {
3044 intptr_t id = (intptr_t) *tmpargs;
3045 if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS
3046 && orig_teams != new_teams)
3047 || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT
3048 && orig_threads != new_threads))
3049 {
3050 tmpargs++;
3051 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3052 tmpargs++;
3053 }
3054 else
3055 {
3056 *tmp_new_args++ = *tmpargs++;
3057 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
3058 *tmp_new_args++ = *tmpargs++;
3059 }
3060 }
3061
3062 /* Add the new TEAMS arg to the new args list if it has been changed. */
3063 if (orig_teams != new_teams)
3064 {
3065 intptr_t new_val = new_teams;
3066 if (tms_len == 1)
3067 {
3068 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3069 | GOMP_TARGET_ARG_NUM_TEAMS;
3070 *tmp_new_args++ = (void *) new_val;
3071 }
3072 else
3073 {
3074 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3075 | GOMP_TARGET_ARG_NUM_TEAMS);
3076 *tmp_new_args++ = (void *) new_val;
3077 }
3078 }
3079
3080 /* Add the new THREADS arg to the new args list if it has been changed. */
3081 if (orig_threads != new_threads)
3082 {
3083 intptr_t new_val = new_threads;
3084 if (ths_len == 1)
3085 {
3086 new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT)
3087 | GOMP_TARGET_ARG_THREAD_LIMIT;
3088 *tmp_new_args++ = (void *) new_val;
3089 }
3090 else
3091 {
3092 *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3093 | GOMP_TARGET_ARG_THREAD_LIMIT);
3094 *tmp_new_args++ = (void *) new_val;
3095 }
3096 }
3097
3098 *tmp_new_args = NULL;
3099 }
3100
3101 flags = clear_unsupported_flags (devicep, flags);
3102
3103 if (flags & GOMP_TARGET_FLAG_NOWAIT)
3104 {
3105 struct gomp_thread *thr = gomp_thread ();
3106 /* Create a team if we don't have any around, as nowait
3107 target tasks make sense to run asynchronously even when
3108 outside of any parallel. */
3109 if (__builtin_expect (thr->ts.team == NULL, 0))
3110 {
3111 struct gomp_team *team = gomp_new_team (1);
3112 struct gomp_task *task = thr->task;
3113 struct gomp_task **implicit_task = &task;
3114 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
3115 team->prev_ts = thr->ts;
3116 thr->ts.team = team;
3117 thr->ts.team_id = 0;
3118 thr->ts.work_share = &team->work_shares[0];
3119 thr->ts.last_work_share = NULL;
3120 #ifdef HAVE_SYNC_BUILTINS
3121 thr->ts.single_count = 0;
3122 #endif
3123 thr->ts.static_trip = 0;
3124 thr->task = &team->implicit_task[0];
3125 gomp_init_task (thr->task, NULL, icv);
3126 while (*implicit_task
3127 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
3128 implicit_task = &(*implicit_task)->parent;
3129 if (*implicit_task)
3130 {
3131 thr->task = *implicit_task;
3132 gomp_end_task ();
3133 free (*implicit_task);
3134 thr->task = &team->implicit_task[0];
3135 }
3136 else
3137 pthread_setspecific (gomp_thread_destructor, thr);
3138 if (implicit_task != &task)
3139 {
3140 *implicit_task = thr->task;
3141 thr->task = task;
3142 }
3143 }
3144 if (thr->ts.team
3145 && !thr->task->final_task)
3146 {
3147 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
3148 sizes, kinds, flags, depend, new_args,
3149 GOMP_TARGET_TASK_BEFORE_MAP);
3150 return;
3151 }
3152 }
3153
3154 /* If there are depend clauses, but nowait is not present
3155 (or we are in a final task), block the parent task until the
3156 dependencies are resolved and then just continue with the rest
3157 of the function as if it is a merged task. */
3158 if (depend != NULL)
3159 {
3160 struct gomp_thread *thr = gomp_thread ();
3161 if (thr->task && thr->task->depend_hash)
3162 {
3163 /* If we might need to wait, copy firstprivate now. */
3164 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3165 &tgt_align, &tgt_size);
3166 if (tgt_align)
3167 {
3168 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3169 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3170 tgt_align, tgt_size);
3171 }
3172 fpc_done = true;
3173 gomp_task_maybe_wait_for_dependencies (depend);
3174 }
3175 }
3176
3177 void *fn_addr;
3178 if (devicep == NULL
3179 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3180 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
3181 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3182 {
3183 if (!fpc_done)
3184 {
3185 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3186 &tgt_align, &tgt_size);
3187 if (tgt_align)
3188 {
3189 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3190 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3191 tgt_align, tgt_size);
3192 }
3193 }
3194 gomp_target_fallback (fn, hostaddrs, devicep, new_args);
3195 return;
3196 }
3197
3198 struct target_mem_desc *tgt_vars;
3199 htab_t refcount_set = NULL;
3200
3201 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3202 {
3203 if (!fpc_done)
3204 {
3205 calculate_firstprivate_requirements (mapnum, sizes, kinds,
3206 &tgt_align, &tgt_size);
3207 if (tgt_align)
3208 {
3209 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3210 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
3211 tgt_align, tgt_size);
3212 }
3213 }
3214 tgt_vars = NULL;
3215 }
3216 else
3217 {
3218 refcount_set = htab_create (mapnum);
3219 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
3220 true, &refcount_set, GOMP_MAP_VARS_TARGET);
3221 }
3222 devicep->run_func (devicep->target_id, fn_addr,
3223 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
3224 new_args);
3225 if (tgt_vars)
3226 {
3227 htab_clear (refcount_set);
3228 gomp_unmap_vars (tgt_vars, true, &refcount_set);
3229 }
3230 if (refcount_set)
3231 htab_free (refcount_set);
3232
3233 /* Copy back ICVs from device to host.
3234 HOST_PTR is expected to exist since it was added in
3235 gomp_load_image_to_device if not already available. */
3236 gomp_copy_back_icvs (devicep, device);
3237
3238 }
3239
3240
3241 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3242 keeping track of all variable handling - assuming that reverse offload occurs
3243 ony very rarely. Downside is that the reverse search is slow. */
3244
3245 struct gomp_splay_tree_rev_lookup_data {
3246 uintptr_t tgt_start;
3247 uintptr_t tgt_end;
3248 splay_tree_key key;
3249 };
3250
3251 static int
3252 gomp_splay_tree_rev_lookup (splay_tree_key key, void *d)
3253 {
3254 struct gomp_splay_tree_rev_lookup_data *data;
3255 data = (struct gomp_splay_tree_rev_lookup_data *)d;
3256 uintptr_t tgt_start = key->tgt->tgt_start + key->tgt_offset;
3257
3258 if (tgt_start > data->tgt_start || key->tgt->list_count == 0)
3259 return 0;
3260
3261 size_t j;
3262 for (j = 0; j < key->tgt->list_count; j++)
3263 if (key->tgt->list[j].key == key)
3264 break;
3265 assert (j < key->tgt->list_count);
3266 uintptr_t tgt_end = tgt_start + key->tgt->list[j].length;
3267
3268 if ((tgt_start == data->tgt_start && tgt_end == data->tgt_end)
3269 || (tgt_end > data->tgt_start && tgt_start < data->tgt_end))
3270 {
3271 data->key = key;
3272 return 1;
3273 }
3274 return 0;
3275 }
3276
3277 static inline splay_tree_key
3278 gomp_map_rev_lookup (splay_tree mem_map, uint64_t tgt_start, uint64_t tgt_end,
3279 bool zero_len)
3280 {
3281 struct gomp_splay_tree_rev_lookup_data data;
3282 data.key = NULL;
3283 data.tgt_start = tgt_start;
3284 data.tgt_end = tgt_end;
3285
3286 if (tgt_start != tgt_end)
3287 {
3288 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3289 return data.key;
3290 }
3291
3292 data.tgt_end++;
3293 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3294 if (data.key != NULL || zero_len)
3295 return data.key;
3296 data.tgt_end--;
3297
3298 data.tgt_start--;
3299 splay_tree_foreach_lazy (mem_map, gomp_splay_tree_rev_lookup, &data);
3300 return data.key;
3301 }
3302
3303 struct cpy_data
3304 {
3305 uint64_t devaddr;
3306 bool present, aligned;
3307 };
3308
3309
3310 /* Search just mapped reverse-offload data; returns index if found,
3311 otherwise >= n. */
3312
3313 static inline int
3314 gomp_map_cdata_lookup_int (struct cpy_data *d, uint64_t *devaddrs,
3315 unsigned short *kinds, uint64_t *sizes, size_t n,
3316 uint64_t tgt_start, uint64_t tgt_end)
3317 {
3318 const bool short_mapkind = true;
3319 const int typemask = short_mapkind ? 0xff : 0x7;
3320 size_t i;
3321 for (i = 0; i < n; i++)
3322 {
3323 bool is_struct = ((get_kind (short_mapkind, kinds, i) & typemask)
3324 == GOMP_MAP_STRUCT);
3325 uint64_t dev_end;
3326 if (!is_struct)
3327 dev_end = d[i].devaddr + sizes[i];
3328 else
3329 {
3330 if (i + sizes[i] < n)
3331 dev_end = d[i + sizes[i]].devaddr + sizes[i + sizes[i]];
3332 else
3333 dev_end = devaddrs[i + sizes[i]] + sizes[i + sizes[i]];
3334 }
3335 if ((d[i].devaddr == tgt_start && dev_end == tgt_end)
3336 || (dev_end > tgt_start && d[i].devaddr < tgt_end))
3337 break;
3338 if (is_struct)
3339 i += sizes[i];
3340 }
3341 return i;
3342 }
3343
3344 static inline int
3345 gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs,
3346 unsigned short *kinds, uint64_t *sizes,
3347 size_t n, uint64_t tgt_start, uint64_t tgt_end,
3348 bool zero_len)
3349 {
3350 size_t i;
3351 if (tgt_start != tgt_end)
3352 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3353 tgt_start, tgt_end);
3354 tgt_end++;
3355 i = gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3356 tgt_start, tgt_end);
3357 if (i < n || zero_len)
3358 return i;
3359 tgt_end--;
3360
3361 tgt_start--;
3362 return gomp_map_cdata_lookup_int (d, devaddrs, kinds, sizes, n,
3363 tgt_start, tgt_end);
3364 }
3365
3366 /* Handle reverse offload. This is called by the device plugins for a
3367 reverse offload; it is not called if the outer target runs on the host.
3368 The mapping is simplified device-affecting constructs (except for target
3369 with device(ancestor:1)) must not be encountered; in particular not
3370 target (enter/exit) data. */
3371
3372 void
3373 gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
3374 uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num,
3375 struct goacc_asyncqueue *aq)
3376 {
3377 /* Return early if there is no offload code. */
3378 if (sizeof (OFFLOAD_PLUGINS) == sizeof (""))
3379 return;
3380 /* Currently, this fails because of calculate_firstprivate_requirements
3381 below; it could be fixed but additional code needs to be updated to
3382 handle 32bit hosts - thus, it is not worthwhile. */
3383 if (sizeof (void *) != sizeof (uint64_t))
3384 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3385
3386 struct cpy_data *cdata = NULL;
3387 uint64_t *devaddrs;
3388 uint64_t *sizes;
3389 unsigned short *kinds;
3390 const bool short_mapkind = true;
3391 const int typemask = short_mapkind ? 0xff : 0x7;
3392 struct gomp_device_descr *devicep = resolve_device (dev_num, false);
3393
3394 reverse_splay_tree_key n;
3395 struct reverse_splay_tree_key_s k;
3396 k.dev = fn_ptr;
3397
3398 gomp_mutex_lock (&devicep->lock);
3399 n = gomp_map_lookup_rev (&devicep->mem_map_rev, &k);
3400 gomp_mutex_unlock (&devicep->lock);
3401
3402 if (n == NULL)
3403 gomp_fatal ("Cannot find reverse-offload function");
3404 void (*host_fn)() = (void (*)()) n->k->host_start;
3405
3406 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
3407 {
3408 devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
3409 sizes = (uint64_t *) (uintptr_t) sizes_ptr;
3410 kinds = (unsigned short *) (uintptr_t) kinds_ptr;
3411 }
3412 else
3413 {
3414 devaddrs = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3415 sizes = (uint64_t *) gomp_malloc (mapnum * sizeof (uint64_t));
3416 kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short));
3417 gomp_copy_dev2host (devicep, aq, devaddrs,
3418 (const void *) (uintptr_t) devaddrs_ptr,
3419 mapnum * sizeof (uint64_t));
3420 gomp_copy_dev2host (devicep, aq, sizes,
3421 (const void *) (uintptr_t) sizes_ptr,
3422 mapnum * sizeof (uint64_t));
3423 gomp_copy_dev2host (devicep, aq, kinds,
3424 (const void *) (uintptr_t) kinds_ptr,
3425 mapnum * sizeof (unsigned short));
3426 if (aq && !devicep->openacc.async.synchronize_func (aq))
3427 exit (EXIT_FAILURE);
3428 }
3429
3430 size_t tgt_align = 0, tgt_size = 0;
3431
3432 /* If actually executed on 32bit systems, the casts lead to wrong code;
3433 but 32bit with offloading is not supported; see top of this function. */
3434 calculate_firstprivate_requirements (mapnum, (void *) (uintptr_t) sizes,
3435 (void *) (uintptr_t) kinds,
3436 &tgt_align, &tgt_size);
3437
3438 if (tgt_align)
3439 {
3440 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
3441 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
3442 if (al)
3443 tgt += tgt_align - al;
3444 tgt_size = 0;
3445 for (uint64_t i = 0; i < mapnum; i++)
3446 if (get_kind (short_mapkind, kinds, i) == GOMP_MAP_FIRSTPRIVATE
3447 && devaddrs[i] != 0)
3448 {
3449 size_t align = (size_t) 1 << (kinds[i] >> 8);
3450 tgt_size = (tgt_size + align - 1) & ~(align - 1);
3451 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3452 memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i],
3453 (size_t) sizes[i]);
3454 else
3455 {
3456 gomp_copy_dev2host (devicep, aq, tgt + tgt_size,
3457 (void *) (uintptr_t) devaddrs[i],
3458 (size_t) sizes[i]);
3459 if (aq && !devicep->openacc.async.synchronize_func (aq))
3460 exit (EXIT_FAILURE);
3461 }
3462 devaddrs[i] = (uint64_t) (uintptr_t) tgt + tgt_size;
3463 tgt_size = tgt_size + sizes[i];
3464 if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3465 && i + 1 < mapnum
3466 && ((get_kind (short_mapkind, kinds, i) & typemask)
3467 == GOMP_MAP_ATTACH))
3468 {
3469 *(uint64_t*) (uintptr_t) (devaddrs[i+1] + sizes[i+1])
3470 = (uint64_t) devaddrs[i];
3471 ++i;
3472 }
3473 }
3474 }
3475
3476 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3477 {
3478 size_t j, struct_cpy = 0;
3479 splay_tree_key n2;
3480 cdata = gomp_alloca (sizeof (*cdata) * mapnum);
3481 memset (cdata, '\0', sizeof (*cdata) * mapnum);
3482 gomp_mutex_lock (&devicep->lock);
3483 for (uint64_t i = 0; i < mapnum; i++)
3484 {
3485 if (devaddrs[i] == 0)
3486 continue;
3487 n = NULL;
3488 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3489 switch (kind)
3490 {
3491 case GOMP_MAP_FIRSTPRIVATE:
3492 case GOMP_MAP_FIRSTPRIVATE_INT:
3493 continue;
3494
3495 case GOMP_MAP_DELETE:
3496 case GOMP_MAP_RELEASE:
3497 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
3498 /* Assume it is present; look it up - but ignore unless the
3499 present clause is there. */
3500 case GOMP_MAP_ALLOC:
3501 case GOMP_MAP_FROM:
3502 case GOMP_MAP_FORCE_ALLOC:
3503 case GOMP_MAP_FORCE_FROM:
3504 case GOMP_MAP_ALWAYS_FROM:
3505 case GOMP_MAP_TO:
3506 case GOMP_MAP_TOFROM:
3507 case GOMP_MAP_FORCE_TO:
3508 case GOMP_MAP_FORCE_TOFROM:
3509 case GOMP_MAP_ALWAYS_TO:
3510 case GOMP_MAP_ALWAYS_TOFROM:
3511 case GOMP_MAP_FORCE_PRESENT:
3512 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3513 case GOMP_MAP_ALWAYS_PRESENT_TO:
3514 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3515 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
3516 cdata[i].devaddr = devaddrs[i];
3517 bool zero_len = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3518 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION);
3519 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3520 devaddrs[i],
3521 devaddrs[i] + sizes[i], zero_len);
3522 if (j < i)
3523 {
3524 n2 = NULL;
3525 cdata[i].present = true;
3526 devaddrs[i] = devaddrs[j] + devaddrs[i] - cdata[j].devaddr;
3527 }
3528 else
3529 {
3530 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3531 devaddrs[i],
3532 devaddrs[i] + sizes[i], zero_len);
3533 cdata[i].present = n2 != NULL;
3534 }
3535 if (!cdata[i].present && GOMP_MAP_PRESENT_P (kind))
3536 {
3537 gomp_mutex_unlock (&devicep->lock);
3538 #ifdef HAVE_INTTYPES_H
3539 gomp_fatal ("present clause: no corresponding data on "
3540 "parent device at %p with size %"PRIu64,
3541 (void *) (uintptr_t) devaddrs[i],
3542 (uint64_t) sizes[i]);
3543 #else
3544 gomp_fatal ("present clause: no corresponding data on "
3545 "parent device at %p with size %lu",
3546 (void *) (uintptr_t) devaddrs[i],
3547 (unsigned long) sizes[i]);
3548 #endif
3549 break;
3550 }
3551 else if (!cdata[i].present
3552 && kind != GOMP_MAP_DELETE
3553 && kind != GOMP_MAP_RELEASE
3554 && kind != GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
3555 {
3556 cdata[i].aligned = true;
3557 size_t align = (size_t) 1 << (kinds[i] >> 8);
3558 devaddrs[i]
3559 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3560 sizes[i]);
3561 }
3562 else if (n2 != NULL)
3563 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3564 - (n2->tgt->tgt_start + n2->tgt_offset));
3565 if (((!cdata[i].present || struct_cpy)
3566 && (kind == GOMP_MAP_TO || kind == GOMP_MAP_TOFROM))
3567 || kind == GOMP_MAP_FORCE_TO
3568 || kind == GOMP_MAP_FORCE_TOFROM
3569 || GOMP_MAP_ALWAYS_TO_P (kind))
3570 {
3571 gomp_copy_dev2host (devicep, aq,
3572 (void *) (uintptr_t) devaddrs[i],
3573 (void *) (uintptr_t) cdata[i].devaddr,
3574 sizes[i]);
3575 if (aq && !devicep->openacc.async.synchronize_func (aq))
3576 {
3577 gomp_mutex_unlock (&devicep->lock);
3578 exit (EXIT_FAILURE);
3579 }
3580 }
3581 if (struct_cpy)
3582 struct_cpy--;
3583 break;
3584 case GOMP_MAP_ATTACH:
3585 case GOMP_MAP_POINTER:
3586 case GOMP_MAP_ALWAYS_POINTER:
3587 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3588 devaddrs[i] + sizes[i],
3589 devaddrs[i] + sizes[i]
3590 + sizeof (void*), false);
3591 cdata[i].present = n2 != NULL;
3592 cdata[i].devaddr = devaddrs[i];
3593 if (n2)
3594 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3595 - (n2->tgt->tgt_start + n2->tgt_offset));
3596 else
3597 {
3598 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3599 devaddrs[i] + sizes[i],
3600 devaddrs[i] + sizes[i]
3601 + sizeof (void*), false);
3602 if (j < i)
3603 {
3604 cdata[i].present = true;
3605 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3606 - cdata[j].devaddr);
3607 }
3608 }
3609 if (!cdata[i].present)
3610 devaddrs[i] = (uintptr_t) gomp_malloc (sizeof (void*));
3611 /* Assume that when present, the pointer is already correct. */
3612 if (!n2)
3613 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[i])
3614 = devaddrs[i-1];
3615 break;
3616 case GOMP_MAP_TO_PSET:
3617 /* Assume that when present, the pointers are fine and no 'to:'
3618 is required. */
3619 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3620 devaddrs[i], devaddrs[i] + sizes[i],
3621 false);
3622 cdata[i].present = n2 != NULL;
3623 cdata[i].devaddr = devaddrs[i];
3624 if (n2)
3625 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3626 - (n2->tgt->tgt_start + n2->tgt_offset));
3627 else
3628 {
3629 j = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, i,
3630 devaddrs[i],
3631 devaddrs[i] + sizes[i], false);
3632 if (j < i)
3633 {
3634 cdata[i].present = true;
3635 devaddrs[i] = (devaddrs[j] + devaddrs[i]
3636 - cdata[j].devaddr);
3637 }
3638 }
3639 if (!cdata[i].present)
3640 {
3641 cdata[i].aligned = true;
3642 size_t align = (size_t) 1 << (kinds[i] >> 8);
3643 devaddrs[i]
3644 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align,
3645 sizes[i]);
3646 gomp_copy_dev2host (devicep, aq,
3647 (void *) (uintptr_t) devaddrs[i],
3648 (void *) (uintptr_t) cdata[i].devaddr,
3649 sizes[i]);
3650 if (aq && !devicep->openacc.async.synchronize_func (aq))
3651 {
3652 gomp_mutex_unlock (&devicep->lock);
3653 exit (EXIT_FAILURE);
3654 }
3655 }
3656 for (j = i + 1; j < mapnum; j++)
3657 {
3658 kind = get_kind (short_mapkind, kinds, j) & typemask;
3659 if (!GOMP_MAP_ALWAYS_POINTER_P (kind)
3660 && !GOMP_MAP_POINTER_P (kind))
3661 break;
3662 if (devaddrs[j] < devaddrs[i])
3663 break;
3664 if (cdata[i].present)
3665 continue;
3666 if (devaddrs[j] == 0)
3667 {
3668 *(uint64_t *) (uintptr_t) (devaddrs[i] + sizes[j]) = 0;
3669 continue;
3670 }
3671 int k;
3672 n2 = NULL;
3673 /* Dereference devaddrs[j] to get the device addr. */
3674 assert (devaddrs[j] - sizes[j] == cdata[i].devaddr);
3675 devaddrs[j] = *(uint64_t *) (uintptr_t) (devaddrs[i]
3676 + sizes[j]);
3677 cdata[j].present = true;
3678 cdata[j].devaddr = devaddrs[j];
3679 if (devaddrs[j] == 0)
3680 continue;
3681 k = gomp_map_cdata_lookup (cdata, devaddrs, kinds, sizes, j,
3682 devaddrs[j],
3683 devaddrs[j] + sizeof (void*),
3684 false);
3685 if (k < j)
3686 devaddrs[j] = (devaddrs[k] + devaddrs[j]
3687 - cdata[k].devaddr);
3688 else
3689 {
3690 n2 = gomp_map_rev_lookup (&devicep->mem_map,
3691 devaddrs[j],
3692 devaddrs[j] + sizeof (void*),
3693 false);
3694 if (n2 == NULL)
3695 {
3696 gomp_mutex_unlock (&devicep->lock);
3697 gomp_fatal ("Pointer target wasn't mapped");
3698 }
3699 devaddrs[j] = (n2->host_start + cdata[j].devaddr
3700 - (n2->tgt->tgt_start + n2->tgt_offset));
3701 }
3702 *(void **) (uintptr_t) (devaddrs[i] + sizes[j])
3703 = (void *) (uintptr_t) devaddrs[j];
3704 }
3705 i = j -1;
3706 break;
3707 case GOMP_MAP_STRUCT:
3708 n2 = gomp_map_rev_lookup (&devicep->mem_map, devaddrs[i+1],
3709 devaddrs[i + sizes[i]]
3710 + sizes[i + sizes[i]], false);
3711 cdata[i].present = n2 != NULL;
3712 cdata[i].devaddr = devaddrs[i];
3713 struct_cpy = cdata[i].present ? 0 : sizes[i];
3714 if (!n2)
3715 {
3716 size_t sz = (size_t) (devaddrs[i + sizes[i]]
3717 - devaddrs[i+1]
3718 + sizes[i + sizes[i]]);
3719 size_t align = (size_t) 1 << (kinds[i] >> 8);
3720 cdata[i].aligned = true;
3721 devaddrs[i] = (uintptr_t) gomp_aligned_alloc (align, sz);
3722 devaddrs[i] -= devaddrs[i+1] - cdata[i].devaddr;
3723 }
3724 else
3725 devaddrs[i] = (n2->host_start + cdata[i].devaddr
3726 - (n2->tgt->tgt_start + n2->tgt_offset));
3727 break;
3728 default:
3729 gomp_mutex_unlock (&devicep->lock);
3730 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds[i]);
3731 }
3732 }
3733 gomp_mutex_unlock (&devicep->lock);
3734 }
3735
3736 host_fn (devaddrs);
3737
3738 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
3739 {
3740 uint64_t struct_cpy = 0;
3741 bool clean_struct = false;
3742 for (uint64_t i = 0; i < mapnum; i++)
3743 {
3744 if (cdata[i].devaddr == 0)
3745 continue;
3746 int kind = get_kind (short_mapkind, kinds, i) & typemask;
3747 bool copy = !cdata[i].present || struct_cpy;
3748 switch (kind)
3749 {
3750 case GOMP_MAP_FORCE_FROM:
3751 case GOMP_MAP_FORCE_TOFROM:
3752 case GOMP_MAP_ALWAYS_FROM:
3753 case GOMP_MAP_ALWAYS_TOFROM:
3754 case GOMP_MAP_ALWAYS_PRESENT_FROM:
3755 case GOMP_MAP_ALWAYS_PRESENT_TOFROM:
3756 copy = true;
3757 /* FALLTHRU */
3758 case GOMP_MAP_FROM:
3759 case GOMP_MAP_TOFROM:
3760 if (copy)
3761 {
3762 gomp_copy_host2dev (devicep, aq,
3763 (void *) (uintptr_t) cdata[i].devaddr,
3764 (void *) (uintptr_t) devaddrs[i],
3765 sizes[i], false, NULL);
3766 if (aq && !devicep->openacc.async.synchronize_func (aq))
3767 exit (EXIT_FAILURE);
3768 }
3769 default:
3770 break;
3771 }
3772 if (struct_cpy)
3773 {
3774 struct_cpy--;
3775 continue;
3776 }
3777 if (kind == GOMP_MAP_STRUCT && !cdata[i].present)
3778 {
3779 clean_struct = true;
3780 struct_cpy = sizes[i];
3781 }
3782 else if (!cdata[i].present && cdata[i].aligned)
3783 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3784 else if (!cdata[i].present)
3785 free ((void *) (uintptr_t) devaddrs[i]);
3786 }
3787 if (clean_struct)
3788 for (uint64_t i = 0; i < mapnum; i++)
3789 if (!cdata[i].present
3790 && ((get_kind (short_mapkind, kinds, i) & typemask)
3791 == GOMP_MAP_STRUCT))
3792 {
3793 devaddrs[i] += cdata[i+1].devaddr - cdata[i].devaddr;
3794 gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
3795 }
3796
3797 free (devaddrs);
3798 free (sizes);
3799 free (kinds);
3800 }
3801 }
3802
3803 /* Host fallback for GOMP_target_data{,_ext} routines. */
3804
3805 static void
3806 gomp_target_data_fallback (struct gomp_device_descr *devicep)
3807 {
3808 struct gomp_task_icv *icv = gomp_icv (false);
3809
3810 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
3811 && devicep != NULL)
3812 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3813 "be used for offloading");
3814
3815 if (icv->target_data)
3816 {
3817 /* Even when doing a host fallback, if there are any active
3818 #pragma omp target data constructs, need to remember the
3819 new #pragma omp target data, otherwise GOMP_target_end_data
3820 would get out of sync. */
3821 struct target_mem_desc *tgt
3822 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
3823 NULL, GOMP_MAP_VARS_DATA);
3824 tgt->prev = icv->target_data;
3825 icv->target_data = tgt;
3826 }
3827 }
3828
3829 void
3830 GOMP_target_data (int device, const void *unused, size_t mapnum,
3831 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3832 {
3833 struct gomp_device_descr *devicep = resolve_device (device, true);
3834
3835 if (devicep == NULL
3836 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3837 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
3838 return gomp_target_data_fallback (devicep);
3839
3840 struct target_mem_desc *tgt
3841 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
3842 NULL, GOMP_MAP_VARS_DATA);
3843 struct gomp_task_icv *icv = gomp_icv (true);
3844 tgt->prev = icv->target_data;
3845 icv->target_data = tgt;
3846 }
3847
3848 void
3849 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
3850 size_t *sizes, unsigned short *kinds)
3851 {
3852 struct gomp_device_descr *devicep = resolve_device (device, true);
3853
3854 if (devicep == NULL
3855 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3856 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3857 return gomp_target_data_fallback (devicep);
3858
3859 struct target_mem_desc *tgt
3860 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
3861 NULL, GOMP_MAP_VARS_DATA);
3862 struct gomp_task_icv *icv = gomp_icv (true);
3863 tgt->prev = icv->target_data;
3864 icv->target_data = tgt;
3865 }
3866
3867 void
3868 GOMP_target_end_data (void)
3869 {
3870 struct gomp_task_icv *icv = gomp_icv (false);
3871 if (icv->target_data)
3872 {
3873 struct target_mem_desc *tgt = icv->target_data;
3874 icv->target_data = tgt->prev;
3875 gomp_unmap_vars (tgt, true, NULL);
3876 }
3877 }
3878
3879 void
3880 GOMP_target_update (int device, const void *unused, size_t mapnum,
3881 void **hostaddrs, size_t *sizes, unsigned char *kinds)
3882 {
3883 struct gomp_device_descr *devicep = resolve_device (device, true);
3884
3885 if (devicep == NULL
3886 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3887 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3888 return;
3889
3890 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
3891 }
3892
3893 void
3894 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
3895 size_t *sizes, unsigned short *kinds,
3896 unsigned int flags, void **depend)
3897 {
3898 struct gomp_device_descr *devicep = resolve_device (device, true);
3899
3900 /* If there are depend clauses, but nowait is not present,
3901 block the parent task until the dependencies are resolved
3902 and then just continue with the rest of the function as if it
3903 is a merged task. Until we are able to schedule task during
3904 variable mapping or unmapping, ignore nowait if depend clauses
3905 are not present. */
3906 if (depend != NULL)
3907 {
3908 struct gomp_thread *thr = gomp_thread ();
3909 if (thr->task && thr->task->depend_hash)
3910 {
3911 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3912 && thr->ts.team
3913 && !thr->task->final_task)
3914 {
3915 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3916 mapnum, hostaddrs, sizes, kinds,
3917 flags | GOMP_TARGET_FLAG_UPDATE,
3918 depend, NULL, GOMP_TARGET_TASK_DATA))
3919 return;
3920 }
3921 else
3922 {
3923 struct gomp_team *team = thr->ts.team;
3924 /* If parallel or taskgroup has been cancelled, don't start new
3925 tasks. */
3926 if (__builtin_expect (gomp_cancel_var, 0) && team)
3927 {
3928 if (gomp_team_barrier_cancelled (&team->barrier))
3929 return;
3930 if (thr->task->taskgroup)
3931 {
3932 if (thr->task->taskgroup->cancelled)
3933 return;
3934 if (thr->task->taskgroup->workshare
3935 && thr->task->taskgroup->prev
3936 && thr->task->taskgroup->prev->cancelled)
3937 return;
3938 }
3939 }
3940
3941 gomp_task_maybe_wait_for_dependencies (depend);
3942 }
3943 }
3944 }
3945
3946 if (devicep == NULL
3947 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3948 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3949 return;
3950
3951 struct gomp_thread *thr = gomp_thread ();
3952 struct gomp_team *team = thr->ts.team;
3953 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3954 if (__builtin_expect (gomp_cancel_var, 0) && team)
3955 {
3956 if (gomp_team_barrier_cancelled (&team->barrier))
3957 return;
3958 if (thr->task->taskgroup)
3959 {
3960 if (thr->task->taskgroup->cancelled)
3961 return;
3962 if (thr->task->taskgroup->workshare
3963 && thr->task->taskgroup->prev
3964 && thr->task->taskgroup->prev->cancelled)
3965 return;
3966 }
3967 }
3968
3969 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
3970 }
3971
3972 static void
3973 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
3974 void **hostaddrs, size_t *sizes, unsigned short *kinds,
3975 htab_t *refcount_set)
3976 {
3977 const int typemask = 0xff;
3978 size_t i;
3979 gomp_mutex_lock (&devicep->lock);
3980 if (devicep->state == GOMP_DEVICE_FINALIZED)
3981 {
3982 gomp_mutex_unlock (&devicep->lock);
3983 return;
3984 }
3985
3986 for (i = 0; i < mapnum; i++)
3987 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
3988 {
3989 struct splay_tree_key_s cur_node;
3990 cur_node.host_start = (uintptr_t) hostaddrs[i];
3991 cur_node.host_end = cur_node.host_start + sizeof (void *);
3992 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
3993
3994 if (n)
3995 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
3996 false, NULL);
3997 }
3998
3999 int nrmvars = 0;
4000 splay_tree_key remove_vars[mapnum];
4001
4002 for (i = 0; i < mapnum; i++)
4003 {
4004 struct splay_tree_key_s cur_node;
4005 unsigned char kind = kinds[i] & typemask;
4006 switch (kind)
4007 {
4008 case GOMP_MAP_FROM:
4009 case GOMP_MAP_ALWAYS_FROM:
4010 case GOMP_MAP_DELETE:
4011 case GOMP_MAP_RELEASE:
4012 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
4013 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
4014 cur_node.host_start = (uintptr_t) hostaddrs[i];
4015 cur_node.host_end = cur_node.host_start + sizes[i];
4016 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4017 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
4018 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
4019 : splay_tree_lookup (&devicep->mem_map, &cur_node);
4020 if (!k)
4021 continue;
4022
4023 bool delete_p = (kind == GOMP_MAP_DELETE
4024 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
4025 bool do_copy, do_remove;
4026 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
4027 &do_remove);
4028
4029 if ((kind == GOMP_MAP_FROM && do_copy)
4030 || kind == GOMP_MAP_ALWAYS_FROM)
4031 {
4032 if (k->aux && k->aux->attach_count)
4033 {
4034 /* We have to be careful not to overwrite still attached
4035 pointers during the copyback to host. */
4036 uintptr_t addr = k->host_start;
4037 while (addr < k->host_end)
4038 {
4039 size_t i = (addr - k->host_start) / sizeof (void *);
4040 if (k->aux->attach_count[i] == 0)
4041 gomp_copy_dev2host (devicep, NULL, (void *) addr,
4042 (void *) (k->tgt->tgt_start
4043 + k->tgt_offset
4044 + addr - k->host_start),
4045 sizeof (void *));
4046 addr += sizeof (void *);
4047 }
4048 }
4049 else
4050 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
4051 (void *) (k->tgt->tgt_start + k->tgt_offset
4052 + cur_node.host_start
4053 - k->host_start),
4054 cur_node.host_end - cur_node.host_start);
4055 }
4056
4057 /* Structure elements lists are removed altogether at once, which
4058 may cause immediate deallocation of the target_mem_desc, causing
4059 errors if we still have following element siblings to copy back.
4060 While we're at it, it also seems more disciplined to simply
4061 queue all removals together for processing below.
4062
4063 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4064 not have this problem, since they maintain an additional
4065 tgt->refcount = 1 reference to the target_mem_desc to start with.
4066 */
4067 if (do_remove)
4068 remove_vars[nrmvars++] = k;
4069 break;
4070
4071 case GOMP_MAP_DETACH:
4072 break;
4073 default:
4074 gomp_mutex_unlock (&devicep->lock);
4075 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4076 kind);
4077 }
4078 }
4079
4080 for (int i = 0; i < nrmvars; i++)
4081 gomp_remove_var (devicep, remove_vars[i]);
4082
4083 gomp_mutex_unlock (&devicep->lock);
4084 }
4085
4086 void
4087 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
4088 size_t *sizes, unsigned short *kinds,
4089 unsigned int flags, void **depend)
4090 {
4091 struct gomp_device_descr *devicep = resolve_device (device, true);
4092
4093 /* If there are depend clauses, but nowait is not present,
4094 block the parent task until the dependencies are resolved
4095 and then just continue with the rest of the function as if it
4096 is a merged task. Until we are able to schedule task during
4097 variable mapping or unmapping, ignore nowait if depend clauses
4098 are not present. */
4099 if (depend != NULL)
4100 {
4101 struct gomp_thread *thr = gomp_thread ();
4102 if (thr->task && thr->task->depend_hash)
4103 {
4104 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
4105 && thr->ts.team
4106 && !thr->task->final_task)
4107 {
4108 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
4109 mapnum, hostaddrs, sizes, kinds,
4110 flags, depend, NULL,
4111 GOMP_TARGET_TASK_DATA))
4112 return;
4113 }
4114 else
4115 {
4116 struct gomp_team *team = thr->ts.team;
4117 /* If parallel or taskgroup has been cancelled, don't start new
4118 tasks. */
4119 if (__builtin_expect (gomp_cancel_var, 0) && team)
4120 {
4121 if (gomp_team_barrier_cancelled (&team->barrier))
4122 return;
4123 if (thr->task->taskgroup)
4124 {
4125 if (thr->task->taskgroup->cancelled)
4126 return;
4127 if (thr->task->taskgroup->workshare
4128 && thr->task->taskgroup->prev
4129 && thr->task->taskgroup->prev->cancelled)
4130 return;
4131 }
4132 }
4133
4134 gomp_task_maybe_wait_for_dependencies (depend);
4135 }
4136 }
4137 }
4138
4139 if (devicep == NULL
4140 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4141 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4142 return;
4143
4144 struct gomp_thread *thr = gomp_thread ();
4145 struct gomp_team *team = thr->ts.team;
4146 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4147 if (__builtin_expect (gomp_cancel_var, 0) && team)
4148 {
4149 if (gomp_team_barrier_cancelled (&team->barrier))
4150 return;
4151 if (thr->task->taskgroup)
4152 {
4153 if (thr->task->taskgroup->cancelled)
4154 return;
4155 if (thr->task->taskgroup->workshare
4156 && thr->task->taskgroup->prev
4157 && thr->task->taskgroup->prev->cancelled)
4158 return;
4159 }
4160 }
4161
4162 htab_t refcount_set = htab_create (mapnum);
4163
4164 /* The variables are mapped separately such that they can be released
4165 independently. */
4166 size_t i, j;
4167 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4168 for (i = 0; i < mapnum; i++)
4169 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4170 {
4171 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
4172 &kinds[i], true, &refcount_set,
4173 GOMP_MAP_VARS_ENTER_DATA);
4174 i += sizes[i];
4175 }
4176 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
4177 {
4178 for (j = i + 1; j < mapnum; j++)
4179 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
4180 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
4181 break;
4182 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
4183 &kinds[i], true, &refcount_set,
4184 GOMP_MAP_VARS_ENTER_DATA);
4185 i += j - i - 1;
4186 }
4187 else if (i + 1 < mapnum
4188 && ((kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH
4189 || ((kinds[i + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4190 && (kinds[i] & 0xff) != GOMP_MAP_ALWAYS_POINTER)))
4191 {
4192 /* An attach operation must be processed together with the mapped
4193 base-pointer list item. */
4194 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4195 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4196 i += 1;
4197 }
4198 else
4199 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
4200 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4201 else
4202 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
4203 htab_free (refcount_set);
4204 }
4205
4206 bool
4207 gomp_target_task_fn (void *data)
4208 {
4209 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
4210 struct gomp_device_descr *devicep = ttask->devicep;
4211
4212 if (ttask->fn != NULL)
4213 {
4214 void *fn_addr;
4215 if (devicep == NULL
4216 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4217 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
4218 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
4219 {
4220 ttask->state = GOMP_TARGET_TASK_FALLBACK;
4221 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
4222 ttask->args);
4223 return false;
4224 }
4225
4226 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
4227 {
4228 if (ttask->tgt)
4229 gomp_unmap_vars (ttask->tgt, true, NULL);
4230 return false;
4231 }
4232
4233 void *actual_arguments;
4234 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4235 {
4236 ttask->tgt = NULL;
4237 actual_arguments = ttask->hostaddrs;
4238 }
4239 else
4240 {
4241 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
4242 NULL, ttask->sizes, ttask->kinds, true,
4243 NULL, GOMP_MAP_VARS_TARGET);
4244 actual_arguments = (void *) ttask->tgt->tgt_start;
4245 }
4246 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
4247
4248 assert (devicep->async_run_func);
4249 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
4250 ttask->args, (void *) ttask);
4251 return true;
4252 }
4253 else if (devicep == NULL
4254 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4255 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4256 return false;
4257
4258 size_t i;
4259 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
4260 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4261 ttask->kinds, true);
4262 else
4263 {
4264 htab_t refcount_set = htab_create (ttask->mapnum);
4265 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
4266 for (i = 0; i < ttask->mapnum; i++)
4267 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
4268 {
4269 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
4270 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
4271 &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
4272 i += ttask->sizes[i];
4273 }
4274 else
4275 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
4276 &ttask->kinds[i], true, &refcount_set,
4277 GOMP_MAP_VARS_ENTER_DATA);
4278 else
4279 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
4280 ttask->kinds, &refcount_set);
4281 htab_free (refcount_set);
4282 }
4283 return false;
4284 }
4285
4286 void
4287 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
4288 {
4289 if (thread_limit)
4290 {
4291 struct gomp_task_icv *icv = gomp_icv (true);
4292 icv->thread_limit_var
4293 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4294 }
4295 (void) num_teams;
4296 }
4297
4298 bool
4299 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
4300 unsigned int thread_limit, bool first)
4301 {
4302 struct gomp_thread *thr = gomp_thread ();
4303 if (first)
4304 {
4305 if (thread_limit)
4306 {
4307 struct gomp_task_icv *icv = gomp_icv (true);
4308 icv->thread_limit_var
4309 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
4310 }
4311 (void) num_teams_high;
4312 if (num_teams_low == 0)
4313 num_teams_low = 1;
4314 thr->num_teams = num_teams_low - 1;
4315 thr->team_num = 0;
4316 }
4317 else if (thr->team_num == thr->num_teams)
4318 return false;
4319 else
4320 ++thr->team_num;
4321 return true;
4322 }
4323
4324 void *
4325 omp_target_alloc (size_t size, int device_num)
4326 {
4327 if (device_num == omp_initial_device
4328 || device_num == gomp_get_num_devices ())
4329 return malloc (size);
4330
4331 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4332 if (devicep == NULL)
4333 return NULL;
4334
4335 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4336 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4337 return malloc (size);
4338
4339 gomp_mutex_lock (&devicep->lock);
4340 void *ret = devicep->alloc_func (devicep->target_id, size);
4341 gomp_mutex_unlock (&devicep->lock);
4342 return ret;
4343 }
4344
4345 void
4346 omp_target_free (void *device_ptr, int device_num)
4347 {
4348 if (device_num == omp_initial_device
4349 || device_num == gomp_get_num_devices ())
4350 {
4351 free (device_ptr);
4352 return;
4353 }
4354
4355 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4356 if (devicep == NULL || device_ptr == NULL)
4357 return;
4358
4359 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4360 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4361 {
4362 free (device_ptr);
4363 return;
4364 }
4365
4366 gomp_mutex_lock (&devicep->lock);
4367 gomp_free_device_memory (devicep, device_ptr);
4368 gomp_mutex_unlock (&devicep->lock);
4369 }
4370
4371 int
4372 omp_target_is_present (const void *ptr, int device_num)
4373 {
4374 if (device_num == omp_initial_device
4375 || device_num == gomp_get_num_devices ())
4376 return 1;
4377
4378 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4379 if (devicep == NULL)
4380 return 0;
4381
4382 if (ptr == NULL)
4383 return 1;
4384
4385 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4386 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4387 return 1;
4388
4389 gomp_mutex_lock (&devicep->lock);
4390 struct splay_tree_s *mem_map = &devicep->mem_map;
4391 struct splay_tree_key_s cur_node;
4392
4393 cur_node.host_start = (uintptr_t) ptr;
4394 cur_node.host_end = cur_node.host_start;
4395 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4396 int ret = n != NULL;
4397 gomp_mutex_unlock (&devicep->lock);
4398 return ret;
4399 }
4400
4401 static int
4402 omp_target_memcpy_check (int dst_device_num, int src_device_num,
4403 struct gomp_device_descr **dst_devicep,
4404 struct gomp_device_descr **src_devicep)
4405 {
4406 if (dst_device_num != gomp_get_num_devices ()
4407 /* Above gomp_get_num_devices has to be called unconditionally. */
4408 && dst_device_num != omp_initial_device)
4409 {
4410 *dst_devicep = resolve_device (dst_device_num, false);
4411 if (*dst_devicep == NULL)
4412 return EINVAL;
4413
4414 if (!((*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4415 || (*dst_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4416 *dst_devicep = NULL;
4417 }
4418
4419 if (src_device_num != num_devices_openmp
4420 && src_device_num != omp_initial_device)
4421 {
4422 *src_devicep = resolve_device (src_device_num, false);
4423 if (*src_devicep == NULL)
4424 return EINVAL;
4425
4426 if (!((*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4427 || (*src_devicep)->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4428 *src_devicep = NULL;
4429 }
4430
4431 return 0;
4432 }
4433
4434 static int
4435 omp_target_memcpy_copy (void *dst, const void *src, size_t length,
4436 size_t dst_offset, size_t src_offset,
4437 struct gomp_device_descr *dst_devicep,
4438 struct gomp_device_descr *src_devicep)
4439 {
4440 bool ret;
4441 if (src_devicep == NULL && dst_devicep == NULL)
4442 {
4443 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
4444 return 0;
4445 }
4446 if (src_devicep == NULL)
4447 {
4448 gomp_mutex_lock (&dst_devicep->lock);
4449 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4450 (char *) dst + dst_offset,
4451 (char *) src + src_offset, length);
4452 gomp_mutex_unlock (&dst_devicep->lock);
4453 return (ret ? 0 : EINVAL);
4454 }
4455 if (dst_devicep == NULL)
4456 {
4457 gomp_mutex_lock (&src_devicep->lock);
4458 ret = src_devicep->dev2host_func (src_devicep->target_id,
4459 (char *) dst + dst_offset,
4460 (char *) src + src_offset, length);
4461 gomp_mutex_unlock (&src_devicep->lock);
4462 return (ret ? 0 : EINVAL);
4463 }
4464 if (src_devicep == dst_devicep)
4465 {
4466 gomp_mutex_lock (&src_devicep->lock);
4467 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4468 (char *) dst + dst_offset,
4469 (char *) src + src_offset, length);
4470 gomp_mutex_unlock (&src_devicep->lock);
4471 return (ret ? 0 : EINVAL);
4472 }
4473 return EINVAL;
4474 }
4475
4476 int
4477 omp_target_memcpy (void *dst, const void *src, size_t length, size_t dst_offset,
4478 size_t src_offset, int dst_device_num, int src_device_num)
4479 {
4480 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4481 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4482 &dst_devicep, &src_devicep);
4483
4484 if (ret)
4485 return ret;
4486
4487 ret = omp_target_memcpy_copy (dst, src, length, dst_offset, src_offset,
4488 dst_devicep, src_devicep);
4489
4490 return ret;
4491 }
4492
4493 typedef struct
4494 {
4495 void *dst;
4496 const void *src;
4497 size_t length;
4498 size_t dst_offset;
4499 size_t src_offset;
4500 struct gomp_device_descr *dst_devicep;
4501 struct gomp_device_descr *src_devicep;
4502 } omp_target_memcpy_data;
4503
4504 static void
4505 omp_target_memcpy_async_helper (void *args)
4506 {
4507 omp_target_memcpy_data *a = args;
4508 if (omp_target_memcpy_copy (a->dst, a->src, a->length, a->dst_offset,
4509 a->src_offset, a->dst_devicep, a->src_devicep))
4510 gomp_fatal ("omp_target_memcpy failed");
4511 }
4512
4513 int
4514 omp_target_memcpy_async (void *dst, const void *src, size_t length,
4515 size_t dst_offset, size_t src_offset,
4516 int dst_device_num, int src_device_num,
4517 int depobj_count, omp_depend_t *depobj_list)
4518 {
4519 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4520 unsigned int flags = 0;
4521 void *depend[depobj_count + 5];
4522 int i;
4523 int check = omp_target_memcpy_check (dst_device_num, src_device_num,
4524 &dst_devicep, &src_devicep);
4525
4526 omp_target_memcpy_data s = {
4527 .dst = dst,
4528 .src = src,
4529 .length = length,
4530 .dst_offset = dst_offset,
4531 .src_offset = src_offset,
4532 .dst_devicep = dst_devicep,
4533 .src_devicep = src_devicep
4534 };
4535
4536 if (check)
4537 return check;
4538
4539 if (depobj_count > 0 && depobj_list != NULL)
4540 {
4541 flags |= GOMP_TASK_FLAG_DEPEND;
4542 depend[0] = 0;
4543 depend[1] = (void *) (uintptr_t) depobj_count;
4544 depend[2] = depend[3] = depend[4] = 0;
4545 for (i = 0; i < depobj_count; ++i)
4546 depend[i + 5] = &depobj_list[i];
4547 }
4548
4549 GOMP_task (omp_target_memcpy_async_helper, &s, NULL, sizeof (s),
4550 __alignof__ (s), true, flags, depend, 0, NULL);
4551
4552 return 0;
4553 }
4554
4555 static int
4556 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
4557 int num_dims, const size_t *volume,
4558 const size_t *dst_offsets,
4559 const size_t *src_offsets,
4560 const size_t *dst_dimensions,
4561 const size_t *src_dimensions,
4562 struct gomp_device_descr *dst_devicep,
4563 struct gomp_device_descr *src_devicep,
4564 size_t *tmp_size, void **tmp)
4565 {
4566 size_t dst_slice = element_size;
4567 size_t src_slice = element_size;
4568 size_t j, dst_off, src_off, length;
4569 int i, ret;
4570
4571 if (num_dims == 1)
4572 {
4573 if (__builtin_mul_overflow (element_size, volume[0], &length)
4574 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
4575 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
4576 return EINVAL;
4577 if (dst_devicep == NULL && src_devicep == NULL)
4578 {
4579 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
4580 length);
4581 ret = 1;
4582 }
4583 else if (src_devicep == NULL)
4584 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4585 (char *) dst + dst_off,
4586 (const char *) src + src_off,
4587 length);
4588 else if (dst_devicep == NULL)
4589 ret = src_devicep->dev2host_func (src_devicep->target_id,
4590 (char *) dst + dst_off,
4591 (const char *) src + src_off,
4592 length);
4593 else if (src_devicep == dst_devicep)
4594 ret = src_devicep->dev2dev_func (src_devicep->target_id,
4595 (char *) dst + dst_off,
4596 (const char *) src + src_off,
4597 length);
4598 else
4599 {
4600 if (*tmp_size == 0)
4601 {
4602 *tmp_size = length;
4603 *tmp = malloc (length);
4604 if (*tmp == NULL)
4605 return ENOMEM;
4606 }
4607 else if (*tmp_size < length)
4608 {
4609 *tmp_size = length;
4610 free (*tmp);
4611 *tmp = malloc (length);
4612 if (*tmp == NULL)
4613 return ENOMEM;
4614 }
4615 ret = src_devicep->dev2host_func (src_devicep->target_id, *tmp,
4616 (const char *) src + src_off,
4617 length);
4618 if (ret == 1)
4619 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
4620 (char *) dst + dst_off, *tmp,
4621 length);
4622 }
4623 return ret ? 0 : EINVAL;
4624 }
4625
4626 /* host->device, device->host and intra device. */
4627 if (num_dims == 2
4628 && ((src_devicep
4629 && src_devicep == dst_devicep
4630 && src_devicep->memcpy2d_func)
4631 || (!src_devicep != !dst_devicep
4632 && ((src_devicep && src_devicep->memcpy2d_func)
4633 || (dst_devicep && dst_devicep->memcpy2d_func)))))
4634 {
4635 size_t vol_sz1, dst_sz1, src_sz1, dst_off_sz1, src_off_sz1;
4636 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4637 int src_id = src_devicep ? src_devicep->target_id : -1;
4638 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4639
4640 if (__builtin_mul_overflow (volume[1], element_size, &vol_sz1)
4641 || __builtin_mul_overflow (dst_dimensions[1], element_size, &dst_sz1)
4642 || __builtin_mul_overflow (src_dimensions[1], element_size, &src_sz1)
4643 || __builtin_mul_overflow (dst_offsets[1], element_size, &dst_off_sz1)
4644 || __builtin_mul_overflow (src_offsets[1], element_size,
4645 &src_off_sz1))
4646 return EINVAL;
4647 ret = devp->memcpy2d_func (dst_id, src_id, vol_sz1, volume[0],
4648 dst, dst_off_sz1, dst_offsets[0], dst_sz1,
4649 src, src_off_sz1, src_offsets[0], src_sz1);
4650 if (ret != -1)
4651 return ret ? 0 : EINVAL;
4652 }
4653 else if (num_dims == 3
4654 && ((src_devicep
4655 && src_devicep == dst_devicep
4656 && src_devicep->memcpy3d_func)
4657 || (!src_devicep != !dst_devicep
4658 && ((src_devicep && src_devicep->memcpy3d_func)
4659 || (dst_devicep && dst_devicep->memcpy3d_func)))))
4660 {
4661 size_t vol_sz2, dst_sz2, src_sz2, dst_off_sz2, src_off_sz2;
4662 int dst_id = dst_devicep ? dst_devicep->target_id : -1;
4663 int src_id = src_devicep ? src_devicep->target_id : -1;
4664 struct gomp_device_descr *devp = dst_devicep ? dst_devicep : src_devicep;
4665
4666 if (__builtin_mul_overflow (volume[2], element_size, &vol_sz2)
4667 || __builtin_mul_overflow (dst_dimensions[2], element_size, &dst_sz2)
4668 || __builtin_mul_overflow (src_dimensions[2], element_size, &src_sz2)
4669 || __builtin_mul_overflow (dst_offsets[2], element_size, &dst_off_sz2)
4670 || __builtin_mul_overflow (src_offsets[2], element_size,
4671 &src_off_sz2))
4672 return EINVAL;
4673 ret = devp->memcpy3d_func (dst_id, src_id, vol_sz2, volume[1], volume[0],
4674 dst, dst_off_sz2, dst_offsets[1],
4675 dst_offsets[0], dst_sz2, dst_dimensions[1],
4676 src, src_off_sz2, src_offsets[1],
4677 src_offsets[0], src_sz2, src_dimensions[1]);
4678 if (ret != -1)
4679 return ret ? 0 : EINVAL;
4680 }
4681
4682 for (i = 1; i < num_dims; i++)
4683 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
4684 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
4685 return EINVAL;
4686 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
4687 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
4688 return EINVAL;
4689 for (j = 0; j < volume[0]; j++)
4690 {
4691 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
4692 (const char *) src + src_off,
4693 element_size, num_dims - 1,
4694 volume + 1, dst_offsets + 1,
4695 src_offsets + 1, dst_dimensions + 1,
4696 src_dimensions + 1, dst_devicep,
4697 src_devicep, tmp_size, tmp);
4698 if (ret)
4699 return ret;
4700 dst_off += dst_slice;
4701 src_off += src_slice;
4702 }
4703 return 0;
4704 }
4705
4706 static int
4707 omp_target_memcpy_rect_check (void *dst, const void *src, int dst_device_num,
4708 int src_device_num,
4709 struct gomp_device_descr **dst_devicep,
4710 struct gomp_device_descr **src_devicep)
4711 {
4712 if (!dst && !src)
4713 return INT_MAX;
4714
4715 int ret = omp_target_memcpy_check (dst_device_num, src_device_num,
4716 dst_devicep, src_devicep);
4717 if (ret)
4718 return ret;
4719
4720 return 0;
4721 }
4722
4723 static int
4724 omp_target_memcpy_rect_copy (void *dst, const void *src,
4725 size_t element_size, int num_dims,
4726 const size_t *volume, const size_t *dst_offsets,
4727 const size_t *src_offsets,
4728 const size_t *dst_dimensions,
4729 const size_t *src_dimensions,
4730 struct gomp_device_descr *dst_devicep,
4731 struct gomp_device_descr *src_devicep)
4732 {
4733 size_t tmp_size = 0;
4734 void *tmp = NULL;
4735 bool lock_src;
4736 bool lock_dst;
4737
4738 lock_src = src_devicep != NULL;
4739 lock_dst = dst_devicep != NULL && src_devicep != dst_devicep;
4740 if (lock_src)
4741 gomp_mutex_lock (&src_devicep->lock);
4742 if (lock_dst)
4743 gomp_mutex_lock (&dst_devicep->lock);
4744 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
4745 volume, dst_offsets, src_offsets,
4746 dst_dimensions, src_dimensions,
4747 dst_devicep, src_devicep,
4748 &tmp_size, &tmp);
4749 if (lock_src)
4750 gomp_mutex_unlock (&src_devicep->lock);
4751 if (lock_dst)
4752 gomp_mutex_unlock (&dst_devicep->lock);
4753 if (tmp)
4754 free (tmp);
4755
4756 return ret;
4757 }
4758
4759 int
4760 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
4761 int num_dims, const size_t *volume,
4762 const size_t *dst_offsets,
4763 const size_t *src_offsets,
4764 const size_t *dst_dimensions,
4765 const size_t *src_dimensions,
4766 int dst_device_num, int src_device_num)
4767 {
4768 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4769
4770 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4771 src_device_num, &dst_devicep,
4772 &src_devicep);
4773
4774 if (check)
4775 return check;
4776
4777 int ret = omp_target_memcpy_rect_copy (dst, src, element_size, num_dims,
4778 volume, dst_offsets, src_offsets,
4779 dst_dimensions, src_dimensions,
4780 dst_devicep, src_devicep);
4781
4782 return ret;
4783 }
4784
4785 typedef struct
4786 {
4787 void *dst;
4788 const void *src;
4789 size_t element_size;
4790 const size_t *volume;
4791 const size_t *dst_offsets;
4792 const size_t *src_offsets;
4793 const size_t *dst_dimensions;
4794 const size_t *src_dimensions;
4795 struct gomp_device_descr *dst_devicep;
4796 struct gomp_device_descr *src_devicep;
4797 int num_dims;
4798 } omp_target_memcpy_rect_data;
4799
4800 static void
4801 omp_target_memcpy_rect_async_helper (void *args)
4802 {
4803 omp_target_memcpy_rect_data *a = args;
4804 int ret = omp_target_memcpy_rect_copy (a->dst, a->src, a->element_size,
4805 a->num_dims, a->volume, a->dst_offsets,
4806 a->src_offsets, a->dst_dimensions,
4807 a->src_dimensions, a->dst_devicep,
4808 a->src_devicep);
4809 if (ret)
4810 gomp_fatal ("omp_target_memcpy_rect failed");
4811 }
4812
4813 int
4814 omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
4815 int num_dims, const size_t *volume,
4816 const size_t *dst_offsets,
4817 const size_t *src_offsets,
4818 const size_t *dst_dimensions,
4819 const size_t *src_dimensions,
4820 int dst_device_num, int src_device_num,
4821 int depobj_count, omp_depend_t *depobj_list)
4822 {
4823 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
4824 unsigned flags = 0;
4825 int check = omp_target_memcpy_rect_check (dst, src, dst_device_num,
4826 src_device_num, &dst_devicep,
4827 &src_devicep);
4828 void *depend[depobj_count + 5];
4829 int i;
4830
4831 omp_target_memcpy_rect_data s = {
4832 .dst = dst,
4833 .src = src,
4834 .element_size = element_size,
4835 .num_dims = num_dims,
4836 .volume = volume,
4837 .dst_offsets = dst_offsets,
4838 .src_offsets = src_offsets,
4839 .dst_dimensions = dst_dimensions,
4840 .src_dimensions = src_dimensions,
4841 .dst_devicep = dst_devicep,
4842 .src_devicep = src_devicep
4843 };
4844
4845 if (check)
4846 return check;
4847
4848 if (depobj_count > 0 && depobj_list != NULL)
4849 {
4850 flags |= GOMP_TASK_FLAG_DEPEND;
4851 depend[0] = 0;
4852 depend[1] = (void *) (uintptr_t) depobj_count;
4853 depend[2] = depend[3] = depend[4] = 0;
4854 for (i = 0; i < depobj_count; ++i)
4855 depend[i + 5] = &depobj_list[i];
4856 }
4857
4858 GOMP_task (omp_target_memcpy_rect_async_helper, &s, NULL, sizeof (s),
4859 __alignof__ (s), true, flags, depend, 0, NULL);
4860
4861 return 0;
4862 }
4863
4864 int
4865 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
4866 size_t size, size_t device_offset, int device_num)
4867 {
4868 if (device_num == omp_initial_device
4869 || device_num == gomp_get_num_devices ())
4870 return EINVAL;
4871
4872 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4873 if (devicep == NULL)
4874 return EINVAL;
4875
4876 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4877 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4878 return EINVAL;
4879
4880 gomp_mutex_lock (&devicep->lock);
4881
4882 struct splay_tree_s *mem_map = &devicep->mem_map;
4883 struct splay_tree_key_s cur_node;
4884 int ret = EINVAL;
4885
4886 cur_node.host_start = (uintptr_t) host_ptr;
4887 cur_node.host_end = cur_node.host_start + size;
4888 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4889 if (n)
4890 {
4891 if (n->tgt->tgt_start + n->tgt_offset
4892 == (uintptr_t) device_ptr + device_offset
4893 && n->host_start <= cur_node.host_start
4894 && n->host_end >= cur_node.host_end)
4895 ret = 0;
4896 }
4897 else
4898 {
4899 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
4900 tgt->array = gomp_malloc (sizeof (*tgt->array));
4901 tgt->refcount = 1;
4902 tgt->tgt_start = 0;
4903 tgt->tgt_end = 0;
4904 tgt->to_free = NULL;
4905 tgt->prev = NULL;
4906 tgt->list_count = 0;
4907 tgt->device_descr = devicep;
4908 splay_tree_node array = tgt->array;
4909 splay_tree_key k = &array->key;
4910 k->host_start = cur_node.host_start;
4911 k->host_end = cur_node.host_end;
4912 k->tgt = tgt;
4913 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
4914 k->refcount = REFCOUNT_INFINITY;
4915 k->dynamic_refcount = 0;
4916 k->aux = NULL;
4917 array->left = NULL;
4918 array->right = NULL;
4919 splay_tree_insert (&devicep->mem_map, array);
4920 ret = 0;
4921 }
4922 gomp_mutex_unlock (&devicep->lock);
4923 return ret;
4924 }
4925
4926 int
4927 omp_target_disassociate_ptr (const void *ptr, int device_num)
4928 {
4929 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4930 if (devicep == NULL)
4931 return EINVAL;
4932
4933 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
4934 return EINVAL;
4935
4936 gomp_mutex_lock (&devicep->lock);
4937
4938 struct splay_tree_s *mem_map = &devicep->mem_map;
4939 struct splay_tree_key_s cur_node;
4940 int ret = EINVAL;
4941
4942 cur_node.host_start = (uintptr_t) ptr;
4943 cur_node.host_end = cur_node.host_start;
4944 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
4945 if (n
4946 && n->host_start == cur_node.host_start
4947 && n->refcount == REFCOUNT_INFINITY
4948 && n->tgt->tgt_start == 0
4949 && n->tgt->to_free == NULL
4950 && n->tgt->refcount == 1
4951 && n->tgt->list_count == 0)
4952 {
4953 splay_tree_remove (&devicep->mem_map, n);
4954 gomp_unmap_tgt (n->tgt);
4955 ret = 0;
4956 }
4957
4958 gomp_mutex_unlock (&devicep->lock);
4959 return ret;
4960 }
4961
4962 void *
4963 omp_get_mapped_ptr (const void *ptr, int device_num)
4964 {
4965 if (device_num == omp_initial_device
4966 || device_num == omp_get_initial_device ())
4967 return (void *) ptr;
4968
4969 struct gomp_device_descr *devicep = resolve_device (device_num, false);
4970 if (devicep == NULL)
4971 return NULL;
4972
4973 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
4974 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
4975 return (void *) ptr;
4976
4977 gomp_mutex_lock (&devicep->lock);
4978
4979 struct splay_tree_s *mem_map = &devicep->mem_map;
4980 struct splay_tree_key_s cur_node;
4981 void *ret = NULL;
4982
4983 cur_node.host_start = (uintptr_t) ptr;
4984 cur_node.host_end = cur_node.host_start;
4985 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
4986
4987 if (n)
4988 {
4989 uintptr_t offset = cur_node.host_start - n->host_start;
4990 ret = (void *) (n->tgt->tgt_start + n->tgt_offset + offset);
4991 }
4992
4993 gomp_mutex_unlock (&devicep->lock);
4994
4995 return ret;
4996 }
4997
4998 int
4999 omp_target_is_accessible (const void *ptr, size_t size, int device_num)
5000 {
5001 if (device_num == omp_initial_device
5002 || device_num == gomp_get_num_devices ())
5003 return true;
5004
5005 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5006 if (devicep == NULL)
5007 return false;
5008
5009 /* TODO: Unified shared memory must be handled when available. */
5010
5011 return devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM;
5012 }
5013
5014 int
5015 omp_pause_resource (omp_pause_resource_t kind, int device_num)
5016 {
5017 (void) kind;
5018 if (device_num == omp_initial_device
5019 || device_num == gomp_get_num_devices ())
5020 return gomp_pause_host ();
5021
5022 struct gomp_device_descr *devicep = resolve_device (device_num, false);
5023 if (devicep == NULL)
5024 return -1;
5025
5026 /* Do nothing for target devices for now. */
5027 return 0;
5028 }
5029
5030 int
5031 omp_pause_resource_all (omp_pause_resource_t kind)
5032 {
5033 (void) kind;
5034 if (gomp_pause_host ())
5035 return -1;
5036 /* Do nothing for target devices for now. */
5037 return 0;
5038 }
5039
5040 ialias (omp_pause_resource)
5041 ialias (omp_pause_resource_all)
5042
5043 #ifdef PLUGIN_SUPPORT
5044
5045 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5046 in PLUGIN_NAME.
5047 The handles of the found functions are stored in the corresponding fields
5048 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5049
5050 static bool
5051 gomp_load_plugin_for_device (struct gomp_device_descr *device,
5052 const char *plugin_name)
5053 {
5054 const char *err = NULL, *last_missing = NULL;
5055
5056 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
5057 if (!plugin_handle)
5058 #if OFFLOAD_DEFAULTED
5059 return 0;
5060 #else
5061 goto dl_fail;
5062 #endif
5063
5064 /* Check if all required functions are available in the plugin and store
5065 their handlers. None of the symbols can legitimately be NULL,
5066 so we don't need to check dlerror all the time. */
5067 #define DLSYM(f) \
5068 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5069 goto dl_fail
5070 /* Similar, but missing functions are not an error. Return false if
5071 failed, true otherwise. */
5072 #define DLSYM_OPT(f, n) \
5073 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5074 || (last_missing = #n, 0))
5075
5076 DLSYM (version);
5077 if (device->version_func () != GOMP_VERSION)
5078 {
5079 err = "plugin version mismatch";
5080 goto fail;
5081 }
5082
5083 DLSYM (get_name);
5084 DLSYM (get_caps);
5085 DLSYM (get_type);
5086 DLSYM (get_num_devices);
5087 DLSYM (init_device);
5088 DLSYM (fini_device);
5089 DLSYM (load_image);
5090 DLSYM (unload_image);
5091 DLSYM (alloc);
5092 DLSYM (free);
5093 DLSYM (dev2host);
5094 DLSYM (host2dev);
5095 DLSYM_OPT (memcpy2d, memcpy2d);
5096 DLSYM_OPT (memcpy3d, memcpy3d);
5097 device->capabilities = device->get_caps_func ();
5098 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5099 {
5100 DLSYM (run);
5101 DLSYM_OPT (async_run, async_run);
5102 DLSYM_OPT (can_run, can_run);
5103 DLSYM (dev2dev);
5104 }
5105 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5106 {
5107 if (!DLSYM_OPT (openacc.exec, openacc_exec)
5108 || !DLSYM_OPT (openacc.create_thread_data,
5109 openacc_create_thread_data)
5110 || !DLSYM_OPT (openacc.destroy_thread_data,
5111 openacc_destroy_thread_data)
5112 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
5113 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
5114 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
5115 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
5116 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
5117 || !DLSYM_OPT (openacc.async.queue_callback,
5118 openacc_async_queue_callback)
5119 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
5120 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
5121 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
5122 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
5123 {
5124 /* Require all the OpenACC handlers if we have
5125 GOMP_OFFLOAD_CAP_OPENACC_200. */
5126 err = "plugin missing OpenACC handler function";
5127 goto fail;
5128 }
5129
5130 unsigned cuda = 0;
5131 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
5132 openacc_cuda_get_current_device);
5133 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
5134 openacc_cuda_get_current_context);
5135 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
5136 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
5137 if (cuda && cuda != 4)
5138 {
5139 /* Make sure all the CUDA functions are there if any of them are. */
5140 err = "plugin missing OpenACC CUDA handler function";
5141 goto fail;
5142 }
5143 }
5144 #undef DLSYM
5145 #undef DLSYM_OPT
5146
5147 return 1;
5148
5149 dl_fail:
5150 err = dlerror ();
5151 fail:
5152 gomp_error ("while loading %s: %s", plugin_name, err);
5153 if (last_missing)
5154 gomp_error ("missing function was %s", last_missing);
5155 if (plugin_handle)
5156 dlclose (plugin_handle);
5157
5158 return 0;
5159 }
5160
5161 /* This function finalizes all initialized devices. */
5162
5163 static void
5164 gomp_target_fini (void)
5165 {
5166 int i;
5167 for (i = 0; i < num_devices; i++)
5168 {
5169 bool ret = true;
5170 struct gomp_device_descr *devicep = &devices[i];
5171 gomp_mutex_lock (&devicep->lock);
5172 if (devicep->state == GOMP_DEVICE_INITIALIZED)
5173 ret = gomp_fini_device (devicep);
5174 gomp_mutex_unlock (&devicep->lock);
5175 if (!ret)
5176 gomp_fatal ("device finalization failed");
5177 }
5178 }
5179
5180 /* This function initializes the runtime for offloading.
5181 It parses the list of offload plugins, and tries to load these.
5182 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5183 will be set, and the array DEVICES initialized, containing descriptors for
5184 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5185 by the others. */
5186
5187 static void
5188 gomp_target_init (void)
5189 {
5190 const char *prefix ="libgomp-plugin-";
5191 const char *suffix = SONAME_SUFFIX (1);
5192 const char *cur, *next;
5193 char *plugin_name;
5194 int i, new_num_devs;
5195 int num_devs = 0, num_devs_openmp;
5196 struct gomp_device_descr *devs = NULL;
5197
5198 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
5199 return;
5200
5201 cur = OFFLOAD_PLUGINS;
5202 if (*cur)
5203 do
5204 {
5205 struct gomp_device_descr current_device;
5206 size_t prefix_len, suffix_len, cur_len;
5207
5208 next = strchr (cur, ',');
5209
5210 prefix_len = strlen (prefix);
5211 cur_len = next ? next - cur : strlen (cur);
5212 suffix_len = strlen (suffix);
5213
5214 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
5215 if (!plugin_name)
5216 {
5217 num_devs = 0;
5218 break;
5219 }
5220
5221 memcpy (plugin_name, prefix, prefix_len);
5222 memcpy (plugin_name + prefix_len, cur, cur_len);
5223 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
5224
5225 if (gomp_load_plugin_for_device (&current_device, plugin_name))
5226 {
5227 int omp_req = omp_requires_mask & ~GOMP_REQUIRES_TARGET_USED;
5228 new_num_devs = current_device.get_num_devices_func (omp_req);
5229 if (gomp_debug_var > 0 && new_num_devs < 0)
5230 {
5231 bool found = false;
5232 int type = current_device.get_type_func ();
5233 for (int img = 0; img < num_offload_images; img++)
5234 if (type == offload_images[img].type)
5235 found = true;
5236 if (found)
5237 {
5238 char buf[sizeof ("unified_address, unified_shared_memory, "
5239 "reverse_offload")];
5240 gomp_requires_to_name (buf, sizeof (buf), omp_req);
5241 char *name = (char *) malloc (cur_len + 1);
5242 memcpy (name, cur, cur_len);
5243 name[cur_len] = '\0';
5244 gomp_debug (1,
5245 "%s devices present but 'omp requires %s' "
5246 "cannot be fulfilled\n", name, buf);
5247 free (name);
5248 }
5249 }
5250 else if (new_num_devs >= 1)
5251 {
5252 /* Augment DEVICES and NUM_DEVICES. */
5253
5254 devs = realloc (devs, (num_devs + new_num_devs)
5255 * sizeof (struct gomp_device_descr));
5256 if (!devs)
5257 {
5258 num_devs = 0;
5259 free (plugin_name);
5260 break;
5261 }
5262
5263 current_device.name = current_device.get_name_func ();
5264 /* current_device.capabilities has already been set. */
5265 current_device.type = current_device.get_type_func ();
5266 current_device.mem_map.root = NULL;
5267 current_device.mem_map_rev.root = NULL;
5268 current_device.state = GOMP_DEVICE_UNINITIALIZED;
5269 for (i = 0; i < new_num_devs; i++)
5270 {
5271 current_device.target_id = i;
5272 devs[num_devs] = current_device;
5273 gomp_mutex_init (&devs[num_devs].lock);
5274 num_devs++;
5275 }
5276 }
5277 }
5278
5279 free (plugin_name);
5280 cur = next + 1;
5281 }
5282 while (next);
5283
5284 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5285 NUM_DEVICES_OPENMP. */
5286 struct gomp_device_descr *devs_s
5287 = malloc (num_devs * sizeof (struct gomp_device_descr));
5288 if (!devs_s)
5289 {
5290 num_devs = 0;
5291 free (devs);
5292 devs = NULL;
5293 }
5294 num_devs_openmp = 0;
5295 for (i = 0; i < num_devs; i++)
5296 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
5297 devs_s[num_devs_openmp++] = devs[i];
5298 int num_devs_after_openmp = num_devs_openmp;
5299 for (i = 0; i < num_devs; i++)
5300 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
5301 devs_s[num_devs_after_openmp++] = devs[i];
5302 free (devs);
5303 devs = devs_s;
5304
5305 for (i = 0; i < num_devs; i++)
5306 {
5307 /* The 'devices' array can be moved (by the realloc call) until we have
5308 found all the plugins, so registering with the OpenACC runtime (which
5309 takes a copy of the pointer argument) must be delayed until now. */
5310 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
5311 goacc_register (&devs[i]);
5312 }
5313 if (gomp_global_icv.default_device_var == INT_MIN)
5314 {
5315 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5316 struct gomp_icv_list *none;
5317 none = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX);
5318 gomp_global_icv.default_device_var = (num_devs_openmp
5319 ? 0 : omp_invalid_device);
5320 none->icvs.default_device_var = gomp_global_icv.default_device_var;
5321 }
5322
5323 num_devices = num_devs;
5324 num_devices_openmp = num_devs_openmp;
5325 devices = devs;
5326 if (atexit (gomp_target_fini) != 0)
5327 gomp_fatal ("atexit failed");
5328 }
5329
5330 #else /* PLUGIN_SUPPORT */
5331 /* If dlfcn.h is unavailable we always fallback to host execution.
5332 GOMP_target* routines are just stubs for this case. */
5333 static void
5334 gomp_target_init (void)
5335 {
5336 }
5337 #endif /* PLUGIN_SUPPORT */