]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
bf30716cd855945e70cf7399c8d2af457276837c
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
48
49 static void gomp_target_init (void);
50
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
56
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
65 };
66
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
69
70 /* Total number of offload images. */
71 static int num_offload_images;
72
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
75
76 /* Total number of available devices. */
77 static int num_devices;
78
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
81
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
86 {
87 void *ret = realloc (old, size);
88 if (ret == NULL)
89 {
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 }
93 return ret;
94 }
95
96 attribute_hidden void
97 gomp_init_targets_once (void)
98 {
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 }
101
102 attribute_hidden int
103 gomp_get_num_devices (void)
104 {
105 gomp_init_targets_once ();
106 return num_devices_openmp;
107 }
108
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
111 {
112 if (device_id == GOMP_DEVICE_ICV)
113 {
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
116 }
117
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 return NULL;
120
121 gomp_mutex_lock (&devices[device_id].lock);
122 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123 gomp_init_device (&devices[device_id]);
124 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 {
126 gomp_mutex_unlock (&devices[device_id].lock);
127 return NULL;
128 }
129 gomp_mutex_unlock (&devices[device_id].lock);
130
131 return &devices[device_id];
132 }
133
134
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 {
138 if (key->host_start != key->host_end)
139 return splay_tree_lookup (mem_map, key);
140
141 key->host_end++;
142 splay_tree_key n = splay_tree_lookup (mem_map, key);
143 key->host_end--;
144 if (n)
145 return n;
146 key->host_start--;
147 n = splay_tree_lookup (mem_map, key);
148 key->host_start++;
149 if (n)
150 return n;
151 return splay_tree_lookup (mem_map, key);
152 }
153
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 {
157 if (key->host_start != key->host_end)
158 return splay_tree_lookup (mem_map, key);
159
160 key->host_end++;
161 splay_tree_key n = splay_tree_lookup (mem_map, key);
162 key->host_end--;
163 return n;
164 }
165
166 static inline void
167 gomp_device_copy (struct gomp_device_descr *devicep,
168 bool (*copy_func) (int, void *, const void *, size_t),
169 const char *dst, void *dstaddr,
170 const char *src, const void *srcaddr,
171 size_t size)
172 {
173 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 {
175 gomp_mutex_unlock (&devicep->lock);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
178 }
179 }
180
181 static inline void
182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 bool (*copy_func) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue *),
185 const char *dst, void *dstaddr,
186 const char *src, const void *srcaddr,
187 size_t size, struct goacc_asyncqueue *aq)
188 {
189 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190 {
191 gomp_mutex_unlock (&devicep->lock);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194 }
195 }
196
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
199
200 struct gomp_coalesce_chunk
201 {
202 /* The starting and ending point of a coalesced chunk of memory. */
203 size_t start, end;
204 };
205
206 struct gomp_coalesce_buf
207 {
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
210 void *buf;
211 struct target_mem_desc *tgt;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk *chunks;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 be performed. */
218 long chunk_cnt;
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
222 long use_cnt;
223 };
224
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
228
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
236
237 /* Add region with device tgt_start relative offset and length to CBUF. */
238
239 static inline void
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 {
242 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243 return;
244 if (cbuf->chunk_cnt)
245 {
246 if (cbuf->chunk_cnt < 0)
247 return;
248 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 {
250 cbuf->chunk_cnt = -1;
251 return;
252 }
253 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 {
255 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 cbuf->use_cnt++;
257 return;
258 }
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf->use_cnt == 1)
263 cbuf->chunk_cnt--;
264 }
265 cbuf->chunks[cbuf->chunk_cnt].start = start;
266 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267 cbuf->chunk_cnt++;
268 cbuf->use_cnt = 1;
269 }
270
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
273
274 static inline bool
275 gomp_to_device_kind_p (int kind)
276 {
277 switch (kind)
278 {
279 case GOMP_MAP_ALLOC:
280 case GOMP_MAP_FROM:
281 case GOMP_MAP_FORCE_ALLOC:
282 case GOMP_MAP_FORCE_FROM:
283 case GOMP_MAP_ALWAYS_FROM:
284 return false;
285 default:
286 return true;
287 }
288 }
289
290 attribute_hidden void
291 gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 struct goacc_asyncqueue *aq,
293 void *d, const void *h, size_t sz,
294 struct gomp_coalesce_buf *cbuf)
295 {
296 if (cbuf)
297 {
298 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 {
301 long first = 0;
302 long last = cbuf->chunk_cnt - 1;
303 while (first <= last)
304 {
305 long middle = (first + last) >> 1;
306 if (cbuf->chunks[middle].end <= doff)
307 first = middle + 1;
308 else if (cbuf->chunks[middle].start <= doff)
309 {
310 if (doff + sz > cbuf->chunks[middle].end)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 h, sz);
314 return;
315 }
316 else
317 last = middle - 1;
318 }
319 }
320 }
321 if (__builtin_expect (aq != NULL, 0))
322 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 "dev", d, "host", h, sz, aq);
324 else
325 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 }
327
328 attribute_hidden void
329 gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 struct goacc_asyncqueue *aq,
331 void *h, const void *d, size_t sz)
332 {
333 if (__builtin_expect (aq != NULL, 0))
334 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 "host", h, "dev", d, sz, aq);
336 else
337 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338 }
339
340 static void
341 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342 {
343 if (!devicep->free_func (devicep->target_id, devptr))
344 {
345 gomp_mutex_unlock (&devicep->lock);
346 gomp_fatal ("error in freeing device memory block at %p", devptr);
347 }
348 }
349
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
353
354 static inline void
355 gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 splay_tree_key newn, struct target_var_desc *tgt_var,
358 unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 {
360 tgt_var->key = oldn;
361 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
362 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
363 tgt_var->do_detach = kind == GOMP_MAP_ATTACH;
364 tgt_var->offset = newn->host_start - oldn->host_start;
365 tgt_var->length = newn->host_end - newn->host_start;
366
367 if ((kind & GOMP_MAP_FLAG_FORCE)
368 || oldn->host_start > newn->host_start
369 || oldn->host_end < newn->host_end)
370 {
371 gomp_mutex_unlock (&devicep->lock);
372 gomp_fatal ("Trying to map into device [%p..%p) object when "
373 "[%p..%p) is already mapped",
374 (void *) newn->host_start, (void *) newn->host_end,
375 (void *) oldn->host_start, (void *) oldn->host_end);
376 }
377
378 if (GOMP_MAP_ALWAYS_TO_P (kind))
379 gomp_copy_host2dev (devicep, aq,
380 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
381 + newn->host_start - oldn->host_start),
382 (void *) newn->host_start,
383 newn->host_end - newn->host_start, cbuf);
384
385 if (oldn->refcount != REFCOUNT_INFINITY)
386 oldn->refcount++;
387 }
388
389 static int
390 get_kind (bool short_mapkind, void *kinds, int idx)
391 {
392 return short_mapkind ? ((unsigned short *) kinds)[idx]
393 : ((unsigned char *) kinds)[idx];
394 }
395
396 static void
397 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
398 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
399 struct gomp_coalesce_buf *cbuf)
400 {
401 struct gomp_device_descr *devicep = tgt->device_descr;
402 struct splay_tree_s *mem_map = &devicep->mem_map;
403 struct splay_tree_key_s cur_node;
404
405 cur_node.host_start = host_ptr;
406 if (cur_node.host_start == (uintptr_t) NULL)
407 {
408 cur_node.tgt_offset = (uintptr_t) NULL;
409 gomp_copy_host2dev (devicep, aq,
410 (void *) (tgt->tgt_start + target_offset),
411 (void *) &cur_node.tgt_offset,
412 sizeof (void *), cbuf);
413 return;
414 }
415 /* Add bias to the pointer value. */
416 cur_node.host_start += bias;
417 cur_node.host_end = cur_node.host_start;
418 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
419 if (n == NULL)
420 {
421 gomp_mutex_unlock (&devicep->lock);
422 gomp_fatal ("Pointer target of array section wasn't mapped");
423 }
424 cur_node.host_start -= n->host_start;
425 cur_node.tgt_offset
426 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
427 /* At this point tgt_offset is target address of the
428 array section. Now subtract bias to get what we want
429 to initialize the pointer with. */
430 cur_node.tgt_offset -= bias;
431 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
432 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
433 }
434
435 static void
436 gomp_map_fields_existing (struct target_mem_desc *tgt,
437 struct goacc_asyncqueue *aq, splay_tree_key n,
438 size_t first, size_t i, void **hostaddrs,
439 size_t *sizes, void *kinds,
440 struct gomp_coalesce_buf *cbuf)
441 {
442 struct gomp_device_descr *devicep = tgt->device_descr;
443 struct splay_tree_s *mem_map = &devicep->mem_map;
444 struct splay_tree_key_s cur_node;
445 int kind;
446 const bool short_mapkind = true;
447 const int typemask = short_mapkind ? 0xff : 0x7;
448
449 cur_node.host_start = (uintptr_t) hostaddrs[i];
450 cur_node.host_end = cur_node.host_start + sizes[i];
451 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
452 kind = get_kind (short_mapkind, kinds, i);
453 if (n2
454 && n2->tgt == n->tgt
455 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
456 {
457 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
458 &tgt->list[i], kind & typemask, cbuf);
459 return;
460 }
461 if (sizes[i] == 0)
462 {
463 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
464 {
465 cur_node.host_start--;
466 n2 = splay_tree_lookup (mem_map, &cur_node);
467 cur_node.host_start++;
468 if (n2
469 && n2->tgt == n->tgt
470 && n2->host_start - n->host_start
471 == n2->tgt_offset - n->tgt_offset)
472 {
473 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
474 &tgt->list[i], kind & typemask, cbuf);
475 return;
476 }
477 }
478 cur_node.host_end++;
479 n2 = splay_tree_lookup (mem_map, &cur_node);
480 cur_node.host_end--;
481 if (n2
482 && n2->tgt == n->tgt
483 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
484 {
485 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
486 kind & typemask, cbuf);
487 return;
488 }
489 }
490 gomp_mutex_unlock (&devicep->lock);
491 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
492 "other mapped elements from the same structure weren't mapped "
493 "together with it", (void *) cur_node.host_start,
494 (void *) cur_node.host_end);
495 }
496
497 attribute_hidden void
498 gomp_attach_pointer (struct gomp_device_descr *devicep,
499 struct goacc_asyncqueue *aq, splay_tree mem_map,
500 splay_tree_key n, uintptr_t attach_to, size_t bias,
501 struct gomp_coalesce_buf *cbufp)
502 {
503 struct splay_tree_key_s s;
504 size_t size, idx;
505
506 if (n == NULL)
507 {
508 gomp_mutex_unlock (&devicep->lock);
509 gomp_fatal ("enclosing struct not mapped for attach");
510 }
511
512 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
513 /* We might have a pointer in a packed struct: however we cannot have more
514 than one such pointer in each pointer-sized portion of the struct, so
515 this is safe. */
516 idx = (attach_to - n->host_start) / sizeof (void *);
517
518 if (!n->aux)
519 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
520
521 if (!n->aux->attach_count)
522 n->aux->attach_count
523 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
524
525 if (n->aux->attach_count[idx] < UINTPTR_MAX)
526 n->aux->attach_count[idx]++;
527 else
528 {
529 gomp_mutex_unlock (&devicep->lock);
530 gomp_fatal ("attach count overflow");
531 }
532
533 if (n->aux->attach_count[idx] == 1)
534 {
535 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
536 - n->host_start;
537 uintptr_t target = (uintptr_t) *(void **) attach_to;
538 splay_tree_key tn;
539 uintptr_t data;
540
541 if ((void *) target == NULL)
542 {
543 gomp_mutex_unlock (&devicep->lock);
544 gomp_fatal ("attempt to attach null pointer");
545 }
546
547 s.host_start = target + bias;
548 s.host_end = s.host_start + 1;
549 tn = splay_tree_lookup (mem_map, &s);
550
551 if (!tn)
552 {
553 gomp_mutex_unlock (&devicep->lock);
554 gomp_fatal ("pointer target not mapped for attach");
555 }
556
557 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
558
559 gomp_debug (1,
560 "%s: attaching host %p, target %p (struct base %p) to %p\n",
561 __FUNCTION__, (void *) attach_to, (void *) devptr,
562 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
563
564 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
565 sizeof (void *), cbufp);
566 }
567 else
568 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
569 (void *) attach_to, (int) n->aux->attach_count[idx]);
570 }
571
572 attribute_hidden void
573 gomp_detach_pointer (struct gomp_device_descr *devicep,
574 struct goacc_asyncqueue *aq, splay_tree_key n,
575 uintptr_t detach_from, bool finalize,
576 struct gomp_coalesce_buf *cbufp)
577 {
578 size_t idx;
579
580 if (n == NULL)
581 {
582 gomp_mutex_unlock (&devicep->lock);
583 gomp_fatal ("enclosing struct not mapped for detach");
584 }
585
586 idx = (detach_from - n->host_start) / sizeof (void *);
587
588 if (!n->aux || !n->aux->attach_count)
589 {
590 gomp_mutex_unlock (&devicep->lock);
591 gomp_fatal ("no attachment counters for struct");
592 }
593
594 if (finalize)
595 n->aux->attach_count[idx] = 1;
596
597 if (n->aux->attach_count[idx] == 0)
598 {
599 gomp_mutex_unlock (&devicep->lock);
600 gomp_fatal ("attach count underflow");
601 }
602 else
603 n->aux->attach_count[idx]--;
604
605 if (n->aux->attach_count[idx] == 0)
606 {
607 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
608 - n->host_start;
609 uintptr_t target = (uintptr_t) *(void **) detach_from;
610
611 gomp_debug (1,
612 "%s: detaching host %p, target %p (struct base %p) to %p\n",
613 __FUNCTION__, (void *) detach_from, (void *) devptr,
614 (void *) (n->tgt->tgt_start + n->tgt_offset),
615 (void *) target);
616
617 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
618 sizeof (void *), cbufp);
619 }
620 else
621 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
622 (void *) detach_from, (int) n->aux->attach_count[idx]);
623 }
624
625 attribute_hidden uintptr_t
626 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
627 {
628 if (tgt->list[i].key != NULL)
629 return tgt->list[i].key->tgt->tgt_start
630 + tgt->list[i].key->tgt_offset
631 + tgt->list[i].offset;
632
633 switch (tgt->list[i].offset)
634 {
635 case OFFSET_INLINED:
636 return (uintptr_t) hostaddrs[i];
637
638 case OFFSET_POINTER:
639 return 0;
640
641 case OFFSET_STRUCT:
642 return tgt->list[i + 1].key->tgt->tgt_start
643 + tgt->list[i + 1].key->tgt_offset
644 + tgt->list[i + 1].offset
645 + (uintptr_t) hostaddrs[i]
646 - (uintptr_t) hostaddrs[i + 1];
647
648 default:
649 return tgt->tgt_start + tgt->list[i].offset;
650 }
651 }
652
653 static inline __attribute__((always_inline)) struct target_mem_desc *
654 gomp_map_vars_internal (struct gomp_device_descr *devicep,
655 struct goacc_asyncqueue *aq, size_t mapnum,
656 void **hostaddrs, void **devaddrs, size_t *sizes,
657 void *kinds, bool short_mapkind,
658 enum gomp_map_vars_kind pragma_kind)
659 {
660 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
661 bool has_firstprivate = false;
662 const int rshift = short_mapkind ? 8 : 3;
663 const int typemask = short_mapkind ? 0xff : 0x7;
664 struct splay_tree_s *mem_map = &devicep->mem_map;
665 struct splay_tree_key_s cur_node;
666 struct target_mem_desc *tgt
667 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
668 tgt->list_count = mapnum;
669 tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA
670 || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1;
671 tgt->device_descr = devicep;
672 tgt->prev = NULL;
673 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
674
675 if (mapnum == 0)
676 {
677 tgt->tgt_start = 0;
678 tgt->tgt_end = 0;
679 return tgt;
680 }
681
682 tgt_align = sizeof (void *);
683 tgt_size = 0;
684 cbuf.chunks = NULL;
685 cbuf.chunk_cnt = -1;
686 cbuf.use_cnt = 0;
687 cbuf.buf = NULL;
688 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
689 {
690 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
691 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
692 cbuf.chunk_cnt = 0;
693 }
694 if (pragma_kind == GOMP_MAP_VARS_TARGET)
695 {
696 size_t align = 4 * sizeof (void *);
697 tgt_align = align;
698 tgt_size = mapnum * sizeof (void *);
699 cbuf.chunk_cnt = 1;
700 cbuf.use_cnt = 1 + (mapnum > 1);
701 cbuf.chunks[0].start = 0;
702 cbuf.chunks[0].end = tgt_size;
703 }
704
705 gomp_mutex_lock (&devicep->lock);
706 if (devicep->state == GOMP_DEVICE_FINALIZED)
707 {
708 gomp_mutex_unlock (&devicep->lock);
709 free (tgt);
710 return NULL;
711 }
712
713 for (i = 0; i < mapnum; i++)
714 {
715 int kind = get_kind (short_mapkind, kinds, i);
716 if (hostaddrs[i] == NULL
717 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
718 {
719 tgt->list[i].key = NULL;
720 tgt->list[i].offset = OFFSET_INLINED;
721 continue;
722 }
723 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
724 {
725 tgt->list[i].key = NULL;
726 if (!not_found_cnt)
727 {
728 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
729 on a separate construct prior to using use_device_{addr,ptr}.
730 In OpenMP 5.0, map directives need to be ordered by the
731 middle-end before the use_device_* clauses. If
732 !not_found_cnt, all mappings requested (if any) are already
733 mapped, so use_device_{addr,ptr} can be resolved right away.
734 Otherwise, if not_found_cnt, gomp_map_lookup might fail
735 now but would succeed after performing the mappings in the
736 following loop. We can't defer this always to the second
737 loop, because it is not even invoked when !not_found_cnt
738 after the first loop. */
739 cur_node.host_start = (uintptr_t) hostaddrs[i];
740 cur_node.host_end = cur_node.host_start;
741 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
742 if (n == NULL)
743 {
744 gomp_mutex_unlock (&devicep->lock);
745 gomp_fatal ("use_device_ptr pointer wasn't mapped");
746 }
747 cur_node.host_start -= n->host_start;
748 hostaddrs[i]
749 = (void *) (n->tgt->tgt_start + n->tgt_offset
750 + cur_node.host_start);
751 tgt->list[i].offset = ~(uintptr_t) 0;
752 }
753 else
754 tgt->list[i].offset = 0;
755 continue;
756 }
757 else if ((kind & typemask) == GOMP_MAP_STRUCT)
758 {
759 size_t first = i + 1;
760 size_t last = i + sizes[i];
761 cur_node.host_start = (uintptr_t) hostaddrs[i];
762 cur_node.host_end = (uintptr_t) hostaddrs[last]
763 + sizes[last];
764 tgt->list[i].key = NULL;
765 tgt->list[i].offset = OFFSET_STRUCT;
766 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
767 if (n == NULL)
768 {
769 size_t align = (size_t) 1 << (kind >> rshift);
770 if (tgt_align < align)
771 tgt_align = align;
772 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
773 tgt_size = (tgt_size + align - 1) & ~(align - 1);
774 tgt_size += cur_node.host_end - cur_node.host_start;
775 not_found_cnt += last - i;
776 for (i = first; i <= last; i++)
777 {
778 tgt->list[i].key = NULL;
779 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
780 & typemask))
781 gomp_coalesce_buf_add (&cbuf,
782 tgt_size - cur_node.host_end
783 + (uintptr_t) hostaddrs[i],
784 sizes[i]);
785 }
786 i--;
787 continue;
788 }
789 for (i = first; i <= last; i++)
790 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
791 sizes, kinds, NULL);
792 i--;
793 continue;
794 }
795 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
796 {
797 tgt->list[i].key = NULL;
798 tgt->list[i].offset = OFFSET_POINTER;
799 has_firstprivate = true;
800 continue;
801 }
802 else if ((kind & typemask) == GOMP_MAP_ATTACH)
803 {
804 tgt->list[i].key = NULL;
805 has_firstprivate = true;
806 continue;
807 }
808 cur_node.host_start = (uintptr_t) hostaddrs[i];
809 if (!GOMP_MAP_POINTER_P (kind & typemask)
810 && (kind & typemask) != GOMP_MAP_ATTACH)
811 cur_node.host_end = cur_node.host_start + sizes[i];
812 else
813 cur_node.host_end = cur_node.host_start + sizeof (void *);
814 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
815 {
816 tgt->list[i].key = NULL;
817
818 size_t align = (size_t) 1 << (kind >> rshift);
819 if (tgt_align < align)
820 tgt_align = align;
821 tgt_size = (tgt_size + align - 1) & ~(align - 1);
822 gomp_coalesce_buf_add (&cbuf, tgt_size,
823 cur_node.host_end - cur_node.host_start);
824 tgt_size += cur_node.host_end - cur_node.host_start;
825 has_firstprivate = true;
826 continue;
827 }
828 splay_tree_key n;
829 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
830 {
831 n = gomp_map_0len_lookup (mem_map, &cur_node);
832 if (!n)
833 {
834 tgt->list[i].key = NULL;
835 tgt->list[i].offset = OFFSET_POINTER;
836 continue;
837 }
838 }
839 else
840 n = splay_tree_lookup (mem_map, &cur_node);
841 if (n && n->refcount != REFCOUNT_LINK)
842 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
843 kind & typemask, NULL);
844 else
845 {
846 tgt->list[i].key = NULL;
847
848 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
849 {
850 /* Not present, hence, skip entry - including its MAP_POINTER,
851 when existing. */
852 tgt->list[i].offset = OFFSET_POINTER;
853 if (i + 1 < mapnum
854 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
855 == GOMP_MAP_POINTER))
856 {
857 ++i;
858 tgt->list[i].key = NULL;
859 tgt->list[i].offset = 0;
860 }
861 continue;
862 }
863 size_t align = (size_t) 1 << (kind >> rshift);
864 not_found_cnt++;
865 if (tgt_align < align)
866 tgt_align = align;
867 tgt_size = (tgt_size + align - 1) & ~(align - 1);
868 if (gomp_to_device_kind_p (kind & typemask))
869 gomp_coalesce_buf_add (&cbuf, tgt_size,
870 cur_node.host_end - cur_node.host_start);
871 tgt_size += cur_node.host_end - cur_node.host_start;
872 if ((kind & typemask) == GOMP_MAP_TO_PSET)
873 {
874 size_t j;
875 for (j = i + 1; j < mapnum; j++)
876 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
877 & typemask))
878 break;
879 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
880 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
881 > cur_node.host_end))
882 break;
883 else
884 {
885 tgt->list[j].key = NULL;
886 i++;
887 }
888 }
889 }
890 }
891
892 if (devaddrs)
893 {
894 if (mapnum != 1)
895 {
896 gomp_mutex_unlock (&devicep->lock);
897 gomp_fatal ("unexpected aggregation");
898 }
899 tgt->to_free = devaddrs[0];
900 tgt->tgt_start = (uintptr_t) tgt->to_free;
901 tgt->tgt_end = tgt->tgt_start + sizes[0];
902 }
903 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
904 {
905 /* Allocate tgt_align aligned tgt_size block of memory. */
906 /* FIXME: Perhaps change interface to allocate properly aligned
907 memory. */
908 tgt->to_free = devicep->alloc_func (devicep->target_id,
909 tgt_size + tgt_align - 1);
910 if (!tgt->to_free)
911 {
912 gomp_mutex_unlock (&devicep->lock);
913 gomp_fatal ("device memory allocation fail");
914 }
915
916 tgt->tgt_start = (uintptr_t) tgt->to_free;
917 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
918 tgt->tgt_end = tgt->tgt_start + tgt_size;
919
920 if (cbuf.use_cnt == 1)
921 cbuf.chunk_cnt--;
922 if (cbuf.chunk_cnt > 0)
923 {
924 cbuf.buf
925 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
926 if (cbuf.buf)
927 {
928 cbuf.tgt = tgt;
929 cbufp = &cbuf;
930 }
931 }
932 }
933 else
934 {
935 tgt->to_free = NULL;
936 tgt->tgt_start = 0;
937 tgt->tgt_end = 0;
938 }
939
940 tgt_size = 0;
941 if (pragma_kind == GOMP_MAP_VARS_TARGET)
942 tgt_size = mapnum * sizeof (void *);
943
944 tgt->array = NULL;
945 if (not_found_cnt || has_firstprivate)
946 {
947 if (not_found_cnt)
948 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
949 splay_tree_node array = tgt->array;
950 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
951 uintptr_t field_tgt_base = 0;
952
953 for (i = 0; i < mapnum; i++)
954 if (tgt->list[i].key == NULL)
955 {
956 int kind = get_kind (short_mapkind, kinds, i);
957 if (hostaddrs[i] == NULL)
958 continue;
959 switch (kind & typemask)
960 {
961 size_t align, len, first, last;
962 splay_tree_key n;
963 case GOMP_MAP_FIRSTPRIVATE:
964 align = (size_t) 1 << (kind >> rshift);
965 tgt_size = (tgt_size + align - 1) & ~(align - 1);
966 tgt->list[i].offset = tgt_size;
967 len = sizes[i];
968 gomp_copy_host2dev (devicep, aq,
969 (void *) (tgt->tgt_start + tgt_size),
970 (void *) hostaddrs[i], len, cbufp);
971 tgt_size += len;
972 continue;
973 case GOMP_MAP_FIRSTPRIVATE_INT:
974 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
975 continue;
976 case GOMP_MAP_USE_DEVICE_PTR:
977 if (tgt->list[i].offset == 0)
978 {
979 cur_node.host_start = (uintptr_t) hostaddrs[i];
980 cur_node.host_end = cur_node.host_start;
981 n = gomp_map_lookup (mem_map, &cur_node);
982 if (n == NULL)
983 {
984 gomp_mutex_unlock (&devicep->lock);
985 gomp_fatal ("use_device_ptr pointer wasn't mapped");
986 }
987 cur_node.host_start -= n->host_start;
988 hostaddrs[i]
989 = (void *) (n->tgt->tgt_start + n->tgt_offset
990 + cur_node.host_start);
991 tgt->list[i].offset = ~(uintptr_t) 0;
992 }
993 continue;
994 case GOMP_MAP_STRUCT:
995 first = i + 1;
996 last = i + sizes[i];
997 cur_node.host_start = (uintptr_t) hostaddrs[i];
998 cur_node.host_end = (uintptr_t) hostaddrs[last]
999 + sizes[last];
1000 if (tgt->list[first].key != NULL)
1001 continue;
1002 n = splay_tree_lookup (mem_map, &cur_node);
1003 if (n == NULL)
1004 {
1005 size_t align = (size_t) 1 << (kind >> rshift);
1006 tgt_size -= (uintptr_t) hostaddrs[first]
1007 - (uintptr_t) hostaddrs[i];
1008 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1009 tgt_size += (uintptr_t) hostaddrs[first]
1010 - (uintptr_t) hostaddrs[i];
1011 field_tgt_base = (uintptr_t) hostaddrs[first];
1012 field_tgt_offset = tgt_size;
1013 field_tgt_clear = last;
1014 tgt_size += cur_node.host_end
1015 - (uintptr_t) hostaddrs[first];
1016 continue;
1017 }
1018 for (i = first; i <= last; i++)
1019 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1020 sizes, kinds, cbufp);
1021 i--;
1022 continue;
1023 case GOMP_MAP_ALWAYS_POINTER:
1024 cur_node.host_start = (uintptr_t) hostaddrs[i];
1025 cur_node.host_end = cur_node.host_start + sizeof (void *);
1026 n = splay_tree_lookup (mem_map, &cur_node);
1027 if (n == NULL
1028 || n->host_start > cur_node.host_start
1029 || n->host_end < cur_node.host_end)
1030 {
1031 gomp_mutex_unlock (&devicep->lock);
1032 gomp_fatal ("always pointer not mapped");
1033 }
1034 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1035 != GOMP_MAP_ALWAYS_POINTER)
1036 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1037 if (cur_node.tgt_offset)
1038 cur_node.tgt_offset -= sizes[i];
1039 gomp_copy_host2dev (devicep, aq,
1040 (void *) (n->tgt->tgt_start
1041 + n->tgt_offset
1042 + cur_node.host_start
1043 - n->host_start),
1044 (void *) &cur_node.tgt_offset,
1045 sizeof (void *), cbufp);
1046 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1047 + cur_node.host_start - n->host_start;
1048 continue;
1049 case GOMP_MAP_IF_PRESENT:
1050 /* Not present - otherwise handled above. Skip over its
1051 MAP_POINTER as well. */
1052 if (i + 1 < mapnum
1053 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1054 == GOMP_MAP_POINTER))
1055 ++i;
1056 continue;
1057 case GOMP_MAP_ATTACH:
1058 {
1059 cur_node.host_start = (uintptr_t) hostaddrs[i];
1060 cur_node.host_end = cur_node.host_start + sizeof (void *);
1061 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1062 if (n != NULL)
1063 {
1064 tgt->list[i].key = n;
1065 tgt->list[i].offset = cur_node.host_start - n->host_start;
1066 tgt->list[i].length = n->host_end - n->host_start;
1067 tgt->list[i].copy_from = false;
1068 tgt->list[i].always_copy_from = false;
1069 tgt->list[i].do_detach
1070 = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
1071 n->refcount++;
1072 }
1073 else
1074 {
1075 gomp_mutex_unlock (&devicep->lock);
1076 gomp_fatal ("outer struct not mapped for attach");
1077 }
1078 gomp_attach_pointer (devicep, aq, mem_map, n,
1079 (uintptr_t) hostaddrs[i], sizes[i],
1080 cbufp);
1081 continue;
1082 }
1083 default:
1084 break;
1085 }
1086 splay_tree_key k = &array->key;
1087 k->host_start = (uintptr_t) hostaddrs[i];
1088 if (!GOMP_MAP_POINTER_P (kind & typemask))
1089 k->host_end = k->host_start + sizes[i];
1090 else
1091 k->host_end = k->host_start + sizeof (void *);
1092 splay_tree_key n = splay_tree_lookup (mem_map, k);
1093 if (n && n->refcount != REFCOUNT_LINK)
1094 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1095 kind & typemask, cbufp);
1096 else
1097 {
1098 k->aux = NULL;
1099 if (n && n->refcount == REFCOUNT_LINK)
1100 {
1101 /* Replace target address of the pointer with target address
1102 of mapped object in the splay tree. */
1103 splay_tree_remove (mem_map, n);
1104 k->aux
1105 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1106 k->aux->link_key = n;
1107 }
1108 size_t align = (size_t) 1 << (kind >> rshift);
1109 tgt->list[i].key = k;
1110 k->tgt = tgt;
1111 if (field_tgt_clear != FIELD_TGT_EMPTY)
1112 {
1113 k->tgt_offset = k->host_start - field_tgt_base
1114 + field_tgt_offset;
1115 if (i == field_tgt_clear)
1116 field_tgt_clear = FIELD_TGT_EMPTY;
1117 }
1118 else
1119 {
1120 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1121 k->tgt_offset = tgt_size;
1122 tgt_size += k->host_end - k->host_start;
1123 }
1124 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1125 tgt->list[i].always_copy_from
1126 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1127 tgt->list[i].do_detach = false;
1128 tgt->list[i].offset = 0;
1129 tgt->list[i].length = k->host_end - k->host_start;
1130 k->refcount = 1;
1131 k->virtual_refcount = 0;
1132 tgt->refcount++;
1133 array->left = NULL;
1134 array->right = NULL;
1135 splay_tree_insert (mem_map, array);
1136 switch (kind & typemask)
1137 {
1138 case GOMP_MAP_ALLOC:
1139 case GOMP_MAP_FROM:
1140 case GOMP_MAP_FORCE_ALLOC:
1141 case GOMP_MAP_FORCE_FROM:
1142 case GOMP_MAP_ALWAYS_FROM:
1143 break;
1144 case GOMP_MAP_TO:
1145 case GOMP_MAP_TOFROM:
1146 case GOMP_MAP_FORCE_TO:
1147 case GOMP_MAP_FORCE_TOFROM:
1148 case GOMP_MAP_ALWAYS_TO:
1149 case GOMP_MAP_ALWAYS_TOFROM:
1150 gomp_copy_host2dev (devicep, aq,
1151 (void *) (tgt->tgt_start
1152 + k->tgt_offset),
1153 (void *) k->host_start,
1154 k->host_end - k->host_start, cbufp);
1155 break;
1156 case GOMP_MAP_POINTER:
1157 gomp_map_pointer (tgt, aq,
1158 (uintptr_t) *(void **) k->host_start,
1159 k->tgt_offset, sizes[i], cbufp);
1160 break;
1161 case GOMP_MAP_TO_PSET:
1162 gomp_copy_host2dev (devicep, aq,
1163 (void *) (tgt->tgt_start
1164 + k->tgt_offset),
1165 (void *) k->host_start,
1166 k->host_end - k->host_start, cbufp);
1167
1168 for (j = i + 1; j < mapnum; j++)
1169 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1170 j)
1171 & typemask))
1172 break;
1173 else if ((uintptr_t) hostaddrs[j] < k->host_start
1174 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1175 > k->host_end))
1176 break;
1177 else
1178 {
1179 tgt->list[j].key = k;
1180 tgt->list[j].copy_from = false;
1181 tgt->list[j].always_copy_from = false;
1182 tgt->list[j].do_detach = false;
1183 if (k->refcount != REFCOUNT_INFINITY)
1184 k->refcount++;
1185 gomp_map_pointer (tgt, aq,
1186 (uintptr_t) *(void **) hostaddrs[j],
1187 k->tgt_offset
1188 + ((uintptr_t) hostaddrs[j]
1189 - k->host_start),
1190 sizes[j], cbufp);
1191 i++;
1192 }
1193 break;
1194 case GOMP_MAP_FORCE_PRESENT:
1195 {
1196 /* We already looked up the memory region above and it
1197 was missing. */
1198 size_t size = k->host_end - k->host_start;
1199 gomp_mutex_unlock (&devicep->lock);
1200 #ifdef HAVE_INTTYPES_H
1201 gomp_fatal ("present clause: !acc_is_present (%p, "
1202 "%"PRIu64" (0x%"PRIx64"))",
1203 (void *) k->host_start,
1204 (uint64_t) size, (uint64_t) size);
1205 #else
1206 gomp_fatal ("present clause: !acc_is_present (%p, "
1207 "%lu (0x%lx))", (void *) k->host_start,
1208 (unsigned long) size, (unsigned long) size);
1209 #endif
1210 }
1211 break;
1212 case GOMP_MAP_FORCE_DEVICEPTR:
1213 assert (k->host_end - k->host_start == sizeof (void *));
1214 gomp_copy_host2dev (devicep, aq,
1215 (void *) (tgt->tgt_start
1216 + k->tgt_offset),
1217 (void *) k->host_start,
1218 sizeof (void *), cbufp);
1219 break;
1220 default:
1221 gomp_mutex_unlock (&devicep->lock);
1222 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1223 kind);
1224 }
1225
1226 if (k->aux && k->aux->link_key)
1227 {
1228 /* Set link pointer on target to the device address of the
1229 mapped object. */
1230 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1231 /* We intentionally do not use coalescing here, as it's not
1232 data allocated by the current call to this function. */
1233 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1234 &tgt_addr, sizeof (void *), NULL);
1235 }
1236 array++;
1237 }
1238 }
1239 }
1240
1241 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1242 {
1243 for (i = 0; i < mapnum; i++)
1244 {
1245 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1246 gomp_copy_host2dev (devicep, aq,
1247 (void *) (tgt->tgt_start + i * sizeof (void *)),
1248 (void *) &cur_node.tgt_offset, sizeof (void *),
1249 cbufp);
1250 }
1251 }
1252
1253 if (cbufp)
1254 {
1255 long c = 0;
1256 for (c = 0; c < cbuf.chunk_cnt; ++c)
1257 gomp_copy_host2dev (devicep, aq,
1258 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1259 (char *) cbuf.buf + (cbuf.chunks[c].start
1260 - cbuf.chunks[0].start),
1261 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1262 free (cbuf.buf);
1263 cbuf.buf = NULL;
1264 cbufp = NULL;
1265 }
1266
1267 /* If the variable from "omp target enter data" map-list was already mapped,
1268 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1269 gomp_exit_data. */
1270 if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA
1271 || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
1272 && tgt->refcount == 0)
1273 {
1274 /* If we're about to discard a target_mem_desc with no "structural"
1275 references (tgt->refcount == 0), any splay keys linked in the tgt's
1276 list must have their virtual refcount incremented to represent that
1277 "lost" reference in order to implement the semantics of the OpenACC
1278 "present increment" operation properly. */
1279 if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
1280 for (i = 0; i < tgt->list_count; i++)
1281 if (tgt->list[i].key)
1282 tgt->list[i].key->virtual_refcount++;
1283
1284 free (tgt);
1285 tgt = NULL;
1286 }
1287
1288 gomp_mutex_unlock (&devicep->lock);
1289 return tgt;
1290 }
1291
1292 attribute_hidden struct target_mem_desc *
1293 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1294 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1295 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1296 {
1297 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1298 sizes, kinds, short_mapkind, pragma_kind);
1299 }
1300
1301 attribute_hidden struct target_mem_desc *
1302 gomp_map_vars_async (struct gomp_device_descr *devicep,
1303 struct goacc_asyncqueue *aq, size_t mapnum,
1304 void **hostaddrs, void **devaddrs, size_t *sizes,
1305 void *kinds, bool short_mapkind,
1306 enum gomp_map_vars_kind pragma_kind)
1307 {
1308 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1309 sizes, kinds, short_mapkind, pragma_kind);
1310 }
1311
1312 static void
1313 gomp_unmap_tgt (struct target_mem_desc *tgt)
1314 {
1315 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1316 if (tgt->tgt_end)
1317 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1318
1319 free (tgt->array);
1320 free (tgt);
1321 }
1322
1323 static bool
1324 gomp_unref_tgt (void *ptr)
1325 {
1326 bool is_tgt_unmapped = false;
1327
1328 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1329
1330 if (tgt->refcount > 1)
1331 tgt->refcount--;
1332 else
1333 {
1334 gomp_unmap_tgt (tgt);
1335 is_tgt_unmapped = true;
1336 }
1337
1338 return is_tgt_unmapped;
1339 }
1340
1341 static void
1342 gomp_unref_tgt_void (void *ptr)
1343 {
1344 (void) gomp_unref_tgt (ptr);
1345 }
1346
1347 static inline __attribute__((always_inline)) bool
1348 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1349 struct goacc_asyncqueue *aq)
1350 {
1351 bool is_tgt_unmapped = false;
1352 splay_tree_remove (&devicep->mem_map, k);
1353 if (k->aux)
1354 {
1355 if (k->aux->link_key)
1356 splay_tree_insert (&devicep->mem_map,
1357 (splay_tree_node) k->aux->link_key);
1358 if (k->aux->attach_count)
1359 free (k->aux->attach_count);
1360 free (k->aux);
1361 k->aux = NULL;
1362 }
1363 if (aq)
1364 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1365 (void *) k->tgt);
1366 else
1367 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1368 return is_tgt_unmapped;
1369 }
1370
1371 attribute_hidden bool
1372 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1373 {
1374 return gomp_remove_var_internal (devicep, k, NULL);
1375 }
1376
1377 /* Remove a variable asynchronously. This actually removes the variable
1378 mapping immediately, but retains the linked target_mem_desc until the
1379 asynchronous operation has completed (as it may still refer to target
1380 memory). The device lock must be held before entry, and remains locked on
1381 exit. */
1382
1383 attribute_hidden void
1384 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1385 struct goacc_asyncqueue *aq)
1386 {
1387 (void) gomp_remove_var_internal (devicep, k, aq);
1388 }
1389
1390 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1391 variables back from device to host: if it is false, it is assumed that this
1392 has been done already. */
1393
1394 static inline __attribute__((always_inline)) void
1395 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1396 struct goacc_asyncqueue *aq)
1397 {
1398 struct gomp_device_descr *devicep = tgt->device_descr;
1399
1400 if (tgt->list_count == 0)
1401 {
1402 free (tgt);
1403 return;
1404 }
1405
1406 gomp_mutex_lock (&devicep->lock);
1407 if (devicep->state == GOMP_DEVICE_FINALIZED)
1408 {
1409 gomp_mutex_unlock (&devicep->lock);
1410 free (tgt->array);
1411 free (tgt);
1412 return;
1413 }
1414
1415 size_t i;
1416
1417 /* We must perform detachments before any copies back to the host. */
1418 for (i = 0; i < tgt->list_count; i++)
1419 {
1420 splay_tree_key k = tgt->list[i].key;
1421
1422 if (k != NULL && tgt->list[i].do_detach)
1423 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1424 + tgt->list[i].offset,
1425 k->refcount == 1, NULL);
1426 }
1427
1428 for (i = 0; i < tgt->list_count; i++)
1429 {
1430 splay_tree_key k = tgt->list[i].key;
1431 if (k == NULL)
1432 continue;
1433
1434 bool do_unmap = false;
1435 if (k->tgt == tgt
1436 && k->virtual_refcount > 0
1437 && k->refcount != REFCOUNT_INFINITY)
1438 {
1439 k->virtual_refcount--;
1440 k->refcount--;
1441 }
1442 else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1443 k->refcount--;
1444 else if (k->refcount == 1)
1445 {
1446 k->refcount--;
1447 do_unmap = true;
1448 }
1449
1450 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1451 || tgt->list[i].always_copy_from)
1452 gomp_copy_dev2host (devicep, aq,
1453 (void *) (k->host_start + tgt->list[i].offset),
1454 (void *) (k->tgt->tgt_start + k->tgt_offset
1455 + tgt->list[i].offset),
1456 tgt->list[i].length);
1457 if (do_unmap)
1458 {
1459 struct target_mem_desc *k_tgt = k->tgt;
1460 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1461 /* It would be bad if TGT got unmapped while we're still iterating
1462 over its LIST_COUNT, and also expect to use it in the following
1463 code. */
1464 assert (!is_tgt_unmapped
1465 || k_tgt != tgt);
1466 }
1467 }
1468
1469 if (aq)
1470 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1471 (void *) tgt);
1472 else
1473 gomp_unref_tgt ((void *) tgt);
1474
1475 gomp_mutex_unlock (&devicep->lock);
1476 }
1477
1478 attribute_hidden void
1479 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1480 {
1481 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1482 }
1483
1484 attribute_hidden void
1485 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1486 struct goacc_asyncqueue *aq)
1487 {
1488 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1489 }
1490
1491 static void
1492 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1493 size_t *sizes, void *kinds, bool short_mapkind)
1494 {
1495 size_t i;
1496 struct splay_tree_key_s cur_node;
1497 const int typemask = short_mapkind ? 0xff : 0x7;
1498
1499 if (!devicep)
1500 return;
1501
1502 if (mapnum == 0)
1503 return;
1504
1505 gomp_mutex_lock (&devicep->lock);
1506 if (devicep->state == GOMP_DEVICE_FINALIZED)
1507 {
1508 gomp_mutex_unlock (&devicep->lock);
1509 return;
1510 }
1511
1512 for (i = 0; i < mapnum; i++)
1513 if (sizes[i])
1514 {
1515 cur_node.host_start = (uintptr_t) hostaddrs[i];
1516 cur_node.host_end = cur_node.host_start + sizes[i];
1517 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1518 if (n)
1519 {
1520 int kind = get_kind (short_mapkind, kinds, i);
1521 if (n->host_start > cur_node.host_start
1522 || n->host_end < cur_node.host_end)
1523 {
1524 gomp_mutex_unlock (&devicep->lock);
1525 gomp_fatal ("Trying to update [%p..%p) object when "
1526 "only [%p..%p) is mapped",
1527 (void *) cur_node.host_start,
1528 (void *) cur_node.host_end,
1529 (void *) n->host_start,
1530 (void *) n->host_end);
1531 }
1532
1533
1534 void *hostaddr = (void *) cur_node.host_start;
1535 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1536 + cur_node.host_start - n->host_start);
1537 size_t size = cur_node.host_end - cur_node.host_start;
1538
1539 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1540 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1541 NULL);
1542 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1543 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1544 }
1545 }
1546 gomp_mutex_unlock (&devicep->lock);
1547 }
1548
1549 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1550 And insert to splay tree the mapping between addresses from HOST_TABLE and
1551 from loaded target image. We rely in the host and device compiler
1552 emitting variable and functions in the same order. */
1553
1554 static void
1555 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1556 const void *host_table, const void *target_data,
1557 bool is_register_lock)
1558 {
1559 void **host_func_table = ((void ***) host_table)[0];
1560 void **host_funcs_end = ((void ***) host_table)[1];
1561 void **host_var_table = ((void ***) host_table)[2];
1562 void **host_vars_end = ((void ***) host_table)[3];
1563
1564 /* The func table contains only addresses, the var table contains addresses
1565 and corresponding sizes. */
1566 int num_funcs = host_funcs_end - host_func_table;
1567 int num_vars = (host_vars_end - host_var_table) / 2;
1568
1569 /* Load image to device and get target addresses for the image. */
1570 struct addr_pair *target_table = NULL;
1571 int i, num_target_entries;
1572
1573 num_target_entries
1574 = devicep->load_image_func (devicep->target_id, version,
1575 target_data, &target_table);
1576
1577 if (num_target_entries != num_funcs + num_vars)
1578 {
1579 gomp_mutex_unlock (&devicep->lock);
1580 if (is_register_lock)
1581 gomp_mutex_unlock (&register_lock);
1582 gomp_fatal ("Cannot map target functions or variables"
1583 " (expected %u, have %u)", num_funcs + num_vars,
1584 num_target_entries);
1585 }
1586
1587 /* Insert host-target address mapping into splay tree. */
1588 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1589 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1590 tgt->refcount = REFCOUNT_INFINITY;
1591 tgt->tgt_start = 0;
1592 tgt->tgt_end = 0;
1593 tgt->to_free = NULL;
1594 tgt->prev = NULL;
1595 tgt->list_count = 0;
1596 tgt->device_descr = devicep;
1597 splay_tree_node array = tgt->array;
1598
1599 for (i = 0; i < num_funcs; i++)
1600 {
1601 splay_tree_key k = &array->key;
1602 k->host_start = (uintptr_t) host_func_table[i];
1603 k->host_end = k->host_start + 1;
1604 k->tgt = tgt;
1605 k->tgt_offset = target_table[i].start;
1606 k->refcount = REFCOUNT_INFINITY;
1607 k->virtual_refcount = 0;
1608 k->aux = NULL;
1609 array->left = NULL;
1610 array->right = NULL;
1611 splay_tree_insert (&devicep->mem_map, array);
1612 array++;
1613 }
1614
1615 /* Most significant bit of the size in host and target tables marks
1616 "omp declare target link" variables. */
1617 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1618 const uintptr_t size_mask = ~link_bit;
1619
1620 for (i = 0; i < num_vars; i++)
1621 {
1622 struct addr_pair *target_var = &target_table[num_funcs + i];
1623 uintptr_t target_size = target_var->end - target_var->start;
1624
1625 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1626 {
1627 gomp_mutex_unlock (&devicep->lock);
1628 if (is_register_lock)
1629 gomp_mutex_unlock (&register_lock);
1630 gomp_fatal ("Cannot map target variables (size mismatch)");
1631 }
1632
1633 splay_tree_key k = &array->key;
1634 k->host_start = (uintptr_t) host_var_table[i * 2];
1635 k->host_end
1636 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1637 k->tgt = tgt;
1638 k->tgt_offset = target_var->start;
1639 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1640 k->virtual_refcount = 0;
1641 k->aux = NULL;
1642 array->left = NULL;
1643 array->right = NULL;
1644 splay_tree_insert (&devicep->mem_map, array);
1645 array++;
1646 }
1647
1648 free (target_table);
1649 }
1650
1651 /* Unload the mappings described by target_data from device DEVICE_P.
1652 The device must be locked. */
1653
1654 static void
1655 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1656 unsigned version,
1657 const void *host_table, const void *target_data)
1658 {
1659 void **host_func_table = ((void ***) host_table)[0];
1660 void **host_funcs_end = ((void ***) host_table)[1];
1661 void **host_var_table = ((void ***) host_table)[2];
1662 void **host_vars_end = ((void ***) host_table)[3];
1663
1664 /* The func table contains only addresses, the var table contains addresses
1665 and corresponding sizes. */
1666 int num_funcs = host_funcs_end - host_func_table;
1667 int num_vars = (host_vars_end - host_var_table) / 2;
1668
1669 struct splay_tree_key_s k;
1670 splay_tree_key node = NULL;
1671
1672 /* Find mapping at start of node array */
1673 if (num_funcs || num_vars)
1674 {
1675 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1676 : (uintptr_t) host_var_table[0]);
1677 k.host_end = k.host_start + 1;
1678 node = splay_tree_lookup (&devicep->mem_map, &k);
1679 }
1680
1681 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1682 {
1683 gomp_mutex_unlock (&devicep->lock);
1684 gomp_fatal ("image unload fail");
1685 }
1686
1687 /* Remove mappings from splay tree. */
1688 int i;
1689 for (i = 0; i < num_funcs; i++)
1690 {
1691 k.host_start = (uintptr_t) host_func_table[i];
1692 k.host_end = k.host_start + 1;
1693 splay_tree_remove (&devicep->mem_map, &k);
1694 }
1695
1696 /* Most significant bit of the size in host and target tables marks
1697 "omp declare target link" variables. */
1698 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1699 const uintptr_t size_mask = ~link_bit;
1700 bool is_tgt_unmapped = false;
1701
1702 for (i = 0; i < num_vars; i++)
1703 {
1704 k.host_start = (uintptr_t) host_var_table[i * 2];
1705 k.host_end
1706 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1707
1708 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1709 splay_tree_remove (&devicep->mem_map, &k);
1710 else
1711 {
1712 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1713 is_tgt_unmapped = gomp_remove_var (devicep, n);
1714 }
1715 }
1716
1717 if (node && !is_tgt_unmapped)
1718 {
1719 free (node->tgt);
1720 free (node);
1721 }
1722 }
1723
1724 /* This function should be called from every offload image while loading.
1725 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1726 the target, and TARGET_DATA needed by target plugin. */
1727
1728 void
1729 GOMP_offload_register_ver (unsigned version, const void *host_table,
1730 int target_type, const void *target_data)
1731 {
1732 int i;
1733
1734 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1735 gomp_fatal ("Library too old for offload (version %u < %u)",
1736 GOMP_VERSION, GOMP_VERSION_LIB (version));
1737
1738 gomp_mutex_lock (&register_lock);
1739
1740 /* Load image to all initialized devices. */
1741 for (i = 0; i < num_devices; i++)
1742 {
1743 struct gomp_device_descr *devicep = &devices[i];
1744 gomp_mutex_lock (&devicep->lock);
1745 if (devicep->type == target_type
1746 && devicep->state == GOMP_DEVICE_INITIALIZED)
1747 gomp_load_image_to_device (devicep, version,
1748 host_table, target_data, true);
1749 gomp_mutex_unlock (&devicep->lock);
1750 }
1751
1752 /* Insert image to array of pending images. */
1753 offload_images
1754 = gomp_realloc_unlock (offload_images,
1755 (num_offload_images + 1)
1756 * sizeof (struct offload_image_descr));
1757 offload_images[num_offload_images].version = version;
1758 offload_images[num_offload_images].type = target_type;
1759 offload_images[num_offload_images].host_table = host_table;
1760 offload_images[num_offload_images].target_data = target_data;
1761
1762 num_offload_images++;
1763 gomp_mutex_unlock (&register_lock);
1764 }
1765
1766 void
1767 GOMP_offload_register (const void *host_table, int target_type,
1768 const void *target_data)
1769 {
1770 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1771 }
1772
1773 /* This function should be called from every offload image while unloading.
1774 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1775 the target, and TARGET_DATA needed by target plugin. */
1776
1777 void
1778 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1779 int target_type, const void *target_data)
1780 {
1781 int i;
1782
1783 gomp_mutex_lock (&register_lock);
1784
1785 /* Unload image from all initialized devices. */
1786 for (i = 0; i < num_devices; i++)
1787 {
1788 struct gomp_device_descr *devicep = &devices[i];
1789 gomp_mutex_lock (&devicep->lock);
1790 if (devicep->type == target_type
1791 && devicep->state == GOMP_DEVICE_INITIALIZED)
1792 gomp_unload_image_from_device (devicep, version,
1793 host_table, target_data);
1794 gomp_mutex_unlock (&devicep->lock);
1795 }
1796
1797 /* Remove image from array of pending images. */
1798 for (i = 0; i < num_offload_images; i++)
1799 if (offload_images[i].target_data == target_data)
1800 {
1801 offload_images[i] = offload_images[--num_offload_images];
1802 break;
1803 }
1804
1805 gomp_mutex_unlock (&register_lock);
1806 }
1807
1808 void
1809 GOMP_offload_unregister (const void *host_table, int target_type,
1810 const void *target_data)
1811 {
1812 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1813 }
1814
1815 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1816 must be locked on entry, and remains locked on return. */
1817
1818 attribute_hidden void
1819 gomp_init_device (struct gomp_device_descr *devicep)
1820 {
1821 int i;
1822 if (!devicep->init_device_func (devicep->target_id))
1823 {
1824 gomp_mutex_unlock (&devicep->lock);
1825 gomp_fatal ("device initialization failed");
1826 }
1827
1828 /* Load to device all images registered by the moment. */
1829 for (i = 0; i < num_offload_images; i++)
1830 {
1831 struct offload_image_descr *image = &offload_images[i];
1832 if (image->type == devicep->type)
1833 gomp_load_image_to_device (devicep, image->version,
1834 image->host_table, image->target_data,
1835 false);
1836 }
1837
1838 /* Initialize OpenACC asynchronous queues. */
1839 goacc_init_asyncqueues (devicep);
1840
1841 devicep->state = GOMP_DEVICE_INITIALIZED;
1842 }
1843
1844 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1845 must be locked on entry, and remains locked on return. */
1846
1847 attribute_hidden bool
1848 gomp_fini_device (struct gomp_device_descr *devicep)
1849 {
1850 bool ret = goacc_fini_asyncqueues (devicep);
1851 ret &= devicep->fini_device_func (devicep->target_id);
1852 devicep->state = GOMP_DEVICE_FINALIZED;
1853 return ret;
1854 }
1855
1856 attribute_hidden void
1857 gomp_unload_device (struct gomp_device_descr *devicep)
1858 {
1859 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1860 {
1861 unsigned i;
1862
1863 /* Unload from device all images registered at the moment. */
1864 for (i = 0; i < num_offload_images; i++)
1865 {
1866 struct offload_image_descr *image = &offload_images[i];
1867 if (image->type == devicep->type)
1868 gomp_unload_image_from_device (devicep, image->version,
1869 image->host_table,
1870 image->target_data);
1871 }
1872 }
1873 }
1874
1875 /* Host fallback for GOMP_target{,_ext} routines. */
1876
1877 static void
1878 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1879 {
1880 struct gomp_thread old_thr, *thr = gomp_thread ();
1881 old_thr = *thr;
1882 memset (thr, '\0', sizeof (*thr));
1883 if (gomp_places_list)
1884 {
1885 thr->place = old_thr.place;
1886 thr->ts.place_partition_len = gomp_places_list_len;
1887 }
1888 fn (hostaddrs);
1889 gomp_free_thread (thr);
1890 *thr = old_thr;
1891 }
1892
1893 /* Calculate alignment and size requirements of a private copy of data shared
1894 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1895
1896 static inline void
1897 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1898 unsigned short *kinds, size_t *tgt_align,
1899 size_t *tgt_size)
1900 {
1901 size_t i;
1902 for (i = 0; i < mapnum; i++)
1903 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1904 {
1905 size_t align = (size_t) 1 << (kinds[i] >> 8);
1906 if (*tgt_align < align)
1907 *tgt_align = align;
1908 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1909 *tgt_size += sizes[i];
1910 }
1911 }
1912
1913 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1914
1915 static inline void
1916 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1917 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1918 size_t tgt_size)
1919 {
1920 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1921 if (al)
1922 tgt += tgt_align - al;
1923 tgt_size = 0;
1924 size_t i;
1925 for (i = 0; i < mapnum; i++)
1926 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1927 {
1928 size_t align = (size_t) 1 << (kinds[i] >> 8);
1929 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1930 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1931 hostaddrs[i] = tgt + tgt_size;
1932 tgt_size = tgt_size + sizes[i];
1933 }
1934 }
1935
1936 /* Helper function of GOMP_target{,_ext} routines. */
1937
1938 static void *
1939 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1940 void (*host_fn) (void *))
1941 {
1942 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1943 return (void *) host_fn;
1944 else
1945 {
1946 gomp_mutex_lock (&devicep->lock);
1947 if (devicep->state == GOMP_DEVICE_FINALIZED)
1948 {
1949 gomp_mutex_unlock (&devicep->lock);
1950 return NULL;
1951 }
1952
1953 struct splay_tree_key_s k;
1954 k.host_start = (uintptr_t) host_fn;
1955 k.host_end = k.host_start + 1;
1956 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1957 gomp_mutex_unlock (&devicep->lock);
1958 if (tgt_fn == NULL)
1959 return NULL;
1960
1961 return (void *) tgt_fn->tgt_offset;
1962 }
1963 }
1964
1965 /* Called when encountering a target directive. If DEVICE
1966 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1967 GOMP_DEVICE_HOST_FALLBACK (or any value
1968 larger than last available hw device), use host fallback.
1969 FN is address of host code, UNUSED is part of the current ABI, but
1970 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1971 with MAPNUM entries, with addresses of the host objects,
1972 sizes of the host objects (resp. for pointer kind pointer bias
1973 and assumed sizeof (void *) size) and kinds. */
1974
1975 void
1976 GOMP_target (int device, void (*fn) (void *), const void *unused,
1977 size_t mapnum, void **hostaddrs, size_t *sizes,
1978 unsigned char *kinds)
1979 {
1980 struct gomp_device_descr *devicep = resolve_device (device);
1981
1982 void *fn_addr;
1983 if (devicep == NULL
1984 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1985 /* All shared memory devices should use the GOMP_target_ext function. */
1986 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1987 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1988 return gomp_target_fallback (fn, hostaddrs);
1989
1990 struct target_mem_desc *tgt_vars
1991 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1992 GOMP_MAP_VARS_TARGET);
1993 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1994 NULL);
1995 gomp_unmap_vars (tgt_vars, true);
1996 }
1997
1998 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1999 and several arguments have been added:
2000 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2001 DEPEND is array of dependencies, see GOMP_task for details.
2002
2003 ARGS is a pointer to an array consisting of a variable number of both
2004 device-independent and device-specific arguments, which can take one two
2005 elements where the first specifies for which device it is intended, the type
2006 and optionally also the value. If the value is not present in the first
2007 one, the whole second element the actual value. The last element of the
2008 array is a single NULL. Among the device independent can be for example
2009 NUM_TEAMS and THREAD_LIMIT.
2010
2011 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2012 that value, or 1 if teams construct is not present, or 0, if
2013 teams construct does not have num_teams clause and so the choice is
2014 implementation defined, and -1 if it can't be determined on the host
2015 what value will GOMP_teams have on the device.
2016 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2017 body with that value, or 0, if teams construct does not have thread_limit
2018 clause or the teams construct is not present, or -1 if it can't be
2019 determined on the host what value will GOMP_teams have on the device. */
2020
2021 void
2022 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2023 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2024 unsigned int flags, void **depend, void **args)
2025 {
2026 struct gomp_device_descr *devicep = resolve_device (device);
2027 size_t tgt_align = 0, tgt_size = 0;
2028 bool fpc_done = false;
2029
2030 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2031 {
2032 struct gomp_thread *thr = gomp_thread ();
2033 /* Create a team if we don't have any around, as nowait
2034 target tasks make sense to run asynchronously even when
2035 outside of any parallel. */
2036 if (__builtin_expect (thr->ts.team == NULL, 0))
2037 {
2038 struct gomp_team *team = gomp_new_team (1);
2039 struct gomp_task *task = thr->task;
2040 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2041 team->prev_ts = thr->ts;
2042 thr->ts.team = team;
2043 thr->ts.team_id = 0;
2044 thr->ts.work_share = &team->work_shares[0];
2045 thr->ts.last_work_share = NULL;
2046 #ifdef HAVE_SYNC_BUILTINS
2047 thr->ts.single_count = 0;
2048 #endif
2049 thr->ts.static_trip = 0;
2050 thr->task = &team->implicit_task[0];
2051 gomp_init_task (thr->task, NULL, icv);
2052 if (task)
2053 {
2054 thr->task = task;
2055 gomp_end_task ();
2056 free (task);
2057 thr->task = &team->implicit_task[0];
2058 }
2059 else
2060 pthread_setspecific (gomp_thread_destructor, thr);
2061 }
2062 if (thr->ts.team
2063 && !thr->task->final_task)
2064 {
2065 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2066 sizes, kinds, flags, depend, args,
2067 GOMP_TARGET_TASK_BEFORE_MAP);
2068 return;
2069 }
2070 }
2071
2072 /* If there are depend clauses, but nowait is not present
2073 (or we are in a final task), block the parent task until the
2074 dependencies are resolved and then just continue with the rest
2075 of the function as if it is a merged task. */
2076 if (depend != NULL)
2077 {
2078 struct gomp_thread *thr = gomp_thread ();
2079 if (thr->task && thr->task->depend_hash)
2080 {
2081 /* If we might need to wait, copy firstprivate now. */
2082 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2083 &tgt_align, &tgt_size);
2084 if (tgt_align)
2085 {
2086 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2087 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2088 tgt_align, tgt_size);
2089 }
2090 fpc_done = true;
2091 gomp_task_maybe_wait_for_dependencies (depend);
2092 }
2093 }
2094
2095 void *fn_addr;
2096 if (devicep == NULL
2097 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2098 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2099 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2100 {
2101 if (!fpc_done)
2102 {
2103 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2104 &tgt_align, &tgt_size);
2105 if (tgt_align)
2106 {
2107 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2108 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2109 tgt_align, tgt_size);
2110 }
2111 }
2112 gomp_target_fallback (fn, hostaddrs);
2113 return;
2114 }
2115
2116 struct target_mem_desc *tgt_vars;
2117 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2118 {
2119 if (!fpc_done)
2120 {
2121 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2122 &tgt_align, &tgt_size);
2123 if (tgt_align)
2124 {
2125 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2126 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2127 tgt_align, tgt_size);
2128 }
2129 }
2130 tgt_vars = NULL;
2131 }
2132 else
2133 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2134 true, GOMP_MAP_VARS_TARGET);
2135 devicep->run_func (devicep->target_id, fn_addr,
2136 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2137 args);
2138 if (tgt_vars)
2139 gomp_unmap_vars (tgt_vars, true);
2140 }
2141
2142 /* Host fallback for GOMP_target_data{,_ext} routines. */
2143
2144 static void
2145 gomp_target_data_fallback (void)
2146 {
2147 struct gomp_task_icv *icv = gomp_icv (false);
2148 if (icv->target_data)
2149 {
2150 /* Even when doing a host fallback, if there are any active
2151 #pragma omp target data constructs, need to remember the
2152 new #pragma omp target data, otherwise GOMP_target_end_data
2153 would get out of sync. */
2154 struct target_mem_desc *tgt
2155 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2156 GOMP_MAP_VARS_DATA);
2157 tgt->prev = icv->target_data;
2158 icv->target_data = tgt;
2159 }
2160 }
2161
2162 void
2163 GOMP_target_data (int device, const void *unused, size_t mapnum,
2164 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2165 {
2166 struct gomp_device_descr *devicep = resolve_device (device);
2167
2168 if (devicep == NULL
2169 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2170 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2171 return gomp_target_data_fallback ();
2172
2173 struct target_mem_desc *tgt
2174 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2175 GOMP_MAP_VARS_DATA);
2176 struct gomp_task_icv *icv = gomp_icv (true);
2177 tgt->prev = icv->target_data;
2178 icv->target_data = tgt;
2179 }
2180
2181 void
2182 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2183 size_t *sizes, unsigned short *kinds)
2184 {
2185 struct gomp_device_descr *devicep = resolve_device (device);
2186
2187 if (devicep == NULL
2188 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2189 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2190 return gomp_target_data_fallback ();
2191
2192 struct target_mem_desc *tgt
2193 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2194 GOMP_MAP_VARS_DATA);
2195 struct gomp_task_icv *icv = gomp_icv (true);
2196 tgt->prev = icv->target_data;
2197 icv->target_data = tgt;
2198 }
2199
2200 void
2201 GOMP_target_end_data (void)
2202 {
2203 struct gomp_task_icv *icv = gomp_icv (false);
2204 if (icv->target_data)
2205 {
2206 struct target_mem_desc *tgt = icv->target_data;
2207 icv->target_data = tgt->prev;
2208 gomp_unmap_vars (tgt, true);
2209 }
2210 }
2211
2212 void
2213 GOMP_target_update (int device, const void *unused, size_t mapnum,
2214 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2215 {
2216 struct gomp_device_descr *devicep = resolve_device (device);
2217
2218 if (devicep == NULL
2219 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2220 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2221 return;
2222
2223 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2224 }
2225
2226 void
2227 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2228 size_t *sizes, unsigned short *kinds,
2229 unsigned int flags, void **depend)
2230 {
2231 struct gomp_device_descr *devicep = resolve_device (device);
2232
2233 /* If there are depend clauses, but nowait is not present,
2234 block the parent task until the dependencies are resolved
2235 and then just continue with the rest of the function as if it
2236 is a merged task. Until we are able to schedule task during
2237 variable mapping or unmapping, ignore nowait if depend clauses
2238 are not present. */
2239 if (depend != NULL)
2240 {
2241 struct gomp_thread *thr = gomp_thread ();
2242 if (thr->task && thr->task->depend_hash)
2243 {
2244 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2245 && thr->ts.team
2246 && !thr->task->final_task)
2247 {
2248 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2249 mapnum, hostaddrs, sizes, kinds,
2250 flags | GOMP_TARGET_FLAG_UPDATE,
2251 depend, NULL, GOMP_TARGET_TASK_DATA))
2252 return;
2253 }
2254 else
2255 {
2256 struct gomp_team *team = thr->ts.team;
2257 /* If parallel or taskgroup has been cancelled, don't start new
2258 tasks. */
2259 if (__builtin_expect (gomp_cancel_var, 0) && team)
2260 {
2261 if (gomp_team_barrier_cancelled (&team->barrier))
2262 return;
2263 if (thr->task->taskgroup)
2264 {
2265 if (thr->task->taskgroup->cancelled)
2266 return;
2267 if (thr->task->taskgroup->workshare
2268 && thr->task->taskgroup->prev
2269 && thr->task->taskgroup->prev->cancelled)
2270 return;
2271 }
2272 }
2273
2274 gomp_task_maybe_wait_for_dependencies (depend);
2275 }
2276 }
2277 }
2278
2279 if (devicep == NULL
2280 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2281 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2282 return;
2283
2284 struct gomp_thread *thr = gomp_thread ();
2285 struct gomp_team *team = thr->ts.team;
2286 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2287 if (__builtin_expect (gomp_cancel_var, 0) && team)
2288 {
2289 if (gomp_team_barrier_cancelled (&team->barrier))
2290 return;
2291 if (thr->task->taskgroup)
2292 {
2293 if (thr->task->taskgroup->cancelled)
2294 return;
2295 if (thr->task->taskgroup->workshare
2296 && thr->task->taskgroup->prev
2297 && thr->task->taskgroup->prev->cancelled)
2298 return;
2299 }
2300 }
2301
2302 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2303 }
2304
2305 static void
2306 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2307 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2308 {
2309 const int typemask = 0xff;
2310 size_t i;
2311 gomp_mutex_lock (&devicep->lock);
2312 if (devicep->state == GOMP_DEVICE_FINALIZED)
2313 {
2314 gomp_mutex_unlock (&devicep->lock);
2315 return;
2316 }
2317
2318 for (i = 0; i < mapnum; i++)
2319 {
2320 struct splay_tree_key_s cur_node;
2321 unsigned char kind = kinds[i] & typemask;
2322 switch (kind)
2323 {
2324 case GOMP_MAP_FROM:
2325 case GOMP_MAP_ALWAYS_FROM:
2326 case GOMP_MAP_DELETE:
2327 case GOMP_MAP_RELEASE:
2328 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2329 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2330 cur_node.host_start = (uintptr_t) hostaddrs[i];
2331 cur_node.host_end = cur_node.host_start + sizes[i];
2332 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2333 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2334 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2335 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2336 if (!k)
2337 continue;
2338
2339 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2340 k->refcount--;
2341 if ((kind == GOMP_MAP_DELETE
2342 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2343 && k->refcount != REFCOUNT_INFINITY)
2344 k->refcount = 0;
2345
2346 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2347 || kind == GOMP_MAP_ALWAYS_FROM)
2348 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2349 (void *) (k->tgt->tgt_start + k->tgt_offset
2350 + cur_node.host_start
2351 - k->host_start),
2352 cur_node.host_end - cur_node.host_start);
2353 if (k->refcount == 0)
2354 gomp_remove_var (devicep, k);
2355
2356 break;
2357 default:
2358 gomp_mutex_unlock (&devicep->lock);
2359 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2360 kind);
2361 }
2362 }
2363
2364 gomp_mutex_unlock (&devicep->lock);
2365 }
2366
2367 void
2368 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2369 size_t *sizes, unsigned short *kinds,
2370 unsigned int flags, void **depend)
2371 {
2372 struct gomp_device_descr *devicep = resolve_device (device);
2373
2374 /* If there are depend clauses, but nowait is not present,
2375 block the parent task until the dependencies are resolved
2376 and then just continue with the rest of the function as if it
2377 is a merged task. Until we are able to schedule task during
2378 variable mapping or unmapping, ignore nowait if depend clauses
2379 are not present. */
2380 if (depend != NULL)
2381 {
2382 struct gomp_thread *thr = gomp_thread ();
2383 if (thr->task && thr->task->depend_hash)
2384 {
2385 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2386 && thr->ts.team
2387 && !thr->task->final_task)
2388 {
2389 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2390 mapnum, hostaddrs, sizes, kinds,
2391 flags, depend, NULL,
2392 GOMP_TARGET_TASK_DATA))
2393 return;
2394 }
2395 else
2396 {
2397 struct gomp_team *team = thr->ts.team;
2398 /* If parallel or taskgroup has been cancelled, don't start new
2399 tasks. */
2400 if (__builtin_expect (gomp_cancel_var, 0) && team)
2401 {
2402 if (gomp_team_barrier_cancelled (&team->barrier))
2403 return;
2404 if (thr->task->taskgroup)
2405 {
2406 if (thr->task->taskgroup->cancelled)
2407 return;
2408 if (thr->task->taskgroup->workshare
2409 && thr->task->taskgroup->prev
2410 && thr->task->taskgroup->prev->cancelled)
2411 return;
2412 }
2413 }
2414
2415 gomp_task_maybe_wait_for_dependencies (depend);
2416 }
2417 }
2418 }
2419
2420 if (devicep == NULL
2421 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2422 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2423 return;
2424
2425 struct gomp_thread *thr = gomp_thread ();
2426 struct gomp_team *team = thr->ts.team;
2427 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2428 if (__builtin_expect (gomp_cancel_var, 0) && team)
2429 {
2430 if (gomp_team_barrier_cancelled (&team->barrier))
2431 return;
2432 if (thr->task->taskgroup)
2433 {
2434 if (thr->task->taskgroup->cancelled)
2435 return;
2436 if (thr->task->taskgroup->workshare
2437 && thr->task->taskgroup->prev
2438 && thr->task->taskgroup->prev->cancelled)
2439 return;
2440 }
2441 }
2442
2443 size_t i;
2444 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2445 for (i = 0; i < mapnum; i++)
2446 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2447 {
2448 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2449 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2450 i += sizes[i];
2451 }
2452 else
2453 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2454 true, GOMP_MAP_VARS_ENTER_DATA);
2455 else
2456 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2457 }
2458
2459 bool
2460 gomp_target_task_fn (void *data)
2461 {
2462 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2463 struct gomp_device_descr *devicep = ttask->devicep;
2464
2465 if (ttask->fn != NULL)
2466 {
2467 void *fn_addr;
2468 if (devicep == NULL
2469 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2470 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2471 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2472 {
2473 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2474 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2475 return false;
2476 }
2477
2478 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2479 {
2480 if (ttask->tgt)
2481 gomp_unmap_vars (ttask->tgt, true);
2482 return false;
2483 }
2484
2485 void *actual_arguments;
2486 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2487 {
2488 ttask->tgt = NULL;
2489 actual_arguments = ttask->hostaddrs;
2490 }
2491 else
2492 {
2493 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2494 NULL, ttask->sizes, ttask->kinds, true,
2495 GOMP_MAP_VARS_TARGET);
2496 actual_arguments = (void *) ttask->tgt->tgt_start;
2497 }
2498 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2499
2500 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2501 ttask->args, (void *) ttask);
2502 return true;
2503 }
2504 else if (devicep == NULL
2505 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2506 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2507 return false;
2508
2509 size_t i;
2510 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2511 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2512 ttask->kinds, true);
2513 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2514 for (i = 0; i < ttask->mapnum; i++)
2515 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2516 {
2517 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2518 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2519 GOMP_MAP_VARS_ENTER_DATA);
2520 i += ttask->sizes[i];
2521 }
2522 else
2523 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2524 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2525 else
2526 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2527 ttask->kinds);
2528 return false;
2529 }
2530
2531 void
2532 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2533 {
2534 if (thread_limit)
2535 {
2536 struct gomp_task_icv *icv = gomp_icv (true);
2537 icv->thread_limit_var
2538 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2539 }
2540 (void) num_teams;
2541 }
2542
2543 void *
2544 omp_target_alloc (size_t size, int device_num)
2545 {
2546 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2547 return malloc (size);
2548
2549 if (device_num < 0)
2550 return NULL;
2551
2552 struct gomp_device_descr *devicep = resolve_device (device_num);
2553 if (devicep == NULL)
2554 return NULL;
2555
2556 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2557 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2558 return malloc (size);
2559
2560 gomp_mutex_lock (&devicep->lock);
2561 void *ret = devicep->alloc_func (devicep->target_id, size);
2562 gomp_mutex_unlock (&devicep->lock);
2563 return ret;
2564 }
2565
2566 void
2567 omp_target_free (void *device_ptr, int device_num)
2568 {
2569 if (device_ptr == NULL)
2570 return;
2571
2572 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2573 {
2574 free (device_ptr);
2575 return;
2576 }
2577
2578 if (device_num < 0)
2579 return;
2580
2581 struct gomp_device_descr *devicep = resolve_device (device_num);
2582 if (devicep == NULL)
2583 return;
2584
2585 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2586 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2587 {
2588 free (device_ptr);
2589 return;
2590 }
2591
2592 gomp_mutex_lock (&devicep->lock);
2593 gomp_free_device_memory (devicep, device_ptr);
2594 gomp_mutex_unlock (&devicep->lock);
2595 }
2596
2597 int
2598 omp_target_is_present (const void *ptr, int device_num)
2599 {
2600 if (ptr == NULL)
2601 return 1;
2602
2603 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2604 return 1;
2605
2606 if (device_num < 0)
2607 return 0;
2608
2609 struct gomp_device_descr *devicep = resolve_device (device_num);
2610 if (devicep == NULL)
2611 return 0;
2612
2613 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2614 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2615 return 1;
2616
2617 gomp_mutex_lock (&devicep->lock);
2618 struct splay_tree_s *mem_map = &devicep->mem_map;
2619 struct splay_tree_key_s cur_node;
2620
2621 cur_node.host_start = (uintptr_t) ptr;
2622 cur_node.host_end = cur_node.host_start;
2623 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2624 int ret = n != NULL;
2625 gomp_mutex_unlock (&devicep->lock);
2626 return ret;
2627 }
2628
2629 int
2630 omp_target_memcpy (void *dst, const void *src, size_t length,
2631 size_t dst_offset, size_t src_offset, int dst_device_num,
2632 int src_device_num)
2633 {
2634 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2635 bool ret;
2636
2637 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2638 {
2639 if (dst_device_num < 0)
2640 return EINVAL;
2641
2642 dst_devicep = resolve_device (dst_device_num);
2643 if (dst_devicep == NULL)
2644 return EINVAL;
2645
2646 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2647 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2648 dst_devicep = NULL;
2649 }
2650 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2651 {
2652 if (src_device_num < 0)
2653 return EINVAL;
2654
2655 src_devicep = resolve_device (src_device_num);
2656 if (src_devicep == NULL)
2657 return EINVAL;
2658
2659 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2660 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2661 src_devicep = NULL;
2662 }
2663 if (src_devicep == NULL && dst_devicep == NULL)
2664 {
2665 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2666 return 0;
2667 }
2668 if (src_devicep == NULL)
2669 {
2670 gomp_mutex_lock (&dst_devicep->lock);
2671 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2672 (char *) dst + dst_offset,
2673 (char *) src + src_offset, length);
2674 gomp_mutex_unlock (&dst_devicep->lock);
2675 return (ret ? 0 : EINVAL);
2676 }
2677 if (dst_devicep == NULL)
2678 {
2679 gomp_mutex_lock (&src_devicep->lock);
2680 ret = src_devicep->dev2host_func (src_devicep->target_id,
2681 (char *) dst + dst_offset,
2682 (char *) src + src_offset, length);
2683 gomp_mutex_unlock (&src_devicep->lock);
2684 return (ret ? 0 : EINVAL);
2685 }
2686 if (src_devicep == dst_devicep)
2687 {
2688 gomp_mutex_lock (&src_devicep->lock);
2689 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2690 (char *) dst + dst_offset,
2691 (char *) src + src_offset, length);
2692 gomp_mutex_unlock (&src_devicep->lock);
2693 return (ret ? 0 : EINVAL);
2694 }
2695 return EINVAL;
2696 }
2697
2698 static int
2699 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2700 int num_dims, const size_t *volume,
2701 const size_t *dst_offsets,
2702 const size_t *src_offsets,
2703 const size_t *dst_dimensions,
2704 const size_t *src_dimensions,
2705 struct gomp_device_descr *dst_devicep,
2706 struct gomp_device_descr *src_devicep)
2707 {
2708 size_t dst_slice = element_size;
2709 size_t src_slice = element_size;
2710 size_t j, dst_off, src_off, length;
2711 int i, ret;
2712
2713 if (num_dims == 1)
2714 {
2715 if (__builtin_mul_overflow (element_size, volume[0], &length)
2716 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2717 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2718 return EINVAL;
2719 if (dst_devicep == NULL && src_devicep == NULL)
2720 {
2721 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2722 length);
2723 ret = 1;
2724 }
2725 else if (src_devicep == NULL)
2726 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2727 (char *) dst + dst_off,
2728 (const char *) src + src_off,
2729 length);
2730 else if (dst_devicep == NULL)
2731 ret = src_devicep->dev2host_func (src_devicep->target_id,
2732 (char *) dst + dst_off,
2733 (const char *) src + src_off,
2734 length);
2735 else if (src_devicep == dst_devicep)
2736 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2737 (char *) dst + dst_off,
2738 (const char *) src + src_off,
2739 length);
2740 else
2741 ret = 0;
2742 return ret ? 0 : EINVAL;
2743 }
2744
2745 /* FIXME: it would be nice to have some plugin function to handle
2746 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2747 be handled in the generic recursion below, and for host-host it
2748 should be used even for any num_dims >= 2. */
2749
2750 for (i = 1; i < num_dims; i++)
2751 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2752 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2753 return EINVAL;
2754 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2755 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2756 return EINVAL;
2757 for (j = 0; j < volume[0]; j++)
2758 {
2759 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2760 (const char *) src + src_off,
2761 element_size, num_dims - 1,
2762 volume + 1, dst_offsets + 1,
2763 src_offsets + 1, dst_dimensions + 1,
2764 src_dimensions + 1, dst_devicep,
2765 src_devicep);
2766 if (ret)
2767 return ret;
2768 dst_off += dst_slice;
2769 src_off += src_slice;
2770 }
2771 return 0;
2772 }
2773
2774 int
2775 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2776 int num_dims, const size_t *volume,
2777 const size_t *dst_offsets,
2778 const size_t *src_offsets,
2779 const size_t *dst_dimensions,
2780 const size_t *src_dimensions,
2781 int dst_device_num, int src_device_num)
2782 {
2783 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2784
2785 if (!dst && !src)
2786 return INT_MAX;
2787
2788 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2789 {
2790 if (dst_device_num < 0)
2791 return EINVAL;
2792
2793 dst_devicep = resolve_device (dst_device_num);
2794 if (dst_devicep == NULL)
2795 return EINVAL;
2796
2797 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2798 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2799 dst_devicep = NULL;
2800 }
2801 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2802 {
2803 if (src_device_num < 0)
2804 return EINVAL;
2805
2806 src_devicep = resolve_device (src_device_num);
2807 if (src_devicep == NULL)
2808 return EINVAL;
2809
2810 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2811 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2812 src_devicep = NULL;
2813 }
2814
2815 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2816 return EINVAL;
2817
2818 if (src_devicep)
2819 gomp_mutex_lock (&src_devicep->lock);
2820 else if (dst_devicep)
2821 gomp_mutex_lock (&dst_devicep->lock);
2822 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2823 volume, dst_offsets, src_offsets,
2824 dst_dimensions, src_dimensions,
2825 dst_devicep, src_devicep);
2826 if (src_devicep)
2827 gomp_mutex_unlock (&src_devicep->lock);
2828 else if (dst_devicep)
2829 gomp_mutex_unlock (&dst_devicep->lock);
2830 return ret;
2831 }
2832
2833 int
2834 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2835 size_t size, size_t device_offset, int device_num)
2836 {
2837 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2838 return EINVAL;
2839
2840 if (device_num < 0)
2841 return EINVAL;
2842
2843 struct gomp_device_descr *devicep = resolve_device (device_num);
2844 if (devicep == NULL)
2845 return EINVAL;
2846
2847 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2848 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2849 return EINVAL;
2850
2851 gomp_mutex_lock (&devicep->lock);
2852
2853 struct splay_tree_s *mem_map = &devicep->mem_map;
2854 struct splay_tree_key_s cur_node;
2855 int ret = EINVAL;
2856
2857 cur_node.host_start = (uintptr_t) host_ptr;
2858 cur_node.host_end = cur_node.host_start + size;
2859 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2860 if (n)
2861 {
2862 if (n->tgt->tgt_start + n->tgt_offset
2863 == (uintptr_t) device_ptr + device_offset
2864 && n->host_start <= cur_node.host_start
2865 && n->host_end >= cur_node.host_end)
2866 ret = 0;
2867 }
2868 else
2869 {
2870 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2871 tgt->array = gomp_malloc (sizeof (*tgt->array));
2872 tgt->refcount = 1;
2873 tgt->tgt_start = 0;
2874 tgt->tgt_end = 0;
2875 tgt->to_free = NULL;
2876 tgt->prev = NULL;
2877 tgt->list_count = 0;
2878 tgt->device_descr = devicep;
2879 splay_tree_node array = tgt->array;
2880 splay_tree_key k = &array->key;
2881 k->host_start = cur_node.host_start;
2882 k->host_end = cur_node.host_end;
2883 k->tgt = tgt;
2884 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2885 k->refcount = REFCOUNT_INFINITY;
2886 k->virtual_refcount = 0;
2887 k->aux = NULL;
2888 array->left = NULL;
2889 array->right = NULL;
2890 splay_tree_insert (&devicep->mem_map, array);
2891 ret = 0;
2892 }
2893 gomp_mutex_unlock (&devicep->lock);
2894 return ret;
2895 }
2896
2897 int
2898 omp_target_disassociate_ptr (const void *ptr, int device_num)
2899 {
2900 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2901 return EINVAL;
2902
2903 if (device_num < 0)
2904 return EINVAL;
2905
2906 struct gomp_device_descr *devicep = resolve_device (device_num);
2907 if (devicep == NULL)
2908 return EINVAL;
2909
2910 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2911 return EINVAL;
2912
2913 gomp_mutex_lock (&devicep->lock);
2914
2915 struct splay_tree_s *mem_map = &devicep->mem_map;
2916 struct splay_tree_key_s cur_node;
2917 int ret = EINVAL;
2918
2919 cur_node.host_start = (uintptr_t) ptr;
2920 cur_node.host_end = cur_node.host_start;
2921 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2922 if (n
2923 && n->host_start == cur_node.host_start
2924 && n->refcount == REFCOUNT_INFINITY
2925 && n->tgt->tgt_start == 0
2926 && n->tgt->to_free == NULL
2927 && n->tgt->refcount == 1
2928 && n->tgt->list_count == 0)
2929 {
2930 splay_tree_remove (&devicep->mem_map, n);
2931 gomp_unmap_tgt (n->tgt);
2932 ret = 0;
2933 }
2934
2935 gomp_mutex_unlock (&devicep->lock);
2936 return ret;
2937 }
2938
2939 int
2940 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2941 {
2942 (void) kind;
2943 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2944 return gomp_pause_host ();
2945 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2946 return -1;
2947 /* Do nothing for target devices for now. */
2948 return 0;
2949 }
2950
2951 int
2952 omp_pause_resource_all (omp_pause_resource_t kind)
2953 {
2954 (void) kind;
2955 if (gomp_pause_host ())
2956 return -1;
2957 /* Do nothing for target devices for now. */
2958 return 0;
2959 }
2960
2961 ialias (omp_pause_resource)
2962 ialias (omp_pause_resource_all)
2963
2964 #ifdef PLUGIN_SUPPORT
2965
2966 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2967 in PLUGIN_NAME.
2968 The handles of the found functions are stored in the corresponding fields
2969 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2970
2971 static bool
2972 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2973 const char *plugin_name)
2974 {
2975 const char *err = NULL, *last_missing = NULL;
2976
2977 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2978 if (!plugin_handle)
2979 goto dl_fail;
2980
2981 /* Check if all required functions are available in the plugin and store
2982 their handlers. None of the symbols can legitimately be NULL,
2983 so we don't need to check dlerror all the time. */
2984 #define DLSYM(f) \
2985 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2986 goto dl_fail
2987 /* Similar, but missing functions are not an error. Return false if
2988 failed, true otherwise. */
2989 #define DLSYM_OPT(f, n) \
2990 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2991 || (last_missing = #n, 0))
2992
2993 DLSYM (version);
2994 if (device->version_func () != GOMP_VERSION)
2995 {
2996 err = "plugin version mismatch";
2997 goto fail;
2998 }
2999
3000 DLSYM (get_name);
3001 DLSYM (get_caps);
3002 DLSYM (get_type);
3003 DLSYM (get_num_devices);
3004 DLSYM (init_device);
3005 DLSYM (fini_device);
3006 DLSYM (load_image);
3007 DLSYM (unload_image);
3008 DLSYM (alloc);
3009 DLSYM (free);
3010 DLSYM (dev2host);
3011 DLSYM (host2dev);
3012 device->capabilities = device->get_caps_func ();
3013 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3014 {
3015 DLSYM (run);
3016 DLSYM (async_run);
3017 DLSYM_OPT (can_run, can_run);
3018 DLSYM (dev2dev);
3019 }
3020 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3021 {
3022 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3023 || !DLSYM_OPT (openacc.create_thread_data,
3024 openacc_create_thread_data)
3025 || !DLSYM_OPT (openacc.destroy_thread_data,
3026 openacc_destroy_thread_data)
3027 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3028 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3029 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3030 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3031 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3032 || !DLSYM_OPT (openacc.async.queue_callback,
3033 openacc_async_queue_callback)
3034 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3035 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3036 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
3037 {
3038 /* Require all the OpenACC handlers if we have
3039 GOMP_OFFLOAD_CAP_OPENACC_200. */
3040 err = "plugin missing OpenACC handler function";
3041 goto fail;
3042 }
3043
3044 unsigned cuda = 0;
3045 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3046 openacc_cuda_get_current_device);
3047 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3048 openacc_cuda_get_current_context);
3049 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3050 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3051 if (cuda && cuda != 4)
3052 {
3053 /* Make sure all the CUDA functions are there if any of them are. */
3054 err = "plugin missing OpenACC CUDA handler function";
3055 goto fail;
3056 }
3057 }
3058 #undef DLSYM
3059 #undef DLSYM_OPT
3060
3061 return 1;
3062
3063 dl_fail:
3064 err = dlerror ();
3065 fail:
3066 gomp_error ("while loading %s: %s", plugin_name, err);
3067 if (last_missing)
3068 gomp_error ("missing function was %s", last_missing);
3069 if (plugin_handle)
3070 dlclose (plugin_handle);
3071
3072 return 0;
3073 }
3074
3075 /* This function finalizes all initialized devices. */
3076
3077 static void
3078 gomp_target_fini (void)
3079 {
3080 int i;
3081 for (i = 0; i < num_devices; i++)
3082 {
3083 bool ret = true;
3084 struct gomp_device_descr *devicep = &devices[i];
3085 gomp_mutex_lock (&devicep->lock);
3086 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3087 ret = gomp_fini_device (devicep);
3088 gomp_mutex_unlock (&devicep->lock);
3089 if (!ret)
3090 gomp_fatal ("device finalization failed");
3091 }
3092 }
3093
3094 /* This function initializes the runtime for offloading.
3095 It parses the list of offload plugins, and tries to load these.
3096 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3097 will be set, and the array DEVICES initialized, containing descriptors for
3098 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3099 by the others. */
3100
3101 static void
3102 gomp_target_init (void)
3103 {
3104 const char *prefix ="libgomp-plugin-";
3105 const char *suffix = SONAME_SUFFIX (1);
3106 const char *cur, *next;
3107 char *plugin_name;
3108 int i, new_num_devices;
3109
3110 num_devices = 0;
3111 devices = NULL;
3112
3113 cur = OFFLOAD_PLUGINS;
3114 if (*cur)
3115 do
3116 {
3117 struct gomp_device_descr current_device;
3118 size_t prefix_len, suffix_len, cur_len;
3119
3120 next = strchr (cur, ',');
3121
3122 prefix_len = strlen (prefix);
3123 cur_len = next ? next - cur : strlen (cur);
3124 suffix_len = strlen (suffix);
3125
3126 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3127 if (!plugin_name)
3128 {
3129 num_devices = 0;
3130 break;
3131 }
3132
3133 memcpy (plugin_name, prefix, prefix_len);
3134 memcpy (plugin_name + prefix_len, cur, cur_len);
3135 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3136
3137 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3138 {
3139 new_num_devices = current_device.get_num_devices_func ();
3140 if (new_num_devices >= 1)
3141 {
3142 /* Augment DEVICES and NUM_DEVICES. */
3143
3144 devices = realloc (devices, (num_devices + new_num_devices)
3145 * sizeof (struct gomp_device_descr));
3146 if (!devices)
3147 {
3148 num_devices = 0;
3149 free (plugin_name);
3150 break;
3151 }
3152
3153 current_device.name = current_device.get_name_func ();
3154 /* current_device.capabilities has already been set. */
3155 current_device.type = current_device.get_type_func ();
3156 current_device.mem_map.root = NULL;
3157 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3158 for (i = 0; i < new_num_devices; i++)
3159 {
3160 current_device.target_id = i;
3161 devices[num_devices] = current_device;
3162 gomp_mutex_init (&devices[num_devices].lock);
3163 num_devices++;
3164 }
3165 }
3166 }
3167
3168 free (plugin_name);
3169 cur = next + 1;
3170 }
3171 while (next);
3172
3173 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3174 NUM_DEVICES_OPENMP. */
3175 struct gomp_device_descr *devices_s
3176 = malloc (num_devices * sizeof (struct gomp_device_descr));
3177 if (!devices_s)
3178 {
3179 num_devices = 0;
3180 free (devices);
3181 devices = NULL;
3182 }
3183 num_devices_openmp = 0;
3184 for (i = 0; i < num_devices; i++)
3185 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3186 devices_s[num_devices_openmp++] = devices[i];
3187 int num_devices_after_openmp = num_devices_openmp;
3188 for (i = 0; i < num_devices; i++)
3189 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3190 devices_s[num_devices_after_openmp++] = devices[i];
3191 free (devices);
3192 devices = devices_s;
3193
3194 for (i = 0; i < num_devices; i++)
3195 {
3196 /* The 'devices' array can be moved (by the realloc call) until we have
3197 found all the plugins, so registering with the OpenACC runtime (which
3198 takes a copy of the pointer argument) must be delayed until now. */
3199 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3200 goacc_register (&devices[i]);
3201 }
3202
3203 if (atexit (gomp_target_fini) != 0)
3204 gomp_fatal ("atexit failed");
3205 }
3206
3207 #else /* PLUGIN_SUPPORT */
3208 /* If dlfcn.h is unavailable we always fallback to host execution.
3209 GOMP_target* routines are just stubs for this case. */
3210 static void
3211 gomp_target_init (void)
3212 {
3213 }
3214 #endif /* PLUGIN_SUPPORT */