]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
OpenACC 'acc_get_property' cleanup
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
48
49 static void gomp_target_init (void);
50
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
56
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
65 };
66
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
69
70 /* Total number of offload images. */
71 static int num_offload_images;
72
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
75
76 /* Total number of available devices. */
77 static int num_devices;
78
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
81
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
86 {
87 void *ret = realloc (old, size);
88 if (ret == NULL)
89 {
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 }
93 return ret;
94 }
95
96 attribute_hidden void
97 gomp_init_targets_once (void)
98 {
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 }
101
102 attribute_hidden int
103 gomp_get_num_devices (void)
104 {
105 gomp_init_targets_once ();
106 return num_devices_openmp;
107 }
108
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
111 {
112 if (device_id == GOMP_DEVICE_ICV)
113 {
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
116 }
117
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 return NULL;
120
121 gomp_mutex_lock (&devices[device_id].lock);
122 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123 gomp_init_device (&devices[device_id]);
124 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125 {
126 gomp_mutex_unlock (&devices[device_id].lock);
127 return NULL;
128 }
129 gomp_mutex_unlock (&devices[device_id].lock);
130
131 return &devices[device_id];
132 }
133
134
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 {
138 if (key->host_start != key->host_end)
139 return splay_tree_lookup (mem_map, key);
140
141 key->host_end++;
142 splay_tree_key n = splay_tree_lookup (mem_map, key);
143 key->host_end--;
144 if (n)
145 return n;
146 key->host_start--;
147 n = splay_tree_lookup (mem_map, key);
148 key->host_start++;
149 if (n)
150 return n;
151 return splay_tree_lookup (mem_map, key);
152 }
153
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 {
157 if (key->host_start != key->host_end)
158 return splay_tree_lookup (mem_map, key);
159
160 key->host_end++;
161 splay_tree_key n = splay_tree_lookup (mem_map, key);
162 key->host_end--;
163 return n;
164 }
165
166 static inline void
167 gomp_device_copy (struct gomp_device_descr *devicep,
168 bool (*copy_func) (int, void *, const void *, size_t),
169 const char *dst, void *dstaddr,
170 const char *src, const void *srcaddr,
171 size_t size)
172 {
173 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174 {
175 gomp_mutex_unlock (&devicep->lock);
176 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
178 }
179 }
180
181 static inline void
182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 bool (*copy_func) (int, void *, const void *, size_t,
184 struct goacc_asyncqueue *),
185 const char *dst, void *dstaddr,
186 const char *src, const void *srcaddr,
187 size_t size, struct goacc_asyncqueue *aq)
188 {
189 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190 {
191 gomp_mutex_unlock (&devicep->lock);
192 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194 }
195 }
196
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198 host to device memory transfers. */
199
200 struct gomp_coalesce_chunk
201 {
202 /* The starting and ending point of a coalesced chunk of memory. */
203 size_t start, end;
204 };
205
206 struct gomp_coalesce_buf
207 {
208 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209 it will be copied to the device. */
210 void *buf;
211 struct target_mem_desc *tgt;
212 /* Array with offsets, chunks[i].start is the starting offset and
213 chunks[i].end ending offset relative to tgt->tgt_start device address
214 of chunks which are to be copied to buf and later copied to device. */
215 struct gomp_coalesce_chunk *chunks;
216 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217 be performed. */
218 long chunk_cnt;
219 /* During construction of chunks array, how many memory regions are within
220 the last chunk. If there is just one memory region for a chunk, we copy
221 it directly to device rather than going through buf. */
222 long use_cnt;
223 };
224
225 /* Maximum size of memory region considered for coalescing. Larger copies
226 are performed directly. */
227 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
228
229 /* Maximum size of a gap in between regions to consider them being copied
230 within the same chunk. All the device offsets considered are within
231 newly allocated device memory, so it isn't fatal if we copy some padding
232 in between from host to device. The gaps come either from alignment
233 padding or from memory regions which are not supposed to be copied from
234 host to device (e.g. map(alloc:), map(from:) etc.). */
235 #define MAX_COALESCE_BUF_GAP (4 * 1024)
236
237 /* Add region with device tgt_start relative offset and length to CBUF. */
238
239 static inline void
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 {
242 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243 return;
244 if (cbuf->chunk_cnt)
245 {
246 if (cbuf->chunk_cnt < 0)
247 return;
248 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 {
250 cbuf->chunk_cnt = -1;
251 return;
252 }
253 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 {
255 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 cbuf->use_cnt++;
257 return;
258 }
259 /* If the last chunk is only used by one mapping, discard it,
260 as it will be one host to device copy anyway and
261 memcpying it around will only waste cycles. */
262 if (cbuf->use_cnt == 1)
263 cbuf->chunk_cnt--;
264 }
265 cbuf->chunks[cbuf->chunk_cnt].start = start;
266 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267 cbuf->chunk_cnt++;
268 cbuf->use_cnt = 1;
269 }
270
271 /* Return true for mapping kinds which need to copy data from the
272 host to device for regions that weren't previously mapped. */
273
274 static inline bool
275 gomp_to_device_kind_p (int kind)
276 {
277 switch (kind)
278 {
279 case GOMP_MAP_ALLOC:
280 case GOMP_MAP_FROM:
281 case GOMP_MAP_FORCE_ALLOC:
282 case GOMP_MAP_FORCE_FROM:
283 case GOMP_MAP_ALWAYS_FROM:
284 return false;
285 default:
286 return true;
287 }
288 }
289
290 attribute_hidden void
291 gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 struct goacc_asyncqueue *aq,
293 void *d, const void *h, size_t sz,
294 struct gomp_coalesce_buf *cbuf)
295 {
296 if (cbuf)
297 {
298 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 {
301 long first = 0;
302 long last = cbuf->chunk_cnt - 1;
303 while (first <= last)
304 {
305 long middle = (first + last) >> 1;
306 if (cbuf->chunks[middle].end <= doff)
307 first = middle + 1;
308 else if (cbuf->chunks[middle].start <= doff)
309 {
310 if (doff + sz > cbuf->chunks[middle].end)
311 gomp_fatal ("internal libgomp cbuf error");
312 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 h, sz);
314 return;
315 }
316 else
317 last = middle - 1;
318 }
319 }
320 }
321 if (__builtin_expect (aq != NULL, 0))
322 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 "dev", d, "host", h, sz, aq);
324 else
325 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 }
327
328 attribute_hidden void
329 gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 struct goacc_asyncqueue *aq,
331 void *h, const void *d, size_t sz)
332 {
333 if (__builtin_expect (aq != NULL, 0))
334 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 "host", h, "dev", d, sz, aq);
336 else
337 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338 }
339
340 static void
341 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342 {
343 if (!devicep->free_func (devicep->target_id, devptr))
344 {
345 gomp_mutex_unlock (&devicep->lock);
346 gomp_fatal ("error in freeing device memory block at %p", devptr);
347 }
348 }
349
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351 gomp_map_0len_lookup found oldn for newn.
352 Helper function of gomp_map_vars. */
353
354 static inline void
355 gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 splay_tree_key newn, struct target_var_desc *tgt_var,
358 unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 {
360 tgt_var->key = oldn;
361 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
362 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
363 tgt_var->do_detach = kind == GOMP_MAP_ATTACH;
364 tgt_var->offset = newn->host_start - oldn->host_start;
365 tgt_var->length = newn->host_end - newn->host_start;
366
367 if ((kind & GOMP_MAP_FLAG_FORCE)
368 || oldn->host_start > newn->host_start
369 || oldn->host_end < newn->host_end)
370 {
371 gomp_mutex_unlock (&devicep->lock);
372 gomp_fatal ("Trying to map into device [%p..%p) object when "
373 "[%p..%p) is already mapped",
374 (void *) newn->host_start, (void *) newn->host_end,
375 (void *) oldn->host_start, (void *) oldn->host_end);
376 }
377
378 if (GOMP_MAP_ALWAYS_TO_P (kind))
379 gomp_copy_host2dev (devicep, aq,
380 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
381 + newn->host_start - oldn->host_start),
382 (void *) newn->host_start,
383 newn->host_end - newn->host_start, cbuf);
384
385 if (oldn->refcount != REFCOUNT_INFINITY)
386 oldn->refcount++;
387 }
388
389 static int
390 get_kind (bool short_mapkind, void *kinds, int idx)
391 {
392 return short_mapkind ? ((unsigned short *) kinds)[idx]
393 : ((unsigned char *) kinds)[idx];
394 }
395
396 static void
397 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
398 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
399 struct gomp_coalesce_buf *cbuf)
400 {
401 struct gomp_device_descr *devicep = tgt->device_descr;
402 struct splay_tree_s *mem_map = &devicep->mem_map;
403 struct splay_tree_key_s cur_node;
404
405 cur_node.host_start = host_ptr;
406 if (cur_node.host_start == (uintptr_t) NULL)
407 {
408 cur_node.tgt_offset = (uintptr_t) NULL;
409 gomp_copy_host2dev (devicep, aq,
410 (void *) (tgt->tgt_start + target_offset),
411 (void *) &cur_node.tgt_offset,
412 sizeof (void *), cbuf);
413 return;
414 }
415 /* Add bias to the pointer value. */
416 cur_node.host_start += bias;
417 cur_node.host_end = cur_node.host_start;
418 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
419 if (n == NULL)
420 {
421 gomp_mutex_unlock (&devicep->lock);
422 gomp_fatal ("Pointer target of array section wasn't mapped");
423 }
424 cur_node.host_start -= n->host_start;
425 cur_node.tgt_offset
426 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
427 /* At this point tgt_offset is target address of the
428 array section. Now subtract bias to get what we want
429 to initialize the pointer with. */
430 cur_node.tgt_offset -= bias;
431 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
432 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
433 }
434
435 static void
436 gomp_map_fields_existing (struct target_mem_desc *tgt,
437 struct goacc_asyncqueue *aq, splay_tree_key n,
438 size_t first, size_t i, void **hostaddrs,
439 size_t *sizes, void *kinds,
440 struct gomp_coalesce_buf *cbuf)
441 {
442 struct gomp_device_descr *devicep = tgt->device_descr;
443 struct splay_tree_s *mem_map = &devicep->mem_map;
444 struct splay_tree_key_s cur_node;
445 int kind;
446 const bool short_mapkind = true;
447 const int typemask = short_mapkind ? 0xff : 0x7;
448
449 cur_node.host_start = (uintptr_t) hostaddrs[i];
450 cur_node.host_end = cur_node.host_start + sizes[i];
451 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
452 kind = get_kind (short_mapkind, kinds, i);
453 if (n2
454 && n2->tgt == n->tgt
455 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
456 {
457 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
458 &tgt->list[i], kind & typemask, cbuf);
459 return;
460 }
461 if (sizes[i] == 0)
462 {
463 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
464 {
465 cur_node.host_start--;
466 n2 = splay_tree_lookup (mem_map, &cur_node);
467 cur_node.host_start++;
468 if (n2
469 && n2->tgt == n->tgt
470 && n2->host_start - n->host_start
471 == n2->tgt_offset - n->tgt_offset)
472 {
473 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
474 &tgt->list[i], kind & typemask, cbuf);
475 return;
476 }
477 }
478 cur_node.host_end++;
479 n2 = splay_tree_lookup (mem_map, &cur_node);
480 cur_node.host_end--;
481 if (n2
482 && n2->tgt == n->tgt
483 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
484 {
485 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
486 kind & typemask, cbuf);
487 return;
488 }
489 }
490 gomp_mutex_unlock (&devicep->lock);
491 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
492 "other mapped elements from the same structure weren't mapped "
493 "together with it", (void *) cur_node.host_start,
494 (void *) cur_node.host_end);
495 }
496
497 attribute_hidden void
498 gomp_attach_pointer (struct gomp_device_descr *devicep,
499 struct goacc_asyncqueue *aq, splay_tree mem_map,
500 splay_tree_key n, uintptr_t attach_to, size_t bias,
501 struct gomp_coalesce_buf *cbufp)
502 {
503 struct splay_tree_key_s s;
504 size_t size, idx;
505
506 if (n == NULL)
507 {
508 gomp_mutex_unlock (&devicep->lock);
509 gomp_fatal ("enclosing struct not mapped for attach");
510 }
511
512 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
513 /* We might have a pointer in a packed struct: however we cannot have more
514 than one such pointer in each pointer-sized portion of the struct, so
515 this is safe. */
516 idx = (attach_to - n->host_start) / sizeof (void *);
517
518 if (!n->aux)
519 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
520
521 if (!n->aux->attach_count)
522 n->aux->attach_count
523 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
524
525 if (n->aux->attach_count[idx] < UINTPTR_MAX)
526 n->aux->attach_count[idx]++;
527 else
528 {
529 gomp_mutex_unlock (&devicep->lock);
530 gomp_fatal ("attach count overflow");
531 }
532
533 if (n->aux->attach_count[idx] == 1)
534 {
535 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
536 - n->host_start;
537 uintptr_t target = (uintptr_t) *(void **) attach_to;
538 splay_tree_key tn;
539 uintptr_t data;
540
541 if ((void *) target == NULL)
542 {
543 gomp_mutex_unlock (&devicep->lock);
544 gomp_fatal ("attempt to attach null pointer");
545 }
546
547 s.host_start = target + bias;
548 s.host_end = s.host_start + 1;
549 tn = splay_tree_lookup (mem_map, &s);
550
551 if (!tn)
552 {
553 gomp_mutex_unlock (&devicep->lock);
554 gomp_fatal ("pointer target not mapped for attach");
555 }
556
557 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
558
559 gomp_debug (1,
560 "%s: attaching host %p, target %p (struct base %p) to %p\n",
561 __FUNCTION__, (void *) attach_to, (void *) devptr,
562 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
563
564 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
565 sizeof (void *), cbufp);
566 }
567 else
568 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
569 (void *) attach_to, (int) n->aux->attach_count[idx]);
570 }
571
572 attribute_hidden void
573 gomp_detach_pointer (struct gomp_device_descr *devicep,
574 struct goacc_asyncqueue *aq, splay_tree_key n,
575 uintptr_t detach_from, bool finalize,
576 struct gomp_coalesce_buf *cbufp)
577 {
578 size_t idx;
579
580 if (n == NULL)
581 {
582 gomp_mutex_unlock (&devicep->lock);
583 gomp_fatal ("enclosing struct not mapped for detach");
584 }
585
586 idx = (detach_from - n->host_start) / sizeof (void *);
587
588 if (!n->aux || !n->aux->attach_count)
589 {
590 gomp_mutex_unlock (&devicep->lock);
591 gomp_fatal ("no attachment counters for struct");
592 }
593
594 if (finalize)
595 n->aux->attach_count[idx] = 1;
596
597 if (n->aux->attach_count[idx] == 0)
598 {
599 gomp_mutex_unlock (&devicep->lock);
600 gomp_fatal ("attach count underflow");
601 }
602 else
603 n->aux->attach_count[idx]--;
604
605 if (n->aux->attach_count[idx] == 0)
606 {
607 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
608 - n->host_start;
609 uintptr_t target = (uintptr_t) *(void **) detach_from;
610
611 gomp_debug (1,
612 "%s: detaching host %p, target %p (struct base %p) to %p\n",
613 __FUNCTION__, (void *) detach_from, (void *) devptr,
614 (void *) (n->tgt->tgt_start + n->tgt_offset),
615 (void *) target);
616
617 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
618 sizeof (void *), cbufp);
619 }
620 else
621 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
622 (void *) detach_from, (int) n->aux->attach_count[idx]);
623 }
624
625 attribute_hidden uintptr_t
626 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
627 {
628 if (tgt->list[i].key != NULL)
629 return tgt->list[i].key->tgt->tgt_start
630 + tgt->list[i].key->tgt_offset
631 + tgt->list[i].offset;
632
633 switch (tgt->list[i].offset)
634 {
635 case OFFSET_INLINED:
636 return (uintptr_t) hostaddrs[i];
637
638 case OFFSET_POINTER:
639 return 0;
640
641 case OFFSET_STRUCT:
642 return tgt->list[i + 1].key->tgt->tgt_start
643 + tgt->list[i + 1].key->tgt_offset
644 + tgt->list[i + 1].offset
645 + (uintptr_t) hostaddrs[i]
646 - (uintptr_t) hostaddrs[i + 1];
647
648 default:
649 return tgt->tgt_start + tgt->list[i].offset;
650 }
651 }
652
653 static inline __attribute__((always_inline)) struct target_mem_desc *
654 gomp_map_vars_internal (struct gomp_device_descr *devicep,
655 struct goacc_asyncqueue *aq, size_t mapnum,
656 void **hostaddrs, void **devaddrs, size_t *sizes,
657 void *kinds, bool short_mapkind,
658 enum gomp_map_vars_kind pragma_kind)
659 {
660 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
661 bool has_firstprivate = false;
662 const int rshift = short_mapkind ? 8 : 3;
663 const int typemask = short_mapkind ? 0xff : 0x7;
664 struct splay_tree_s *mem_map = &devicep->mem_map;
665 struct splay_tree_key_s cur_node;
666 struct target_mem_desc *tgt
667 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
668 tgt->list_count = mapnum;
669 tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA
670 || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1;
671 tgt->device_descr = devicep;
672 tgt->prev = NULL;
673 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
674
675 if (mapnum == 0)
676 {
677 tgt->tgt_start = 0;
678 tgt->tgt_end = 0;
679 return tgt;
680 }
681
682 tgt_align = sizeof (void *);
683 tgt_size = 0;
684 cbuf.chunks = NULL;
685 cbuf.chunk_cnt = -1;
686 cbuf.use_cnt = 0;
687 cbuf.buf = NULL;
688 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
689 {
690 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
691 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
692 cbuf.chunk_cnt = 0;
693 }
694 if (pragma_kind == GOMP_MAP_VARS_TARGET)
695 {
696 size_t align = 4 * sizeof (void *);
697 tgt_align = align;
698 tgt_size = mapnum * sizeof (void *);
699 cbuf.chunk_cnt = 1;
700 cbuf.use_cnt = 1 + (mapnum > 1);
701 cbuf.chunks[0].start = 0;
702 cbuf.chunks[0].end = tgt_size;
703 }
704
705 gomp_mutex_lock (&devicep->lock);
706 if (devicep->state == GOMP_DEVICE_FINALIZED)
707 {
708 gomp_mutex_unlock (&devicep->lock);
709 free (tgt);
710 return NULL;
711 }
712
713 for (i = 0; i < mapnum; i++)
714 {
715 int kind = get_kind (short_mapkind, kinds, i);
716 if (hostaddrs[i] == NULL
717 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
718 {
719 tgt->list[i].key = NULL;
720 tgt->list[i].offset = OFFSET_INLINED;
721 continue;
722 }
723 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
724 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
725 {
726 tgt->list[i].key = NULL;
727 if (!not_found_cnt)
728 {
729 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
730 on a separate construct prior to using use_device_{addr,ptr}.
731 In OpenMP 5.0, map directives need to be ordered by the
732 middle-end before the use_device_* clauses. If
733 !not_found_cnt, all mappings requested (if any) are already
734 mapped, so use_device_{addr,ptr} can be resolved right away.
735 Otherwise, if not_found_cnt, gomp_map_lookup might fail
736 now but would succeed after performing the mappings in the
737 following loop. We can't defer this always to the second
738 loop, because it is not even invoked when !not_found_cnt
739 after the first loop. */
740 cur_node.host_start = (uintptr_t) hostaddrs[i];
741 cur_node.host_end = cur_node.host_start;
742 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
743 if (n != NULL)
744 {
745 cur_node.host_start -= n->host_start;
746 hostaddrs[i]
747 = (void *) (n->tgt->tgt_start + n->tgt_offset
748 + cur_node.host_start);
749 }
750 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
751 {
752 gomp_mutex_unlock (&devicep->lock);
753 gomp_fatal ("use_device_ptr pointer wasn't mapped");
754 }
755 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
756 /* If not present, continue using the host address. */
757 ;
758 else
759 __builtin_unreachable ();
760 tgt->list[i].offset = OFFSET_INLINED;
761 }
762 else
763 tgt->list[i].offset = 0;
764 continue;
765 }
766 else if ((kind & typemask) == GOMP_MAP_STRUCT)
767 {
768 size_t first = i + 1;
769 size_t last = i + sizes[i];
770 cur_node.host_start = (uintptr_t) hostaddrs[i];
771 cur_node.host_end = (uintptr_t) hostaddrs[last]
772 + sizes[last];
773 tgt->list[i].key = NULL;
774 tgt->list[i].offset = OFFSET_STRUCT;
775 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
776 if (n == NULL)
777 {
778 size_t align = (size_t) 1 << (kind >> rshift);
779 if (tgt_align < align)
780 tgt_align = align;
781 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
782 tgt_size = (tgt_size + align - 1) & ~(align - 1);
783 tgt_size += cur_node.host_end - cur_node.host_start;
784 not_found_cnt += last - i;
785 for (i = first; i <= last; i++)
786 {
787 tgt->list[i].key = NULL;
788 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
789 & typemask))
790 gomp_coalesce_buf_add (&cbuf,
791 tgt_size - cur_node.host_end
792 + (uintptr_t) hostaddrs[i],
793 sizes[i]);
794 }
795 i--;
796 continue;
797 }
798 for (i = first; i <= last; i++)
799 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
800 sizes, kinds, NULL);
801 i--;
802 continue;
803 }
804 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
805 {
806 tgt->list[i].key = NULL;
807 tgt->list[i].offset = OFFSET_POINTER;
808 has_firstprivate = true;
809 continue;
810 }
811 else if ((kind & typemask) == GOMP_MAP_ATTACH)
812 {
813 tgt->list[i].key = NULL;
814 has_firstprivate = true;
815 continue;
816 }
817 cur_node.host_start = (uintptr_t) hostaddrs[i];
818 if (!GOMP_MAP_POINTER_P (kind & typemask)
819 && (kind & typemask) != GOMP_MAP_ATTACH)
820 cur_node.host_end = cur_node.host_start + sizes[i];
821 else
822 cur_node.host_end = cur_node.host_start + sizeof (void *);
823 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
824 {
825 tgt->list[i].key = NULL;
826
827 size_t align = (size_t) 1 << (kind >> rshift);
828 if (tgt_align < align)
829 tgt_align = align;
830 tgt_size = (tgt_size + align - 1) & ~(align - 1);
831 gomp_coalesce_buf_add (&cbuf, tgt_size,
832 cur_node.host_end - cur_node.host_start);
833 tgt_size += cur_node.host_end - cur_node.host_start;
834 has_firstprivate = true;
835 continue;
836 }
837 splay_tree_key n;
838 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
839 {
840 n = gomp_map_0len_lookup (mem_map, &cur_node);
841 if (!n)
842 {
843 tgt->list[i].key = NULL;
844 tgt->list[i].offset = OFFSET_POINTER;
845 continue;
846 }
847 }
848 else
849 n = splay_tree_lookup (mem_map, &cur_node);
850 if (n && n->refcount != REFCOUNT_LINK)
851 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
852 kind & typemask, NULL);
853 else
854 {
855 tgt->list[i].key = NULL;
856
857 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
858 {
859 /* Not present, hence, skip entry - including its MAP_POINTER,
860 when existing. */
861 tgt->list[i].offset = OFFSET_POINTER;
862 if (i + 1 < mapnum
863 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
864 == GOMP_MAP_POINTER))
865 {
866 ++i;
867 tgt->list[i].key = NULL;
868 tgt->list[i].offset = 0;
869 }
870 continue;
871 }
872 size_t align = (size_t) 1 << (kind >> rshift);
873 not_found_cnt++;
874 if (tgt_align < align)
875 tgt_align = align;
876 tgt_size = (tgt_size + align - 1) & ~(align - 1);
877 if (gomp_to_device_kind_p (kind & typemask))
878 gomp_coalesce_buf_add (&cbuf, tgt_size,
879 cur_node.host_end - cur_node.host_start);
880 tgt_size += cur_node.host_end - cur_node.host_start;
881 if ((kind & typemask) == GOMP_MAP_TO_PSET)
882 {
883 size_t j;
884 for (j = i + 1; j < mapnum; j++)
885 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
886 & typemask))
887 break;
888 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
889 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
890 > cur_node.host_end))
891 break;
892 else
893 {
894 tgt->list[j].key = NULL;
895 i++;
896 }
897 }
898 }
899 }
900
901 if (devaddrs)
902 {
903 if (mapnum != 1)
904 {
905 gomp_mutex_unlock (&devicep->lock);
906 gomp_fatal ("unexpected aggregation");
907 }
908 tgt->to_free = devaddrs[0];
909 tgt->tgt_start = (uintptr_t) tgt->to_free;
910 tgt->tgt_end = tgt->tgt_start + sizes[0];
911 }
912 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
913 {
914 /* Allocate tgt_align aligned tgt_size block of memory. */
915 /* FIXME: Perhaps change interface to allocate properly aligned
916 memory. */
917 tgt->to_free = devicep->alloc_func (devicep->target_id,
918 tgt_size + tgt_align - 1);
919 if (!tgt->to_free)
920 {
921 gomp_mutex_unlock (&devicep->lock);
922 gomp_fatal ("device memory allocation fail");
923 }
924
925 tgt->tgt_start = (uintptr_t) tgt->to_free;
926 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
927 tgt->tgt_end = tgt->tgt_start + tgt_size;
928
929 if (cbuf.use_cnt == 1)
930 cbuf.chunk_cnt--;
931 if (cbuf.chunk_cnt > 0)
932 {
933 cbuf.buf
934 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
935 if (cbuf.buf)
936 {
937 cbuf.tgt = tgt;
938 cbufp = &cbuf;
939 }
940 }
941 }
942 else
943 {
944 tgt->to_free = NULL;
945 tgt->tgt_start = 0;
946 tgt->tgt_end = 0;
947 }
948
949 tgt_size = 0;
950 if (pragma_kind == GOMP_MAP_VARS_TARGET)
951 tgt_size = mapnum * sizeof (void *);
952
953 tgt->array = NULL;
954 if (not_found_cnt || has_firstprivate)
955 {
956 if (not_found_cnt)
957 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
958 splay_tree_node array = tgt->array;
959 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
960 uintptr_t field_tgt_base = 0;
961
962 for (i = 0; i < mapnum; i++)
963 if (tgt->list[i].key == NULL)
964 {
965 int kind = get_kind (short_mapkind, kinds, i);
966 if (hostaddrs[i] == NULL)
967 continue;
968 switch (kind & typemask)
969 {
970 size_t align, len, first, last;
971 splay_tree_key n;
972 case GOMP_MAP_FIRSTPRIVATE:
973 align = (size_t) 1 << (kind >> rshift);
974 tgt_size = (tgt_size + align - 1) & ~(align - 1);
975 tgt->list[i].offset = tgt_size;
976 len = sizes[i];
977 gomp_copy_host2dev (devicep, aq,
978 (void *) (tgt->tgt_start + tgt_size),
979 (void *) hostaddrs[i], len, cbufp);
980 tgt_size += len;
981 continue;
982 case GOMP_MAP_FIRSTPRIVATE_INT:
983 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
984 continue;
985 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
986 /* The OpenACC 'host_data' construct only allows 'use_device'
987 "mapping" clauses, so in the first loop, 'not_found_cnt'
988 must always have been zero, so all OpenACC 'use_device'
989 clauses have already been handled. (We can only easily test
990 'use_device' with 'if_present' clause here.) */
991 assert (tgt->list[i].offset == OFFSET_INLINED);
992 /* Nevertheless, FALLTHRU to the normal handling, to keep the
993 code conceptually simple, similar to the first loop. */
994 case GOMP_MAP_USE_DEVICE_PTR:
995 if (tgt->list[i].offset == 0)
996 {
997 cur_node.host_start = (uintptr_t) hostaddrs[i];
998 cur_node.host_end = cur_node.host_start;
999 n = gomp_map_lookup (mem_map, &cur_node);
1000 if (n != NULL)
1001 {
1002 cur_node.host_start -= n->host_start;
1003 hostaddrs[i]
1004 = (void *) (n->tgt->tgt_start + n->tgt_offset
1005 + cur_node.host_start);
1006 }
1007 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1008 {
1009 gomp_mutex_unlock (&devicep->lock);
1010 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1011 }
1012 else if ((kind & typemask)
1013 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1014 /* If not present, continue using the host address. */
1015 ;
1016 else
1017 __builtin_unreachable ();
1018 tgt->list[i].offset = OFFSET_INLINED;
1019 }
1020 continue;
1021 case GOMP_MAP_STRUCT:
1022 first = i + 1;
1023 last = i + sizes[i];
1024 cur_node.host_start = (uintptr_t) hostaddrs[i];
1025 cur_node.host_end = (uintptr_t) hostaddrs[last]
1026 + sizes[last];
1027 if (tgt->list[first].key != NULL)
1028 continue;
1029 n = splay_tree_lookup (mem_map, &cur_node);
1030 if (n == NULL)
1031 {
1032 size_t align = (size_t) 1 << (kind >> rshift);
1033 tgt_size -= (uintptr_t) hostaddrs[first]
1034 - (uintptr_t) hostaddrs[i];
1035 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1036 tgt_size += (uintptr_t) hostaddrs[first]
1037 - (uintptr_t) hostaddrs[i];
1038 field_tgt_base = (uintptr_t) hostaddrs[first];
1039 field_tgt_offset = tgt_size;
1040 field_tgt_clear = last;
1041 tgt_size += cur_node.host_end
1042 - (uintptr_t) hostaddrs[first];
1043 continue;
1044 }
1045 for (i = first; i <= last; i++)
1046 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1047 sizes, kinds, cbufp);
1048 i--;
1049 continue;
1050 case GOMP_MAP_ALWAYS_POINTER:
1051 cur_node.host_start = (uintptr_t) hostaddrs[i];
1052 cur_node.host_end = cur_node.host_start + sizeof (void *);
1053 n = splay_tree_lookup (mem_map, &cur_node);
1054 if (n == NULL
1055 || n->host_start > cur_node.host_start
1056 || n->host_end < cur_node.host_end)
1057 {
1058 gomp_mutex_unlock (&devicep->lock);
1059 gomp_fatal ("always pointer not mapped");
1060 }
1061 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1062 != GOMP_MAP_ALWAYS_POINTER)
1063 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1064 if (cur_node.tgt_offset)
1065 cur_node.tgt_offset -= sizes[i];
1066 gomp_copy_host2dev (devicep, aq,
1067 (void *) (n->tgt->tgt_start
1068 + n->tgt_offset
1069 + cur_node.host_start
1070 - n->host_start),
1071 (void *) &cur_node.tgt_offset,
1072 sizeof (void *), cbufp);
1073 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1074 + cur_node.host_start - n->host_start;
1075 continue;
1076 case GOMP_MAP_IF_PRESENT:
1077 /* Not present - otherwise handled above. Skip over its
1078 MAP_POINTER as well. */
1079 if (i + 1 < mapnum
1080 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1081 == GOMP_MAP_POINTER))
1082 ++i;
1083 continue;
1084 case GOMP_MAP_ATTACH:
1085 {
1086 cur_node.host_start = (uintptr_t) hostaddrs[i];
1087 cur_node.host_end = cur_node.host_start + sizeof (void *);
1088 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1089 if (n != NULL)
1090 {
1091 tgt->list[i].key = n;
1092 tgt->list[i].offset = cur_node.host_start - n->host_start;
1093 tgt->list[i].length = n->host_end - n->host_start;
1094 tgt->list[i].copy_from = false;
1095 tgt->list[i].always_copy_from = false;
1096 tgt->list[i].do_detach
1097 = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
1098 n->refcount++;
1099 }
1100 else
1101 {
1102 gomp_mutex_unlock (&devicep->lock);
1103 gomp_fatal ("outer struct not mapped for attach");
1104 }
1105 gomp_attach_pointer (devicep, aq, mem_map, n,
1106 (uintptr_t) hostaddrs[i], sizes[i],
1107 cbufp);
1108 continue;
1109 }
1110 default:
1111 break;
1112 }
1113 splay_tree_key k = &array->key;
1114 k->host_start = (uintptr_t) hostaddrs[i];
1115 if (!GOMP_MAP_POINTER_P (kind & typemask))
1116 k->host_end = k->host_start + sizes[i];
1117 else
1118 k->host_end = k->host_start + sizeof (void *);
1119 splay_tree_key n = splay_tree_lookup (mem_map, k);
1120 if (n && n->refcount != REFCOUNT_LINK)
1121 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1122 kind & typemask, cbufp);
1123 else
1124 {
1125 k->aux = NULL;
1126 if (n && n->refcount == REFCOUNT_LINK)
1127 {
1128 /* Replace target address of the pointer with target address
1129 of mapped object in the splay tree. */
1130 splay_tree_remove (mem_map, n);
1131 k->aux
1132 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1133 k->aux->link_key = n;
1134 }
1135 size_t align = (size_t) 1 << (kind >> rshift);
1136 tgt->list[i].key = k;
1137 k->tgt = tgt;
1138 if (field_tgt_clear != FIELD_TGT_EMPTY)
1139 {
1140 k->tgt_offset = k->host_start - field_tgt_base
1141 + field_tgt_offset;
1142 if (i == field_tgt_clear)
1143 field_tgt_clear = FIELD_TGT_EMPTY;
1144 }
1145 else
1146 {
1147 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1148 k->tgt_offset = tgt_size;
1149 tgt_size += k->host_end - k->host_start;
1150 }
1151 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1152 tgt->list[i].always_copy_from
1153 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1154 tgt->list[i].do_detach = false;
1155 tgt->list[i].offset = 0;
1156 tgt->list[i].length = k->host_end - k->host_start;
1157 k->refcount = 1;
1158 k->virtual_refcount = 0;
1159 tgt->refcount++;
1160 array->left = NULL;
1161 array->right = NULL;
1162 splay_tree_insert (mem_map, array);
1163 switch (kind & typemask)
1164 {
1165 case GOMP_MAP_ALLOC:
1166 case GOMP_MAP_FROM:
1167 case GOMP_MAP_FORCE_ALLOC:
1168 case GOMP_MAP_FORCE_FROM:
1169 case GOMP_MAP_ALWAYS_FROM:
1170 break;
1171 case GOMP_MAP_TO:
1172 case GOMP_MAP_TOFROM:
1173 case GOMP_MAP_FORCE_TO:
1174 case GOMP_MAP_FORCE_TOFROM:
1175 case GOMP_MAP_ALWAYS_TO:
1176 case GOMP_MAP_ALWAYS_TOFROM:
1177 gomp_copy_host2dev (devicep, aq,
1178 (void *) (tgt->tgt_start
1179 + k->tgt_offset),
1180 (void *) k->host_start,
1181 k->host_end - k->host_start, cbufp);
1182 break;
1183 case GOMP_MAP_POINTER:
1184 gomp_map_pointer (tgt, aq,
1185 (uintptr_t) *(void **) k->host_start,
1186 k->tgt_offset, sizes[i], cbufp);
1187 break;
1188 case GOMP_MAP_TO_PSET:
1189 gomp_copy_host2dev (devicep, aq,
1190 (void *) (tgt->tgt_start
1191 + k->tgt_offset),
1192 (void *) k->host_start,
1193 k->host_end - k->host_start, cbufp);
1194
1195 for (j = i + 1; j < mapnum; j++)
1196 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1197 j)
1198 & typemask))
1199 break;
1200 else if ((uintptr_t) hostaddrs[j] < k->host_start
1201 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1202 > k->host_end))
1203 break;
1204 else
1205 {
1206 tgt->list[j].key = k;
1207 tgt->list[j].copy_from = false;
1208 tgt->list[j].always_copy_from = false;
1209 tgt->list[j].do_detach = false;
1210 if (k->refcount != REFCOUNT_INFINITY)
1211 k->refcount++;
1212 gomp_map_pointer (tgt, aq,
1213 (uintptr_t) *(void **) hostaddrs[j],
1214 k->tgt_offset
1215 + ((uintptr_t) hostaddrs[j]
1216 - k->host_start),
1217 sizes[j], cbufp);
1218 i++;
1219 }
1220 break;
1221 case GOMP_MAP_FORCE_PRESENT:
1222 {
1223 /* We already looked up the memory region above and it
1224 was missing. */
1225 size_t size = k->host_end - k->host_start;
1226 gomp_mutex_unlock (&devicep->lock);
1227 #ifdef HAVE_INTTYPES_H
1228 gomp_fatal ("present clause: !acc_is_present (%p, "
1229 "%"PRIu64" (0x%"PRIx64"))",
1230 (void *) k->host_start,
1231 (uint64_t) size, (uint64_t) size);
1232 #else
1233 gomp_fatal ("present clause: !acc_is_present (%p, "
1234 "%lu (0x%lx))", (void *) k->host_start,
1235 (unsigned long) size, (unsigned long) size);
1236 #endif
1237 }
1238 break;
1239 case GOMP_MAP_FORCE_DEVICEPTR:
1240 assert (k->host_end - k->host_start == sizeof (void *));
1241 gomp_copy_host2dev (devicep, aq,
1242 (void *) (tgt->tgt_start
1243 + k->tgt_offset),
1244 (void *) k->host_start,
1245 sizeof (void *), cbufp);
1246 break;
1247 default:
1248 gomp_mutex_unlock (&devicep->lock);
1249 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1250 kind);
1251 }
1252
1253 if (k->aux && k->aux->link_key)
1254 {
1255 /* Set link pointer on target to the device address of the
1256 mapped object. */
1257 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1258 /* We intentionally do not use coalescing here, as it's not
1259 data allocated by the current call to this function. */
1260 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1261 &tgt_addr, sizeof (void *), NULL);
1262 }
1263 array++;
1264 }
1265 }
1266 }
1267
1268 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1269 {
1270 for (i = 0; i < mapnum; i++)
1271 {
1272 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1273 gomp_copy_host2dev (devicep, aq,
1274 (void *) (tgt->tgt_start + i * sizeof (void *)),
1275 (void *) &cur_node.tgt_offset, sizeof (void *),
1276 cbufp);
1277 }
1278 }
1279
1280 if (cbufp)
1281 {
1282 long c = 0;
1283 for (c = 0; c < cbuf.chunk_cnt; ++c)
1284 gomp_copy_host2dev (devicep, aq,
1285 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1286 (char *) cbuf.buf + (cbuf.chunks[c].start
1287 - cbuf.chunks[0].start),
1288 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1289 free (cbuf.buf);
1290 cbuf.buf = NULL;
1291 cbufp = NULL;
1292 }
1293
1294 /* If the variable from "omp target enter data" map-list was already mapped,
1295 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1296 gomp_exit_data. */
1297 if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA
1298 || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
1299 && tgt->refcount == 0)
1300 {
1301 /* If we're about to discard a target_mem_desc with no "structural"
1302 references (tgt->refcount == 0), any splay keys linked in the tgt's
1303 list must have their virtual refcount incremented to represent that
1304 "lost" reference in order to implement the semantics of the OpenACC
1305 "present increment" operation properly. */
1306 if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
1307 for (i = 0; i < tgt->list_count; i++)
1308 if (tgt->list[i].key)
1309 tgt->list[i].key->virtual_refcount++;
1310
1311 free (tgt);
1312 tgt = NULL;
1313 }
1314
1315 gomp_mutex_unlock (&devicep->lock);
1316 return tgt;
1317 }
1318
1319 attribute_hidden struct target_mem_desc *
1320 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1321 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1322 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1323 {
1324 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1325 sizes, kinds, short_mapkind, pragma_kind);
1326 }
1327
1328 attribute_hidden struct target_mem_desc *
1329 gomp_map_vars_async (struct gomp_device_descr *devicep,
1330 struct goacc_asyncqueue *aq, size_t mapnum,
1331 void **hostaddrs, void **devaddrs, size_t *sizes,
1332 void *kinds, bool short_mapkind,
1333 enum gomp_map_vars_kind pragma_kind)
1334 {
1335 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1336 sizes, kinds, short_mapkind, pragma_kind);
1337 }
1338
1339 static void
1340 gomp_unmap_tgt (struct target_mem_desc *tgt)
1341 {
1342 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1343 if (tgt->tgt_end)
1344 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1345
1346 free (tgt->array);
1347 free (tgt);
1348 }
1349
1350 static bool
1351 gomp_unref_tgt (void *ptr)
1352 {
1353 bool is_tgt_unmapped = false;
1354
1355 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1356
1357 if (tgt->refcount > 1)
1358 tgt->refcount--;
1359 else
1360 {
1361 gomp_unmap_tgt (tgt);
1362 is_tgt_unmapped = true;
1363 }
1364
1365 return is_tgt_unmapped;
1366 }
1367
1368 static void
1369 gomp_unref_tgt_void (void *ptr)
1370 {
1371 (void) gomp_unref_tgt (ptr);
1372 }
1373
1374 static inline __attribute__((always_inline)) bool
1375 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1376 struct goacc_asyncqueue *aq)
1377 {
1378 bool is_tgt_unmapped = false;
1379 splay_tree_remove (&devicep->mem_map, k);
1380 if (k->aux)
1381 {
1382 if (k->aux->link_key)
1383 splay_tree_insert (&devicep->mem_map,
1384 (splay_tree_node) k->aux->link_key);
1385 if (k->aux->attach_count)
1386 free (k->aux->attach_count);
1387 free (k->aux);
1388 k->aux = NULL;
1389 }
1390 if (aq)
1391 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1392 (void *) k->tgt);
1393 else
1394 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1395 return is_tgt_unmapped;
1396 }
1397
1398 attribute_hidden bool
1399 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1400 {
1401 return gomp_remove_var_internal (devicep, k, NULL);
1402 }
1403
1404 /* Remove a variable asynchronously. This actually removes the variable
1405 mapping immediately, but retains the linked target_mem_desc until the
1406 asynchronous operation has completed (as it may still refer to target
1407 memory). The device lock must be held before entry, and remains locked on
1408 exit. */
1409
1410 attribute_hidden void
1411 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1412 struct goacc_asyncqueue *aq)
1413 {
1414 (void) gomp_remove_var_internal (devicep, k, aq);
1415 }
1416
1417 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1418 variables back from device to host: if it is false, it is assumed that this
1419 has been done already. */
1420
1421 static inline __attribute__((always_inline)) void
1422 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1423 struct goacc_asyncqueue *aq)
1424 {
1425 struct gomp_device_descr *devicep = tgt->device_descr;
1426
1427 if (tgt->list_count == 0)
1428 {
1429 free (tgt);
1430 return;
1431 }
1432
1433 gomp_mutex_lock (&devicep->lock);
1434 if (devicep->state == GOMP_DEVICE_FINALIZED)
1435 {
1436 gomp_mutex_unlock (&devicep->lock);
1437 free (tgt->array);
1438 free (tgt);
1439 return;
1440 }
1441
1442 size_t i;
1443
1444 /* We must perform detachments before any copies back to the host. */
1445 for (i = 0; i < tgt->list_count; i++)
1446 {
1447 splay_tree_key k = tgt->list[i].key;
1448
1449 if (k != NULL && tgt->list[i].do_detach)
1450 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1451 + tgt->list[i].offset,
1452 k->refcount == 1, NULL);
1453 }
1454
1455 for (i = 0; i < tgt->list_count; i++)
1456 {
1457 splay_tree_key k = tgt->list[i].key;
1458 if (k == NULL)
1459 continue;
1460
1461 bool do_unmap = false;
1462 if (k->tgt == tgt
1463 && k->virtual_refcount > 0
1464 && k->refcount != REFCOUNT_INFINITY)
1465 {
1466 k->virtual_refcount--;
1467 k->refcount--;
1468 }
1469 else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1470 k->refcount--;
1471 else if (k->refcount == 1)
1472 {
1473 k->refcount--;
1474 do_unmap = true;
1475 }
1476
1477 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1478 || tgt->list[i].always_copy_from)
1479 gomp_copy_dev2host (devicep, aq,
1480 (void *) (k->host_start + tgt->list[i].offset),
1481 (void *) (k->tgt->tgt_start + k->tgt_offset
1482 + tgt->list[i].offset),
1483 tgt->list[i].length);
1484 if (do_unmap)
1485 {
1486 struct target_mem_desc *k_tgt = k->tgt;
1487 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1488 /* It would be bad if TGT got unmapped while we're still iterating
1489 over its LIST_COUNT, and also expect to use it in the following
1490 code. */
1491 assert (!is_tgt_unmapped
1492 || k_tgt != tgt);
1493 }
1494 }
1495
1496 if (aq)
1497 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1498 (void *) tgt);
1499 else
1500 gomp_unref_tgt ((void *) tgt);
1501
1502 gomp_mutex_unlock (&devicep->lock);
1503 }
1504
1505 attribute_hidden void
1506 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1507 {
1508 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1509 }
1510
1511 attribute_hidden void
1512 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1513 struct goacc_asyncqueue *aq)
1514 {
1515 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1516 }
1517
1518 static void
1519 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1520 size_t *sizes, void *kinds, bool short_mapkind)
1521 {
1522 size_t i;
1523 struct splay_tree_key_s cur_node;
1524 const int typemask = short_mapkind ? 0xff : 0x7;
1525
1526 if (!devicep)
1527 return;
1528
1529 if (mapnum == 0)
1530 return;
1531
1532 gomp_mutex_lock (&devicep->lock);
1533 if (devicep->state == GOMP_DEVICE_FINALIZED)
1534 {
1535 gomp_mutex_unlock (&devicep->lock);
1536 return;
1537 }
1538
1539 for (i = 0; i < mapnum; i++)
1540 if (sizes[i])
1541 {
1542 cur_node.host_start = (uintptr_t) hostaddrs[i];
1543 cur_node.host_end = cur_node.host_start + sizes[i];
1544 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1545 if (n)
1546 {
1547 int kind = get_kind (short_mapkind, kinds, i);
1548 if (n->host_start > cur_node.host_start
1549 || n->host_end < cur_node.host_end)
1550 {
1551 gomp_mutex_unlock (&devicep->lock);
1552 gomp_fatal ("Trying to update [%p..%p) object when "
1553 "only [%p..%p) is mapped",
1554 (void *) cur_node.host_start,
1555 (void *) cur_node.host_end,
1556 (void *) n->host_start,
1557 (void *) n->host_end);
1558 }
1559
1560
1561 void *hostaddr = (void *) cur_node.host_start;
1562 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1563 + cur_node.host_start - n->host_start);
1564 size_t size = cur_node.host_end - cur_node.host_start;
1565
1566 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1567 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1568 NULL);
1569 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1570 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1571 }
1572 }
1573 gomp_mutex_unlock (&devicep->lock);
1574 }
1575
1576 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1577 And insert to splay tree the mapping between addresses from HOST_TABLE and
1578 from loaded target image. We rely in the host and device compiler
1579 emitting variable and functions in the same order. */
1580
1581 static void
1582 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1583 const void *host_table, const void *target_data,
1584 bool is_register_lock)
1585 {
1586 void **host_func_table = ((void ***) host_table)[0];
1587 void **host_funcs_end = ((void ***) host_table)[1];
1588 void **host_var_table = ((void ***) host_table)[2];
1589 void **host_vars_end = ((void ***) host_table)[3];
1590
1591 /* The func table contains only addresses, the var table contains addresses
1592 and corresponding sizes. */
1593 int num_funcs = host_funcs_end - host_func_table;
1594 int num_vars = (host_vars_end - host_var_table) / 2;
1595
1596 /* Load image to device and get target addresses for the image. */
1597 struct addr_pair *target_table = NULL;
1598 int i, num_target_entries;
1599
1600 num_target_entries
1601 = devicep->load_image_func (devicep->target_id, version,
1602 target_data, &target_table);
1603
1604 if (num_target_entries != num_funcs + num_vars)
1605 {
1606 gomp_mutex_unlock (&devicep->lock);
1607 if (is_register_lock)
1608 gomp_mutex_unlock (&register_lock);
1609 gomp_fatal ("Cannot map target functions or variables"
1610 " (expected %u, have %u)", num_funcs + num_vars,
1611 num_target_entries);
1612 }
1613
1614 /* Insert host-target address mapping into splay tree. */
1615 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1616 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1617 tgt->refcount = REFCOUNT_INFINITY;
1618 tgt->tgt_start = 0;
1619 tgt->tgt_end = 0;
1620 tgt->to_free = NULL;
1621 tgt->prev = NULL;
1622 tgt->list_count = 0;
1623 tgt->device_descr = devicep;
1624 splay_tree_node array = tgt->array;
1625
1626 for (i = 0; i < num_funcs; i++)
1627 {
1628 splay_tree_key k = &array->key;
1629 k->host_start = (uintptr_t) host_func_table[i];
1630 k->host_end = k->host_start + 1;
1631 k->tgt = tgt;
1632 k->tgt_offset = target_table[i].start;
1633 k->refcount = REFCOUNT_INFINITY;
1634 k->virtual_refcount = 0;
1635 k->aux = NULL;
1636 array->left = NULL;
1637 array->right = NULL;
1638 splay_tree_insert (&devicep->mem_map, array);
1639 array++;
1640 }
1641
1642 /* Most significant bit of the size in host and target tables marks
1643 "omp declare target link" variables. */
1644 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1645 const uintptr_t size_mask = ~link_bit;
1646
1647 for (i = 0; i < num_vars; i++)
1648 {
1649 struct addr_pair *target_var = &target_table[num_funcs + i];
1650 uintptr_t target_size = target_var->end - target_var->start;
1651
1652 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1653 {
1654 gomp_mutex_unlock (&devicep->lock);
1655 if (is_register_lock)
1656 gomp_mutex_unlock (&register_lock);
1657 gomp_fatal ("Cannot map target variables (size mismatch)");
1658 }
1659
1660 splay_tree_key k = &array->key;
1661 k->host_start = (uintptr_t) host_var_table[i * 2];
1662 k->host_end
1663 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1664 k->tgt = tgt;
1665 k->tgt_offset = target_var->start;
1666 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1667 k->virtual_refcount = 0;
1668 k->aux = NULL;
1669 array->left = NULL;
1670 array->right = NULL;
1671 splay_tree_insert (&devicep->mem_map, array);
1672 array++;
1673 }
1674
1675 free (target_table);
1676 }
1677
1678 /* Unload the mappings described by target_data from device DEVICE_P.
1679 The device must be locked. */
1680
1681 static void
1682 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1683 unsigned version,
1684 const void *host_table, const void *target_data)
1685 {
1686 void **host_func_table = ((void ***) host_table)[0];
1687 void **host_funcs_end = ((void ***) host_table)[1];
1688 void **host_var_table = ((void ***) host_table)[2];
1689 void **host_vars_end = ((void ***) host_table)[3];
1690
1691 /* The func table contains only addresses, the var table contains addresses
1692 and corresponding sizes. */
1693 int num_funcs = host_funcs_end - host_func_table;
1694 int num_vars = (host_vars_end - host_var_table) / 2;
1695
1696 struct splay_tree_key_s k;
1697 splay_tree_key node = NULL;
1698
1699 /* Find mapping at start of node array */
1700 if (num_funcs || num_vars)
1701 {
1702 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1703 : (uintptr_t) host_var_table[0]);
1704 k.host_end = k.host_start + 1;
1705 node = splay_tree_lookup (&devicep->mem_map, &k);
1706 }
1707
1708 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1709 {
1710 gomp_mutex_unlock (&devicep->lock);
1711 gomp_fatal ("image unload fail");
1712 }
1713
1714 /* Remove mappings from splay tree. */
1715 int i;
1716 for (i = 0; i < num_funcs; i++)
1717 {
1718 k.host_start = (uintptr_t) host_func_table[i];
1719 k.host_end = k.host_start + 1;
1720 splay_tree_remove (&devicep->mem_map, &k);
1721 }
1722
1723 /* Most significant bit of the size in host and target tables marks
1724 "omp declare target link" variables. */
1725 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1726 const uintptr_t size_mask = ~link_bit;
1727 bool is_tgt_unmapped = false;
1728
1729 for (i = 0; i < num_vars; i++)
1730 {
1731 k.host_start = (uintptr_t) host_var_table[i * 2];
1732 k.host_end
1733 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1734
1735 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1736 splay_tree_remove (&devicep->mem_map, &k);
1737 else
1738 {
1739 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1740 is_tgt_unmapped = gomp_remove_var (devicep, n);
1741 }
1742 }
1743
1744 if (node && !is_tgt_unmapped)
1745 {
1746 free (node->tgt);
1747 free (node);
1748 }
1749 }
1750
1751 /* This function should be called from every offload image while loading.
1752 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1753 the target, and TARGET_DATA needed by target plugin. */
1754
1755 void
1756 GOMP_offload_register_ver (unsigned version, const void *host_table,
1757 int target_type, const void *target_data)
1758 {
1759 int i;
1760
1761 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1762 gomp_fatal ("Library too old for offload (version %u < %u)",
1763 GOMP_VERSION, GOMP_VERSION_LIB (version));
1764
1765 gomp_mutex_lock (&register_lock);
1766
1767 /* Load image to all initialized devices. */
1768 for (i = 0; i < num_devices; i++)
1769 {
1770 struct gomp_device_descr *devicep = &devices[i];
1771 gomp_mutex_lock (&devicep->lock);
1772 if (devicep->type == target_type
1773 && devicep->state == GOMP_DEVICE_INITIALIZED)
1774 gomp_load_image_to_device (devicep, version,
1775 host_table, target_data, true);
1776 gomp_mutex_unlock (&devicep->lock);
1777 }
1778
1779 /* Insert image to array of pending images. */
1780 offload_images
1781 = gomp_realloc_unlock (offload_images,
1782 (num_offload_images + 1)
1783 * sizeof (struct offload_image_descr));
1784 offload_images[num_offload_images].version = version;
1785 offload_images[num_offload_images].type = target_type;
1786 offload_images[num_offload_images].host_table = host_table;
1787 offload_images[num_offload_images].target_data = target_data;
1788
1789 num_offload_images++;
1790 gomp_mutex_unlock (&register_lock);
1791 }
1792
1793 void
1794 GOMP_offload_register (const void *host_table, int target_type,
1795 const void *target_data)
1796 {
1797 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1798 }
1799
1800 /* This function should be called from every offload image while unloading.
1801 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1802 the target, and TARGET_DATA needed by target plugin. */
1803
1804 void
1805 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1806 int target_type, const void *target_data)
1807 {
1808 int i;
1809
1810 gomp_mutex_lock (&register_lock);
1811
1812 /* Unload image from all initialized devices. */
1813 for (i = 0; i < num_devices; i++)
1814 {
1815 struct gomp_device_descr *devicep = &devices[i];
1816 gomp_mutex_lock (&devicep->lock);
1817 if (devicep->type == target_type
1818 && devicep->state == GOMP_DEVICE_INITIALIZED)
1819 gomp_unload_image_from_device (devicep, version,
1820 host_table, target_data);
1821 gomp_mutex_unlock (&devicep->lock);
1822 }
1823
1824 /* Remove image from array of pending images. */
1825 for (i = 0; i < num_offload_images; i++)
1826 if (offload_images[i].target_data == target_data)
1827 {
1828 offload_images[i] = offload_images[--num_offload_images];
1829 break;
1830 }
1831
1832 gomp_mutex_unlock (&register_lock);
1833 }
1834
1835 void
1836 GOMP_offload_unregister (const void *host_table, int target_type,
1837 const void *target_data)
1838 {
1839 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1840 }
1841
1842 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1843 must be locked on entry, and remains locked on return. */
1844
1845 attribute_hidden void
1846 gomp_init_device (struct gomp_device_descr *devicep)
1847 {
1848 int i;
1849 if (!devicep->init_device_func (devicep->target_id))
1850 {
1851 gomp_mutex_unlock (&devicep->lock);
1852 gomp_fatal ("device initialization failed");
1853 }
1854
1855 /* Load to device all images registered by the moment. */
1856 for (i = 0; i < num_offload_images; i++)
1857 {
1858 struct offload_image_descr *image = &offload_images[i];
1859 if (image->type == devicep->type)
1860 gomp_load_image_to_device (devicep, image->version,
1861 image->host_table, image->target_data,
1862 false);
1863 }
1864
1865 /* Initialize OpenACC asynchronous queues. */
1866 goacc_init_asyncqueues (devicep);
1867
1868 devicep->state = GOMP_DEVICE_INITIALIZED;
1869 }
1870
1871 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1872 must be locked on entry, and remains locked on return. */
1873
1874 attribute_hidden bool
1875 gomp_fini_device (struct gomp_device_descr *devicep)
1876 {
1877 bool ret = goacc_fini_asyncqueues (devicep);
1878 ret &= devicep->fini_device_func (devicep->target_id);
1879 devicep->state = GOMP_DEVICE_FINALIZED;
1880 return ret;
1881 }
1882
1883 attribute_hidden void
1884 gomp_unload_device (struct gomp_device_descr *devicep)
1885 {
1886 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1887 {
1888 unsigned i;
1889
1890 /* Unload from device all images registered at the moment. */
1891 for (i = 0; i < num_offload_images; i++)
1892 {
1893 struct offload_image_descr *image = &offload_images[i];
1894 if (image->type == devicep->type)
1895 gomp_unload_image_from_device (devicep, image->version,
1896 image->host_table,
1897 image->target_data);
1898 }
1899 }
1900 }
1901
1902 /* Host fallback for GOMP_target{,_ext} routines. */
1903
1904 static void
1905 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1906 {
1907 struct gomp_thread old_thr, *thr = gomp_thread ();
1908 old_thr = *thr;
1909 memset (thr, '\0', sizeof (*thr));
1910 if (gomp_places_list)
1911 {
1912 thr->place = old_thr.place;
1913 thr->ts.place_partition_len = gomp_places_list_len;
1914 }
1915 fn (hostaddrs);
1916 gomp_free_thread (thr);
1917 *thr = old_thr;
1918 }
1919
1920 /* Calculate alignment and size requirements of a private copy of data shared
1921 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1922
1923 static inline void
1924 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1925 unsigned short *kinds, size_t *tgt_align,
1926 size_t *tgt_size)
1927 {
1928 size_t i;
1929 for (i = 0; i < mapnum; i++)
1930 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1931 {
1932 size_t align = (size_t) 1 << (kinds[i] >> 8);
1933 if (*tgt_align < align)
1934 *tgt_align = align;
1935 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1936 *tgt_size += sizes[i];
1937 }
1938 }
1939
1940 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1941
1942 static inline void
1943 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1944 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1945 size_t tgt_size)
1946 {
1947 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1948 if (al)
1949 tgt += tgt_align - al;
1950 tgt_size = 0;
1951 size_t i;
1952 for (i = 0; i < mapnum; i++)
1953 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1954 {
1955 size_t align = (size_t) 1 << (kinds[i] >> 8);
1956 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1957 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1958 hostaddrs[i] = tgt + tgt_size;
1959 tgt_size = tgt_size + sizes[i];
1960 }
1961 }
1962
1963 /* Helper function of GOMP_target{,_ext} routines. */
1964
1965 static void *
1966 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1967 void (*host_fn) (void *))
1968 {
1969 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1970 return (void *) host_fn;
1971 else
1972 {
1973 gomp_mutex_lock (&devicep->lock);
1974 if (devicep->state == GOMP_DEVICE_FINALIZED)
1975 {
1976 gomp_mutex_unlock (&devicep->lock);
1977 return NULL;
1978 }
1979
1980 struct splay_tree_key_s k;
1981 k.host_start = (uintptr_t) host_fn;
1982 k.host_end = k.host_start + 1;
1983 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1984 gomp_mutex_unlock (&devicep->lock);
1985 if (tgt_fn == NULL)
1986 return NULL;
1987
1988 return (void *) tgt_fn->tgt_offset;
1989 }
1990 }
1991
1992 /* Called when encountering a target directive. If DEVICE
1993 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1994 GOMP_DEVICE_HOST_FALLBACK (or any value
1995 larger than last available hw device), use host fallback.
1996 FN is address of host code, UNUSED is part of the current ABI, but
1997 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1998 with MAPNUM entries, with addresses of the host objects,
1999 sizes of the host objects (resp. for pointer kind pointer bias
2000 and assumed sizeof (void *) size) and kinds. */
2001
2002 void
2003 GOMP_target (int device, void (*fn) (void *), const void *unused,
2004 size_t mapnum, void **hostaddrs, size_t *sizes,
2005 unsigned char *kinds)
2006 {
2007 struct gomp_device_descr *devicep = resolve_device (device);
2008
2009 void *fn_addr;
2010 if (devicep == NULL
2011 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2012 /* All shared memory devices should use the GOMP_target_ext function. */
2013 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2014 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2015 return gomp_target_fallback (fn, hostaddrs);
2016
2017 struct target_mem_desc *tgt_vars
2018 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2019 GOMP_MAP_VARS_TARGET);
2020 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2021 NULL);
2022 gomp_unmap_vars (tgt_vars, true);
2023 }
2024
2025 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2026 and several arguments have been added:
2027 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2028 DEPEND is array of dependencies, see GOMP_task for details.
2029
2030 ARGS is a pointer to an array consisting of a variable number of both
2031 device-independent and device-specific arguments, which can take one two
2032 elements where the first specifies for which device it is intended, the type
2033 and optionally also the value. If the value is not present in the first
2034 one, the whole second element the actual value. The last element of the
2035 array is a single NULL. Among the device independent can be for example
2036 NUM_TEAMS and THREAD_LIMIT.
2037
2038 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2039 that value, or 1 if teams construct is not present, or 0, if
2040 teams construct does not have num_teams clause and so the choice is
2041 implementation defined, and -1 if it can't be determined on the host
2042 what value will GOMP_teams have on the device.
2043 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2044 body with that value, or 0, if teams construct does not have thread_limit
2045 clause or the teams construct is not present, or -1 if it can't be
2046 determined on the host what value will GOMP_teams have on the device. */
2047
2048 void
2049 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2050 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2051 unsigned int flags, void **depend, void **args)
2052 {
2053 struct gomp_device_descr *devicep = resolve_device (device);
2054 size_t tgt_align = 0, tgt_size = 0;
2055 bool fpc_done = false;
2056
2057 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2058 {
2059 struct gomp_thread *thr = gomp_thread ();
2060 /* Create a team if we don't have any around, as nowait
2061 target tasks make sense to run asynchronously even when
2062 outside of any parallel. */
2063 if (__builtin_expect (thr->ts.team == NULL, 0))
2064 {
2065 struct gomp_team *team = gomp_new_team (1);
2066 struct gomp_task *task = thr->task;
2067 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2068 team->prev_ts = thr->ts;
2069 thr->ts.team = team;
2070 thr->ts.team_id = 0;
2071 thr->ts.work_share = &team->work_shares[0];
2072 thr->ts.last_work_share = NULL;
2073 #ifdef HAVE_SYNC_BUILTINS
2074 thr->ts.single_count = 0;
2075 #endif
2076 thr->ts.static_trip = 0;
2077 thr->task = &team->implicit_task[0];
2078 gomp_init_task (thr->task, NULL, icv);
2079 if (task)
2080 {
2081 thr->task = task;
2082 gomp_end_task ();
2083 free (task);
2084 thr->task = &team->implicit_task[0];
2085 }
2086 else
2087 pthread_setspecific (gomp_thread_destructor, thr);
2088 }
2089 if (thr->ts.team
2090 && !thr->task->final_task)
2091 {
2092 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2093 sizes, kinds, flags, depend, args,
2094 GOMP_TARGET_TASK_BEFORE_MAP);
2095 return;
2096 }
2097 }
2098
2099 /* If there are depend clauses, but nowait is not present
2100 (or we are in a final task), block the parent task until the
2101 dependencies are resolved and then just continue with the rest
2102 of the function as if it is a merged task. */
2103 if (depend != NULL)
2104 {
2105 struct gomp_thread *thr = gomp_thread ();
2106 if (thr->task && thr->task->depend_hash)
2107 {
2108 /* If we might need to wait, copy firstprivate now. */
2109 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2110 &tgt_align, &tgt_size);
2111 if (tgt_align)
2112 {
2113 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2114 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2115 tgt_align, tgt_size);
2116 }
2117 fpc_done = true;
2118 gomp_task_maybe_wait_for_dependencies (depend);
2119 }
2120 }
2121
2122 void *fn_addr;
2123 if (devicep == NULL
2124 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2125 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2126 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2127 {
2128 if (!fpc_done)
2129 {
2130 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2131 &tgt_align, &tgt_size);
2132 if (tgt_align)
2133 {
2134 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2135 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2136 tgt_align, tgt_size);
2137 }
2138 }
2139 gomp_target_fallback (fn, hostaddrs);
2140 return;
2141 }
2142
2143 struct target_mem_desc *tgt_vars;
2144 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2145 {
2146 if (!fpc_done)
2147 {
2148 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2149 &tgt_align, &tgt_size);
2150 if (tgt_align)
2151 {
2152 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2153 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2154 tgt_align, tgt_size);
2155 }
2156 }
2157 tgt_vars = NULL;
2158 }
2159 else
2160 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2161 true, GOMP_MAP_VARS_TARGET);
2162 devicep->run_func (devicep->target_id, fn_addr,
2163 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2164 args);
2165 if (tgt_vars)
2166 gomp_unmap_vars (tgt_vars, true);
2167 }
2168
2169 /* Host fallback for GOMP_target_data{,_ext} routines. */
2170
2171 static void
2172 gomp_target_data_fallback (void)
2173 {
2174 struct gomp_task_icv *icv = gomp_icv (false);
2175 if (icv->target_data)
2176 {
2177 /* Even when doing a host fallback, if there are any active
2178 #pragma omp target data constructs, need to remember the
2179 new #pragma omp target data, otherwise GOMP_target_end_data
2180 would get out of sync. */
2181 struct target_mem_desc *tgt
2182 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2183 GOMP_MAP_VARS_DATA);
2184 tgt->prev = icv->target_data;
2185 icv->target_data = tgt;
2186 }
2187 }
2188
2189 void
2190 GOMP_target_data (int device, const void *unused, size_t mapnum,
2191 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2192 {
2193 struct gomp_device_descr *devicep = resolve_device (device);
2194
2195 if (devicep == NULL
2196 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2197 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2198 return gomp_target_data_fallback ();
2199
2200 struct target_mem_desc *tgt
2201 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2202 GOMP_MAP_VARS_DATA);
2203 struct gomp_task_icv *icv = gomp_icv (true);
2204 tgt->prev = icv->target_data;
2205 icv->target_data = tgt;
2206 }
2207
2208 void
2209 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2210 size_t *sizes, unsigned short *kinds)
2211 {
2212 struct gomp_device_descr *devicep = resolve_device (device);
2213
2214 if (devicep == NULL
2215 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2216 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2217 return gomp_target_data_fallback ();
2218
2219 struct target_mem_desc *tgt
2220 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2221 GOMP_MAP_VARS_DATA);
2222 struct gomp_task_icv *icv = gomp_icv (true);
2223 tgt->prev = icv->target_data;
2224 icv->target_data = tgt;
2225 }
2226
2227 void
2228 GOMP_target_end_data (void)
2229 {
2230 struct gomp_task_icv *icv = gomp_icv (false);
2231 if (icv->target_data)
2232 {
2233 struct target_mem_desc *tgt = icv->target_data;
2234 icv->target_data = tgt->prev;
2235 gomp_unmap_vars (tgt, true);
2236 }
2237 }
2238
2239 void
2240 GOMP_target_update (int device, const void *unused, size_t mapnum,
2241 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2242 {
2243 struct gomp_device_descr *devicep = resolve_device (device);
2244
2245 if (devicep == NULL
2246 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2247 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2248 return;
2249
2250 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2251 }
2252
2253 void
2254 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2255 size_t *sizes, unsigned short *kinds,
2256 unsigned int flags, void **depend)
2257 {
2258 struct gomp_device_descr *devicep = resolve_device (device);
2259
2260 /* If there are depend clauses, but nowait is not present,
2261 block the parent task until the dependencies are resolved
2262 and then just continue with the rest of the function as if it
2263 is a merged task. Until we are able to schedule task during
2264 variable mapping or unmapping, ignore nowait if depend clauses
2265 are not present. */
2266 if (depend != NULL)
2267 {
2268 struct gomp_thread *thr = gomp_thread ();
2269 if (thr->task && thr->task->depend_hash)
2270 {
2271 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2272 && thr->ts.team
2273 && !thr->task->final_task)
2274 {
2275 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2276 mapnum, hostaddrs, sizes, kinds,
2277 flags | GOMP_TARGET_FLAG_UPDATE,
2278 depend, NULL, GOMP_TARGET_TASK_DATA))
2279 return;
2280 }
2281 else
2282 {
2283 struct gomp_team *team = thr->ts.team;
2284 /* If parallel or taskgroup has been cancelled, don't start new
2285 tasks. */
2286 if (__builtin_expect (gomp_cancel_var, 0) && team)
2287 {
2288 if (gomp_team_barrier_cancelled (&team->barrier))
2289 return;
2290 if (thr->task->taskgroup)
2291 {
2292 if (thr->task->taskgroup->cancelled)
2293 return;
2294 if (thr->task->taskgroup->workshare
2295 && thr->task->taskgroup->prev
2296 && thr->task->taskgroup->prev->cancelled)
2297 return;
2298 }
2299 }
2300
2301 gomp_task_maybe_wait_for_dependencies (depend);
2302 }
2303 }
2304 }
2305
2306 if (devicep == NULL
2307 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2308 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2309 return;
2310
2311 struct gomp_thread *thr = gomp_thread ();
2312 struct gomp_team *team = thr->ts.team;
2313 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2314 if (__builtin_expect (gomp_cancel_var, 0) && team)
2315 {
2316 if (gomp_team_barrier_cancelled (&team->barrier))
2317 return;
2318 if (thr->task->taskgroup)
2319 {
2320 if (thr->task->taskgroup->cancelled)
2321 return;
2322 if (thr->task->taskgroup->workshare
2323 && thr->task->taskgroup->prev
2324 && thr->task->taskgroup->prev->cancelled)
2325 return;
2326 }
2327 }
2328
2329 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2330 }
2331
2332 static void
2333 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2334 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2335 {
2336 const int typemask = 0xff;
2337 size_t i;
2338 gomp_mutex_lock (&devicep->lock);
2339 if (devicep->state == GOMP_DEVICE_FINALIZED)
2340 {
2341 gomp_mutex_unlock (&devicep->lock);
2342 return;
2343 }
2344
2345 for (i = 0; i < mapnum; i++)
2346 {
2347 struct splay_tree_key_s cur_node;
2348 unsigned char kind = kinds[i] & typemask;
2349 switch (kind)
2350 {
2351 case GOMP_MAP_FROM:
2352 case GOMP_MAP_ALWAYS_FROM:
2353 case GOMP_MAP_DELETE:
2354 case GOMP_MAP_RELEASE:
2355 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2356 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2357 cur_node.host_start = (uintptr_t) hostaddrs[i];
2358 cur_node.host_end = cur_node.host_start + sizes[i];
2359 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2360 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2361 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2362 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2363 if (!k)
2364 continue;
2365
2366 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2367 k->refcount--;
2368 if ((kind == GOMP_MAP_DELETE
2369 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2370 && k->refcount != REFCOUNT_INFINITY)
2371 k->refcount = 0;
2372
2373 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2374 || kind == GOMP_MAP_ALWAYS_FROM)
2375 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2376 (void *) (k->tgt->tgt_start + k->tgt_offset
2377 + cur_node.host_start
2378 - k->host_start),
2379 cur_node.host_end - cur_node.host_start);
2380 if (k->refcount == 0)
2381 gomp_remove_var (devicep, k);
2382
2383 break;
2384 default:
2385 gomp_mutex_unlock (&devicep->lock);
2386 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2387 kind);
2388 }
2389 }
2390
2391 gomp_mutex_unlock (&devicep->lock);
2392 }
2393
2394 void
2395 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2396 size_t *sizes, unsigned short *kinds,
2397 unsigned int flags, void **depend)
2398 {
2399 struct gomp_device_descr *devicep = resolve_device (device);
2400
2401 /* If there are depend clauses, but nowait is not present,
2402 block the parent task until the dependencies are resolved
2403 and then just continue with the rest of the function as if it
2404 is a merged task. Until we are able to schedule task during
2405 variable mapping or unmapping, ignore nowait if depend clauses
2406 are not present. */
2407 if (depend != NULL)
2408 {
2409 struct gomp_thread *thr = gomp_thread ();
2410 if (thr->task && thr->task->depend_hash)
2411 {
2412 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2413 && thr->ts.team
2414 && !thr->task->final_task)
2415 {
2416 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2417 mapnum, hostaddrs, sizes, kinds,
2418 flags, depend, NULL,
2419 GOMP_TARGET_TASK_DATA))
2420 return;
2421 }
2422 else
2423 {
2424 struct gomp_team *team = thr->ts.team;
2425 /* If parallel or taskgroup has been cancelled, don't start new
2426 tasks. */
2427 if (__builtin_expect (gomp_cancel_var, 0) && team)
2428 {
2429 if (gomp_team_barrier_cancelled (&team->barrier))
2430 return;
2431 if (thr->task->taskgroup)
2432 {
2433 if (thr->task->taskgroup->cancelled)
2434 return;
2435 if (thr->task->taskgroup->workshare
2436 && thr->task->taskgroup->prev
2437 && thr->task->taskgroup->prev->cancelled)
2438 return;
2439 }
2440 }
2441
2442 gomp_task_maybe_wait_for_dependencies (depend);
2443 }
2444 }
2445 }
2446
2447 if (devicep == NULL
2448 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2449 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2450 return;
2451
2452 struct gomp_thread *thr = gomp_thread ();
2453 struct gomp_team *team = thr->ts.team;
2454 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2455 if (__builtin_expect (gomp_cancel_var, 0) && team)
2456 {
2457 if (gomp_team_barrier_cancelled (&team->barrier))
2458 return;
2459 if (thr->task->taskgroup)
2460 {
2461 if (thr->task->taskgroup->cancelled)
2462 return;
2463 if (thr->task->taskgroup->workshare
2464 && thr->task->taskgroup->prev
2465 && thr->task->taskgroup->prev->cancelled)
2466 return;
2467 }
2468 }
2469
2470 size_t i;
2471 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2472 for (i = 0; i < mapnum; i++)
2473 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2474 {
2475 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2476 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2477 i += sizes[i];
2478 }
2479 else
2480 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2481 true, GOMP_MAP_VARS_ENTER_DATA);
2482 else
2483 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2484 }
2485
2486 bool
2487 gomp_target_task_fn (void *data)
2488 {
2489 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2490 struct gomp_device_descr *devicep = ttask->devicep;
2491
2492 if (ttask->fn != NULL)
2493 {
2494 void *fn_addr;
2495 if (devicep == NULL
2496 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2497 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2498 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2499 {
2500 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2501 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2502 return false;
2503 }
2504
2505 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2506 {
2507 if (ttask->tgt)
2508 gomp_unmap_vars (ttask->tgt, true);
2509 return false;
2510 }
2511
2512 void *actual_arguments;
2513 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2514 {
2515 ttask->tgt = NULL;
2516 actual_arguments = ttask->hostaddrs;
2517 }
2518 else
2519 {
2520 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2521 NULL, ttask->sizes, ttask->kinds, true,
2522 GOMP_MAP_VARS_TARGET);
2523 actual_arguments = (void *) ttask->tgt->tgt_start;
2524 }
2525 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2526
2527 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2528 ttask->args, (void *) ttask);
2529 return true;
2530 }
2531 else if (devicep == NULL
2532 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2533 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2534 return false;
2535
2536 size_t i;
2537 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2538 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2539 ttask->kinds, true);
2540 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2541 for (i = 0; i < ttask->mapnum; i++)
2542 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2543 {
2544 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2545 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2546 GOMP_MAP_VARS_ENTER_DATA);
2547 i += ttask->sizes[i];
2548 }
2549 else
2550 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2551 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2552 else
2553 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2554 ttask->kinds);
2555 return false;
2556 }
2557
2558 void
2559 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2560 {
2561 if (thread_limit)
2562 {
2563 struct gomp_task_icv *icv = gomp_icv (true);
2564 icv->thread_limit_var
2565 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2566 }
2567 (void) num_teams;
2568 }
2569
2570 void *
2571 omp_target_alloc (size_t size, int device_num)
2572 {
2573 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2574 return malloc (size);
2575
2576 if (device_num < 0)
2577 return NULL;
2578
2579 struct gomp_device_descr *devicep = resolve_device (device_num);
2580 if (devicep == NULL)
2581 return NULL;
2582
2583 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2584 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2585 return malloc (size);
2586
2587 gomp_mutex_lock (&devicep->lock);
2588 void *ret = devicep->alloc_func (devicep->target_id, size);
2589 gomp_mutex_unlock (&devicep->lock);
2590 return ret;
2591 }
2592
2593 void
2594 omp_target_free (void *device_ptr, int device_num)
2595 {
2596 if (device_ptr == NULL)
2597 return;
2598
2599 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2600 {
2601 free (device_ptr);
2602 return;
2603 }
2604
2605 if (device_num < 0)
2606 return;
2607
2608 struct gomp_device_descr *devicep = resolve_device (device_num);
2609 if (devicep == NULL)
2610 return;
2611
2612 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2613 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2614 {
2615 free (device_ptr);
2616 return;
2617 }
2618
2619 gomp_mutex_lock (&devicep->lock);
2620 gomp_free_device_memory (devicep, device_ptr);
2621 gomp_mutex_unlock (&devicep->lock);
2622 }
2623
2624 int
2625 omp_target_is_present (const void *ptr, int device_num)
2626 {
2627 if (ptr == NULL)
2628 return 1;
2629
2630 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2631 return 1;
2632
2633 if (device_num < 0)
2634 return 0;
2635
2636 struct gomp_device_descr *devicep = resolve_device (device_num);
2637 if (devicep == NULL)
2638 return 0;
2639
2640 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2641 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2642 return 1;
2643
2644 gomp_mutex_lock (&devicep->lock);
2645 struct splay_tree_s *mem_map = &devicep->mem_map;
2646 struct splay_tree_key_s cur_node;
2647
2648 cur_node.host_start = (uintptr_t) ptr;
2649 cur_node.host_end = cur_node.host_start;
2650 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2651 int ret = n != NULL;
2652 gomp_mutex_unlock (&devicep->lock);
2653 return ret;
2654 }
2655
2656 int
2657 omp_target_memcpy (void *dst, const void *src, size_t length,
2658 size_t dst_offset, size_t src_offset, int dst_device_num,
2659 int src_device_num)
2660 {
2661 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2662 bool ret;
2663
2664 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2665 {
2666 if (dst_device_num < 0)
2667 return EINVAL;
2668
2669 dst_devicep = resolve_device (dst_device_num);
2670 if (dst_devicep == NULL)
2671 return EINVAL;
2672
2673 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2674 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2675 dst_devicep = NULL;
2676 }
2677 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2678 {
2679 if (src_device_num < 0)
2680 return EINVAL;
2681
2682 src_devicep = resolve_device (src_device_num);
2683 if (src_devicep == NULL)
2684 return EINVAL;
2685
2686 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2687 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2688 src_devicep = NULL;
2689 }
2690 if (src_devicep == NULL && dst_devicep == NULL)
2691 {
2692 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2693 return 0;
2694 }
2695 if (src_devicep == NULL)
2696 {
2697 gomp_mutex_lock (&dst_devicep->lock);
2698 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2699 (char *) dst + dst_offset,
2700 (char *) src + src_offset, length);
2701 gomp_mutex_unlock (&dst_devicep->lock);
2702 return (ret ? 0 : EINVAL);
2703 }
2704 if (dst_devicep == NULL)
2705 {
2706 gomp_mutex_lock (&src_devicep->lock);
2707 ret = src_devicep->dev2host_func (src_devicep->target_id,
2708 (char *) dst + dst_offset,
2709 (char *) src + src_offset, length);
2710 gomp_mutex_unlock (&src_devicep->lock);
2711 return (ret ? 0 : EINVAL);
2712 }
2713 if (src_devicep == dst_devicep)
2714 {
2715 gomp_mutex_lock (&src_devicep->lock);
2716 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2717 (char *) dst + dst_offset,
2718 (char *) src + src_offset, length);
2719 gomp_mutex_unlock (&src_devicep->lock);
2720 return (ret ? 0 : EINVAL);
2721 }
2722 return EINVAL;
2723 }
2724
2725 static int
2726 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2727 int num_dims, const size_t *volume,
2728 const size_t *dst_offsets,
2729 const size_t *src_offsets,
2730 const size_t *dst_dimensions,
2731 const size_t *src_dimensions,
2732 struct gomp_device_descr *dst_devicep,
2733 struct gomp_device_descr *src_devicep)
2734 {
2735 size_t dst_slice = element_size;
2736 size_t src_slice = element_size;
2737 size_t j, dst_off, src_off, length;
2738 int i, ret;
2739
2740 if (num_dims == 1)
2741 {
2742 if (__builtin_mul_overflow (element_size, volume[0], &length)
2743 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2744 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2745 return EINVAL;
2746 if (dst_devicep == NULL && src_devicep == NULL)
2747 {
2748 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2749 length);
2750 ret = 1;
2751 }
2752 else if (src_devicep == NULL)
2753 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2754 (char *) dst + dst_off,
2755 (const char *) src + src_off,
2756 length);
2757 else if (dst_devicep == NULL)
2758 ret = src_devicep->dev2host_func (src_devicep->target_id,
2759 (char *) dst + dst_off,
2760 (const char *) src + src_off,
2761 length);
2762 else if (src_devicep == dst_devicep)
2763 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2764 (char *) dst + dst_off,
2765 (const char *) src + src_off,
2766 length);
2767 else
2768 ret = 0;
2769 return ret ? 0 : EINVAL;
2770 }
2771
2772 /* FIXME: it would be nice to have some plugin function to handle
2773 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2774 be handled in the generic recursion below, and for host-host it
2775 should be used even for any num_dims >= 2. */
2776
2777 for (i = 1; i < num_dims; i++)
2778 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2779 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2780 return EINVAL;
2781 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2782 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2783 return EINVAL;
2784 for (j = 0; j < volume[0]; j++)
2785 {
2786 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2787 (const char *) src + src_off,
2788 element_size, num_dims - 1,
2789 volume + 1, dst_offsets + 1,
2790 src_offsets + 1, dst_dimensions + 1,
2791 src_dimensions + 1, dst_devicep,
2792 src_devicep);
2793 if (ret)
2794 return ret;
2795 dst_off += dst_slice;
2796 src_off += src_slice;
2797 }
2798 return 0;
2799 }
2800
2801 int
2802 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2803 int num_dims, const size_t *volume,
2804 const size_t *dst_offsets,
2805 const size_t *src_offsets,
2806 const size_t *dst_dimensions,
2807 const size_t *src_dimensions,
2808 int dst_device_num, int src_device_num)
2809 {
2810 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2811
2812 if (!dst && !src)
2813 return INT_MAX;
2814
2815 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2816 {
2817 if (dst_device_num < 0)
2818 return EINVAL;
2819
2820 dst_devicep = resolve_device (dst_device_num);
2821 if (dst_devicep == NULL)
2822 return EINVAL;
2823
2824 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2825 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2826 dst_devicep = NULL;
2827 }
2828 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2829 {
2830 if (src_device_num < 0)
2831 return EINVAL;
2832
2833 src_devicep = resolve_device (src_device_num);
2834 if (src_devicep == NULL)
2835 return EINVAL;
2836
2837 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2838 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2839 src_devicep = NULL;
2840 }
2841
2842 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2843 return EINVAL;
2844
2845 if (src_devicep)
2846 gomp_mutex_lock (&src_devicep->lock);
2847 else if (dst_devicep)
2848 gomp_mutex_lock (&dst_devicep->lock);
2849 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2850 volume, dst_offsets, src_offsets,
2851 dst_dimensions, src_dimensions,
2852 dst_devicep, src_devicep);
2853 if (src_devicep)
2854 gomp_mutex_unlock (&src_devicep->lock);
2855 else if (dst_devicep)
2856 gomp_mutex_unlock (&dst_devicep->lock);
2857 return ret;
2858 }
2859
2860 int
2861 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2862 size_t size, size_t device_offset, int device_num)
2863 {
2864 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2865 return EINVAL;
2866
2867 if (device_num < 0)
2868 return EINVAL;
2869
2870 struct gomp_device_descr *devicep = resolve_device (device_num);
2871 if (devicep == NULL)
2872 return EINVAL;
2873
2874 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2875 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2876 return EINVAL;
2877
2878 gomp_mutex_lock (&devicep->lock);
2879
2880 struct splay_tree_s *mem_map = &devicep->mem_map;
2881 struct splay_tree_key_s cur_node;
2882 int ret = EINVAL;
2883
2884 cur_node.host_start = (uintptr_t) host_ptr;
2885 cur_node.host_end = cur_node.host_start + size;
2886 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2887 if (n)
2888 {
2889 if (n->tgt->tgt_start + n->tgt_offset
2890 == (uintptr_t) device_ptr + device_offset
2891 && n->host_start <= cur_node.host_start
2892 && n->host_end >= cur_node.host_end)
2893 ret = 0;
2894 }
2895 else
2896 {
2897 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2898 tgt->array = gomp_malloc (sizeof (*tgt->array));
2899 tgt->refcount = 1;
2900 tgt->tgt_start = 0;
2901 tgt->tgt_end = 0;
2902 tgt->to_free = NULL;
2903 tgt->prev = NULL;
2904 tgt->list_count = 0;
2905 tgt->device_descr = devicep;
2906 splay_tree_node array = tgt->array;
2907 splay_tree_key k = &array->key;
2908 k->host_start = cur_node.host_start;
2909 k->host_end = cur_node.host_end;
2910 k->tgt = tgt;
2911 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2912 k->refcount = REFCOUNT_INFINITY;
2913 k->virtual_refcount = 0;
2914 k->aux = NULL;
2915 array->left = NULL;
2916 array->right = NULL;
2917 splay_tree_insert (&devicep->mem_map, array);
2918 ret = 0;
2919 }
2920 gomp_mutex_unlock (&devicep->lock);
2921 return ret;
2922 }
2923
2924 int
2925 omp_target_disassociate_ptr (const void *ptr, int device_num)
2926 {
2927 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2928 return EINVAL;
2929
2930 if (device_num < 0)
2931 return EINVAL;
2932
2933 struct gomp_device_descr *devicep = resolve_device (device_num);
2934 if (devicep == NULL)
2935 return EINVAL;
2936
2937 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2938 return EINVAL;
2939
2940 gomp_mutex_lock (&devicep->lock);
2941
2942 struct splay_tree_s *mem_map = &devicep->mem_map;
2943 struct splay_tree_key_s cur_node;
2944 int ret = EINVAL;
2945
2946 cur_node.host_start = (uintptr_t) ptr;
2947 cur_node.host_end = cur_node.host_start;
2948 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2949 if (n
2950 && n->host_start == cur_node.host_start
2951 && n->refcount == REFCOUNT_INFINITY
2952 && n->tgt->tgt_start == 0
2953 && n->tgt->to_free == NULL
2954 && n->tgt->refcount == 1
2955 && n->tgt->list_count == 0)
2956 {
2957 splay_tree_remove (&devicep->mem_map, n);
2958 gomp_unmap_tgt (n->tgt);
2959 ret = 0;
2960 }
2961
2962 gomp_mutex_unlock (&devicep->lock);
2963 return ret;
2964 }
2965
2966 int
2967 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2968 {
2969 (void) kind;
2970 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2971 return gomp_pause_host ();
2972 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2973 return -1;
2974 /* Do nothing for target devices for now. */
2975 return 0;
2976 }
2977
2978 int
2979 omp_pause_resource_all (omp_pause_resource_t kind)
2980 {
2981 (void) kind;
2982 if (gomp_pause_host ())
2983 return -1;
2984 /* Do nothing for target devices for now. */
2985 return 0;
2986 }
2987
2988 ialias (omp_pause_resource)
2989 ialias (omp_pause_resource_all)
2990
2991 #ifdef PLUGIN_SUPPORT
2992
2993 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2994 in PLUGIN_NAME.
2995 The handles of the found functions are stored in the corresponding fields
2996 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2997
2998 static bool
2999 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3000 const char *plugin_name)
3001 {
3002 const char *err = NULL, *last_missing = NULL;
3003
3004 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3005 if (!plugin_handle)
3006 goto dl_fail;
3007
3008 /* Check if all required functions are available in the plugin and store
3009 their handlers. None of the symbols can legitimately be NULL,
3010 so we don't need to check dlerror all the time. */
3011 #define DLSYM(f) \
3012 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3013 goto dl_fail
3014 /* Similar, but missing functions are not an error. Return false if
3015 failed, true otherwise. */
3016 #define DLSYM_OPT(f, n) \
3017 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3018 || (last_missing = #n, 0))
3019
3020 DLSYM (version);
3021 if (device->version_func () != GOMP_VERSION)
3022 {
3023 err = "plugin version mismatch";
3024 goto fail;
3025 }
3026
3027 DLSYM (get_name);
3028 DLSYM (get_caps);
3029 DLSYM (get_type);
3030 DLSYM (get_num_devices);
3031 DLSYM (init_device);
3032 DLSYM (fini_device);
3033 DLSYM (load_image);
3034 DLSYM (unload_image);
3035 DLSYM (alloc);
3036 DLSYM (free);
3037 DLSYM (dev2host);
3038 DLSYM (host2dev);
3039 device->capabilities = device->get_caps_func ();
3040 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3041 {
3042 DLSYM (run);
3043 DLSYM (async_run);
3044 DLSYM_OPT (can_run, can_run);
3045 DLSYM (dev2dev);
3046 }
3047 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3048 {
3049 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3050 || !DLSYM_OPT (openacc.create_thread_data,
3051 openacc_create_thread_data)
3052 || !DLSYM_OPT (openacc.destroy_thread_data,
3053 openacc_destroy_thread_data)
3054 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3055 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3056 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3057 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3058 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3059 || !DLSYM_OPT (openacc.async.queue_callback,
3060 openacc_async_queue_callback)
3061 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3062 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3063 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3064 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3065 {
3066 /* Require all the OpenACC handlers if we have
3067 GOMP_OFFLOAD_CAP_OPENACC_200. */
3068 err = "plugin missing OpenACC handler function";
3069 goto fail;
3070 }
3071
3072 unsigned cuda = 0;
3073 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3074 openacc_cuda_get_current_device);
3075 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3076 openacc_cuda_get_current_context);
3077 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3078 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3079 if (cuda && cuda != 4)
3080 {
3081 /* Make sure all the CUDA functions are there if any of them are. */
3082 err = "plugin missing OpenACC CUDA handler function";
3083 goto fail;
3084 }
3085 }
3086 #undef DLSYM
3087 #undef DLSYM_OPT
3088
3089 return 1;
3090
3091 dl_fail:
3092 err = dlerror ();
3093 fail:
3094 gomp_error ("while loading %s: %s", plugin_name, err);
3095 if (last_missing)
3096 gomp_error ("missing function was %s", last_missing);
3097 if (plugin_handle)
3098 dlclose (plugin_handle);
3099
3100 return 0;
3101 }
3102
3103 /* This function finalizes all initialized devices. */
3104
3105 static void
3106 gomp_target_fini (void)
3107 {
3108 int i;
3109 for (i = 0; i < num_devices; i++)
3110 {
3111 bool ret = true;
3112 struct gomp_device_descr *devicep = &devices[i];
3113 gomp_mutex_lock (&devicep->lock);
3114 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3115 ret = gomp_fini_device (devicep);
3116 gomp_mutex_unlock (&devicep->lock);
3117 if (!ret)
3118 gomp_fatal ("device finalization failed");
3119 }
3120 }
3121
3122 /* This function initializes the runtime for offloading.
3123 It parses the list of offload plugins, and tries to load these.
3124 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3125 will be set, and the array DEVICES initialized, containing descriptors for
3126 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3127 by the others. */
3128
3129 static void
3130 gomp_target_init (void)
3131 {
3132 const char *prefix ="libgomp-plugin-";
3133 const char *suffix = SONAME_SUFFIX (1);
3134 const char *cur, *next;
3135 char *plugin_name;
3136 int i, new_num_devices;
3137
3138 num_devices = 0;
3139 devices = NULL;
3140
3141 cur = OFFLOAD_PLUGINS;
3142 if (*cur)
3143 do
3144 {
3145 struct gomp_device_descr current_device;
3146 size_t prefix_len, suffix_len, cur_len;
3147
3148 next = strchr (cur, ',');
3149
3150 prefix_len = strlen (prefix);
3151 cur_len = next ? next - cur : strlen (cur);
3152 suffix_len = strlen (suffix);
3153
3154 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3155 if (!plugin_name)
3156 {
3157 num_devices = 0;
3158 break;
3159 }
3160
3161 memcpy (plugin_name, prefix, prefix_len);
3162 memcpy (plugin_name + prefix_len, cur, cur_len);
3163 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3164
3165 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3166 {
3167 new_num_devices = current_device.get_num_devices_func ();
3168 if (new_num_devices >= 1)
3169 {
3170 /* Augment DEVICES and NUM_DEVICES. */
3171
3172 devices = realloc (devices, (num_devices + new_num_devices)
3173 * sizeof (struct gomp_device_descr));
3174 if (!devices)
3175 {
3176 num_devices = 0;
3177 free (plugin_name);
3178 break;
3179 }
3180
3181 current_device.name = current_device.get_name_func ();
3182 /* current_device.capabilities has already been set. */
3183 current_device.type = current_device.get_type_func ();
3184 current_device.mem_map.root = NULL;
3185 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3186 for (i = 0; i < new_num_devices; i++)
3187 {
3188 current_device.target_id = i;
3189 devices[num_devices] = current_device;
3190 gomp_mutex_init (&devices[num_devices].lock);
3191 num_devices++;
3192 }
3193 }
3194 }
3195
3196 free (plugin_name);
3197 cur = next + 1;
3198 }
3199 while (next);
3200
3201 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3202 NUM_DEVICES_OPENMP. */
3203 struct gomp_device_descr *devices_s
3204 = malloc (num_devices * sizeof (struct gomp_device_descr));
3205 if (!devices_s)
3206 {
3207 num_devices = 0;
3208 free (devices);
3209 devices = NULL;
3210 }
3211 num_devices_openmp = 0;
3212 for (i = 0; i < num_devices; i++)
3213 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3214 devices_s[num_devices_openmp++] = devices[i];
3215 int num_devices_after_openmp = num_devices_openmp;
3216 for (i = 0; i < num_devices; i++)
3217 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3218 devices_s[num_devices_after_openmp++] = devices[i];
3219 free (devices);
3220 devices = devices_s;
3221
3222 for (i = 0; i < num_devices; i++)
3223 {
3224 /* The 'devices' array can be moved (by the realloc call) until we have
3225 found all the plugins, so registering with the OpenACC runtime (which
3226 takes a copy of the pointer argument) must be delayed until now. */
3227 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3228 goacc_register (&devices[i]);
3229 }
3230
3231 if (atexit (gomp_target_fini) != 0)
3232 gomp_fatal ("atexit failed");
3233 }
3234
3235 #else /* PLUGIN_SUPPORT */
3236 /* If dlfcn.h is unavailable we always fallback to host execution.
3237 GOMP_target* routines are just stubs for this case. */
3238 static void
3239 gomp_target_init (void)
3240 {
3241 }
3242 #endif /* PLUGIN_SUPPORT */