]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/target.c
offload-defaulted: Config option to silently ignore uninstalled offload compilers
[thirdparty/gcc.git] / libgomp / target.c
1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file contains the support of offloading. */
27
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
48
49 static void gomp_target_init (void);
50
51 /* The whole initialization code for offloading plugins is only run one. */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53
54 /* Mutex for offload image registration. */
55 static gomp_mutex_t register_lock;
56
57 /* This structure describes an offload image.
58 It contains type of the target device, pointer to host table descriptor, and
59 pointer to target data. */
60 struct offload_image_descr {
61 unsigned version;
62 enum offload_target_type type;
63 const void *host_table;
64 const void *target_data;
65 };
66
67 /* Array of descriptors of offload images. */
68 static struct offload_image_descr *offload_images;
69
70 /* Total number of offload images. */
71 static int num_offload_images;
72
73 /* Array of descriptors for all available devices. */
74 static struct gomp_device_descr *devices;
75
76 /* Total number of available devices. */
77 static int num_devices;
78
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
80 static int num_devices_openmp;
81
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
83
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
86 {
87 void *ret = realloc (old, size);
88 if (ret == NULL)
89 {
90 gomp_mutex_unlock (&register_lock);
91 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92 }
93 return ret;
94 }
95
96 attribute_hidden void
97 gomp_init_targets_once (void)
98 {
99 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 }
101
102 attribute_hidden int
103 gomp_get_num_devices (void)
104 {
105 gomp_init_targets_once ();
106 return num_devices_openmp;
107 }
108
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
111 {
112 if (device_id == GOMP_DEVICE_ICV)
113 {
114 struct gomp_task_icv *icv = gomp_icv (false);
115 device_id = icv->default_device_var;
116 }
117
118 if (device_id < 0 || device_id >= gomp_get_num_devices ())
119 {
120 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
121 && device_id != GOMP_DEVICE_HOST_FALLBACK
122 && device_id != num_devices_openmp)
123 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
124 "but device not found");
125
126 return NULL;
127 }
128
129 gomp_mutex_lock (&devices[device_id].lock);
130 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
131 gomp_init_device (&devices[device_id]);
132 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
133 {
134 gomp_mutex_unlock (&devices[device_id].lock);
135
136 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
137 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
138 "but device is finalized");
139
140 return NULL;
141 }
142 gomp_mutex_unlock (&devices[device_id].lock);
143
144 return &devices[device_id];
145 }
146
147
148 static inline splay_tree_key
149 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
150 {
151 if (key->host_start != key->host_end)
152 return splay_tree_lookup (mem_map, key);
153
154 key->host_end++;
155 splay_tree_key n = splay_tree_lookup (mem_map, key);
156 key->host_end--;
157 if (n)
158 return n;
159 key->host_start--;
160 n = splay_tree_lookup (mem_map, key);
161 key->host_start++;
162 if (n)
163 return n;
164 return splay_tree_lookup (mem_map, key);
165 }
166
167 static inline splay_tree_key
168 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
169 {
170 if (key->host_start != key->host_end)
171 return splay_tree_lookup (mem_map, key);
172
173 key->host_end++;
174 splay_tree_key n = splay_tree_lookup (mem_map, key);
175 key->host_end--;
176 return n;
177 }
178
179 static inline void
180 gomp_device_copy (struct gomp_device_descr *devicep,
181 bool (*copy_func) (int, void *, const void *, size_t),
182 const char *dst, void *dstaddr,
183 const char *src, const void *srcaddr,
184 size_t size)
185 {
186 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
187 {
188 gomp_mutex_unlock (&devicep->lock);
189 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
190 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
191 }
192 }
193
194 static inline void
195 goacc_device_copy_async (struct gomp_device_descr *devicep,
196 bool (*copy_func) (int, void *, const void *, size_t,
197 struct goacc_asyncqueue *),
198 const char *dst, void *dstaddr,
199 const char *src, const void *srcaddr,
200 size_t size, struct goacc_asyncqueue *aq)
201 {
202 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
203 {
204 gomp_mutex_unlock (&devicep->lock);
205 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
206 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
207 }
208 }
209
210 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
211 host to device memory transfers. */
212
213 struct gomp_coalesce_chunk
214 {
215 /* The starting and ending point of a coalesced chunk of memory. */
216 size_t start, end;
217 };
218
219 struct gomp_coalesce_buf
220 {
221 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
222 it will be copied to the device. */
223 void *buf;
224 struct target_mem_desc *tgt;
225 /* Array with offsets, chunks[i].start is the starting offset and
226 chunks[i].end ending offset relative to tgt->tgt_start device address
227 of chunks which are to be copied to buf and later copied to device. */
228 struct gomp_coalesce_chunk *chunks;
229 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
230 be performed. */
231 long chunk_cnt;
232 /* During construction of chunks array, how many memory regions are within
233 the last chunk. If there is just one memory region for a chunk, we copy
234 it directly to device rather than going through buf. */
235 long use_cnt;
236 };
237
238 /* Maximum size of memory region considered for coalescing. Larger copies
239 are performed directly. */
240 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
241
242 /* Maximum size of a gap in between regions to consider them being copied
243 within the same chunk. All the device offsets considered are within
244 newly allocated device memory, so it isn't fatal if we copy some padding
245 in between from host to device. The gaps come either from alignment
246 padding or from memory regions which are not supposed to be copied from
247 host to device (e.g. map(alloc:), map(from:) etc.). */
248 #define MAX_COALESCE_BUF_GAP (4 * 1024)
249
250 /* Add region with device tgt_start relative offset and length to CBUF. */
251
252 static inline void
253 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
254 {
255 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
256 return;
257 if (cbuf->chunk_cnt)
258 {
259 if (cbuf->chunk_cnt < 0)
260 return;
261 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
262 {
263 cbuf->chunk_cnt = -1;
264 return;
265 }
266 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
267 {
268 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
269 cbuf->use_cnt++;
270 return;
271 }
272 /* If the last chunk is only used by one mapping, discard it,
273 as it will be one host to device copy anyway and
274 memcpying it around will only waste cycles. */
275 if (cbuf->use_cnt == 1)
276 cbuf->chunk_cnt--;
277 }
278 cbuf->chunks[cbuf->chunk_cnt].start = start;
279 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
280 cbuf->chunk_cnt++;
281 cbuf->use_cnt = 1;
282 }
283
284 /* Return true for mapping kinds which need to copy data from the
285 host to device for regions that weren't previously mapped. */
286
287 static inline bool
288 gomp_to_device_kind_p (int kind)
289 {
290 switch (kind)
291 {
292 case GOMP_MAP_ALLOC:
293 case GOMP_MAP_FROM:
294 case GOMP_MAP_FORCE_ALLOC:
295 case GOMP_MAP_FORCE_FROM:
296 case GOMP_MAP_ALWAYS_FROM:
297 return false;
298 default:
299 return true;
300 }
301 }
302
303 attribute_hidden void
304 gomp_copy_host2dev (struct gomp_device_descr *devicep,
305 struct goacc_asyncqueue *aq,
306 void *d, const void *h, size_t sz,
307 struct gomp_coalesce_buf *cbuf)
308 {
309 if (cbuf)
310 {
311 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
312 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
313 {
314 long first = 0;
315 long last = cbuf->chunk_cnt - 1;
316 while (first <= last)
317 {
318 long middle = (first + last) >> 1;
319 if (cbuf->chunks[middle].end <= doff)
320 first = middle + 1;
321 else if (cbuf->chunks[middle].start <= doff)
322 {
323 if (doff + sz > cbuf->chunks[middle].end)
324 gomp_fatal ("internal libgomp cbuf error");
325 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
326 h, sz);
327 return;
328 }
329 else
330 last = middle - 1;
331 }
332 }
333 }
334 if (__builtin_expect (aq != NULL, 0))
335 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
336 "dev", d, "host", h, sz, aq);
337 else
338 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
339 }
340
341 attribute_hidden void
342 gomp_copy_dev2host (struct gomp_device_descr *devicep,
343 struct goacc_asyncqueue *aq,
344 void *h, const void *d, size_t sz)
345 {
346 if (__builtin_expect (aq != NULL, 0))
347 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
348 "host", h, "dev", d, sz, aq);
349 else
350 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
351 }
352
353 static void
354 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
355 {
356 if (!devicep->free_func (devicep->target_id, devptr))
357 {
358 gomp_mutex_unlock (&devicep->lock);
359 gomp_fatal ("error in freeing device memory block at %p", devptr);
360 }
361 }
362
363 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
364 gomp_map_0len_lookup found oldn for newn.
365 Helper function of gomp_map_vars. */
366
367 static inline void
368 gomp_map_vars_existing (struct gomp_device_descr *devicep,
369 struct goacc_asyncqueue *aq, splay_tree_key oldn,
370 splay_tree_key newn, struct target_var_desc *tgt_var,
371 unsigned char kind, bool always_to_flag,
372 struct gomp_coalesce_buf *cbuf)
373 {
374 assert (kind != GOMP_MAP_ATTACH);
375
376 tgt_var->key = oldn;
377 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
378 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
379 tgt_var->is_attach = false;
380 tgt_var->offset = newn->host_start - oldn->host_start;
381 tgt_var->length = newn->host_end - newn->host_start;
382
383 if ((kind & GOMP_MAP_FLAG_FORCE)
384 || oldn->host_start > newn->host_start
385 || oldn->host_end < newn->host_end)
386 {
387 gomp_mutex_unlock (&devicep->lock);
388 gomp_fatal ("Trying to map into device [%p..%p) object when "
389 "[%p..%p) is already mapped",
390 (void *) newn->host_start, (void *) newn->host_end,
391 (void *) oldn->host_start, (void *) oldn->host_end);
392 }
393
394 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
395 gomp_copy_host2dev (devicep, aq,
396 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
397 + newn->host_start - oldn->host_start),
398 (void *) newn->host_start,
399 newn->host_end - newn->host_start, cbuf);
400
401 if (oldn->refcount != REFCOUNT_INFINITY)
402 oldn->refcount++;
403 }
404
405 static int
406 get_kind (bool short_mapkind, void *kinds, int idx)
407 {
408 return short_mapkind ? ((unsigned short *) kinds)[idx]
409 : ((unsigned char *) kinds)[idx];
410 }
411
412 static void
413 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
414 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
415 struct gomp_coalesce_buf *cbuf)
416 {
417 struct gomp_device_descr *devicep = tgt->device_descr;
418 struct splay_tree_s *mem_map = &devicep->mem_map;
419 struct splay_tree_key_s cur_node;
420
421 cur_node.host_start = host_ptr;
422 if (cur_node.host_start == (uintptr_t) NULL)
423 {
424 cur_node.tgt_offset = (uintptr_t) NULL;
425 gomp_copy_host2dev (devicep, aq,
426 (void *) (tgt->tgt_start + target_offset),
427 (void *) &cur_node.tgt_offset,
428 sizeof (void *), cbuf);
429 return;
430 }
431 /* Add bias to the pointer value. */
432 cur_node.host_start += bias;
433 cur_node.host_end = cur_node.host_start;
434 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
435 if (n == NULL)
436 {
437 gomp_mutex_unlock (&devicep->lock);
438 gomp_fatal ("Pointer target of array section wasn't mapped");
439 }
440 cur_node.host_start -= n->host_start;
441 cur_node.tgt_offset
442 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
443 /* At this point tgt_offset is target address of the
444 array section. Now subtract bias to get what we want
445 to initialize the pointer with. */
446 cur_node.tgt_offset -= bias;
447 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
448 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
449 }
450
451 static void
452 gomp_map_fields_existing (struct target_mem_desc *tgt,
453 struct goacc_asyncqueue *aq, splay_tree_key n,
454 size_t first, size_t i, void **hostaddrs,
455 size_t *sizes, void *kinds,
456 struct gomp_coalesce_buf *cbuf)
457 {
458 struct gomp_device_descr *devicep = tgt->device_descr;
459 struct splay_tree_s *mem_map = &devicep->mem_map;
460 struct splay_tree_key_s cur_node;
461 int kind;
462 const bool short_mapkind = true;
463 const int typemask = short_mapkind ? 0xff : 0x7;
464
465 cur_node.host_start = (uintptr_t) hostaddrs[i];
466 cur_node.host_end = cur_node.host_start + sizes[i];
467 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
468 kind = get_kind (short_mapkind, kinds, i);
469 if (n2
470 && n2->tgt == n->tgt
471 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
472 {
473 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
474 kind & typemask, false, cbuf);
475 return;
476 }
477 if (sizes[i] == 0)
478 {
479 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
480 {
481 cur_node.host_start--;
482 n2 = splay_tree_lookup (mem_map, &cur_node);
483 cur_node.host_start++;
484 if (n2
485 && n2->tgt == n->tgt
486 && n2->host_start - n->host_start
487 == n2->tgt_offset - n->tgt_offset)
488 {
489 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
490 kind & typemask, false, cbuf);
491 return;
492 }
493 }
494 cur_node.host_end++;
495 n2 = splay_tree_lookup (mem_map, &cur_node);
496 cur_node.host_end--;
497 if (n2
498 && n2->tgt == n->tgt
499 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
500 {
501 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
502 kind & typemask, false, cbuf);
503 return;
504 }
505 }
506 gomp_mutex_unlock (&devicep->lock);
507 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
508 "other mapped elements from the same structure weren't mapped "
509 "together with it", (void *) cur_node.host_start,
510 (void *) cur_node.host_end);
511 }
512
513 attribute_hidden void
514 gomp_attach_pointer (struct gomp_device_descr *devicep,
515 struct goacc_asyncqueue *aq, splay_tree mem_map,
516 splay_tree_key n, uintptr_t attach_to, size_t bias,
517 struct gomp_coalesce_buf *cbufp)
518 {
519 struct splay_tree_key_s s;
520 size_t size, idx;
521
522 if (n == NULL)
523 {
524 gomp_mutex_unlock (&devicep->lock);
525 gomp_fatal ("enclosing struct not mapped for attach");
526 }
527
528 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
529 /* We might have a pointer in a packed struct: however we cannot have more
530 than one such pointer in each pointer-sized portion of the struct, so
531 this is safe. */
532 idx = (attach_to - n->host_start) / sizeof (void *);
533
534 if (!n->aux)
535 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
536
537 if (!n->aux->attach_count)
538 n->aux->attach_count
539 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
540
541 if (n->aux->attach_count[idx] < UINTPTR_MAX)
542 n->aux->attach_count[idx]++;
543 else
544 {
545 gomp_mutex_unlock (&devicep->lock);
546 gomp_fatal ("attach count overflow");
547 }
548
549 if (n->aux->attach_count[idx] == 1)
550 {
551 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
552 - n->host_start;
553 uintptr_t target = (uintptr_t) *(void **) attach_to;
554 splay_tree_key tn;
555 uintptr_t data;
556
557 if ((void *) target == NULL)
558 {
559 gomp_mutex_unlock (&devicep->lock);
560 gomp_fatal ("attempt to attach null pointer");
561 }
562
563 s.host_start = target + bias;
564 s.host_end = s.host_start + 1;
565 tn = splay_tree_lookup (mem_map, &s);
566
567 if (!tn)
568 {
569 gomp_mutex_unlock (&devicep->lock);
570 gomp_fatal ("pointer target not mapped for attach");
571 }
572
573 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
574
575 gomp_debug (1,
576 "%s: attaching host %p, target %p (struct base %p) to %p\n",
577 __FUNCTION__, (void *) attach_to, (void *) devptr,
578 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
579
580 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
581 sizeof (void *), cbufp);
582 }
583 else
584 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
585 (void *) attach_to, (int) n->aux->attach_count[idx]);
586 }
587
588 attribute_hidden void
589 gomp_detach_pointer (struct gomp_device_descr *devicep,
590 struct goacc_asyncqueue *aq, splay_tree_key n,
591 uintptr_t detach_from, bool finalize,
592 struct gomp_coalesce_buf *cbufp)
593 {
594 size_t idx;
595
596 if (n == NULL)
597 {
598 gomp_mutex_unlock (&devicep->lock);
599 gomp_fatal ("enclosing struct not mapped for detach");
600 }
601
602 idx = (detach_from - n->host_start) / sizeof (void *);
603
604 if (!n->aux || !n->aux->attach_count)
605 {
606 gomp_mutex_unlock (&devicep->lock);
607 gomp_fatal ("no attachment counters for struct");
608 }
609
610 if (finalize)
611 n->aux->attach_count[idx] = 1;
612
613 if (n->aux->attach_count[idx] == 0)
614 {
615 gomp_mutex_unlock (&devicep->lock);
616 gomp_fatal ("attach count underflow");
617 }
618 else
619 n->aux->attach_count[idx]--;
620
621 if (n->aux->attach_count[idx] == 0)
622 {
623 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
624 - n->host_start;
625 uintptr_t target = (uintptr_t) *(void **) detach_from;
626
627 gomp_debug (1,
628 "%s: detaching host %p, target %p (struct base %p) to %p\n",
629 __FUNCTION__, (void *) detach_from, (void *) devptr,
630 (void *) (n->tgt->tgt_start + n->tgt_offset),
631 (void *) target);
632
633 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
634 sizeof (void *), cbufp);
635 }
636 else
637 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
638 (void *) detach_from, (int) n->aux->attach_count[idx]);
639 }
640
641 attribute_hidden uintptr_t
642 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
643 {
644 if (tgt->list[i].key != NULL)
645 return tgt->list[i].key->tgt->tgt_start
646 + tgt->list[i].key->tgt_offset
647 + tgt->list[i].offset;
648
649 switch (tgt->list[i].offset)
650 {
651 case OFFSET_INLINED:
652 return (uintptr_t) hostaddrs[i];
653
654 case OFFSET_POINTER:
655 return 0;
656
657 case OFFSET_STRUCT:
658 return tgt->list[i + 1].key->tgt->tgt_start
659 + tgt->list[i + 1].key->tgt_offset
660 + tgt->list[i + 1].offset
661 + (uintptr_t) hostaddrs[i]
662 - (uintptr_t) hostaddrs[i + 1];
663
664 default:
665 return tgt->tgt_start + tgt->list[i].offset;
666 }
667 }
668
669 static inline __attribute__((always_inline)) struct target_mem_desc *
670 gomp_map_vars_internal (struct gomp_device_descr *devicep,
671 struct goacc_asyncqueue *aq, size_t mapnum,
672 void **hostaddrs, void **devaddrs, size_t *sizes,
673 void *kinds, bool short_mapkind,
674 enum gomp_map_vars_kind pragma_kind)
675 {
676 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
677 bool has_firstprivate = false;
678 bool has_always_ptrset = false;
679 const int rshift = short_mapkind ? 8 : 3;
680 const int typemask = short_mapkind ? 0xff : 0x7;
681 struct splay_tree_s *mem_map = &devicep->mem_map;
682 struct splay_tree_key_s cur_node;
683 struct target_mem_desc *tgt
684 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
685 tgt->list_count = mapnum;
686 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
687 tgt->device_descr = devicep;
688 tgt->prev = NULL;
689 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
690
691 if (mapnum == 0)
692 {
693 tgt->tgt_start = 0;
694 tgt->tgt_end = 0;
695 return tgt;
696 }
697
698 tgt_align = sizeof (void *);
699 tgt_size = 0;
700 cbuf.chunks = NULL;
701 cbuf.chunk_cnt = -1;
702 cbuf.use_cnt = 0;
703 cbuf.buf = NULL;
704 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
705 {
706 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
707 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
708 cbuf.chunk_cnt = 0;
709 }
710 if (pragma_kind == GOMP_MAP_VARS_TARGET)
711 {
712 size_t align = 4 * sizeof (void *);
713 tgt_align = align;
714 tgt_size = mapnum * sizeof (void *);
715 cbuf.chunk_cnt = 1;
716 cbuf.use_cnt = 1 + (mapnum > 1);
717 cbuf.chunks[0].start = 0;
718 cbuf.chunks[0].end = tgt_size;
719 }
720
721 gomp_mutex_lock (&devicep->lock);
722 if (devicep->state == GOMP_DEVICE_FINALIZED)
723 {
724 gomp_mutex_unlock (&devicep->lock);
725 free (tgt);
726 return NULL;
727 }
728
729 for (i = 0; i < mapnum; i++)
730 {
731 int kind = get_kind (short_mapkind, kinds, i);
732 if (hostaddrs[i] == NULL
733 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
734 {
735 tgt->list[i].key = NULL;
736 tgt->list[i].offset = OFFSET_INLINED;
737 continue;
738 }
739 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
740 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
741 {
742 tgt->list[i].key = NULL;
743 if (!not_found_cnt)
744 {
745 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
746 on a separate construct prior to using use_device_{addr,ptr}.
747 In OpenMP 5.0, map directives need to be ordered by the
748 middle-end before the use_device_* clauses. If
749 !not_found_cnt, all mappings requested (if any) are already
750 mapped, so use_device_{addr,ptr} can be resolved right away.
751 Otherwise, if not_found_cnt, gomp_map_lookup might fail
752 now but would succeed after performing the mappings in the
753 following loop. We can't defer this always to the second
754 loop, because it is not even invoked when !not_found_cnt
755 after the first loop. */
756 cur_node.host_start = (uintptr_t) hostaddrs[i];
757 cur_node.host_end = cur_node.host_start;
758 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
759 if (n != NULL)
760 {
761 cur_node.host_start -= n->host_start;
762 hostaddrs[i]
763 = (void *) (n->tgt->tgt_start + n->tgt_offset
764 + cur_node.host_start);
765 }
766 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
767 {
768 gomp_mutex_unlock (&devicep->lock);
769 gomp_fatal ("use_device_ptr pointer wasn't mapped");
770 }
771 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
772 /* If not present, continue using the host address. */
773 ;
774 else
775 __builtin_unreachable ();
776 tgt->list[i].offset = OFFSET_INLINED;
777 }
778 else
779 tgt->list[i].offset = 0;
780 continue;
781 }
782 else if ((kind & typemask) == GOMP_MAP_STRUCT)
783 {
784 size_t first = i + 1;
785 size_t last = i + sizes[i];
786 cur_node.host_start = (uintptr_t) hostaddrs[i];
787 cur_node.host_end = (uintptr_t) hostaddrs[last]
788 + sizes[last];
789 tgt->list[i].key = NULL;
790 tgt->list[i].offset = OFFSET_STRUCT;
791 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
792 if (n == NULL)
793 {
794 size_t align = (size_t) 1 << (kind >> rshift);
795 if (tgt_align < align)
796 tgt_align = align;
797 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
798 tgt_size = (tgt_size + align - 1) & ~(align - 1);
799 tgt_size += cur_node.host_end - cur_node.host_start;
800 not_found_cnt += last - i;
801 for (i = first; i <= last; i++)
802 {
803 tgt->list[i].key = NULL;
804 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
805 & typemask))
806 gomp_coalesce_buf_add (&cbuf,
807 tgt_size - cur_node.host_end
808 + (uintptr_t) hostaddrs[i],
809 sizes[i]);
810 }
811 i--;
812 continue;
813 }
814 for (i = first; i <= last; i++)
815 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
816 sizes, kinds, NULL);
817 i--;
818 continue;
819 }
820 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
821 {
822 tgt->list[i].key = NULL;
823 tgt->list[i].offset = OFFSET_POINTER;
824 has_firstprivate = true;
825 continue;
826 }
827 else if ((kind & typemask) == GOMP_MAP_ATTACH)
828 {
829 tgt->list[i].key = NULL;
830 has_firstprivate = true;
831 continue;
832 }
833 cur_node.host_start = (uintptr_t) hostaddrs[i];
834 if (!GOMP_MAP_POINTER_P (kind & typemask))
835 cur_node.host_end = cur_node.host_start + sizes[i];
836 else
837 cur_node.host_end = cur_node.host_start + sizeof (void *);
838 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
839 {
840 tgt->list[i].key = NULL;
841
842 size_t align = (size_t) 1 << (kind >> rshift);
843 if (tgt_align < align)
844 tgt_align = align;
845 tgt_size = (tgt_size + align - 1) & ~(align - 1);
846 gomp_coalesce_buf_add (&cbuf, tgt_size,
847 cur_node.host_end - cur_node.host_start);
848 tgt_size += cur_node.host_end - cur_node.host_start;
849 has_firstprivate = true;
850 continue;
851 }
852 splay_tree_key n;
853 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
854 {
855 n = gomp_map_0len_lookup (mem_map, &cur_node);
856 if (!n)
857 {
858 tgt->list[i].key = NULL;
859 tgt->list[i].offset = OFFSET_POINTER;
860 continue;
861 }
862 }
863 else
864 n = splay_tree_lookup (mem_map, &cur_node);
865 if (n && n->refcount != REFCOUNT_LINK)
866 {
867 int always_to_cnt = 0;
868 if ((kind & typemask) == GOMP_MAP_TO_PSET)
869 {
870 bool has_nullptr = false;
871 size_t j;
872 for (j = 0; j < n->tgt->list_count; j++)
873 if (n->tgt->list[j].key == n)
874 {
875 has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
876 break;
877 }
878 if (n->tgt->list_count == 0)
879 {
880 /* 'declare target'; assume has_nullptr; it could also be
881 statically assigned pointer, but that it should be to
882 the equivalent variable on the host. */
883 assert (n->refcount == REFCOUNT_INFINITY);
884 has_nullptr = true;
885 }
886 else
887 assert (j < n->tgt->list_count);
888 /* Re-map the data if there is an 'always' modifier or if it a
889 null pointer was there and non a nonnull has been found; that
890 permits transparent re-mapping for Fortran array descriptors
891 which were previously mapped unallocated. */
892 for (j = i + 1; j < mapnum; j++)
893 {
894 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
895 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
896 && (!has_nullptr
897 || !GOMP_MAP_POINTER_P (ptr_kind)
898 || *(void **) hostaddrs[j] == NULL))
899 break;
900 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
901 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
902 > cur_node.host_end))
903 break;
904 else
905 {
906 has_always_ptrset = true;
907 ++always_to_cnt;
908 }
909 }
910 }
911 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
912 kind & typemask, always_to_cnt > 0, NULL);
913 i += always_to_cnt;
914 }
915 else
916 {
917 tgt->list[i].key = NULL;
918
919 if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
920 {
921 /* Not present, hence, skip entry - including its MAP_POINTER,
922 when existing. */
923 tgt->list[i].offset = OFFSET_POINTER;
924 if (i + 1 < mapnum
925 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
926 == GOMP_MAP_POINTER))
927 {
928 ++i;
929 tgt->list[i].key = NULL;
930 tgt->list[i].offset = 0;
931 }
932 continue;
933 }
934 size_t align = (size_t) 1 << (kind >> rshift);
935 not_found_cnt++;
936 if (tgt_align < align)
937 tgt_align = align;
938 tgt_size = (tgt_size + align - 1) & ~(align - 1);
939 if (gomp_to_device_kind_p (kind & typemask))
940 gomp_coalesce_buf_add (&cbuf, tgt_size,
941 cur_node.host_end - cur_node.host_start);
942 tgt_size += cur_node.host_end - cur_node.host_start;
943 if ((kind & typemask) == GOMP_MAP_TO_PSET)
944 {
945 size_t j;
946 int kind;
947 for (j = i + 1; j < mapnum; j++)
948 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
949 kinds, j)) & typemask))
950 && !GOMP_MAP_ALWAYS_POINTER_P (kind))
951 break;
952 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
953 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
954 > cur_node.host_end))
955 break;
956 else
957 {
958 tgt->list[j].key = NULL;
959 i++;
960 }
961 }
962 }
963 }
964
965 if (devaddrs)
966 {
967 if (mapnum != 1)
968 {
969 gomp_mutex_unlock (&devicep->lock);
970 gomp_fatal ("unexpected aggregation");
971 }
972 tgt->to_free = devaddrs[0];
973 tgt->tgt_start = (uintptr_t) tgt->to_free;
974 tgt->tgt_end = tgt->tgt_start + sizes[0];
975 }
976 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
977 {
978 /* Allocate tgt_align aligned tgt_size block of memory. */
979 /* FIXME: Perhaps change interface to allocate properly aligned
980 memory. */
981 tgt->to_free = devicep->alloc_func (devicep->target_id,
982 tgt_size + tgt_align - 1);
983 if (!tgt->to_free)
984 {
985 gomp_mutex_unlock (&devicep->lock);
986 gomp_fatal ("device memory allocation fail");
987 }
988
989 tgt->tgt_start = (uintptr_t) tgt->to_free;
990 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
991 tgt->tgt_end = tgt->tgt_start + tgt_size;
992
993 if (cbuf.use_cnt == 1)
994 cbuf.chunk_cnt--;
995 if (cbuf.chunk_cnt > 0)
996 {
997 cbuf.buf
998 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
999 if (cbuf.buf)
1000 {
1001 cbuf.tgt = tgt;
1002 cbufp = &cbuf;
1003 }
1004 }
1005 }
1006 else
1007 {
1008 tgt->to_free = NULL;
1009 tgt->tgt_start = 0;
1010 tgt->tgt_end = 0;
1011 }
1012
1013 tgt_size = 0;
1014 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1015 tgt_size = mapnum * sizeof (void *);
1016
1017 tgt->array = NULL;
1018 if (not_found_cnt || has_firstprivate || has_always_ptrset)
1019 {
1020 if (not_found_cnt)
1021 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1022 splay_tree_node array = tgt->array;
1023 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1024 uintptr_t field_tgt_base = 0;
1025
1026 for (i = 0; i < mapnum; i++)
1027 if (has_always_ptrset
1028 && tgt->list[i].key
1029 && (get_kind (short_mapkind, kinds, i) & typemask)
1030 == GOMP_MAP_TO_PSET)
1031 {
1032 splay_tree_key k = tgt->list[i].key;
1033 bool has_nullptr = false;
1034 size_t j;
1035 for (j = 0; j < k->tgt->list_count; j++)
1036 if (k->tgt->list[j].key == k)
1037 {
1038 has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1039 break;
1040 }
1041 if (k->tgt->list_count == 0)
1042 has_nullptr = true;
1043 else
1044 assert (j < k->tgt->list_count);
1045
1046 tgt->list[i].has_null_ptr_assoc = false;
1047 for (j = i + 1; j < mapnum; j++)
1048 {
1049 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1050 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1051 && (!has_nullptr
1052 || !GOMP_MAP_POINTER_P (ptr_kind)
1053 || *(void **) hostaddrs[j] == NULL))
1054 break;
1055 else if ((uintptr_t) hostaddrs[j] < k->host_start
1056 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1057 > k->host_end))
1058 break;
1059 else
1060 {
1061 if (*(void **) hostaddrs[j] == NULL)
1062 tgt->list[i].has_null_ptr_assoc = true;
1063 tgt->list[j].key = k;
1064 tgt->list[j].copy_from = false;
1065 tgt->list[j].always_copy_from = false;
1066 tgt->list[j].is_attach = false;
1067 if (k->refcount != REFCOUNT_INFINITY)
1068 k->refcount++;
1069 gomp_map_pointer (k->tgt, aq,
1070 (uintptr_t) *(void **) hostaddrs[j],
1071 k->tgt_offset + ((uintptr_t) hostaddrs[j]
1072 - k->host_start),
1073 sizes[j], cbufp);
1074 }
1075 }
1076 i = j - 1;
1077 }
1078 else if (tgt->list[i].key == NULL)
1079 {
1080 int kind = get_kind (short_mapkind, kinds, i);
1081 if (hostaddrs[i] == NULL)
1082 continue;
1083 switch (kind & typemask)
1084 {
1085 size_t align, len, first, last;
1086 splay_tree_key n;
1087 case GOMP_MAP_FIRSTPRIVATE:
1088 align = (size_t) 1 << (kind >> rshift);
1089 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1090 tgt->list[i].offset = tgt_size;
1091 len = sizes[i];
1092 gomp_copy_host2dev (devicep, aq,
1093 (void *) (tgt->tgt_start + tgt_size),
1094 (void *) hostaddrs[i], len, cbufp);
1095 tgt_size += len;
1096 continue;
1097 case GOMP_MAP_FIRSTPRIVATE_INT:
1098 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1099 continue;
1100 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1101 /* The OpenACC 'host_data' construct only allows 'use_device'
1102 "mapping" clauses, so in the first loop, 'not_found_cnt'
1103 must always have been zero, so all OpenACC 'use_device'
1104 clauses have already been handled. (We can only easily test
1105 'use_device' with 'if_present' clause here.) */
1106 assert (tgt->list[i].offset == OFFSET_INLINED);
1107 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1108 code conceptually simple, similar to the first loop. */
1109 case GOMP_MAP_USE_DEVICE_PTR:
1110 if (tgt->list[i].offset == 0)
1111 {
1112 cur_node.host_start = (uintptr_t) hostaddrs[i];
1113 cur_node.host_end = cur_node.host_start;
1114 n = gomp_map_lookup (mem_map, &cur_node);
1115 if (n != NULL)
1116 {
1117 cur_node.host_start -= n->host_start;
1118 hostaddrs[i]
1119 = (void *) (n->tgt->tgt_start + n->tgt_offset
1120 + cur_node.host_start);
1121 }
1122 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1123 {
1124 gomp_mutex_unlock (&devicep->lock);
1125 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1126 }
1127 else if ((kind & typemask)
1128 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1129 /* If not present, continue using the host address. */
1130 ;
1131 else
1132 __builtin_unreachable ();
1133 tgt->list[i].offset = OFFSET_INLINED;
1134 }
1135 continue;
1136 case GOMP_MAP_STRUCT:
1137 first = i + 1;
1138 last = i + sizes[i];
1139 cur_node.host_start = (uintptr_t) hostaddrs[i];
1140 cur_node.host_end = (uintptr_t) hostaddrs[last]
1141 + sizes[last];
1142 if (tgt->list[first].key != NULL)
1143 continue;
1144 n = splay_tree_lookup (mem_map, &cur_node);
1145 if (n == NULL)
1146 {
1147 size_t align = (size_t) 1 << (kind >> rshift);
1148 tgt_size -= (uintptr_t) hostaddrs[first]
1149 - (uintptr_t) hostaddrs[i];
1150 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1151 tgt_size += (uintptr_t) hostaddrs[first]
1152 - (uintptr_t) hostaddrs[i];
1153 field_tgt_base = (uintptr_t) hostaddrs[first];
1154 field_tgt_offset = tgt_size;
1155 field_tgt_clear = last;
1156 tgt_size += cur_node.host_end
1157 - (uintptr_t) hostaddrs[first];
1158 continue;
1159 }
1160 for (i = first; i <= last; i++)
1161 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1162 sizes, kinds, cbufp);
1163 i--;
1164 continue;
1165 case GOMP_MAP_ALWAYS_POINTER:
1166 cur_node.host_start = (uintptr_t) hostaddrs[i];
1167 cur_node.host_end = cur_node.host_start + sizeof (void *);
1168 n = splay_tree_lookup (mem_map, &cur_node);
1169 if (n == NULL
1170 || n->host_start > cur_node.host_start
1171 || n->host_end < cur_node.host_end)
1172 {
1173 gomp_mutex_unlock (&devicep->lock);
1174 gomp_fatal ("always pointer not mapped");
1175 }
1176 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1177 != GOMP_MAP_ALWAYS_POINTER)
1178 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1179 if (cur_node.tgt_offset)
1180 cur_node.tgt_offset -= sizes[i];
1181 gomp_copy_host2dev (devicep, aq,
1182 (void *) (n->tgt->tgt_start
1183 + n->tgt_offset
1184 + cur_node.host_start
1185 - n->host_start),
1186 (void *) &cur_node.tgt_offset,
1187 sizeof (void *), cbufp);
1188 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1189 + cur_node.host_start - n->host_start;
1190 continue;
1191 case GOMP_MAP_IF_PRESENT:
1192 /* Not present - otherwise handled above. Skip over its
1193 MAP_POINTER as well. */
1194 if (i + 1 < mapnum
1195 && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1196 == GOMP_MAP_POINTER))
1197 ++i;
1198 continue;
1199 case GOMP_MAP_ATTACH:
1200 {
1201 cur_node.host_start = (uintptr_t) hostaddrs[i];
1202 cur_node.host_end = cur_node.host_start + sizeof (void *);
1203 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1204 if (n != NULL)
1205 {
1206 tgt->list[i].key = n;
1207 tgt->list[i].offset = cur_node.host_start - n->host_start;
1208 tgt->list[i].length = n->host_end - n->host_start;
1209 tgt->list[i].copy_from = false;
1210 tgt->list[i].always_copy_from = false;
1211 tgt->list[i].is_attach = true;
1212 /* OpenACC 'attach'/'detach' doesn't affect
1213 structured/dynamic reference counts ('n->refcount',
1214 'n->dynamic_refcount'). */
1215
1216 gomp_attach_pointer (devicep, aq, mem_map, n,
1217 (uintptr_t) hostaddrs[i], sizes[i],
1218 cbufp);
1219 }
1220 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1221 {
1222 gomp_mutex_unlock (&devicep->lock);
1223 gomp_fatal ("outer struct not mapped for attach");
1224 }
1225 continue;
1226 }
1227 default:
1228 break;
1229 }
1230 splay_tree_key k = &array->key;
1231 k->host_start = (uintptr_t) hostaddrs[i];
1232 if (!GOMP_MAP_POINTER_P (kind & typemask))
1233 k->host_end = k->host_start + sizes[i];
1234 else
1235 k->host_end = k->host_start + sizeof (void *);
1236 splay_tree_key n = splay_tree_lookup (mem_map, k);
1237 if (n && n->refcount != REFCOUNT_LINK)
1238 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1239 kind & typemask, false, cbufp);
1240 else
1241 {
1242 k->aux = NULL;
1243 if (n && n->refcount == REFCOUNT_LINK)
1244 {
1245 /* Replace target address of the pointer with target address
1246 of mapped object in the splay tree. */
1247 splay_tree_remove (mem_map, n);
1248 k->aux
1249 = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1250 k->aux->link_key = n;
1251 }
1252 size_t align = (size_t) 1 << (kind >> rshift);
1253 tgt->list[i].key = k;
1254 k->tgt = tgt;
1255 if (field_tgt_clear != FIELD_TGT_EMPTY)
1256 {
1257 k->tgt_offset = k->host_start - field_tgt_base
1258 + field_tgt_offset;
1259 if (i == field_tgt_clear)
1260 field_tgt_clear = FIELD_TGT_EMPTY;
1261 }
1262 else
1263 {
1264 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1265 k->tgt_offset = tgt_size;
1266 tgt_size += k->host_end - k->host_start;
1267 }
1268 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1269 tgt->list[i].always_copy_from
1270 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1271 tgt->list[i].is_attach = false;
1272 tgt->list[i].offset = 0;
1273 tgt->list[i].length = k->host_end - k->host_start;
1274 k->refcount = 1;
1275 k->dynamic_refcount = 0;
1276 tgt->refcount++;
1277 array->left = NULL;
1278 array->right = NULL;
1279 splay_tree_insert (mem_map, array);
1280 switch (kind & typemask)
1281 {
1282 case GOMP_MAP_ALLOC:
1283 case GOMP_MAP_FROM:
1284 case GOMP_MAP_FORCE_ALLOC:
1285 case GOMP_MAP_FORCE_FROM:
1286 case GOMP_MAP_ALWAYS_FROM:
1287 break;
1288 case GOMP_MAP_TO:
1289 case GOMP_MAP_TOFROM:
1290 case GOMP_MAP_FORCE_TO:
1291 case GOMP_MAP_FORCE_TOFROM:
1292 case GOMP_MAP_ALWAYS_TO:
1293 case GOMP_MAP_ALWAYS_TOFROM:
1294 gomp_copy_host2dev (devicep, aq,
1295 (void *) (tgt->tgt_start
1296 + k->tgt_offset),
1297 (void *) k->host_start,
1298 k->host_end - k->host_start, cbufp);
1299 break;
1300 case GOMP_MAP_POINTER:
1301 gomp_map_pointer (tgt, aq,
1302 (uintptr_t) *(void **) k->host_start,
1303 k->tgt_offset, sizes[i], cbufp);
1304 break;
1305 case GOMP_MAP_TO_PSET:
1306 gomp_copy_host2dev (devicep, aq,
1307 (void *) (tgt->tgt_start
1308 + k->tgt_offset),
1309 (void *) k->host_start,
1310 k->host_end - k->host_start, cbufp);
1311 tgt->list[i].has_null_ptr_assoc = false;
1312
1313 for (j = i + 1; j < mapnum; j++)
1314 {
1315 int ptr_kind = (get_kind (short_mapkind, kinds, j)
1316 & typemask);
1317 if (!GOMP_MAP_POINTER_P (ptr_kind)
1318 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1319 break;
1320 else if ((uintptr_t) hostaddrs[j] < k->host_start
1321 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1322 > k->host_end))
1323 break;
1324 else
1325 {
1326 tgt->list[j].key = k;
1327 tgt->list[j].copy_from = false;
1328 tgt->list[j].always_copy_from = false;
1329 tgt->list[j].is_attach = false;
1330 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1331 if (k->refcount != REFCOUNT_INFINITY)
1332 k->refcount++;
1333 gomp_map_pointer (tgt, aq,
1334 (uintptr_t) *(void **) hostaddrs[j],
1335 k->tgt_offset
1336 + ((uintptr_t) hostaddrs[j]
1337 - k->host_start),
1338 sizes[j], cbufp);
1339 }
1340 }
1341 i = j - 1;
1342 break;
1343 case GOMP_MAP_FORCE_PRESENT:
1344 {
1345 /* We already looked up the memory region above and it
1346 was missing. */
1347 size_t size = k->host_end - k->host_start;
1348 gomp_mutex_unlock (&devicep->lock);
1349 #ifdef HAVE_INTTYPES_H
1350 gomp_fatal ("present clause: !acc_is_present (%p, "
1351 "%"PRIu64" (0x%"PRIx64"))",
1352 (void *) k->host_start,
1353 (uint64_t) size, (uint64_t) size);
1354 #else
1355 gomp_fatal ("present clause: !acc_is_present (%p, "
1356 "%lu (0x%lx))", (void *) k->host_start,
1357 (unsigned long) size, (unsigned long) size);
1358 #endif
1359 }
1360 break;
1361 case GOMP_MAP_FORCE_DEVICEPTR:
1362 assert (k->host_end - k->host_start == sizeof (void *));
1363 gomp_copy_host2dev (devicep, aq,
1364 (void *) (tgt->tgt_start
1365 + k->tgt_offset),
1366 (void *) k->host_start,
1367 sizeof (void *), cbufp);
1368 break;
1369 default:
1370 gomp_mutex_unlock (&devicep->lock);
1371 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1372 kind);
1373 }
1374
1375 if (k->aux && k->aux->link_key)
1376 {
1377 /* Set link pointer on target to the device address of the
1378 mapped object. */
1379 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1380 /* We intentionally do not use coalescing here, as it's not
1381 data allocated by the current call to this function. */
1382 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1383 &tgt_addr, sizeof (void *), NULL);
1384 }
1385 array++;
1386 }
1387 }
1388 }
1389
1390 if (pragma_kind == GOMP_MAP_VARS_TARGET)
1391 {
1392 for (i = 0; i < mapnum; i++)
1393 {
1394 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1395 gomp_copy_host2dev (devicep, aq,
1396 (void *) (tgt->tgt_start + i * sizeof (void *)),
1397 (void *) &cur_node.tgt_offset, sizeof (void *),
1398 cbufp);
1399 }
1400 }
1401
1402 if (cbufp)
1403 {
1404 long c = 0;
1405 for (c = 0; c < cbuf.chunk_cnt; ++c)
1406 gomp_copy_host2dev (devicep, aq,
1407 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1408 (char *) cbuf.buf + (cbuf.chunks[c].start
1409 - cbuf.chunks[0].start),
1410 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1411 free (cbuf.buf);
1412 cbuf.buf = NULL;
1413 cbufp = NULL;
1414 }
1415
1416 /* If the variable from "omp target enter data" map-list was already mapped,
1417 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1418 gomp_exit_data. */
1419 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1420 {
1421 free (tgt);
1422 tgt = NULL;
1423 }
1424
1425 gomp_mutex_unlock (&devicep->lock);
1426 return tgt;
1427 }
1428
1429 attribute_hidden struct target_mem_desc *
1430 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1431 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1432 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1433 {
1434 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1435 sizes, kinds, short_mapkind, pragma_kind);
1436 }
1437
1438 attribute_hidden struct target_mem_desc *
1439 gomp_map_vars_async (struct gomp_device_descr *devicep,
1440 struct goacc_asyncqueue *aq, size_t mapnum,
1441 void **hostaddrs, void **devaddrs, size_t *sizes,
1442 void *kinds, bool short_mapkind,
1443 enum gomp_map_vars_kind pragma_kind)
1444 {
1445 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1446 sizes, kinds, short_mapkind, pragma_kind);
1447 }
1448
1449 static void
1450 gomp_unmap_tgt (struct target_mem_desc *tgt)
1451 {
1452 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1453 if (tgt->tgt_end)
1454 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1455
1456 free (tgt->array);
1457 free (tgt);
1458 }
1459
1460 static bool
1461 gomp_unref_tgt (void *ptr)
1462 {
1463 bool is_tgt_unmapped = false;
1464
1465 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1466
1467 if (tgt->refcount > 1)
1468 tgt->refcount--;
1469 else
1470 {
1471 gomp_unmap_tgt (tgt);
1472 is_tgt_unmapped = true;
1473 }
1474
1475 return is_tgt_unmapped;
1476 }
1477
1478 static void
1479 gomp_unref_tgt_void (void *ptr)
1480 {
1481 (void) gomp_unref_tgt (ptr);
1482 }
1483
1484 static inline __attribute__((always_inline)) bool
1485 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1486 struct goacc_asyncqueue *aq)
1487 {
1488 bool is_tgt_unmapped = false;
1489 splay_tree_remove (&devicep->mem_map, k);
1490 if (k->aux)
1491 {
1492 if (k->aux->link_key)
1493 splay_tree_insert (&devicep->mem_map,
1494 (splay_tree_node) k->aux->link_key);
1495 if (k->aux->attach_count)
1496 free (k->aux->attach_count);
1497 free (k->aux);
1498 k->aux = NULL;
1499 }
1500 if (aq)
1501 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1502 (void *) k->tgt);
1503 else
1504 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1505 return is_tgt_unmapped;
1506 }
1507
1508 attribute_hidden bool
1509 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1510 {
1511 return gomp_remove_var_internal (devicep, k, NULL);
1512 }
1513
1514 /* Remove a variable asynchronously. This actually removes the variable
1515 mapping immediately, but retains the linked target_mem_desc until the
1516 asynchronous operation has completed (as it may still refer to target
1517 memory). The device lock must be held before entry, and remains locked on
1518 exit. */
1519
1520 attribute_hidden void
1521 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1522 struct goacc_asyncqueue *aq)
1523 {
1524 (void) gomp_remove_var_internal (devicep, k, aq);
1525 }
1526
1527 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1528 variables back from device to host: if it is false, it is assumed that this
1529 has been done already. */
1530
1531 static inline __attribute__((always_inline)) void
1532 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1533 struct goacc_asyncqueue *aq)
1534 {
1535 struct gomp_device_descr *devicep = tgt->device_descr;
1536
1537 if (tgt->list_count == 0)
1538 {
1539 free (tgt);
1540 return;
1541 }
1542
1543 gomp_mutex_lock (&devicep->lock);
1544 if (devicep->state == GOMP_DEVICE_FINALIZED)
1545 {
1546 gomp_mutex_unlock (&devicep->lock);
1547 free (tgt->array);
1548 free (tgt);
1549 return;
1550 }
1551
1552 size_t i;
1553
1554 /* We must perform detachments before any copies back to the host. */
1555 for (i = 0; i < tgt->list_count; i++)
1556 {
1557 splay_tree_key k = tgt->list[i].key;
1558
1559 if (k != NULL && tgt->list[i].is_attach)
1560 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1561 + tgt->list[i].offset,
1562 false, NULL);
1563 }
1564
1565 for (i = 0; i < tgt->list_count; i++)
1566 {
1567 splay_tree_key k = tgt->list[i].key;
1568 if (k == NULL)
1569 continue;
1570
1571 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1572 counts ('n->refcount', 'n->dynamic_refcount'). */
1573 if (tgt->list[i].is_attach)
1574 continue;
1575
1576 bool do_unmap = false;
1577 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1578 k->refcount--;
1579 else if (k->refcount == 1)
1580 {
1581 k->refcount--;
1582 do_unmap = true;
1583 }
1584
1585 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1586 || tgt->list[i].always_copy_from)
1587 gomp_copy_dev2host (devicep, aq,
1588 (void *) (k->host_start + tgt->list[i].offset),
1589 (void *) (k->tgt->tgt_start + k->tgt_offset
1590 + tgt->list[i].offset),
1591 tgt->list[i].length);
1592 if (do_unmap)
1593 {
1594 struct target_mem_desc *k_tgt = k->tgt;
1595 bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1596 /* It would be bad if TGT got unmapped while we're still iterating
1597 over its LIST_COUNT, and also expect to use it in the following
1598 code. */
1599 assert (!is_tgt_unmapped
1600 || k_tgt != tgt);
1601 }
1602 }
1603
1604 if (aq)
1605 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1606 (void *) tgt);
1607 else
1608 gomp_unref_tgt ((void *) tgt);
1609
1610 gomp_mutex_unlock (&devicep->lock);
1611 }
1612
1613 attribute_hidden void
1614 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1615 {
1616 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1617 }
1618
1619 attribute_hidden void
1620 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1621 struct goacc_asyncqueue *aq)
1622 {
1623 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1624 }
1625
1626 static void
1627 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1628 size_t *sizes, void *kinds, bool short_mapkind)
1629 {
1630 size_t i;
1631 struct splay_tree_key_s cur_node;
1632 const int typemask = short_mapkind ? 0xff : 0x7;
1633
1634 if (!devicep)
1635 return;
1636
1637 if (mapnum == 0)
1638 return;
1639
1640 gomp_mutex_lock (&devicep->lock);
1641 if (devicep->state == GOMP_DEVICE_FINALIZED)
1642 {
1643 gomp_mutex_unlock (&devicep->lock);
1644 return;
1645 }
1646
1647 for (i = 0; i < mapnum; i++)
1648 if (sizes[i])
1649 {
1650 cur_node.host_start = (uintptr_t) hostaddrs[i];
1651 cur_node.host_end = cur_node.host_start + sizes[i];
1652 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1653 if (n)
1654 {
1655 int kind = get_kind (short_mapkind, kinds, i);
1656 if (n->host_start > cur_node.host_start
1657 || n->host_end < cur_node.host_end)
1658 {
1659 gomp_mutex_unlock (&devicep->lock);
1660 gomp_fatal ("Trying to update [%p..%p) object when "
1661 "only [%p..%p) is mapped",
1662 (void *) cur_node.host_start,
1663 (void *) cur_node.host_end,
1664 (void *) n->host_start,
1665 (void *) n->host_end);
1666 }
1667
1668
1669 void *hostaddr = (void *) cur_node.host_start;
1670 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1671 + cur_node.host_start - n->host_start);
1672 size_t size = cur_node.host_end - cur_node.host_start;
1673
1674 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1675 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1676 NULL);
1677 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1678 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1679 }
1680 }
1681 gomp_mutex_unlock (&devicep->lock);
1682 }
1683
1684 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1685 And insert to splay tree the mapping between addresses from HOST_TABLE and
1686 from loaded target image. We rely in the host and device compiler
1687 emitting variable and functions in the same order. */
1688
1689 static void
1690 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1691 const void *host_table, const void *target_data,
1692 bool is_register_lock)
1693 {
1694 void **host_func_table = ((void ***) host_table)[0];
1695 void **host_funcs_end = ((void ***) host_table)[1];
1696 void **host_var_table = ((void ***) host_table)[2];
1697 void **host_vars_end = ((void ***) host_table)[3];
1698
1699 /* The func table contains only addresses, the var table contains addresses
1700 and corresponding sizes. */
1701 int num_funcs = host_funcs_end - host_func_table;
1702 int num_vars = (host_vars_end - host_var_table) / 2;
1703
1704 /* Load image to device and get target addresses for the image. */
1705 struct addr_pair *target_table = NULL;
1706 int i, num_target_entries;
1707
1708 num_target_entries
1709 = devicep->load_image_func (devicep->target_id, version,
1710 target_data, &target_table);
1711
1712 if (num_target_entries != num_funcs + num_vars)
1713 {
1714 gomp_mutex_unlock (&devicep->lock);
1715 if (is_register_lock)
1716 gomp_mutex_unlock (&register_lock);
1717 gomp_fatal ("Cannot map target functions or variables"
1718 " (expected %u, have %u)", num_funcs + num_vars,
1719 num_target_entries);
1720 }
1721
1722 /* Insert host-target address mapping into splay tree. */
1723 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1724 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1725 tgt->refcount = REFCOUNT_INFINITY;
1726 tgt->tgt_start = 0;
1727 tgt->tgt_end = 0;
1728 tgt->to_free = NULL;
1729 tgt->prev = NULL;
1730 tgt->list_count = 0;
1731 tgt->device_descr = devicep;
1732 splay_tree_node array = tgt->array;
1733
1734 for (i = 0; i < num_funcs; i++)
1735 {
1736 splay_tree_key k = &array->key;
1737 k->host_start = (uintptr_t) host_func_table[i];
1738 k->host_end = k->host_start + 1;
1739 k->tgt = tgt;
1740 k->tgt_offset = target_table[i].start;
1741 k->refcount = REFCOUNT_INFINITY;
1742 k->dynamic_refcount = 0;
1743 k->aux = NULL;
1744 array->left = NULL;
1745 array->right = NULL;
1746 splay_tree_insert (&devicep->mem_map, array);
1747 array++;
1748 }
1749
1750 /* Most significant bit of the size in host and target tables marks
1751 "omp declare target link" variables. */
1752 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1753 const uintptr_t size_mask = ~link_bit;
1754
1755 for (i = 0; i < num_vars; i++)
1756 {
1757 struct addr_pair *target_var = &target_table[num_funcs + i];
1758 uintptr_t target_size = target_var->end - target_var->start;
1759 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1760
1761 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1762 {
1763 gomp_mutex_unlock (&devicep->lock);
1764 if (is_register_lock)
1765 gomp_mutex_unlock (&register_lock);
1766 gomp_fatal ("Cannot map target variables (size mismatch)");
1767 }
1768
1769 splay_tree_key k = &array->key;
1770 k->host_start = (uintptr_t) host_var_table[i * 2];
1771 k->host_end
1772 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1773 k->tgt = tgt;
1774 k->tgt_offset = target_var->start;
1775 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1776 k->dynamic_refcount = 0;
1777 k->aux = NULL;
1778 array->left = NULL;
1779 array->right = NULL;
1780 splay_tree_insert (&devicep->mem_map, array);
1781 array++;
1782 }
1783
1784 free (target_table);
1785 }
1786
1787 /* Unload the mappings described by target_data from device DEVICE_P.
1788 The device must be locked. */
1789
1790 static void
1791 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1792 unsigned version,
1793 const void *host_table, const void *target_data)
1794 {
1795 void **host_func_table = ((void ***) host_table)[0];
1796 void **host_funcs_end = ((void ***) host_table)[1];
1797 void **host_var_table = ((void ***) host_table)[2];
1798 void **host_vars_end = ((void ***) host_table)[3];
1799
1800 /* The func table contains only addresses, the var table contains addresses
1801 and corresponding sizes. */
1802 int num_funcs = host_funcs_end - host_func_table;
1803 int num_vars = (host_vars_end - host_var_table) / 2;
1804
1805 struct splay_tree_key_s k;
1806 splay_tree_key node = NULL;
1807
1808 /* Find mapping at start of node array */
1809 if (num_funcs || num_vars)
1810 {
1811 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1812 : (uintptr_t) host_var_table[0]);
1813 k.host_end = k.host_start + 1;
1814 node = splay_tree_lookup (&devicep->mem_map, &k);
1815 }
1816
1817 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1818 {
1819 gomp_mutex_unlock (&devicep->lock);
1820 gomp_fatal ("image unload fail");
1821 }
1822
1823 /* Remove mappings from splay tree. */
1824 int i;
1825 for (i = 0; i < num_funcs; i++)
1826 {
1827 k.host_start = (uintptr_t) host_func_table[i];
1828 k.host_end = k.host_start + 1;
1829 splay_tree_remove (&devicep->mem_map, &k);
1830 }
1831
1832 /* Most significant bit of the size in host and target tables marks
1833 "omp declare target link" variables. */
1834 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1835 const uintptr_t size_mask = ~link_bit;
1836 bool is_tgt_unmapped = false;
1837
1838 for (i = 0; i < num_vars; i++)
1839 {
1840 k.host_start = (uintptr_t) host_var_table[i * 2];
1841 k.host_end
1842 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1843
1844 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1845 splay_tree_remove (&devicep->mem_map, &k);
1846 else
1847 {
1848 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1849 is_tgt_unmapped = gomp_remove_var (devicep, n);
1850 }
1851 }
1852
1853 if (node && !is_tgt_unmapped)
1854 {
1855 free (node->tgt);
1856 free (node);
1857 }
1858 }
1859
1860 /* This function should be called from every offload image while loading.
1861 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1862 the target, and TARGET_DATA needed by target plugin. */
1863
1864 void
1865 GOMP_offload_register_ver (unsigned version, const void *host_table,
1866 int target_type, const void *target_data)
1867 {
1868 int i;
1869
1870 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1871 gomp_fatal ("Library too old for offload (version %u < %u)",
1872 GOMP_VERSION, GOMP_VERSION_LIB (version));
1873
1874 gomp_mutex_lock (&register_lock);
1875
1876 /* Load image to all initialized devices. */
1877 for (i = 0; i < num_devices; i++)
1878 {
1879 struct gomp_device_descr *devicep = &devices[i];
1880 gomp_mutex_lock (&devicep->lock);
1881 if (devicep->type == target_type
1882 && devicep->state == GOMP_DEVICE_INITIALIZED)
1883 gomp_load_image_to_device (devicep, version,
1884 host_table, target_data, true);
1885 gomp_mutex_unlock (&devicep->lock);
1886 }
1887
1888 /* Insert image to array of pending images. */
1889 offload_images
1890 = gomp_realloc_unlock (offload_images,
1891 (num_offload_images + 1)
1892 * sizeof (struct offload_image_descr));
1893 offload_images[num_offload_images].version = version;
1894 offload_images[num_offload_images].type = target_type;
1895 offload_images[num_offload_images].host_table = host_table;
1896 offload_images[num_offload_images].target_data = target_data;
1897
1898 num_offload_images++;
1899 gomp_mutex_unlock (&register_lock);
1900 }
1901
1902 void
1903 GOMP_offload_register (const void *host_table, int target_type,
1904 const void *target_data)
1905 {
1906 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1907 }
1908
1909 /* This function should be called from every offload image while unloading.
1910 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1911 the target, and TARGET_DATA needed by target plugin. */
1912
1913 void
1914 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1915 int target_type, const void *target_data)
1916 {
1917 int i;
1918
1919 gomp_mutex_lock (&register_lock);
1920
1921 /* Unload image from all initialized devices. */
1922 for (i = 0; i < num_devices; i++)
1923 {
1924 struct gomp_device_descr *devicep = &devices[i];
1925 gomp_mutex_lock (&devicep->lock);
1926 if (devicep->type == target_type
1927 && devicep->state == GOMP_DEVICE_INITIALIZED)
1928 gomp_unload_image_from_device (devicep, version,
1929 host_table, target_data);
1930 gomp_mutex_unlock (&devicep->lock);
1931 }
1932
1933 /* Remove image from array of pending images. */
1934 for (i = 0; i < num_offload_images; i++)
1935 if (offload_images[i].target_data == target_data)
1936 {
1937 offload_images[i] = offload_images[--num_offload_images];
1938 break;
1939 }
1940
1941 gomp_mutex_unlock (&register_lock);
1942 }
1943
1944 void
1945 GOMP_offload_unregister (const void *host_table, int target_type,
1946 const void *target_data)
1947 {
1948 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1949 }
1950
1951 /* This function initializes the target device, specified by DEVICEP. DEVICEP
1952 must be locked on entry, and remains locked on return. */
1953
1954 attribute_hidden void
1955 gomp_init_device (struct gomp_device_descr *devicep)
1956 {
1957 int i;
1958 if (!devicep->init_device_func (devicep->target_id))
1959 {
1960 gomp_mutex_unlock (&devicep->lock);
1961 gomp_fatal ("device initialization failed");
1962 }
1963
1964 /* Load to device all images registered by the moment. */
1965 for (i = 0; i < num_offload_images; i++)
1966 {
1967 struct offload_image_descr *image = &offload_images[i];
1968 if (image->type == devicep->type)
1969 gomp_load_image_to_device (devicep, image->version,
1970 image->host_table, image->target_data,
1971 false);
1972 }
1973
1974 /* Initialize OpenACC asynchronous queues. */
1975 goacc_init_asyncqueues (devicep);
1976
1977 devicep->state = GOMP_DEVICE_INITIALIZED;
1978 }
1979
1980 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
1981 must be locked on entry, and remains locked on return. */
1982
1983 attribute_hidden bool
1984 gomp_fini_device (struct gomp_device_descr *devicep)
1985 {
1986 bool ret = goacc_fini_asyncqueues (devicep);
1987 ret &= devicep->fini_device_func (devicep->target_id);
1988 devicep->state = GOMP_DEVICE_FINALIZED;
1989 return ret;
1990 }
1991
1992 attribute_hidden void
1993 gomp_unload_device (struct gomp_device_descr *devicep)
1994 {
1995 if (devicep->state == GOMP_DEVICE_INITIALIZED)
1996 {
1997 unsigned i;
1998
1999 /* Unload from device all images registered at the moment. */
2000 for (i = 0; i < num_offload_images; i++)
2001 {
2002 struct offload_image_descr *image = &offload_images[i];
2003 if (image->type == devicep->type)
2004 gomp_unload_image_from_device (devicep, image->version,
2005 image->host_table,
2006 image->target_data);
2007 }
2008 }
2009 }
2010
2011 /* Host fallback for GOMP_target{,_ext} routines. */
2012
2013 static void
2014 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2015 struct gomp_device_descr *devicep)
2016 {
2017 struct gomp_thread old_thr, *thr = gomp_thread ();
2018
2019 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2020 && devicep != NULL)
2021 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2022 "be used for offloading");
2023
2024 old_thr = *thr;
2025 memset (thr, '\0', sizeof (*thr));
2026 if (gomp_places_list)
2027 {
2028 thr->place = old_thr.place;
2029 thr->ts.place_partition_len = gomp_places_list_len;
2030 }
2031 fn (hostaddrs);
2032 gomp_free_thread (thr);
2033 *thr = old_thr;
2034 }
2035
2036 /* Calculate alignment and size requirements of a private copy of data shared
2037 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2038
2039 static inline void
2040 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2041 unsigned short *kinds, size_t *tgt_align,
2042 size_t *tgt_size)
2043 {
2044 size_t i;
2045 for (i = 0; i < mapnum; i++)
2046 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2047 {
2048 size_t align = (size_t) 1 << (kinds[i] >> 8);
2049 if (*tgt_align < align)
2050 *tgt_align = align;
2051 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2052 *tgt_size += sizes[i];
2053 }
2054 }
2055
2056 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2057
2058 static inline void
2059 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2060 size_t *sizes, unsigned short *kinds, size_t tgt_align,
2061 size_t tgt_size)
2062 {
2063 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2064 if (al)
2065 tgt += tgt_align - al;
2066 tgt_size = 0;
2067 size_t i;
2068 for (i = 0; i < mapnum; i++)
2069 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2070 {
2071 size_t align = (size_t) 1 << (kinds[i] >> 8);
2072 tgt_size = (tgt_size + align - 1) & ~(align - 1);
2073 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2074 hostaddrs[i] = tgt + tgt_size;
2075 tgt_size = tgt_size + sizes[i];
2076 }
2077 }
2078
2079 /* Helper function of GOMP_target{,_ext} routines. */
2080
2081 static void *
2082 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2083 void (*host_fn) (void *))
2084 {
2085 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2086 return (void *) host_fn;
2087 else
2088 {
2089 gomp_mutex_lock (&devicep->lock);
2090 if (devicep->state == GOMP_DEVICE_FINALIZED)
2091 {
2092 gomp_mutex_unlock (&devicep->lock);
2093 return NULL;
2094 }
2095
2096 struct splay_tree_key_s k;
2097 k.host_start = (uintptr_t) host_fn;
2098 k.host_end = k.host_start + 1;
2099 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2100 gomp_mutex_unlock (&devicep->lock);
2101 if (tgt_fn == NULL)
2102 return NULL;
2103
2104 return (void *) tgt_fn->tgt_offset;
2105 }
2106 }
2107
2108 /* Called when encountering a target directive. If DEVICE
2109 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2110 GOMP_DEVICE_HOST_FALLBACK (or any value
2111 larger than last available hw device), use host fallback.
2112 FN is address of host code, UNUSED is part of the current ABI, but
2113 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2114 with MAPNUM entries, with addresses of the host objects,
2115 sizes of the host objects (resp. for pointer kind pointer bias
2116 and assumed sizeof (void *) size) and kinds. */
2117
2118 void
2119 GOMP_target (int device, void (*fn) (void *), const void *unused,
2120 size_t mapnum, void **hostaddrs, size_t *sizes,
2121 unsigned char *kinds)
2122 {
2123 struct gomp_device_descr *devicep = resolve_device (device);
2124
2125 void *fn_addr;
2126 if (devicep == NULL
2127 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2128 /* All shared memory devices should use the GOMP_target_ext function. */
2129 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2130 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2131 return gomp_target_fallback (fn, hostaddrs, devicep);
2132
2133 struct target_mem_desc *tgt_vars
2134 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2135 GOMP_MAP_VARS_TARGET);
2136 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2137 NULL);
2138 gomp_unmap_vars (tgt_vars, true);
2139 }
2140
2141 static inline unsigned int
2142 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2143 {
2144 /* If we cannot run asynchronously, simply ignore nowait. */
2145 if (devicep != NULL && devicep->async_run_func == NULL)
2146 flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2147
2148 return flags;
2149 }
2150
2151 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2152 and several arguments have been added:
2153 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2154 DEPEND is array of dependencies, see GOMP_task for details.
2155
2156 ARGS is a pointer to an array consisting of a variable number of both
2157 device-independent and device-specific arguments, which can take one two
2158 elements where the first specifies for which device it is intended, the type
2159 and optionally also the value. If the value is not present in the first
2160 one, the whole second element the actual value. The last element of the
2161 array is a single NULL. Among the device independent can be for example
2162 NUM_TEAMS and THREAD_LIMIT.
2163
2164 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2165 that value, or 1 if teams construct is not present, or 0, if
2166 teams construct does not have num_teams clause and so the choice is
2167 implementation defined, and -1 if it can't be determined on the host
2168 what value will GOMP_teams have on the device.
2169 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2170 body with that value, or 0, if teams construct does not have thread_limit
2171 clause or the teams construct is not present, or -1 if it can't be
2172 determined on the host what value will GOMP_teams have on the device. */
2173
2174 void
2175 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2176 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2177 unsigned int flags, void **depend, void **args)
2178 {
2179 struct gomp_device_descr *devicep = resolve_device (device);
2180 size_t tgt_align = 0, tgt_size = 0;
2181 bool fpc_done = false;
2182
2183 flags = clear_unsupported_flags (devicep, flags);
2184
2185 if (flags & GOMP_TARGET_FLAG_NOWAIT)
2186 {
2187 struct gomp_thread *thr = gomp_thread ();
2188 /* Create a team if we don't have any around, as nowait
2189 target tasks make sense to run asynchronously even when
2190 outside of any parallel. */
2191 if (__builtin_expect (thr->ts.team == NULL, 0))
2192 {
2193 struct gomp_team *team = gomp_new_team (1);
2194 struct gomp_task *task = thr->task;
2195 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2196 team->prev_ts = thr->ts;
2197 thr->ts.team = team;
2198 thr->ts.team_id = 0;
2199 thr->ts.work_share = &team->work_shares[0];
2200 thr->ts.last_work_share = NULL;
2201 #ifdef HAVE_SYNC_BUILTINS
2202 thr->ts.single_count = 0;
2203 #endif
2204 thr->ts.static_trip = 0;
2205 thr->task = &team->implicit_task[0];
2206 gomp_init_task (thr->task, NULL, icv);
2207 if (task)
2208 {
2209 thr->task = task;
2210 gomp_end_task ();
2211 free (task);
2212 thr->task = &team->implicit_task[0];
2213 }
2214 else
2215 pthread_setspecific (gomp_thread_destructor, thr);
2216 }
2217 if (thr->ts.team
2218 && !thr->task->final_task)
2219 {
2220 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2221 sizes, kinds, flags, depend, args,
2222 GOMP_TARGET_TASK_BEFORE_MAP);
2223 return;
2224 }
2225 }
2226
2227 /* If there are depend clauses, but nowait is not present
2228 (or we are in a final task), block the parent task until the
2229 dependencies are resolved and then just continue with the rest
2230 of the function as if it is a merged task. */
2231 if (depend != NULL)
2232 {
2233 struct gomp_thread *thr = gomp_thread ();
2234 if (thr->task && thr->task->depend_hash)
2235 {
2236 /* If we might need to wait, copy firstprivate now. */
2237 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2238 &tgt_align, &tgt_size);
2239 if (tgt_align)
2240 {
2241 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2242 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2243 tgt_align, tgt_size);
2244 }
2245 fpc_done = true;
2246 gomp_task_maybe_wait_for_dependencies (depend);
2247 }
2248 }
2249
2250 void *fn_addr;
2251 if (devicep == NULL
2252 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2253 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2254 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2255 {
2256 if (!fpc_done)
2257 {
2258 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2259 &tgt_align, &tgt_size);
2260 if (tgt_align)
2261 {
2262 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2263 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2264 tgt_align, tgt_size);
2265 }
2266 }
2267 gomp_target_fallback (fn, hostaddrs, devicep);
2268 return;
2269 }
2270
2271 struct target_mem_desc *tgt_vars;
2272 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2273 {
2274 if (!fpc_done)
2275 {
2276 calculate_firstprivate_requirements (mapnum, sizes, kinds,
2277 &tgt_align, &tgt_size);
2278 if (tgt_align)
2279 {
2280 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2281 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2282 tgt_align, tgt_size);
2283 }
2284 }
2285 tgt_vars = NULL;
2286 }
2287 else
2288 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2289 true, GOMP_MAP_VARS_TARGET);
2290 devicep->run_func (devicep->target_id, fn_addr,
2291 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2292 args);
2293 if (tgt_vars)
2294 gomp_unmap_vars (tgt_vars, true);
2295 }
2296
2297 /* Host fallback for GOMP_target_data{,_ext} routines. */
2298
2299 static void
2300 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2301 {
2302 struct gomp_task_icv *icv = gomp_icv (false);
2303
2304 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2305 && devicep != NULL)
2306 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2307 "be used for offloading");
2308
2309 if (icv->target_data)
2310 {
2311 /* Even when doing a host fallback, if there are any active
2312 #pragma omp target data constructs, need to remember the
2313 new #pragma omp target data, otherwise GOMP_target_end_data
2314 would get out of sync. */
2315 struct target_mem_desc *tgt
2316 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2317 GOMP_MAP_VARS_DATA);
2318 tgt->prev = icv->target_data;
2319 icv->target_data = tgt;
2320 }
2321 }
2322
2323 void
2324 GOMP_target_data (int device, const void *unused, size_t mapnum,
2325 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2326 {
2327 struct gomp_device_descr *devicep = resolve_device (device);
2328
2329 if (devicep == NULL
2330 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2331 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2332 return gomp_target_data_fallback (devicep);
2333
2334 struct target_mem_desc *tgt
2335 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2336 GOMP_MAP_VARS_DATA);
2337 struct gomp_task_icv *icv = gomp_icv (true);
2338 tgt->prev = icv->target_data;
2339 icv->target_data = tgt;
2340 }
2341
2342 void
2343 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2344 size_t *sizes, unsigned short *kinds)
2345 {
2346 struct gomp_device_descr *devicep = resolve_device (device);
2347
2348 if (devicep == NULL
2349 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2350 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2351 return gomp_target_data_fallback (devicep);
2352
2353 struct target_mem_desc *tgt
2354 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2355 GOMP_MAP_VARS_DATA);
2356 struct gomp_task_icv *icv = gomp_icv (true);
2357 tgt->prev = icv->target_data;
2358 icv->target_data = tgt;
2359 }
2360
2361 void
2362 GOMP_target_end_data (void)
2363 {
2364 struct gomp_task_icv *icv = gomp_icv (false);
2365 if (icv->target_data)
2366 {
2367 struct target_mem_desc *tgt = icv->target_data;
2368 icv->target_data = tgt->prev;
2369 gomp_unmap_vars (tgt, true);
2370 }
2371 }
2372
2373 void
2374 GOMP_target_update (int device, const void *unused, size_t mapnum,
2375 void **hostaddrs, size_t *sizes, unsigned char *kinds)
2376 {
2377 struct gomp_device_descr *devicep = resolve_device (device);
2378
2379 if (devicep == NULL
2380 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2381 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2382 return;
2383
2384 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2385 }
2386
2387 void
2388 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2389 size_t *sizes, unsigned short *kinds,
2390 unsigned int flags, void **depend)
2391 {
2392 struct gomp_device_descr *devicep = resolve_device (device);
2393
2394 /* If there are depend clauses, but nowait is not present,
2395 block the parent task until the dependencies are resolved
2396 and then just continue with the rest of the function as if it
2397 is a merged task. Until we are able to schedule task during
2398 variable mapping or unmapping, ignore nowait if depend clauses
2399 are not present. */
2400 if (depend != NULL)
2401 {
2402 struct gomp_thread *thr = gomp_thread ();
2403 if (thr->task && thr->task->depend_hash)
2404 {
2405 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2406 && thr->ts.team
2407 && !thr->task->final_task)
2408 {
2409 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2410 mapnum, hostaddrs, sizes, kinds,
2411 flags | GOMP_TARGET_FLAG_UPDATE,
2412 depend, NULL, GOMP_TARGET_TASK_DATA))
2413 return;
2414 }
2415 else
2416 {
2417 struct gomp_team *team = thr->ts.team;
2418 /* If parallel or taskgroup has been cancelled, don't start new
2419 tasks. */
2420 if (__builtin_expect (gomp_cancel_var, 0) && team)
2421 {
2422 if (gomp_team_barrier_cancelled (&team->barrier))
2423 return;
2424 if (thr->task->taskgroup)
2425 {
2426 if (thr->task->taskgroup->cancelled)
2427 return;
2428 if (thr->task->taskgroup->workshare
2429 && thr->task->taskgroup->prev
2430 && thr->task->taskgroup->prev->cancelled)
2431 return;
2432 }
2433 }
2434
2435 gomp_task_maybe_wait_for_dependencies (depend);
2436 }
2437 }
2438 }
2439
2440 if (devicep == NULL
2441 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2442 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2443 return;
2444
2445 struct gomp_thread *thr = gomp_thread ();
2446 struct gomp_team *team = thr->ts.team;
2447 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2448 if (__builtin_expect (gomp_cancel_var, 0) && team)
2449 {
2450 if (gomp_team_barrier_cancelled (&team->barrier))
2451 return;
2452 if (thr->task->taskgroup)
2453 {
2454 if (thr->task->taskgroup->cancelled)
2455 return;
2456 if (thr->task->taskgroup->workshare
2457 && thr->task->taskgroup->prev
2458 && thr->task->taskgroup->prev->cancelled)
2459 return;
2460 }
2461 }
2462
2463 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2464 }
2465
2466 static void
2467 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2468 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2469 {
2470 const int typemask = 0xff;
2471 size_t i;
2472 gomp_mutex_lock (&devicep->lock);
2473 if (devicep->state == GOMP_DEVICE_FINALIZED)
2474 {
2475 gomp_mutex_unlock (&devicep->lock);
2476 return;
2477 }
2478
2479 for (i = 0; i < mapnum; i++)
2480 if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2481 {
2482 struct splay_tree_key_s cur_node;
2483 cur_node.host_start = (uintptr_t) hostaddrs[i];
2484 cur_node.host_end = cur_node.host_start + sizeof (void *);
2485 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2486
2487 if (n)
2488 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2489 false, NULL);
2490 }
2491
2492 for (i = 0; i < mapnum; i++)
2493 {
2494 struct splay_tree_key_s cur_node;
2495 unsigned char kind = kinds[i] & typemask;
2496 switch (kind)
2497 {
2498 case GOMP_MAP_FROM:
2499 case GOMP_MAP_ALWAYS_FROM:
2500 case GOMP_MAP_DELETE:
2501 case GOMP_MAP_RELEASE:
2502 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2503 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2504 cur_node.host_start = (uintptr_t) hostaddrs[i];
2505 cur_node.host_end = cur_node.host_start + sizes[i];
2506 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2507 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2508 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2509 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2510 if (!k)
2511 continue;
2512
2513 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2514 k->refcount--;
2515 if ((kind == GOMP_MAP_DELETE
2516 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2517 && k->refcount != REFCOUNT_INFINITY)
2518 k->refcount = 0;
2519
2520 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2521 || kind == GOMP_MAP_ALWAYS_FROM)
2522 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2523 (void *) (k->tgt->tgt_start + k->tgt_offset
2524 + cur_node.host_start
2525 - k->host_start),
2526 cur_node.host_end - cur_node.host_start);
2527 if (k->refcount == 0)
2528 gomp_remove_var (devicep, k);
2529 break;
2530
2531 case GOMP_MAP_DETACH:
2532 break;
2533 default:
2534 gomp_mutex_unlock (&devicep->lock);
2535 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2536 kind);
2537 }
2538 }
2539
2540 gomp_mutex_unlock (&devicep->lock);
2541 }
2542
2543 void
2544 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2545 size_t *sizes, unsigned short *kinds,
2546 unsigned int flags, void **depend)
2547 {
2548 struct gomp_device_descr *devicep = resolve_device (device);
2549
2550 /* If there are depend clauses, but nowait is not present,
2551 block the parent task until the dependencies are resolved
2552 and then just continue with the rest of the function as if it
2553 is a merged task. Until we are able to schedule task during
2554 variable mapping or unmapping, ignore nowait if depend clauses
2555 are not present. */
2556 if (depend != NULL)
2557 {
2558 struct gomp_thread *thr = gomp_thread ();
2559 if (thr->task && thr->task->depend_hash)
2560 {
2561 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2562 && thr->ts.team
2563 && !thr->task->final_task)
2564 {
2565 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2566 mapnum, hostaddrs, sizes, kinds,
2567 flags, depend, NULL,
2568 GOMP_TARGET_TASK_DATA))
2569 return;
2570 }
2571 else
2572 {
2573 struct gomp_team *team = thr->ts.team;
2574 /* If parallel or taskgroup has been cancelled, don't start new
2575 tasks. */
2576 if (__builtin_expect (gomp_cancel_var, 0) && team)
2577 {
2578 if (gomp_team_barrier_cancelled (&team->barrier))
2579 return;
2580 if (thr->task->taskgroup)
2581 {
2582 if (thr->task->taskgroup->cancelled)
2583 return;
2584 if (thr->task->taskgroup->workshare
2585 && thr->task->taskgroup->prev
2586 && thr->task->taskgroup->prev->cancelled)
2587 return;
2588 }
2589 }
2590
2591 gomp_task_maybe_wait_for_dependencies (depend);
2592 }
2593 }
2594 }
2595
2596 if (devicep == NULL
2597 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2598 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2599 return;
2600
2601 struct gomp_thread *thr = gomp_thread ();
2602 struct gomp_team *team = thr->ts.team;
2603 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2604 if (__builtin_expect (gomp_cancel_var, 0) && team)
2605 {
2606 if (gomp_team_barrier_cancelled (&team->barrier))
2607 return;
2608 if (thr->task->taskgroup)
2609 {
2610 if (thr->task->taskgroup->cancelled)
2611 return;
2612 if (thr->task->taskgroup->workshare
2613 && thr->task->taskgroup->prev
2614 && thr->task->taskgroup->prev->cancelled)
2615 return;
2616 }
2617 }
2618
2619 /* The variables are mapped separately such that they can be released
2620 independently. */
2621 size_t i, j;
2622 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2623 for (i = 0; i < mapnum; i++)
2624 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2625 {
2626 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2627 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2628 i += sizes[i];
2629 }
2630 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2631 {
2632 for (j = i + 1; j < mapnum; j++)
2633 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
2634 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
2635 break;
2636 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2637 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2638 i += j - i - 1;
2639 }
2640 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
2641 {
2642 /* An attach operation must be processed together with the mapped
2643 base-pointer list item. */
2644 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2645 true, GOMP_MAP_VARS_ENTER_DATA);
2646 i += 1;
2647 }
2648 else
2649 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2650 true, GOMP_MAP_VARS_ENTER_DATA);
2651 else
2652 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2653 }
2654
2655 bool
2656 gomp_target_task_fn (void *data)
2657 {
2658 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2659 struct gomp_device_descr *devicep = ttask->devicep;
2660
2661 if (ttask->fn != NULL)
2662 {
2663 void *fn_addr;
2664 if (devicep == NULL
2665 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2666 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2667 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2668 {
2669 ttask->state = GOMP_TARGET_TASK_FALLBACK;
2670 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
2671 return false;
2672 }
2673
2674 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2675 {
2676 if (ttask->tgt)
2677 gomp_unmap_vars (ttask->tgt, true);
2678 return false;
2679 }
2680
2681 void *actual_arguments;
2682 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2683 {
2684 ttask->tgt = NULL;
2685 actual_arguments = ttask->hostaddrs;
2686 }
2687 else
2688 {
2689 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2690 NULL, ttask->sizes, ttask->kinds, true,
2691 GOMP_MAP_VARS_TARGET);
2692 actual_arguments = (void *) ttask->tgt->tgt_start;
2693 }
2694 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2695
2696 assert (devicep->async_run_func);
2697 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2698 ttask->args, (void *) ttask);
2699 return true;
2700 }
2701 else if (devicep == NULL
2702 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2703 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2704 return false;
2705
2706 size_t i;
2707 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2708 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2709 ttask->kinds, true);
2710 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2711 for (i = 0; i < ttask->mapnum; i++)
2712 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2713 {
2714 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2715 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2716 GOMP_MAP_VARS_ENTER_DATA);
2717 i += ttask->sizes[i];
2718 }
2719 else
2720 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2721 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2722 else
2723 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2724 ttask->kinds);
2725 return false;
2726 }
2727
2728 void
2729 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2730 {
2731 if (thread_limit)
2732 {
2733 struct gomp_task_icv *icv = gomp_icv (true);
2734 icv->thread_limit_var
2735 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2736 }
2737 (void) num_teams;
2738 }
2739
2740 void *
2741 omp_target_alloc (size_t size, int device_num)
2742 {
2743 if (device_num == gomp_get_num_devices ())
2744 return malloc (size);
2745
2746 if (device_num < 0)
2747 return NULL;
2748
2749 struct gomp_device_descr *devicep = resolve_device (device_num);
2750 if (devicep == NULL)
2751 return NULL;
2752
2753 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2754 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2755 return malloc (size);
2756
2757 gomp_mutex_lock (&devicep->lock);
2758 void *ret = devicep->alloc_func (devicep->target_id, size);
2759 gomp_mutex_unlock (&devicep->lock);
2760 return ret;
2761 }
2762
2763 void
2764 omp_target_free (void *device_ptr, int device_num)
2765 {
2766 if (device_ptr == NULL)
2767 return;
2768
2769 if (device_num == gomp_get_num_devices ())
2770 {
2771 free (device_ptr);
2772 return;
2773 }
2774
2775 if (device_num < 0)
2776 return;
2777
2778 struct gomp_device_descr *devicep = resolve_device (device_num);
2779 if (devicep == NULL)
2780 return;
2781
2782 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2783 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2784 {
2785 free (device_ptr);
2786 return;
2787 }
2788
2789 gomp_mutex_lock (&devicep->lock);
2790 gomp_free_device_memory (devicep, device_ptr);
2791 gomp_mutex_unlock (&devicep->lock);
2792 }
2793
2794 int
2795 omp_target_is_present (const void *ptr, int device_num)
2796 {
2797 if (ptr == NULL)
2798 return 1;
2799
2800 if (device_num == gomp_get_num_devices ())
2801 return 1;
2802
2803 if (device_num < 0)
2804 return 0;
2805
2806 struct gomp_device_descr *devicep = resolve_device (device_num);
2807 if (devicep == NULL)
2808 return 0;
2809
2810 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2811 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2812 return 1;
2813
2814 gomp_mutex_lock (&devicep->lock);
2815 struct splay_tree_s *mem_map = &devicep->mem_map;
2816 struct splay_tree_key_s cur_node;
2817
2818 cur_node.host_start = (uintptr_t) ptr;
2819 cur_node.host_end = cur_node.host_start;
2820 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2821 int ret = n != NULL;
2822 gomp_mutex_unlock (&devicep->lock);
2823 return ret;
2824 }
2825
2826 int
2827 omp_target_memcpy (void *dst, const void *src, size_t length,
2828 size_t dst_offset, size_t src_offset, int dst_device_num,
2829 int src_device_num)
2830 {
2831 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2832 bool ret;
2833
2834 if (dst_device_num != gomp_get_num_devices ())
2835 {
2836 if (dst_device_num < 0)
2837 return EINVAL;
2838
2839 dst_devicep = resolve_device (dst_device_num);
2840 if (dst_devicep == NULL)
2841 return EINVAL;
2842
2843 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2844 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2845 dst_devicep = NULL;
2846 }
2847 if (src_device_num != num_devices_openmp)
2848 {
2849 if (src_device_num < 0)
2850 return EINVAL;
2851
2852 src_devicep = resolve_device (src_device_num);
2853 if (src_devicep == NULL)
2854 return EINVAL;
2855
2856 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2857 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2858 src_devicep = NULL;
2859 }
2860 if (src_devicep == NULL && dst_devicep == NULL)
2861 {
2862 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2863 return 0;
2864 }
2865 if (src_devicep == NULL)
2866 {
2867 gomp_mutex_lock (&dst_devicep->lock);
2868 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2869 (char *) dst + dst_offset,
2870 (char *) src + src_offset, length);
2871 gomp_mutex_unlock (&dst_devicep->lock);
2872 return (ret ? 0 : EINVAL);
2873 }
2874 if (dst_devicep == NULL)
2875 {
2876 gomp_mutex_lock (&src_devicep->lock);
2877 ret = src_devicep->dev2host_func (src_devicep->target_id,
2878 (char *) dst + dst_offset,
2879 (char *) src + src_offset, length);
2880 gomp_mutex_unlock (&src_devicep->lock);
2881 return (ret ? 0 : EINVAL);
2882 }
2883 if (src_devicep == dst_devicep)
2884 {
2885 gomp_mutex_lock (&src_devicep->lock);
2886 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2887 (char *) dst + dst_offset,
2888 (char *) src + src_offset, length);
2889 gomp_mutex_unlock (&src_devicep->lock);
2890 return (ret ? 0 : EINVAL);
2891 }
2892 return EINVAL;
2893 }
2894
2895 static int
2896 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2897 int num_dims, const size_t *volume,
2898 const size_t *dst_offsets,
2899 const size_t *src_offsets,
2900 const size_t *dst_dimensions,
2901 const size_t *src_dimensions,
2902 struct gomp_device_descr *dst_devicep,
2903 struct gomp_device_descr *src_devicep)
2904 {
2905 size_t dst_slice = element_size;
2906 size_t src_slice = element_size;
2907 size_t j, dst_off, src_off, length;
2908 int i, ret;
2909
2910 if (num_dims == 1)
2911 {
2912 if (__builtin_mul_overflow (element_size, volume[0], &length)
2913 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2914 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2915 return EINVAL;
2916 if (dst_devicep == NULL && src_devicep == NULL)
2917 {
2918 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2919 length);
2920 ret = 1;
2921 }
2922 else if (src_devicep == NULL)
2923 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2924 (char *) dst + dst_off,
2925 (const char *) src + src_off,
2926 length);
2927 else if (dst_devicep == NULL)
2928 ret = src_devicep->dev2host_func (src_devicep->target_id,
2929 (char *) dst + dst_off,
2930 (const char *) src + src_off,
2931 length);
2932 else if (src_devicep == dst_devicep)
2933 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2934 (char *) dst + dst_off,
2935 (const char *) src + src_off,
2936 length);
2937 else
2938 ret = 0;
2939 return ret ? 0 : EINVAL;
2940 }
2941
2942 /* FIXME: it would be nice to have some plugin function to handle
2943 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2944 be handled in the generic recursion below, and for host-host it
2945 should be used even for any num_dims >= 2. */
2946
2947 for (i = 1; i < num_dims; i++)
2948 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2949 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2950 return EINVAL;
2951 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2952 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2953 return EINVAL;
2954 for (j = 0; j < volume[0]; j++)
2955 {
2956 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2957 (const char *) src + src_off,
2958 element_size, num_dims - 1,
2959 volume + 1, dst_offsets + 1,
2960 src_offsets + 1, dst_dimensions + 1,
2961 src_dimensions + 1, dst_devicep,
2962 src_devicep);
2963 if (ret)
2964 return ret;
2965 dst_off += dst_slice;
2966 src_off += src_slice;
2967 }
2968 return 0;
2969 }
2970
2971 int
2972 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2973 int num_dims, const size_t *volume,
2974 const size_t *dst_offsets,
2975 const size_t *src_offsets,
2976 const size_t *dst_dimensions,
2977 const size_t *src_dimensions,
2978 int dst_device_num, int src_device_num)
2979 {
2980 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2981
2982 if (!dst && !src)
2983 return INT_MAX;
2984
2985 if (dst_device_num != gomp_get_num_devices ())
2986 {
2987 if (dst_device_num < 0)
2988 return EINVAL;
2989
2990 dst_devicep = resolve_device (dst_device_num);
2991 if (dst_devicep == NULL)
2992 return EINVAL;
2993
2994 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2995 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2996 dst_devicep = NULL;
2997 }
2998 if (src_device_num != num_devices_openmp)
2999 {
3000 if (src_device_num < 0)
3001 return EINVAL;
3002
3003 src_devicep = resolve_device (src_device_num);
3004 if (src_devicep == NULL)
3005 return EINVAL;
3006
3007 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3008 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3009 src_devicep = NULL;
3010 }
3011
3012 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3013 return EINVAL;
3014
3015 if (src_devicep)
3016 gomp_mutex_lock (&src_devicep->lock);
3017 else if (dst_devicep)
3018 gomp_mutex_lock (&dst_devicep->lock);
3019 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3020 volume, dst_offsets, src_offsets,
3021 dst_dimensions, src_dimensions,
3022 dst_devicep, src_devicep);
3023 if (src_devicep)
3024 gomp_mutex_unlock (&src_devicep->lock);
3025 else if (dst_devicep)
3026 gomp_mutex_unlock (&dst_devicep->lock);
3027 return ret;
3028 }
3029
3030 int
3031 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3032 size_t size, size_t device_offset, int device_num)
3033 {
3034 if (device_num == gomp_get_num_devices ())
3035 return EINVAL;
3036
3037 if (device_num < 0)
3038 return EINVAL;
3039
3040 struct gomp_device_descr *devicep = resolve_device (device_num);
3041 if (devicep == NULL)
3042 return EINVAL;
3043
3044 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3045 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3046 return EINVAL;
3047
3048 gomp_mutex_lock (&devicep->lock);
3049
3050 struct splay_tree_s *mem_map = &devicep->mem_map;
3051 struct splay_tree_key_s cur_node;
3052 int ret = EINVAL;
3053
3054 cur_node.host_start = (uintptr_t) host_ptr;
3055 cur_node.host_end = cur_node.host_start + size;
3056 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3057 if (n)
3058 {
3059 if (n->tgt->tgt_start + n->tgt_offset
3060 == (uintptr_t) device_ptr + device_offset
3061 && n->host_start <= cur_node.host_start
3062 && n->host_end >= cur_node.host_end)
3063 ret = 0;
3064 }
3065 else
3066 {
3067 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3068 tgt->array = gomp_malloc (sizeof (*tgt->array));
3069 tgt->refcount = 1;
3070 tgt->tgt_start = 0;
3071 tgt->tgt_end = 0;
3072 tgt->to_free = NULL;
3073 tgt->prev = NULL;
3074 tgt->list_count = 0;
3075 tgt->device_descr = devicep;
3076 splay_tree_node array = tgt->array;
3077 splay_tree_key k = &array->key;
3078 k->host_start = cur_node.host_start;
3079 k->host_end = cur_node.host_end;
3080 k->tgt = tgt;
3081 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3082 k->refcount = REFCOUNT_INFINITY;
3083 k->dynamic_refcount = 0;
3084 k->aux = NULL;
3085 array->left = NULL;
3086 array->right = NULL;
3087 splay_tree_insert (&devicep->mem_map, array);
3088 ret = 0;
3089 }
3090 gomp_mutex_unlock (&devicep->lock);
3091 return ret;
3092 }
3093
3094 int
3095 omp_target_disassociate_ptr (const void *ptr, int device_num)
3096 {
3097 if (device_num == gomp_get_num_devices ())
3098 return EINVAL;
3099
3100 if (device_num < 0)
3101 return EINVAL;
3102
3103 struct gomp_device_descr *devicep = resolve_device (device_num);
3104 if (devicep == NULL)
3105 return EINVAL;
3106
3107 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3108 return EINVAL;
3109
3110 gomp_mutex_lock (&devicep->lock);
3111
3112 struct splay_tree_s *mem_map = &devicep->mem_map;
3113 struct splay_tree_key_s cur_node;
3114 int ret = EINVAL;
3115
3116 cur_node.host_start = (uintptr_t) ptr;
3117 cur_node.host_end = cur_node.host_start;
3118 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3119 if (n
3120 && n->host_start == cur_node.host_start
3121 && n->refcount == REFCOUNT_INFINITY
3122 && n->tgt->tgt_start == 0
3123 && n->tgt->to_free == NULL
3124 && n->tgt->refcount == 1
3125 && n->tgt->list_count == 0)
3126 {
3127 splay_tree_remove (&devicep->mem_map, n);
3128 gomp_unmap_tgt (n->tgt);
3129 ret = 0;
3130 }
3131
3132 gomp_mutex_unlock (&devicep->lock);
3133 return ret;
3134 }
3135
3136 int
3137 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3138 {
3139 (void) kind;
3140 if (device_num == gomp_get_num_devices ())
3141 return gomp_pause_host ();
3142 if (device_num < 0 || device_num >= num_devices_openmp)
3143 return -1;
3144 /* Do nothing for target devices for now. */
3145 return 0;
3146 }
3147
3148 int
3149 omp_pause_resource_all (omp_pause_resource_t kind)
3150 {
3151 (void) kind;
3152 if (gomp_pause_host ())
3153 return -1;
3154 /* Do nothing for target devices for now. */
3155 return 0;
3156 }
3157
3158 ialias (omp_pause_resource)
3159 ialias (omp_pause_resource_all)
3160
3161 #ifdef PLUGIN_SUPPORT
3162
3163 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3164 in PLUGIN_NAME.
3165 The handles of the found functions are stored in the corresponding fields
3166 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
3167
3168 static bool
3169 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3170 const char *plugin_name)
3171 {
3172 const char *err = NULL, *last_missing = NULL;
3173
3174 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3175 if (!plugin_handle)
3176 #if OFFLOAD_DEFAULTED
3177 return 0;
3178 #else
3179 goto dl_fail;
3180 #endif
3181
3182 /* Check if all required functions are available in the plugin and store
3183 their handlers. None of the symbols can legitimately be NULL,
3184 so we don't need to check dlerror all the time. */
3185 #define DLSYM(f) \
3186 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
3187 goto dl_fail
3188 /* Similar, but missing functions are not an error. Return false if
3189 failed, true otherwise. */
3190 #define DLSYM_OPT(f, n) \
3191 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
3192 || (last_missing = #n, 0))
3193
3194 DLSYM (version);
3195 if (device->version_func () != GOMP_VERSION)
3196 {
3197 err = "plugin version mismatch";
3198 goto fail;
3199 }
3200
3201 DLSYM (get_name);
3202 DLSYM (get_caps);
3203 DLSYM (get_type);
3204 DLSYM (get_num_devices);
3205 DLSYM (init_device);
3206 DLSYM (fini_device);
3207 DLSYM (load_image);
3208 DLSYM (unload_image);
3209 DLSYM (alloc);
3210 DLSYM (free);
3211 DLSYM (dev2host);
3212 DLSYM (host2dev);
3213 device->capabilities = device->get_caps_func ();
3214 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3215 {
3216 DLSYM (run);
3217 DLSYM_OPT (async_run, async_run);
3218 DLSYM_OPT (can_run, can_run);
3219 DLSYM (dev2dev);
3220 }
3221 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3222 {
3223 if (!DLSYM_OPT (openacc.exec, openacc_exec)
3224 || !DLSYM_OPT (openacc.create_thread_data,
3225 openacc_create_thread_data)
3226 || !DLSYM_OPT (openacc.destroy_thread_data,
3227 openacc_destroy_thread_data)
3228 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3229 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3230 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3231 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3232 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3233 || !DLSYM_OPT (openacc.async.queue_callback,
3234 openacc_async_queue_callback)
3235 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3236 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3237 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3238 || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3239 {
3240 /* Require all the OpenACC handlers if we have
3241 GOMP_OFFLOAD_CAP_OPENACC_200. */
3242 err = "plugin missing OpenACC handler function";
3243 goto fail;
3244 }
3245
3246 unsigned cuda = 0;
3247 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3248 openacc_cuda_get_current_device);
3249 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3250 openacc_cuda_get_current_context);
3251 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3252 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3253 if (cuda && cuda != 4)
3254 {
3255 /* Make sure all the CUDA functions are there if any of them are. */
3256 err = "plugin missing OpenACC CUDA handler function";
3257 goto fail;
3258 }
3259 }
3260 #undef DLSYM
3261 #undef DLSYM_OPT
3262
3263 return 1;
3264
3265 dl_fail:
3266 err = dlerror ();
3267 fail:
3268 gomp_error ("while loading %s: %s", plugin_name, err);
3269 if (last_missing)
3270 gomp_error ("missing function was %s", last_missing);
3271 if (plugin_handle)
3272 dlclose (plugin_handle);
3273
3274 return 0;
3275 }
3276
3277 /* This function finalizes all initialized devices. */
3278
3279 static void
3280 gomp_target_fini (void)
3281 {
3282 int i;
3283 for (i = 0; i < num_devices; i++)
3284 {
3285 bool ret = true;
3286 struct gomp_device_descr *devicep = &devices[i];
3287 gomp_mutex_lock (&devicep->lock);
3288 if (devicep->state == GOMP_DEVICE_INITIALIZED)
3289 ret = gomp_fini_device (devicep);
3290 gomp_mutex_unlock (&devicep->lock);
3291 if (!ret)
3292 gomp_fatal ("device finalization failed");
3293 }
3294 }
3295
3296 /* This function initializes the runtime for offloading.
3297 It parses the list of offload plugins, and tries to load these.
3298 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3299 will be set, and the array DEVICES initialized, containing descriptors for
3300 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3301 by the others. */
3302
3303 static void
3304 gomp_target_init (void)
3305 {
3306 const char *prefix ="libgomp-plugin-";
3307 const char *suffix = SONAME_SUFFIX (1);
3308 const char *cur, *next;
3309 char *plugin_name;
3310 int i, new_num_devs;
3311 int num_devs = 0, num_devs_openmp;
3312 struct gomp_device_descr *devs = NULL;
3313
3314 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3315 return;
3316
3317 cur = OFFLOAD_PLUGINS;
3318 if (*cur)
3319 do
3320 {
3321 struct gomp_device_descr current_device;
3322 size_t prefix_len, suffix_len, cur_len;
3323
3324 next = strchr (cur, ',');
3325
3326 prefix_len = strlen (prefix);
3327 cur_len = next ? next - cur : strlen (cur);
3328 suffix_len = strlen (suffix);
3329
3330 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3331 if (!plugin_name)
3332 {
3333 num_devs = 0;
3334 break;
3335 }
3336
3337 memcpy (plugin_name, prefix, prefix_len);
3338 memcpy (plugin_name + prefix_len, cur, cur_len);
3339 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3340
3341 if (gomp_load_plugin_for_device (&current_device, plugin_name))
3342 {
3343 new_num_devs = current_device.get_num_devices_func ();
3344 if (new_num_devs >= 1)
3345 {
3346 /* Augment DEVICES and NUM_DEVICES. */
3347
3348 devs = realloc (devs, (num_devs + new_num_devs)
3349 * sizeof (struct gomp_device_descr));
3350 if (!devs)
3351 {
3352 num_devs = 0;
3353 free (plugin_name);
3354 break;
3355 }
3356
3357 current_device.name = current_device.get_name_func ();
3358 /* current_device.capabilities has already been set. */
3359 current_device.type = current_device.get_type_func ();
3360 current_device.mem_map.root = NULL;
3361 current_device.state = GOMP_DEVICE_UNINITIALIZED;
3362 for (i = 0; i < new_num_devs; i++)
3363 {
3364 current_device.target_id = i;
3365 devs[num_devs] = current_device;
3366 gomp_mutex_init (&devs[num_devs].lock);
3367 num_devs++;
3368 }
3369 }
3370 }
3371
3372 free (plugin_name);
3373 cur = next + 1;
3374 }
3375 while (next);
3376
3377 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3378 NUM_DEVICES_OPENMP. */
3379 struct gomp_device_descr *devs_s
3380 = malloc (num_devs * sizeof (struct gomp_device_descr));
3381 if (!devs_s)
3382 {
3383 num_devs = 0;
3384 free (devs);
3385 devs = NULL;
3386 }
3387 num_devs_openmp = 0;
3388 for (i = 0; i < num_devs; i++)
3389 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3390 devs_s[num_devs_openmp++] = devs[i];
3391 int num_devs_after_openmp = num_devs_openmp;
3392 for (i = 0; i < num_devs; i++)
3393 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3394 devs_s[num_devs_after_openmp++] = devs[i];
3395 free (devs);
3396 devs = devs_s;
3397
3398 for (i = 0; i < num_devs; i++)
3399 {
3400 /* The 'devices' array can be moved (by the realloc call) until we have
3401 found all the plugins, so registering with the OpenACC runtime (which
3402 takes a copy of the pointer argument) must be delayed until now. */
3403 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3404 goacc_register (&devs[i]);
3405 }
3406
3407 num_devices = num_devs;
3408 num_devices_openmp = num_devs_openmp;
3409 devices = devs;
3410 if (atexit (gomp_target_fini) != 0)
3411 gomp_fatal ("atexit failed");
3412 }
3413
3414 #else /* PLUGIN_SUPPORT */
3415 /* If dlfcn.h is unavailable we always fallback to host execution.
3416 GOMP_target* routines are just stubs for this case. */
3417 static void
3418 gomp_target_init (void)
3419 {
3420 }
3421 #endif /* PLUGIN_SUPPORT */