]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/target.c
PR d/90603
[thirdparty/gcc.git] / libgomp / target.c
CommitLineData
fbd26352 1/* Copyright (C) 2013-2019 Free Software Foundation, Inc.
bc7bff74 2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
c35c9a62 4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
bc7bff74 6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
995b27b9 26/* This file contains the support of offloading. */
bc7bff74 27
28#include "libgomp.h"
ca4c3545 29#include "oacc-plugin.h"
30#include "oacc-int.h"
31#include "gomp-constants.h"
bc7bff74 32#include <limits.h>
33#include <stdbool.h>
34#include <stdlib.h>
2634aed9 35#ifdef HAVE_INTTYPES_H
36# include <inttypes.h> /* For PRIu64. */
37#endif
bc7bff74 38#include <string.h>
ca4c3545 39#include <assert.h>
43895be5 40#include <errno.h>
bc7bff74 41
995b27b9 42#ifdef PLUGIN_SUPPORT
43#include <dlfcn.h>
4fda895a 44#include "plugin-suffix.h"
995b27b9 45#endif
46
47static void gomp_target_init (void);
48
ca4c3545 49/* The whole initialization code for offloading plugins is only run one. */
995b27b9 50static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
51
0d8c703d 52/* Mutex for offload image registration. */
53static gomp_mutex_t register_lock;
54
995b27b9 55/* This structure describes an offload image.
56 It contains type of the target device, pointer to host table descriptor, and
57 pointer to target data. */
58struct offload_image_descr {
d3d8e632 59 unsigned version;
995b27b9 60 enum offload_target_type type;
4e985e0f 61 const void *host_table;
70046055 62 const void *target_data;
995b27b9 63};
64
65/* Array of descriptors of offload images. */
66static struct offload_image_descr *offload_images;
67
68/* Total number of offload images. */
69static int num_offload_images;
70
71/* Array of descriptors for all available devices. */
72static struct gomp_device_descr *devices;
73
74/* Total number of available devices. */
75static int num_devices;
76
ca4c3545 77/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
78static int num_devices_openmp;
79
0d8c703d 80/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
81
82static void *
83gomp_realloc_unlock (void *old, size_t size)
84{
85 void *ret = realloc (old, size);
86 if (ret == NULL)
87 {
88 gomp_mutex_unlock (&register_lock);
89 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
90 }
91 return ret;
92}
93
ca4c3545 94attribute_hidden void
95gomp_init_targets_once (void)
995b27b9 96{
ca4c3545 97 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
98}
995b27b9 99
bc7bff74 100attribute_hidden int
101gomp_get_num_devices (void)
102{
ca4c3545 103 gomp_init_targets_once ();
104 return num_devices_openmp;
995b27b9 105}
106
107static struct gomp_device_descr *
108resolve_device (int device_id)
109{
ca4c3545 110 if (device_id == GOMP_DEVICE_ICV)
995b27b9 111 {
112 struct gomp_task_icv *icv = gomp_icv (false);
113 device_id = icv->default_device_var;
114 }
115
116 if (device_id < 0 || device_id >= gomp_get_num_devices ())
117 return NULL;
118
43895be5 119 gomp_mutex_lock (&devices[device_id].lock);
f87b2900 120 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
43895be5 121 gomp_init_device (&devices[device_id]);
f87b2900 122 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
123 {
124 gomp_mutex_unlock (&devices[device_id].lock);
125 return NULL;
126 }
43895be5 127 gomp_mutex_unlock (&devices[device_id].lock);
128
995b27b9 129 return &devices[device_id];
130}
131
132
43895be5 133static inline splay_tree_key
134gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
135{
136 if (key->host_start != key->host_end)
137 return splay_tree_lookup (mem_map, key);
138
139 key->host_end++;
140 splay_tree_key n = splay_tree_lookup (mem_map, key);
141 key->host_end--;
142 if (n)
143 return n;
144 key->host_start--;
145 n = splay_tree_lookup (mem_map, key);
146 key->host_start++;
147 if (n)
148 return n;
149 return splay_tree_lookup (mem_map, key);
150}
151
9561765e 152static inline splay_tree_key
153gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
154{
155 if (key->host_start != key->host_end)
156 return splay_tree_lookup (mem_map, key);
157
158 key->host_end++;
159 splay_tree_key n = splay_tree_lookup (mem_map, key);
160 key->host_end--;
161 return n;
162}
163
9b50ad1d 164static inline void
165gomp_device_copy (struct gomp_device_descr *devicep,
166 bool (*copy_func) (int, void *, const void *, size_t),
167 const char *dst, void *dstaddr,
168 const char *src, const void *srcaddr,
169 size_t size)
170{
171 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
172 {
173 gomp_mutex_unlock (&devicep->lock);
174 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
175 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
176 }
177}
178
534b5e00 179static inline void
180goacc_device_copy_async (struct gomp_device_descr *devicep,
181 bool (*copy_func) (int, void *, const void *, size_t,
182 struct goacc_asyncqueue *),
183 const char *dst, void *dstaddr,
184 const char *src, const void *srcaddr,
185 size_t size, struct goacc_asyncqueue *aq)
186{
187 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
188 {
189 gomp_mutex_unlock (&devicep->lock);
190 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
191 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
192 }
193}
194
5af416bd 195/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
196 host to device memory transfers. */
197
65c9e9f9 198struct gomp_coalesce_chunk
199{
200 /* The starting and ending point of a coalesced chunk of memory. */
201 size_t start, end;
202};
203
5af416bd 204struct gomp_coalesce_buf
205{
206 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
207 it will be copied to the device. */
208 void *buf;
209 struct target_mem_desc *tgt;
65c9e9f9 210 /* Array with offsets, chunks[i].start is the starting offset and
211 chunks[i].end ending offset relative to tgt->tgt_start device address
5af416bd 212 of chunks which are to be copied to buf and later copied to device. */
65c9e9f9 213 struct gomp_coalesce_chunk *chunks;
5af416bd 214 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
215 be performed. */
216 long chunk_cnt;
217 /* During construction of chunks array, how many memory regions are within
218 the last chunk. If there is just one memory region for a chunk, we copy
219 it directly to device rather than going through buf. */
220 long use_cnt;
221};
222
223/* Maximum size of memory region considered for coalescing. Larger copies
224 are performed directly. */
225#define MAX_COALESCE_BUF_SIZE (32 * 1024)
226
227/* Maximum size of a gap in between regions to consider them being copied
228 within the same chunk. All the device offsets considered are within
229 newly allocated device memory, so it isn't fatal if we copy some padding
230 in between from host to device. The gaps come either from alignment
231 padding or from memory regions which are not supposed to be copied from
232 host to device (e.g. map(alloc:), map(from:) etc.). */
233#define MAX_COALESCE_BUF_GAP (4 * 1024)
234
235/* Add region with device tgt_start relative offset and length to CBUF. */
236
237static inline void
238gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
239{
240 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
241 return;
242 if (cbuf->chunk_cnt)
243 {
244 if (cbuf->chunk_cnt < 0)
245 return;
65c9e9f9 246 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
5af416bd 247 {
248 cbuf->chunk_cnt = -1;
249 return;
250 }
65c9e9f9 251 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
5af416bd 252 {
65c9e9f9 253 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
5af416bd 254 cbuf->use_cnt++;
255 return;
256 }
257 /* If the last chunk is only used by one mapping, discard it,
258 as it will be one host to device copy anyway and
259 memcpying it around will only waste cycles. */
260 if (cbuf->use_cnt == 1)
261 cbuf->chunk_cnt--;
262 }
65c9e9f9 263 cbuf->chunks[cbuf->chunk_cnt].start = start;
264 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
5af416bd 265 cbuf->chunk_cnt++;
266 cbuf->use_cnt = 1;
267}
268
269/* Return true for mapping kinds which need to copy data from the
270 host to device for regions that weren't previously mapped. */
271
272static inline bool
273gomp_to_device_kind_p (int kind)
274{
275 switch (kind)
276 {
277 case GOMP_MAP_ALLOC:
278 case GOMP_MAP_FROM:
279 case GOMP_MAP_FORCE_ALLOC:
280 case GOMP_MAP_ALWAYS_FROM:
281 return false;
282 default:
283 return true;
284 }
285}
286
534b5e00 287attribute_hidden void
9b50ad1d 288gomp_copy_host2dev (struct gomp_device_descr *devicep,
534b5e00 289 struct goacc_asyncqueue *aq,
5af416bd 290 void *d, const void *h, size_t sz,
291 struct gomp_coalesce_buf *cbuf)
9b50ad1d 292{
5af416bd 293 if (cbuf)
294 {
295 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
65c9e9f9 296 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
5af416bd 297 {
298 long first = 0;
299 long last = cbuf->chunk_cnt - 1;
300 while (first <= last)
301 {
302 long middle = (first + last) >> 1;
65c9e9f9 303 if (cbuf->chunks[middle].end <= doff)
5af416bd 304 first = middle + 1;
65c9e9f9 305 else if (cbuf->chunks[middle].start <= doff)
5af416bd 306 {
65c9e9f9 307 if (doff + sz > cbuf->chunks[middle].end)
5af416bd 308 gomp_fatal ("internal libgomp cbuf error");
65c9e9f9 309 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
5af416bd 310 h, sz);
311 return;
312 }
313 else
314 last = middle - 1;
315 }
316 }
317 }
534b5e00 318 if (__builtin_expect (aq != NULL, 0))
319 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
320 "dev", d, "host", h, sz, aq);
321 else
322 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
9b50ad1d 323}
324
534b5e00 325attribute_hidden void
9b50ad1d 326gomp_copy_dev2host (struct gomp_device_descr *devicep,
534b5e00 327 struct goacc_asyncqueue *aq,
9b50ad1d 328 void *h, const void *d, size_t sz)
329{
534b5e00 330 if (__builtin_expect (aq != NULL, 0))
331 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
332 "host", h, "dev", d, sz, aq);
333 else
334 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
9b50ad1d 335}
336
337static void
338gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
339{
340 if (!devicep->free_func (devicep->target_id, devptr))
341 {
342 gomp_mutex_unlock (&devicep->lock);
343 gomp_fatal ("error in freeing device memory block at %p", devptr);
344 }
345}
346
9561765e 347/* Handle the case where gomp_map_lookup, splay_tree_lookup or
348 gomp_map_0len_lookup found oldn for newn.
995b27b9 349 Helper function of gomp_map_vars. */
350
351static inline void
534b5e00 352gomp_map_vars_existing (struct gomp_device_descr *devicep,
353 struct goacc_asyncqueue *aq, splay_tree_key oldn,
43895be5 354 splay_tree_key newn, struct target_var_desc *tgt_var,
5af416bd 355 unsigned char kind, struct gomp_coalesce_buf *cbuf)
995b27b9 356{
43895be5 357 tgt_var->key = oldn;
358 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
359 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
360 tgt_var->offset = newn->host_start - oldn->host_start;
361 tgt_var->length = newn->host_end - newn->host_start;
362
ca4c3545 363 if ((kind & GOMP_MAP_FLAG_FORCE)
364 || oldn->host_start > newn->host_start
995b27b9 365 || oldn->host_end < newn->host_end)
0d8c703d 366 {
367 gomp_mutex_unlock (&devicep->lock);
368 gomp_fatal ("Trying to map into device [%p..%p) object when "
369 "[%p..%p) is already mapped",
370 (void *) newn->host_start, (void *) newn->host_end,
371 (void *) oldn->host_start, (void *) oldn->host_end);
372 }
43895be5 373
374 if (GOMP_MAP_ALWAYS_TO_P (kind))
534b5e00 375 gomp_copy_host2dev (devicep, aq,
9b50ad1d 376 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
377 + newn->host_start - oldn->host_start),
378 (void *) newn->host_start,
5af416bd 379 newn->host_end - newn->host_start, cbuf);
9b50ad1d 380
43895be5 381 if (oldn->refcount != REFCOUNT_INFINITY)
382 oldn->refcount++;
995b27b9 383}
384
ca4c3545 385static int
43895be5 386get_kind (bool short_mapkind, void *kinds, int idx)
ca4c3545 387{
43895be5 388 return short_mapkind ? ((unsigned short *) kinds)[idx]
389 : ((unsigned char *) kinds)[idx];
ca4c3545 390}
391
84f53106 392static void
534b5e00 393gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
394 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
5af416bd 395 struct gomp_coalesce_buf *cbuf)
84f53106 396{
397 struct gomp_device_descr *devicep = tgt->device_descr;
398 struct splay_tree_s *mem_map = &devicep->mem_map;
399 struct splay_tree_key_s cur_node;
400
401 cur_node.host_start = host_ptr;
402 if (cur_node.host_start == (uintptr_t) NULL)
403 {
404 cur_node.tgt_offset = (uintptr_t) NULL;
534b5e00 405 gomp_copy_host2dev (devicep, aq,
9b50ad1d 406 (void *) (tgt->tgt_start + target_offset),
407 (void *) &cur_node.tgt_offset,
5af416bd 408 sizeof (void *), cbuf);
84f53106 409 return;
410 }
411 /* Add bias to the pointer value. */
412 cur_node.host_start += bias;
43895be5 413 cur_node.host_end = cur_node.host_start;
414 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
84f53106 415 if (n == NULL)
416 {
417 gomp_mutex_unlock (&devicep->lock);
418 gomp_fatal ("Pointer target of array section wasn't mapped");
419 }
420 cur_node.host_start -= n->host_start;
421 cur_node.tgt_offset
422 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
423 /* At this point tgt_offset is target address of the
424 array section. Now subtract bias to get what we want
425 to initialize the pointer with. */
426 cur_node.tgt_offset -= bias;
534b5e00 427 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
5af416bd 428 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
84f53106 429}
430
43895be5 431static void
534b5e00 432gomp_map_fields_existing (struct target_mem_desc *tgt,
433 struct goacc_asyncqueue *aq, splay_tree_key n,
43895be5 434 size_t first, size_t i, void **hostaddrs,
5af416bd 435 size_t *sizes, void *kinds,
436 struct gomp_coalesce_buf *cbuf)
43895be5 437{
438 struct gomp_device_descr *devicep = tgt->device_descr;
439 struct splay_tree_s *mem_map = &devicep->mem_map;
440 struct splay_tree_key_s cur_node;
441 int kind;
442 const bool short_mapkind = true;
443 const int typemask = short_mapkind ? 0xff : 0x7;
444
445 cur_node.host_start = (uintptr_t) hostaddrs[i];
446 cur_node.host_end = cur_node.host_start + sizes[i];
447 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
448 kind = get_kind (short_mapkind, kinds, i);
449 if (n2
450 && n2->tgt == n->tgt
451 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
452 {
534b5e00 453 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
5af416bd 454 &tgt->list[i], kind & typemask, cbuf);
43895be5 455 return;
456 }
457 if (sizes[i] == 0)
458 {
459 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
460 {
461 cur_node.host_start--;
462 n2 = splay_tree_lookup (mem_map, &cur_node);
463 cur_node.host_start++;
464 if (n2
465 && n2->tgt == n->tgt
466 && n2->host_start - n->host_start
467 == n2->tgt_offset - n->tgt_offset)
468 {
534b5e00 469 gomp_map_vars_existing (devicep, aq, n2, &cur_node,
470 &tgt->list[i], kind & typemask, cbuf);
43895be5 471 return;
472 }
473 }
474 cur_node.host_end++;
475 n2 = splay_tree_lookup (mem_map, &cur_node);
476 cur_node.host_end--;
477 if (n2
478 && n2->tgt == n->tgt
479 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
480 {
534b5e00 481 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
5af416bd 482 kind & typemask, cbuf);
43895be5 483 return;
484 }
485 }
486 gomp_mutex_unlock (&devicep->lock);
487 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
488 "other mapped elements from the same structure weren't mapped "
489 "together with it", (void *) cur_node.host_start,
490 (void *) cur_node.host_end);
491}
492
9561765e 493static inline uintptr_t
494gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
495{
496 if (tgt->list[i].key != NULL)
497 return tgt->list[i].key->tgt->tgt_start
498 + tgt->list[i].key->tgt_offset
499 + tgt->list[i].offset;
500 if (tgt->list[i].offset == ~(uintptr_t) 0)
501 return (uintptr_t) hostaddrs[i];
502 if (tgt->list[i].offset == ~(uintptr_t) 1)
503 return 0;
504 if (tgt->list[i].offset == ~(uintptr_t) 2)
505 return tgt->list[i + 1].key->tgt->tgt_start
506 + tgt->list[i + 1].key->tgt_offset
507 + tgt->list[i + 1].offset
508 + (uintptr_t) hostaddrs[i]
509 - (uintptr_t) hostaddrs[i + 1];
510 return tgt->tgt_start + tgt->list[i].offset;
511}
512
534b5e00 513static inline __attribute__((always_inline)) struct target_mem_desc *
514gomp_map_vars_internal (struct gomp_device_descr *devicep,
515 struct goacc_asyncqueue *aq, size_t mapnum,
516 void **hostaddrs, void **devaddrs, size_t *sizes,
517 void *kinds, bool short_mapkind,
518 enum gomp_map_vars_kind pragma_kind)
995b27b9 519{
520 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
43895be5 521 bool has_firstprivate = false;
522 const int rshift = short_mapkind ? 8 : 3;
523 const int typemask = short_mapkind ? 0xff : 0x7;
0d8c703d 524 struct splay_tree_s *mem_map = &devicep->mem_map;
995b27b9 525 struct splay_tree_key_s cur_node;
526 struct target_mem_desc *tgt
527 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
528 tgt->list_count = mapnum;
43895be5 529 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
995b27b9 530 tgt->device_descr = devicep;
5af416bd 531 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
995b27b9 532
533 if (mapnum == 0)
3bdac239 534 {
535 tgt->tgt_start = 0;
536 tgt->tgt_end = 0;
537 return tgt;
538 }
995b27b9 539
540 tgt_align = sizeof (void *);
541 tgt_size = 0;
5af416bd 542 cbuf.chunks = NULL;
543 cbuf.chunk_cnt = -1;
544 cbuf.use_cnt = 0;
545 cbuf.buf = NULL;
546 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
547 {
65c9e9f9 548 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
549 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
5af416bd 550 cbuf.chunk_cnt = 0;
551 }
43895be5 552 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 553 {
554 size_t align = 4 * sizeof (void *);
555 tgt_align = align;
556 tgt_size = mapnum * sizeof (void *);
5af416bd 557 cbuf.chunk_cnt = 1;
558 cbuf.use_cnt = 1 + (mapnum > 1);
65c9e9f9 559 cbuf.chunks[0].start = 0;
560 cbuf.chunks[0].end = tgt_size;
995b27b9 561 }
562
0d8c703d 563 gomp_mutex_lock (&devicep->lock);
f87b2900 564 if (devicep->state == GOMP_DEVICE_FINALIZED)
565 {
566 gomp_mutex_unlock (&devicep->lock);
567 free (tgt);
568 return NULL;
569 }
ca4c3545 570
995b27b9 571 for (i = 0; i < mapnum; i++)
572 {
43895be5 573 int kind = get_kind (short_mapkind, kinds, i);
574 if (hostaddrs[i] == NULL
575 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
995b27b9 576 {
43895be5 577 tgt->list[i].key = NULL;
578 tgt->list[i].offset = ~(uintptr_t) 0;
579 continue;
580 }
581 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
582 {
583 cur_node.host_start = (uintptr_t) hostaddrs[i];
584 cur_node.host_end = cur_node.host_start;
585 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
586 if (n == NULL)
587 {
588 gomp_mutex_unlock (&devicep->lock);
589 gomp_fatal ("use_device_ptr pointer wasn't mapped");
590 }
591 cur_node.host_start -= n->host_start;
592 hostaddrs[i]
593 = (void *) (n->tgt->tgt_start + n->tgt_offset
594 + cur_node.host_start);
595 tgt->list[i].key = NULL;
596 tgt->list[i].offset = ~(uintptr_t) 0;
597 continue;
598 }
599 else if ((kind & typemask) == GOMP_MAP_STRUCT)
600 {
601 size_t first = i + 1;
602 size_t last = i + sizes[i];
603 cur_node.host_start = (uintptr_t) hostaddrs[i];
604 cur_node.host_end = (uintptr_t) hostaddrs[last]
605 + sizes[last];
606 tgt->list[i].key = NULL;
607 tgt->list[i].offset = ~(uintptr_t) 2;
608 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
609 if (n == NULL)
610 {
611 size_t align = (size_t) 1 << (kind >> rshift);
612 if (tgt_align < align)
613 tgt_align = align;
5af416bd 614 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
43895be5 615 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 616 tgt_size += cur_node.host_end - cur_node.host_start;
43895be5 617 not_found_cnt += last - i;
618 for (i = first; i <= last; i++)
5af416bd 619 {
620 tgt->list[i].key = NULL;
621 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
622 & typemask))
623 gomp_coalesce_buf_add (&cbuf,
624 tgt_size - cur_node.host_end
625 + (uintptr_t) hostaddrs[i],
626 sizes[i]);
627 }
43895be5 628 i--;
629 continue;
630 }
631 for (i = first; i <= last; i++)
534b5e00 632 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
5af416bd 633 sizes, kinds, NULL);
43895be5 634 i--;
995b27b9 635 continue;
636 }
9561765e 637 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
638 {
639 tgt->list[i].key = NULL;
640 tgt->list[i].offset = ~(uintptr_t) 1;
641 has_firstprivate = true;
642 continue;
643 }
995b27b9 644 cur_node.host_start = (uintptr_t) hostaddrs[i];
ca4c3545 645 if (!GOMP_MAP_POINTER_P (kind & typemask))
995b27b9 646 cur_node.host_end = cur_node.host_start + sizes[i];
647 else
648 cur_node.host_end = cur_node.host_start + sizeof (void *);
43895be5 649 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
650 {
651 tgt->list[i].key = NULL;
652
653 size_t align = (size_t) 1 << (kind >> rshift);
654 if (tgt_align < align)
655 tgt_align = align;
656 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 657 gomp_coalesce_buf_add (&cbuf, tgt_size,
658 cur_node.host_end - cur_node.host_start);
43895be5 659 tgt_size += cur_node.host_end - cur_node.host_start;
660 has_firstprivate = true;
661 continue;
662 }
663 splay_tree_key n;
664 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
995b27b9 665 {
9561765e 666 n = gomp_map_0len_lookup (mem_map, &cur_node);
43895be5 667 if (!n)
668 {
669 tgt->list[i].key = NULL;
670 tgt->list[i].offset = ~(uintptr_t) 1;
671 continue;
672 }
995b27b9 673 }
43895be5 674 else
675 n = splay_tree_lookup (mem_map, &cur_node);
c0998828 676 if (n && n->refcount != REFCOUNT_LINK)
534b5e00 677 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
5af416bd 678 kind & typemask, NULL);
995b27b9 679 else
680 {
43895be5 681 tgt->list[i].key = NULL;
ca4c3545 682
683 size_t align = (size_t) 1 << (kind >> rshift);
995b27b9 684 not_found_cnt++;
685 if (tgt_align < align)
686 tgt_align = align;
687 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 688 if (gomp_to_device_kind_p (kind & typemask))
689 gomp_coalesce_buf_add (&cbuf, tgt_size,
690 cur_node.host_end - cur_node.host_start);
995b27b9 691 tgt_size += cur_node.host_end - cur_node.host_start;
ca4c3545 692 if ((kind & typemask) == GOMP_MAP_TO_PSET)
995b27b9 693 {
694 size_t j;
695 for (j = i + 1; j < mapnum; j++)
43895be5 696 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
ca4c3545 697 & typemask))
995b27b9 698 break;
699 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
700 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
701 > cur_node.host_end))
702 break;
703 else
704 {
43895be5 705 tgt->list[j].key = NULL;
995b27b9 706 i++;
707 }
708 }
709 }
710 }
711
ca4c3545 712 if (devaddrs)
713 {
714 if (mapnum != 1)
0d8c703d 715 {
716 gomp_mutex_unlock (&devicep->lock);
717 gomp_fatal ("unexpected aggregation");
718 }
ca4c3545 719 tgt->to_free = devaddrs[0];
720 tgt->tgt_start = (uintptr_t) tgt->to_free;
721 tgt->tgt_end = tgt->tgt_start + sizes[0];
722 }
43895be5 723 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 724 {
725 /* Allocate tgt_align aligned tgt_size block of memory. */
726 /* FIXME: Perhaps change interface to allocate properly aligned
727 memory. */
728 tgt->to_free = devicep->alloc_func (devicep->target_id,
729 tgt_size + tgt_align - 1);
9b50ad1d 730 if (!tgt->to_free)
731 {
732 gomp_mutex_unlock (&devicep->lock);
733 gomp_fatal ("device memory allocation fail");
734 }
735
995b27b9 736 tgt->tgt_start = (uintptr_t) tgt->to_free;
737 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
738 tgt->tgt_end = tgt->tgt_start + tgt_size;
5af416bd 739
740 if (cbuf.use_cnt == 1)
741 cbuf.chunk_cnt--;
742 if (cbuf.chunk_cnt > 0)
743 {
744 cbuf.buf
65c9e9f9 745 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
5af416bd 746 if (cbuf.buf)
747 {
748 cbuf.tgt = tgt;
749 cbufp = &cbuf;
750 }
751 }
995b27b9 752 }
753 else
754 {
755 tgt->to_free = NULL;
756 tgt->tgt_start = 0;
757 tgt->tgt_end = 0;
758 }
759
760 tgt_size = 0;
43895be5 761 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 762 tgt_size = mapnum * sizeof (void *);
763
764 tgt->array = NULL;
43895be5 765 if (not_found_cnt || has_firstprivate)
995b27b9 766 {
43895be5 767 if (not_found_cnt)
768 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
995b27b9 769 splay_tree_node array = tgt->array;
43895be5 770 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
771 uintptr_t field_tgt_base = 0;
995b27b9 772
773 for (i = 0; i < mapnum; i++)
43895be5 774 if (tgt->list[i].key == NULL)
995b27b9 775 {
43895be5 776 int kind = get_kind (short_mapkind, kinds, i);
995b27b9 777 if (hostaddrs[i] == NULL)
778 continue;
43895be5 779 switch (kind & typemask)
780 {
781 size_t align, len, first, last;
782 splay_tree_key n;
783 case GOMP_MAP_FIRSTPRIVATE:
784 align = (size_t) 1 << (kind >> rshift);
785 tgt_size = (tgt_size + align - 1) & ~(align - 1);
786 tgt->list[i].offset = tgt_size;
787 len = sizes[i];
534b5e00 788 gomp_copy_host2dev (devicep, aq,
9b50ad1d 789 (void *) (tgt->tgt_start + tgt_size),
5af416bd 790 (void *) hostaddrs[i], len, cbufp);
43895be5 791 tgt_size += len;
792 continue;
793 case GOMP_MAP_FIRSTPRIVATE_INT:
794 case GOMP_MAP_USE_DEVICE_PTR:
795 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
796 continue;
797 case GOMP_MAP_STRUCT:
798 first = i + 1;
799 last = i + sizes[i];
800 cur_node.host_start = (uintptr_t) hostaddrs[i];
801 cur_node.host_end = (uintptr_t) hostaddrs[last]
802 + sizes[last];
803 if (tgt->list[first].key != NULL)
804 continue;
805 n = splay_tree_lookup (mem_map, &cur_node);
806 if (n == NULL)
807 {
808 size_t align = (size_t) 1 << (kind >> rshift);
809 tgt_size -= (uintptr_t) hostaddrs[first]
810 - (uintptr_t) hostaddrs[i];
811 tgt_size = (tgt_size + align - 1) & ~(align - 1);
812 tgt_size += (uintptr_t) hostaddrs[first]
813 - (uintptr_t) hostaddrs[i];
814 field_tgt_base = (uintptr_t) hostaddrs[first];
815 field_tgt_offset = tgt_size;
816 field_tgt_clear = last;
817 tgt_size += cur_node.host_end
818 - (uintptr_t) hostaddrs[first];
819 continue;
820 }
821 for (i = first; i <= last; i++)
534b5e00 822 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
5af416bd 823 sizes, kinds, cbufp);
43895be5 824 i--;
825 continue;
9561765e 826 case GOMP_MAP_ALWAYS_POINTER:
827 cur_node.host_start = (uintptr_t) hostaddrs[i];
828 cur_node.host_end = cur_node.host_start + sizeof (void *);
829 n = splay_tree_lookup (mem_map, &cur_node);
830 if (n == NULL
831 || n->host_start > cur_node.host_start
832 || n->host_end < cur_node.host_end)
833 {
834 gomp_mutex_unlock (&devicep->lock);
835 gomp_fatal ("always pointer not mapped");
836 }
837 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
838 != GOMP_MAP_ALWAYS_POINTER)
839 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
840 if (cur_node.tgt_offset)
841 cur_node.tgt_offset -= sizes[i];
534b5e00 842 gomp_copy_host2dev (devicep, aq,
9b50ad1d 843 (void *) (n->tgt->tgt_start
844 + n->tgt_offset
845 + cur_node.host_start
846 - n->host_start),
847 (void *) &cur_node.tgt_offset,
5af416bd 848 sizeof (void *), cbufp);
9561765e 849 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
850 + cur_node.host_start - n->host_start;
851 continue;
43895be5 852 default:
853 break;
854 }
995b27b9 855 splay_tree_key k = &array->key;
856 k->host_start = (uintptr_t) hostaddrs[i];
ca4c3545 857 if (!GOMP_MAP_POINTER_P (kind & typemask))
995b27b9 858 k->host_end = k->host_start + sizes[i];
859 else
860 k->host_end = k->host_start + sizeof (void *);
0d8c703d 861 splay_tree_key n = splay_tree_lookup (mem_map, k);
c0998828 862 if (n && n->refcount != REFCOUNT_LINK)
534b5e00 863 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
5af416bd 864 kind & typemask, cbufp);
995b27b9 865 else
866 {
c0998828 867 k->link_key = NULL;
868 if (n && n->refcount == REFCOUNT_LINK)
869 {
870 /* Replace target address of the pointer with target address
871 of mapped object in the splay tree. */
872 splay_tree_remove (mem_map, n);
873 k->link_key = n;
874 }
ca4c3545 875 size_t align = (size_t) 1 << (kind >> rshift);
43895be5 876 tgt->list[i].key = k;
995b27b9 877 k->tgt = tgt;
43895be5 878 if (field_tgt_clear != ~(size_t) 0)
879 {
880 k->tgt_offset = k->host_start - field_tgt_base
881 + field_tgt_offset;
882 if (i == field_tgt_clear)
883 field_tgt_clear = ~(size_t) 0;
884 }
885 else
886 {
887 tgt_size = (tgt_size + align - 1) & ~(align - 1);
888 k->tgt_offset = tgt_size;
889 tgt_size += k->host_end - k->host_start;
890 }
891 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
892 tgt->list[i].always_copy_from
893 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
894 tgt->list[i].offset = 0;
895 tgt->list[i].length = k->host_end - k->host_start;
995b27b9 896 k->refcount = 1;
737cc978 897 k->dynamic_refcount = 0;
995b27b9 898 tgt->refcount++;
899 array->left = NULL;
900 array->right = NULL;
0d8c703d 901 splay_tree_insert (mem_map, array);
ca4c3545 902 switch (kind & typemask)
995b27b9 903 {
ca4c3545 904 case GOMP_MAP_ALLOC:
905 case GOMP_MAP_FROM:
906 case GOMP_MAP_FORCE_ALLOC:
907 case GOMP_MAP_FORCE_FROM:
43895be5 908 case GOMP_MAP_ALWAYS_FROM:
995b27b9 909 break;
ca4c3545 910 case GOMP_MAP_TO:
911 case GOMP_MAP_TOFROM:
912 case GOMP_MAP_FORCE_TO:
913 case GOMP_MAP_FORCE_TOFROM:
43895be5 914 case GOMP_MAP_ALWAYS_TO:
915 case GOMP_MAP_ALWAYS_TOFROM:
534b5e00 916 gomp_copy_host2dev (devicep, aq,
9b50ad1d 917 (void *) (tgt->tgt_start
918 + k->tgt_offset),
919 (void *) k->host_start,
5af416bd 920 k->host_end - k->host_start, cbufp);
995b27b9 921 break;
ca4c3545 922 case GOMP_MAP_POINTER:
534b5e00 923 gomp_map_pointer (tgt, aq,
924 (uintptr_t) *(void **) k->host_start,
5af416bd 925 k->tgt_offset, sizes[i], cbufp);
995b27b9 926 break;
ca4c3545 927 case GOMP_MAP_TO_PSET:
534b5e00 928 gomp_copy_host2dev (devicep, aq,
9b50ad1d 929 (void *) (tgt->tgt_start
930 + k->tgt_offset),
931 (void *) k->host_start,
5af416bd 932 k->host_end - k->host_start, cbufp);
ca4c3545 933
995b27b9 934 for (j = i + 1; j < mapnum; j++)
43895be5 935 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
936 j)
ca4c3545 937 & typemask))
995b27b9 938 break;
939 else if ((uintptr_t) hostaddrs[j] < k->host_start
940 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
941 > k->host_end))
942 break;
943 else
944 {
43895be5 945 tgt->list[j].key = k;
946 tgt->list[j].copy_from = false;
947 tgt->list[j].always_copy_from = false;
948 if (k->refcount != REFCOUNT_INFINITY)
949 k->refcount++;
534b5e00 950 gomp_map_pointer (tgt, aq,
84f53106 951 (uintptr_t) *(void **) hostaddrs[j],
952 k->tgt_offset
953 + ((uintptr_t) hostaddrs[j]
954 - k->host_start),
5af416bd 955 sizes[j], cbufp);
995b27b9 956 i++;
957 }
ca4c3545 958 break;
959 case GOMP_MAP_FORCE_PRESENT:
960 {
961 /* We already looked up the memory region above and it
962 was missing. */
963 size_t size = k->host_end - k->host_start;
0d8c703d 964 gomp_mutex_unlock (&devicep->lock);
2634aed9 965#ifdef HAVE_INTTYPES_H
966 gomp_fatal ("present clause: !acc_is_present (%p, "
967 "%"PRIu64" (0x%"PRIx64"))",
968 (void *) k->host_start,
969 (uint64_t) size, (uint64_t) size);
970#else
ca4c3545 971 gomp_fatal ("present clause: !acc_is_present (%p, "
2634aed9 972 "%lu (0x%lx))", (void *) k->host_start,
973 (unsigned long) size, (unsigned long) size);
974#endif
ca4c3545 975 }
976 break;
977 case GOMP_MAP_FORCE_DEVICEPTR:
978 assert (k->host_end - k->host_start == sizeof (void *));
534b5e00 979 gomp_copy_host2dev (devicep, aq,
9b50ad1d 980 (void *) (tgt->tgt_start
981 + k->tgt_offset),
982 (void *) k->host_start,
5af416bd 983 sizeof (void *), cbufp);
ca4c3545 984 break;
985 default:
0d8c703d 986 gomp_mutex_unlock (&devicep->lock);
ca4c3545 987 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
988 kind);
995b27b9 989 }
c0998828 990
991 if (k->link_key)
992 {
993 /* Set link pointer on target to the device address of the
994 mapped object. */
995 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
9d5a0b9d 996 /* We intentionally do not use coalescing here, as it's not
997 data allocated by the current call to this function. */
534b5e00 998 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
9d5a0b9d 999 &tgt_addr, sizeof (void *), NULL);
c0998828 1000 }
995b27b9 1001 array++;
1002 }
1003 }
1004 }
ca4c3545 1005
43895be5 1006 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 1007 {
1008 for (i = 0; i < mapnum; i++)
1009 {
9561765e 1010 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
534b5e00 1011 gomp_copy_host2dev (devicep, aq,
9b50ad1d 1012 (void *) (tgt->tgt_start + i * sizeof (void *)),
5af416bd 1013 (void *) &cur_node.tgt_offset, sizeof (void *),
1014 cbufp);
995b27b9 1015 }
1016 }
1017
5af416bd 1018 if (cbufp)
1019 {
1020 long c = 0;
1021 for (c = 0; c < cbuf.chunk_cnt; ++c)
534b5e00 1022 gomp_copy_host2dev (devicep, aq,
65c9e9f9 1023 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1024 (char *) cbuf.buf + (cbuf.chunks[c].start
1025 - cbuf.chunks[0].start),
1026 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
5af416bd 1027 free (cbuf.buf);
65c9e9f9 1028 cbuf.buf = NULL;
1029 cbufp = NULL;
5af416bd 1030 }
1031
43895be5 1032 /* If the variable from "omp target enter data" map-list was already mapped,
1033 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1034 gomp_exit_data. */
1035 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1036 {
1037 free (tgt);
1038 tgt = NULL;
1039 }
1040
0d8c703d 1041 gomp_mutex_unlock (&devicep->lock);
995b27b9 1042 return tgt;
1043}
1044
534b5e00 1045attribute_hidden struct target_mem_desc *
1046gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1047 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1048 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1049{
1050 return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1051 sizes, kinds, short_mapkind, pragma_kind);
1052}
1053
1054attribute_hidden struct target_mem_desc *
1055gomp_map_vars_async (struct gomp_device_descr *devicep,
1056 struct goacc_asyncqueue *aq, size_t mapnum,
1057 void **hostaddrs, void **devaddrs, size_t *sizes,
1058 void *kinds, bool short_mapkind,
1059 enum gomp_map_vars_kind pragma_kind)
1060{
1061 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1062 sizes, kinds, short_mapkind, pragma_kind);
1063}
1064
1065attribute_hidden void
995b27b9 1066gomp_unmap_tgt (struct target_mem_desc *tgt)
1067{
1068 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1069 if (tgt->tgt_end)
9b50ad1d 1070 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
995b27b9 1071
1072 free (tgt->array);
1073 free (tgt);
1074}
1075
737cc978 1076attribute_hidden bool
1077gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1078{
1079 bool is_tgt_unmapped = false;
1080 splay_tree_remove (&devicep->mem_map, k);
1081 if (k->link_key)
1082 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1083 if (k->tgt->refcount > 1)
1084 k->tgt->refcount--;
1085 else
1086 {
1087 is_tgt_unmapped = true;
1088 gomp_unmap_tgt (k->tgt);
1089 }
1090 return is_tgt_unmapped;
1091}
1092
534b5e00 1093static void
1094gomp_unref_tgt (void *ptr)
1095{
1096 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1097
1098 if (tgt->refcount > 1)
1099 tgt->refcount--;
1100 else
1101 gomp_unmap_tgt (tgt);
1102}
1103
ca4c3545 1104/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1105 variables back from device to host: if it is false, it is assumed that this
2bf775c0 1106 has been done already. */
ca4c3545 1107
534b5e00 1108static inline __attribute__((always_inline)) void
1109gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1110 struct goacc_asyncqueue *aq)
995b27b9 1111{
1112 struct gomp_device_descr *devicep = tgt->device_descr;
1113
1114 if (tgt->list_count == 0)
1115 {
1116 free (tgt);
1117 return;
1118 }
1119
0d8c703d 1120 gomp_mutex_lock (&devicep->lock);
f87b2900 1121 if (devicep->state == GOMP_DEVICE_FINALIZED)
1122 {
1123 gomp_mutex_unlock (&devicep->lock);
1124 free (tgt->array);
1125 free (tgt);
1126 return;
1127 }
ca4c3545 1128
995b27b9 1129 size_t i;
995b27b9 1130 for (i = 0; i < tgt->list_count; i++)
43895be5 1131 {
1132 splay_tree_key k = tgt->list[i].key;
1133 if (k == NULL)
1134 continue;
1135
1136 bool do_unmap = false;
1137 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1138 k->refcount--;
1139 else if (k->refcount == 1)
1140 {
2bf775c0 1141 k->refcount--;
1142 do_unmap = true;
43895be5 1143 }
1144
1145 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1146 || tgt->list[i].always_copy_from)
534b5e00 1147 gomp_copy_dev2host (devicep, aq,
9b50ad1d 1148 (void *) (k->host_start + tgt->list[i].offset),
1149 (void *) (k->tgt->tgt_start + k->tgt_offset
1150 + tgt->list[i].offset),
1151 tgt->list[i].length);
43895be5 1152 if (do_unmap)
737cc978 1153 gomp_remove_var (devicep, k);
43895be5 1154 }
995b27b9 1155
534b5e00 1156 if (aq)
1157 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
1158 (void *) tgt);
995b27b9 1159 else
534b5e00 1160 gomp_unref_tgt ((void *) tgt);
ca4c3545 1161
0d8c703d 1162 gomp_mutex_unlock (&devicep->lock);
995b27b9 1163}
1164
534b5e00 1165attribute_hidden void
1166gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1167{
1168 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1169}
1170
1171attribute_hidden void
1172gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1173 struct goacc_asyncqueue *aq)
1174{
1175 gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1176}
1177
995b27b9 1178static void
0d8c703d 1179gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
43895be5 1180 size_t *sizes, void *kinds, bool short_mapkind)
995b27b9 1181{
1182 size_t i;
1183 struct splay_tree_key_s cur_node;
43895be5 1184 const int typemask = short_mapkind ? 0xff : 0x7;
995b27b9 1185
1186 if (!devicep)
1187 return;
1188
1189 if (mapnum == 0)
1190 return;
1191
0d8c703d 1192 gomp_mutex_lock (&devicep->lock);
f87b2900 1193 if (devicep->state == GOMP_DEVICE_FINALIZED)
1194 {
1195 gomp_mutex_unlock (&devicep->lock);
1196 return;
1197 }
1198
995b27b9 1199 for (i = 0; i < mapnum; i++)
1200 if (sizes[i])
1201 {
1202 cur_node.host_start = (uintptr_t) hostaddrs[i];
1203 cur_node.host_end = cur_node.host_start + sizes[i];
0d8c703d 1204 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
995b27b9 1205 if (n)
1206 {
43895be5 1207 int kind = get_kind (short_mapkind, kinds, i);
995b27b9 1208 if (n->host_start > cur_node.host_start
1209 || n->host_end < cur_node.host_end)
0d8c703d 1210 {
1211 gomp_mutex_unlock (&devicep->lock);
1212 gomp_fatal ("Trying to update [%p..%p) object when "
1213 "only [%p..%p) is mapped",
1214 (void *) cur_node.host_start,
1215 (void *) cur_node.host_end,
1216 (void *) n->host_start,
1217 (void *) n->host_end);
1218 }
9b50ad1d 1219
1220
1221 void *hostaddr = (void *) cur_node.host_start;
1222 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1223 + cur_node.host_start - n->host_start);
1224 size_t size = cur_node.host_end - cur_node.host_start;
1225
ca4c3545 1226 if (GOMP_MAP_COPY_TO_P (kind & typemask))
534b5e00 1227 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1228 NULL);
ca4c3545 1229 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
534b5e00 1230 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
995b27b9 1231 }
995b27b9 1232 }
0d8c703d 1233 gomp_mutex_unlock (&devicep->lock);
1234}
1235
1236/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1237 And insert to splay tree the mapping between addresses from HOST_TABLE and
7de5731e 1238 from loaded target image. We rely in the host and device compiler
1239 emitting variable and functions in the same order. */
0d8c703d 1240
1241static void
d3d8e632 1242gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
7de5731e 1243 const void *host_table, const void *target_data,
1244 bool is_register_lock)
0d8c703d 1245{
1246 void **host_func_table = ((void ***) host_table)[0];
1247 void **host_funcs_end = ((void ***) host_table)[1];
1248 void **host_var_table = ((void ***) host_table)[2];
1249 void **host_vars_end = ((void ***) host_table)[3];
1250
1251 /* The func table contains only addresses, the var table contains addresses
1252 and corresponding sizes. */
1253 int num_funcs = host_funcs_end - host_func_table;
1254 int num_vars = (host_vars_end - host_var_table) / 2;
1255
1256 /* Load image to device and get target addresses for the image. */
1257 struct addr_pair *target_table = NULL;
d3d8e632 1258 int i, num_target_entries;
1259
1260 num_target_entries
1261 = devicep->load_image_func (devicep->target_id, version,
1262 target_data, &target_table);
0d8c703d 1263
1264 if (num_target_entries != num_funcs + num_vars)
1265 {
1266 gomp_mutex_unlock (&devicep->lock);
1267 if (is_register_lock)
1268 gomp_mutex_unlock (&register_lock);
d3d8e632 1269 gomp_fatal ("Cannot map target functions or variables"
1270 " (expected %u, have %u)", num_funcs + num_vars,
1271 num_target_entries);
0d8c703d 1272 }
1273
1274 /* Insert host-target address mapping into splay tree. */
1275 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1276 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
43895be5 1277 tgt->refcount = REFCOUNT_INFINITY;
0d8c703d 1278 tgt->tgt_start = 0;
1279 tgt->tgt_end = 0;
1280 tgt->to_free = NULL;
1281 tgt->prev = NULL;
1282 tgt->list_count = 0;
1283 tgt->device_descr = devicep;
1284 splay_tree_node array = tgt->array;
1285
1286 for (i = 0; i < num_funcs; i++)
1287 {
1288 splay_tree_key k = &array->key;
1289 k->host_start = (uintptr_t) host_func_table[i];
1290 k->host_end = k->host_start + 1;
1291 k->tgt = tgt;
1292 k->tgt_offset = target_table[i].start;
43895be5 1293 k->refcount = REFCOUNT_INFINITY;
c0998828 1294 k->link_key = NULL;
0d8c703d 1295 array->left = NULL;
1296 array->right = NULL;
1297 splay_tree_insert (&devicep->mem_map, array);
1298 array++;
1299 }
1300
c0998828 1301 /* Most significant bit of the size in host and target tables marks
1302 "omp declare target link" variables. */
1303 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1304 const uintptr_t size_mask = ~link_bit;
1305
0d8c703d 1306 for (i = 0; i < num_vars; i++)
1307 {
1308 struct addr_pair *target_var = &target_table[num_funcs + i];
c0998828 1309 uintptr_t target_size = target_var->end - target_var->start;
1310
1311 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
0d8c703d 1312 {
1313 gomp_mutex_unlock (&devicep->lock);
1314 if (is_register_lock)
1315 gomp_mutex_unlock (&register_lock);
c0998828 1316 gomp_fatal ("Cannot map target variables (size mismatch)");
0d8c703d 1317 }
1318
1319 splay_tree_key k = &array->key;
1320 k->host_start = (uintptr_t) host_var_table[i * 2];
c0998828 1321 k->host_end
1322 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
0d8c703d 1323 k->tgt = tgt;
1324 k->tgt_offset = target_var->start;
c0998828 1325 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
c0998828 1326 k->link_key = NULL;
0d8c703d 1327 array->left = NULL;
1328 array->right = NULL;
1329 splay_tree_insert (&devicep->mem_map, array);
1330 array++;
1331 }
1332
1333 free (target_table);
995b27b9 1334}
1335
7de5731e 1336/* Unload the mappings described by target_data from device DEVICE_P.
1337 The device must be locked. */
1338
1339static void
1340gomp_unload_image_from_device (struct gomp_device_descr *devicep,
d3d8e632 1341 unsigned version,
7de5731e 1342 const void *host_table, const void *target_data)
1343{
1344 void **host_func_table = ((void ***) host_table)[0];
1345 void **host_funcs_end = ((void ***) host_table)[1];
1346 void **host_var_table = ((void ***) host_table)[2];
1347 void **host_vars_end = ((void ***) host_table)[3];
1348
1349 /* The func table contains only addresses, the var table contains addresses
1350 and corresponding sizes. */
1351 int num_funcs = host_funcs_end - host_func_table;
1352 int num_vars = (host_vars_end - host_var_table) / 2;
1353
7de5731e 1354 struct splay_tree_key_s k;
1355 splay_tree_key node = NULL;
1356
1357 /* Find mapping at start of node array */
1358 if (num_funcs || num_vars)
1359 {
1360 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1361 : (uintptr_t) host_var_table[0]);
1362 k.host_end = k.host_start + 1;
1363 node = splay_tree_lookup (&devicep->mem_map, &k);
1364 }
d3d8e632 1365
9b50ad1d 1366 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1367 {
1368 gomp_mutex_unlock (&devicep->lock);
1369 gomp_fatal ("image unload fail");
1370 }
7de5731e 1371
1372 /* Remove mappings from splay tree. */
c0998828 1373 int i;
1374 for (i = 0; i < num_funcs; i++)
7de5731e 1375 {
c0998828 1376 k.host_start = (uintptr_t) host_func_table[i];
7de5731e 1377 k.host_end = k.host_start + 1;
1378 splay_tree_remove (&devicep->mem_map, &k);
1379 }
1380
c0998828 1381 /* Most significant bit of the size in host and target tables marks
1382 "omp declare target link" variables. */
1383 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1384 const uintptr_t size_mask = ~link_bit;
1385 bool is_tgt_unmapped = false;
1386
1387 for (i = 0; i < num_vars; i++)
7de5731e 1388 {
c0998828 1389 k.host_start = (uintptr_t) host_var_table[i * 2];
1390 k.host_end
1391 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1392
1393 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1394 splay_tree_remove (&devicep->mem_map, &k);
1395 else
1396 {
1397 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
737cc978 1398 is_tgt_unmapped = gomp_remove_var (devicep, n);
c0998828 1399 }
7de5731e 1400 }
1401
c0998828 1402 if (node && !is_tgt_unmapped)
7de5731e 1403 {
1404 free (node->tgt);
1405 free (node);
1406 }
1407}
1408
0d8c703d 1409/* This function should be called from every offload image while loading.
995b27b9 1410 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1411 the target, and TARGET_DATA needed by target plugin. */
1412
1413void
d3d8e632 1414GOMP_offload_register_ver (unsigned version, const void *host_table,
1415 int target_type, const void *target_data)
995b27b9 1416{
0d8c703d 1417 int i;
d3d8e632 1418
1419 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1420 gomp_fatal ("Library too old for offload (version %u < %u)",
1421 GOMP_VERSION, GOMP_VERSION_LIB (version));
1422
0d8c703d 1423 gomp_mutex_lock (&register_lock);
1424
1425 /* Load image to all initialized devices. */
1426 for (i = 0; i < num_devices; i++)
1427 {
1428 struct gomp_device_descr *devicep = &devices[i];
1429 gomp_mutex_lock (&devicep->lock);
f87b2900 1430 if (devicep->type == target_type
1431 && devicep->state == GOMP_DEVICE_INITIALIZED)
d3d8e632 1432 gomp_load_image_to_device (devicep, version,
1433 host_table, target_data, true);
0d8c703d 1434 gomp_mutex_unlock (&devicep->lock);
1435 }
995b27b9 1436
0d8c703d 1437 /* Insert image to array of pending images. */
1438 offload_images
1439 = gomp_realloc_unlock (offload_images,
1440 (num_offload_images + 1)
1441 * sizeof (struct offload_image_descr));
d3d8e632 1442 offload_images[num_offload_images].version = version;
995b27b9 1443 offload_images[num_offload_images].type = target_type;
1444 offload_images[num_offload_images].host_table = host_table;
1445 offload_images[num_offload_images].target_data = target_data;
1446
1447 num_offload_images++;
0d8c703d 1448 gomp_mutex_unlock (&register_lock);
995b27b9 1449}
1450
d3d8e632 1451void
1452GOMP_offload_register (const void *host_table, int target_type,
1453 const void *target_data)
1454{
1455 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1456}
1457
0d8c703d 1458/* This function should be called from every offload image while unloading.
1459 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1460 the target, and TARGET_DATA needed by target plugin. */
995b27b9 1461
0d8c703d 1462void
d3d8e632 1463GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1464 int target_type, const void *target_data)
995b27b9 1465{
0d8c703d 1466 int i;
1467
0d8c703d 1468 gomp_mutex_lock (&register_lock);
1469
1470 /* Unload image from all initialized devices. */
1471 for (i = 0; i < num_devices; i++)
1472 {
0d8c703d 1473 struct gomp_device_descr *devicep = &devices[i];
1474 gomp_mutex_lock (&devicep->lock);
f87b2900 1475 if (devicep->type == target_type
1476 && devicep->state == GOMP_DEVICE_INITIALIZED)
d3d8e632 1477 gomp_unload_image_from_device (devicep, version,
1478 host_table, target_data);
0d8c703d 1479 gomp_mutex_unlock (&devicep->lock);
1480 }
1481
1482 /* Remove image from array of pending images. */
1483 for (i = 0; i < num_offload_images; i++)
1484 if (offload_images[i].target_data == target_data)
1485 {
1486 offload_images[i] = offload_images[--num_offload_images];
1487 break;
1488 }
1489
1490 gomp_mutex_unlock (&register_lock);
ca4c3545 1491}
995b27b9 1492
d3d8e632 1493void
1494GOMP_offload_unregister (const void *host_table, int target_type,
1495 const void *target_data)
1496{
1497 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1498}
1499
0d8c703d 1500/* This function initializes the target device, specified by DEVICEP. DEVICEP
1501 must be locked on entry, and remains locked on return. */
ca4c3545 1502
1503attribute_hidden void
0d8c703d 1504gomp_init_device (struct gomp_device_descr *devicep)
ca4c3545 1505{
995b27b9 1506 int i;
9b50ad1d 1507 if (!devicep->init_device_func (devicep->target_id))
1508 {
1509 gomp_mutex_unlock (&devicep->lock);
1510 gomp_fatal ("device initialization failed");
1511 }
0d8c703d 1512
1513 /* Load to device all images registered by the moment. */
1514 for (i = 0; i < num_offload_images; i++)
995b27b9 1515 {
0d8c703d 1516 struct offload_image_descr *image = &offload_images[i];
1517 if (image->type == devicep->type)
d3d8e632 1518 gomp_load_image_to_device (devicep, image->version,
1519 image->host_table, image->target_data,
1520 false);
995b27b9 1521 }
1522
534b5e00 1523 /* Initialize OpenACC asynchronous queues. */
1524 goacc_init_asyncqueues (devicep);
1525
f87b2900 1526 devicep->state = GOMP_DEVICE_INITIALIZED;
ca4c3545 1527}
1528
534b5e00 1529/* This function finalizes the target device, specified by DEVICEP. DEVICEP
1530 must be locked on entry, and remains locked on return. */
1531
1532attribute_hidden bool
1533gomp_fini_device (struct gomp_device_descr *devicep)
1534{
1535 bool ret = goacc_fini_asyncqueues (devicep);
1536 ret &= devicep->fini_device_func (devicep->target_id);
1537 devicep->state = GOMP_DEVICE_FINALIZED;
1538 return ret;
1539}
1540
7de5731e 1541attribute_hidden void
1542gomp_unload_device (struct gomp_device_descr *devicep)
1543{
f87b2900 1544 if (devicep->state == GOMP_DEVICE_INITIALIZED)
7de5731e 1545 {
1546 unsigned i;
1547
1548 /* Unload from device all images registered at the moment. */
1549 for (i = 0; i < num_offload_images; i++)
1550 {
1551 struct offload_image_descr *image = &offload_images[i];
1552 if (image->type == devicep->type)
d3d8e632 1553 gomp_unload_image_from_device (devicep, image->version,
1554 image->host_table,
7de5731e 1555 image->target_data);
1556 }
1557 }
1558}
1559
ca4c3545 1560/* Free address mapping tables. MM must be locked on entry, and remains locked
1561 on return. */
1562
1563attribute_hidden void
0d8c703d 1564gomp_free_memmap (struct splay_tree_s *mem_map)
ca4c3545 1565{
0d8c703d 1566 while (mem_map->root)
ca4c3545 1567 {
0d8c703d 1568 struct target_mem_desc *tgt = mem_map->root->key.tgt;
ca4c3545 1569
0d8c703d 1570 splay_tree_remove (mem_map, &mem_map->root->key);
ca4c3545 1571 free (tgt->array);
1572 free (tgt);
1573 }
ca4c3545 1574}
1575
9561765e 1576/* Host fallback for GOMP_target{,_ext} routines. */
43895be5 1577
1578static void
1579gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1580{
1581 struct gomp_thread old_thr, *thr = gomp_thread ();
1582 old_thr = *thr;
1583 memset (thr, '\0', sizeof (*thr));
1584 if (gomp_places_list)
1585 {
1586 thr->place = old_thr.place;
1587 thr->ts.place_partition_len = gomp_places_list_len;
1588 }
1589 fn (hostaddrs);
1590 gomp_free_thread (thr);
1591 *thr = old_thr;
1592}
1593
56686608 1594/* Calculate alignment and size requirements of a private copy of data shared
1595 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
9561765e 1596
56686608 1597static inline void
1598calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1599 unsigned short *kinds, size_t *tgt_align,
1600 size_t *tgt_size)
9561765e 1601{
56686608 1602 size_t i;
1603 for (i = 0; i < mapnum; i++)
1604 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1605 {
1606 size_t align = (size_t) 1 << (kinds[i] >> 8);
1607 if (*tgt_align < align)
1608 *tgt_align = align;
1609 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1610 *tgt_size += sizes[i];
1611 }
1612}
1613
1614/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1615
1616static inline void
1617copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1618 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1619 size_t tgt_size)
1620{
1621 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1622 if (al)
1623 tgt += tgt_align - al;
1624 tgt_size = 0;
1625 size_t i;
9561765e 1626 for (i = 0; i < mapnum; i++)
1627 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1628 {
1629 size_t align = (size_t) 1 << (kinds[i] >> 8);
9561765e 1630 tgt_size = (tgt_size + align - 1) & ~(align - 1);
56686608 1631 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1632 hostaddrs[i] = tgt + tgt_size;
1633 tgt_size = tgt_size + sizes[i];
9561765e 1634 }
56686608 1635}
1636
9561765e 1637/* Helper function of GOMP_target{,_ext} routines. */
43895be5 1638
1639static void *
1640gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1641 void (*host_fn) (void *))
1642{
1643 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1644 return (void *) host_fn;
1645 else
1646 {
1647 gomp_mutex_lock (&devicep->lock);
f87b2900 1648 if (devicep->state == GOMP_DEVICE_FINALIZED)
1649 {
1650 gomp_mutex_unlock (&devicep->lock);
1651 return NULL;
1652 }
1653
43895be5 1654 struct splay_tree_key_s k;
1655 k.host_start = (uintptr_t) host_fn;
1656 k.host_end = k.host_start + 1;
1657 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1658 gomp_mutex_unlock (&devicep->lock);
1659 if (tgt_fn == NULL)
bc6edebb 1660 return NULL;
43895be5 1661
1662 return (void *) tgt_fn->tgt_offset;
1663 }
1664}
1665
bc7bff74 1666/* Called when encountering a target directive. If DEVICE
ca4c3545 1667 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1668 GOMP_DEVICE_HOST_FALLBACK (or any value
1669 larger than last available hw device), use host fallback.
dc19c8fd 1670 FN is address of host code, UNUSED is part of the current ABI, but
1671 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
bc7bff74 1672 with MAPNUM entries, with addresses of the host objects,
1673 sizes of the host objects (resp. for pointer kind pointer bias
1674 and assumed sizeof (void *) size) and kinds. */
1675
1676void
dc19c8fd 1677GOMP_target (int device, void (*fn) (void *), const void *unused,
bc7bff74 1678 size_t mapnum, void **hostaddrs, size_t *sizes,
1679 unsigned char *kinds)
1680{
995b27b9 1681 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1682
f87b2900 1683 void *fn_addr;
ca4c3545 1684 if (devicep == NULL
f87b2900 1685 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 1686 /* All shared memory devices should use the GOMP_target_ext function. */
1687 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
f87b2900 1688 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
43895be5 1689 return gomp_target_fallback (fn, hostaddrs);
1690
43895be5 1691 struct target_mem_desc *tgt_vars
1692 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1693 GOMP_MAP_VARS_TARGET);
56686608 1694 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1695 NULL);
43895be5 1696 gomp_unmap_vars (tgt_vars, true);
1697}
995b27b9 1698
9561765e 1699/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1700 and several arguments have been added:
1701 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1702 DEPEND is array of dependencies, see GOMP_task for details.
56686608 1703
1704 ARGS is a pointer to an array consisting of a variable number of both
1705 device-independent and device-specific arguments, which can take one two
1706 elements where the first specifies for which device it is intended, the type
1707 and optionally also the value. If the value is not present in the first
1708 one, the whole second element the actual value. The last element of the
1709 array is a single NULL. Among the device independent can be for example
1710 NUM_TEAMS and THREAD_LIMIT.
1711
9561765e 1712 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1713 that value, or 1 if teams construct is not present, or 0, if
1714 teams construct does not have num_teams clause and so the choice is
1715 implementation defined, and -1 if it can't be determined on the host
1716 what value will GOMP_teams have on the device.
1717 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1718 body with that value, or 0, if teams construct does not have thread_limit
1719 clause or the teams construct is not present, or -1 if it can't be
1720 determined on the host what value will GOMP_teams have on the device. */
1721
43895be5 1722void
9561765e 1723GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1724 void **hostaddrs, size_t *sizes, unsigned short *kinds,
56686608 1725 unsigned int flags, void **depend, void **args)
43895be5 1726{
1727 struct gomp_device_descr *devicep = resolve_device (device);
84217e9d 1728 size_t tgt_align = 0, tgt_size = 0;
1729 bool fpc_done = false;
ca4c3545 1730
a9833286 1731 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1732 {
1733 struct gomp_thread *thr = gomp_thread ();
1734 /* Create a team if we don't have any around, as nowait
1735 target tasks make sense to run asynchronously even when
1736 outside of any parallel. */
1737 if (__builtin_expect (thr->ts.team == NULL, 0))
1738 {
1739 struct gomp_team *team = gomp_new_team (1);
1740 struct gomp_task *task = thr->task;
1741 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1742 team->prev_ts = thr->ts;
1743 thr->ts.team = team;
1744 thr->ts.team_id = 0;
1745 thr->ts.work_share = &team->work_shares[0];
1746 thr->ts.last_work_share = NULL;
1747#ifdef HAVE_SYNC_BUILTINS
1748 thr->ts.single_count = 0;
1749#endif
1750 thr->ts.static_trip = 0;
1751 thr->task = &team->implicit_task[0];
1752 gomp_init_task (thr->task, NULL, icv);
1753 if (task)
1754 {
1755 thr->task = task;
1756 gomp_end_task ();
1757 free (task);
1758 thr->task = &team->implicit_task[0];
1759 }
1760 else
1761 pthread_setspecific (gomp_thread_destructor, thr);
1762 }
1763 if (thr->ts.team
1764 && !thr->task->final_task)
1765 {
1766 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
56686608 1767 sizes, kinds, flags, depend, args,
a9833286 1768 GOMP_TARGET_TASK_BEFORE_MAP);
1769 return;
1770 }
1771 }
1772
1773 /* If there are depend clauses, but nowait is not present
1774 (or we are in a final task), block the parent task until the
1775 dependencies are resolved and then just continue with the rest
1776 of the function as if it is a merged task. */
43895be5 1777 if (depend != NULL)
1778 {
1779 struct gomp_thread *thr = gomp_thread ();
1780 if (thr->task && thr->task->depend_hash)
84217e9d 1781 {
1782 /* If we might need to wait, copy firstprivate now. */
1783 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1784 &tgt_align, &tgt_size);
1785 if (tgt_align)
1786 {
1787 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1788 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1789 tgt_align, tgt_size);
1790 }
1791 fpc_done = true;
1792 gomp_task_maybe_wait_for_dependencies (depend);
1793 }
43895be5 1794 }
ca4c3545 1795
f87b2900 1796 void *fn_addr;
43895be5 1797 if (devicep == NULL
f87b2900 1798 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 1799 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1800 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
ca4c3545 1801 {
84217e9d 1802 if (!fpc_done)
1803 {
1804 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1805 &tgt_align, &tgt_size);
1806 if (tgt_align)
1807 {
1808 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1809 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1810 tgt_align, tgt_size);
1811 }
1812 }
1813 gomp_target_fallback (fn, hostaddrs);
43895be5 1814 return;
ca4c3545 1815 }
995b27b9 1816
56686608 1817 struct target_mem_desc *tgt_vars;
56686608 1818 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1819 {
84217e9d 1820 if (!fpc_done)
1821 {
1822 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1823 &tgt_align, &tgt_size);
1824 if (tgt_align)
1825 {
1826 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1827 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1828 tgt_align, tgt_size);
1829 }
1830 }
56686608 1831 tgt_vars = NULL;
1832 }
1833 else
1834 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1835 true, GOMP_MAP_VARS_TARGET);
1836 devicep->run_func (devicep->target_id, fn_addr,
1837 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1838 args);
1839 if (tgt_vars)
1840 gomp_unmap_vars (tgt_vars, true);
bc7bff74 1841}
1842
9561765e 1843/* Host fallback for GOMP_target_data{,_ext} routines. */
43895be5 1844
1845static void
1846gomp_target_data_fallback (void)
1847{
1848 struct gomp_task_icv *icv = gomp_icv (false);
1849 if (icv->target_data)
1850 {
1851 /* Even when doing a host fallback, if there are any active
1852 #pragma omp target data constructs, need to remember the
1853 new #pragma omp target data, otherwise GOMP_target_end_data
1854 would get out of sync. */
1855 struct target_mem_desc *tgt
1856 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1857 GOMP_MAP_VARS_DATA);
1858 tgt->prev = icv->target_data;
1859 icv->target_data = tgt;
1860 }
1861}
1862
bc7bff74 1863void
dc19c8fd 1864GOMP_target_data (int device, const void *unused, size_t mapnum,
bc7bff74 1865 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1866{
995b27b9 1867 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1868
1869 if (devicep == NULL
56686608 1870 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1871 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
43895be5 1872 return gomp_target_data_fallback ();
ca4c3545 1873
995b27b9 1874 struct target_mem_desc *tgt
ca4c3545 1875 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
43895be5 1876 GOMP_MAP_VARS_DATA);
1877 struct gomp_task_icv *icv = gomp_icv (true);
1878 tgt->prev = icv->target_data;
1879 icv->target_data = tgt;
1880}
1881
1882void
9561765e 1883GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1884 size_t *sizes, unsigned short *kinds)
43895be5 1885{
1886 struct gomp_device_descr *devicep = resolve_device (device);
1887
1888 if (devicep == NULL
56686608 1889 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1890 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 1891 return gomp_target_data_fallback ();
1892
1893 struct target_mem_desc *tgt
1894 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1895 GOMP_MAP_VARS_DATA);
995b27b9 1896 struct gomp_task_icv *icv = gomp_icv (true);
1897 tgt->prev = icv->target_data;
1898 icv->target_data = tgt;
bc7bff74 1899}
1900
1901void
1902GOMP_target_end_data (void)
1903{
995b27b9 1904 struct gomp_task_icv *icv = gomp_icv (false);
1905 if (icv->target_data)
1906 {
1907 struct target_mem_desc *tgt = icv->target_data;
1908 icv->target_data = tgt->prev;
ca4c3545 1909 gomp_unmap_vars (tgt, true);
995b27b9 1910 }
bc7bff74 1911}
1912
1913void
dc19c8fd 1914GOMP_target_update (int device, const void *unused, size_t mapnum,
bc7bff74 1915 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1916{
995b27b9 1917 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1918
1919 if (devicep == NULL
56686608 1920 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1921 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
995b27b9 1922 return;
1923
43895be5 1924 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1925}
1926
1927void
9561765e 1928GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1929 size_t *sizes, unsigned short *kinds,
1930 unsigned int flags, void **depend)
43895be5 1931{
1932 struct gomp_device_descr *devicep = resolve_device (device);
1933
1934 /* If there are depend clauses, but nowait is not present,
1935 block the parent task until the dependencies are resolved
1936 and then just continue with the rest of the function as if it
1937 is a merged task. Until we are able to schedule task during
1938 variable mapping or unmapping, ignore nowait if depend clauses
1939 are not present. */
1940 if (depend != NULL)
1941 {
1942 struct gomp_thread *thr = gomp_thread ();
1943 if (thr->task && thr->task->depend_hash)
1944 {
1945 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1946 && thr->ts.team
1947 && !thr->task->final_task)
1948 {
a9833286 1949 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1950 mapnum, hostaddrs, sizes, kinds,
1951 flags | GOMP_TARGET_FLAG_UPDATE,
56686608 1952 depend, NULL, GOMP_TARGET_TASK_DATA))
a9833286 1953 return;
1954 }
1955 else
1956 {
1957 struct gomp_team *team = thr->ts.team;
1958 /* If parallel or taskgroup has been cancelled, don't start new
1959 tasks. */
7e5a76c8 1960 if (__builtin_expect (gomp_cancel_var, 0) && team)
1961 {
1962 if (gomp_team_barrier_cancelled (&team->barrier))
1963 return;
1964 if (thr->task->taskgroup)
1965 {
1966 if (thr->task->taskgroup->cancelled)
1967 return;
1968 if (thr->task->taskgroup->workshare
1969 && thr->task->taskgroup->prev
1970 && thr->task->taskgroup->prev->cancelled)
1971 return;
1972 }
1973 }
a9833286 1974
1975 gomp_task_maybe_wait_for_dependencies (depend);
43895be5 1976 }
43895be5 1977 }
1978 }
1979
1980 if (devicep == NULL
56686608 1981 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1982 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 1983 return;
1984
1985 struct gomp_thread *thr = gomp_thread ();
1986 struct gomp_team *team = thr->ts.team;
1987 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
7e5a76c8 1988 if (__builtin_expect (gomp_cancel_var, 0) && team)
1989 {
1990 if (gomp_team_barrier_cancelled (&team->barrier))
1991 return;
1992 if (thr->task->taskgroup)
1993 {
1994 if (thr->task->taskgroup->cancelled)
1995 return;
1996 if (thr->task->taskgroup->workshare
1997 && thr->task->taskgroup->prev
1998 && thr->task->taskgroup->prev->cancelled)
1999 return;
2000 }
2001 }
43895be5 2002
2003 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2004}
2005
2006static void
2007gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2008 void **hostaddrs, size_t *sizes, unsigned short *kinds)
2009{
2010 const int typemask = 0xff;
2011 size_t i;
ca4c3545 2012 gomp_mutex_lock (&devicep->lock);
f87b2900 2013 if (devicep->state == GOMP_DEVICE_FINALIZED)
2014 {
2015 gomp_mutex_unlock (&devicep->lock);
2016 return;
2017 }
2018
43895be5 2019 for (i = 0; i < mapnum; i++)
2020 {
2021 struct splay_tree_key_s cur_node;
2022 unsigned char kind = kinds[i] & typemask;
2023 switch (kind)
2024 {
2025 case GOMP_MAP_FROM:
2026 case GOMP_MAP_ALWAYS_FROM:
2027 case GOMP_MAP_DELETE:
2028 case GOMP_MAP_RELEASE:
2029 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2030 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2031 cur_node.host_start = (uintptr_t) hostaddrs[i];
2032 cur_node.host_end = cur_node.host_start + sizes[i];
2033 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2034 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
9561765e 2035 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
43895be5 2036 : splay_tree_lookup (&devicep->mem_map, &cur_node);
2037 if (!k)
2038 continue;
2039
2040 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2041 k->refcount--;
2042 if ((kind == GOMP_MAP_DELETE
2043 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2044 && k->refcount != REFCOUNT_INFINITY)
2045 k->refcount = 0;
2046
2047 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2048 || kind == GOMP_MAP_ALWAYS_FROM)
534b5e00 2049 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
9b50ad1d 2050 (void *) (k->tgt->tgt_start + k->tgt_offset
2051 + cur_node.host_start
2052 - k->host_start),
2053 cur_node.host_end - cur_node.host_start);
43895be5 2054 if (k->refcount == 0)
2055 {
2056 splay_tree_remove (&devicep->mem_map, k);
c0998828 2057 if (k->link_key)
2058 splay_tree_insert (&devicep->mem_map,
2059 (splay_tree_node) k->link_key);
43895be5 2060 if (k->tgt->refcount > 1)
2061 k->tgt->refcount--;
2062 else
2063 gomp_unmap_tgt (k->tgt);
2064 }
2065
2066 break;
2067 default:
2068 gomp_mutex_unlock (&devicep->lock);
2069 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2070 kind);
2071 }
2072 }
2073
ca4c3545 2074 gomp_mutex_unlock (&devicep->lock);
43895be5 2075}
ca4c3545 2076
43895be5 2077void
2078GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2079 size_t *sizes, unsigned short *kinds,
2080 unsigned int flags, void **depend)
2081{
2082 struct gomp_device_descr *devicep = resolve_device (device);
2083
2084 /* If there are depend clauses, but nowait is not present,
2085 block the parent task until the dependencies are resolved
2086 and then just continue with the rest of the function as if it
2087 is a merged task. Until we are able to schedule task during
2088 variable mapping or unmapping, ignore nowait if depend clauses
2089 are not present. */
2090 if (depend != NULL)
2091 {
2092 struct gomp_thread *thr = gomp_thread ();
2093 if (thr->task && thr->task->depend_hash)
2094 {
2095 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2096 && thr->ts.team
2097 && !thr->task->final_task)
2098 {
a9833286 2099 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2100 mapnum, hostaddrs, sizes, kinds,
56686608 2101 flags, depend, NULL,
a9833286 2102 GOMP_TARGET_TASK_DATA))
2103 return;
2104 }
2105 else
2106 {
2107 struct gomp_team *team = thr->ts.team;
2108 /* If parallel or taskgroup has been cancelled, don't start new
2109 tasks. */
7e5a76c8 2110 if (__builtin_expect (gomp_cancel_var, 0) && team)
2111 {
2112 if (gomp_team_barrier_cancelled (&team->barrier))
2113 return;
2114 if (thr->task->taskgroup)
2115 {
2116 if (thr->task->taskgroup->cancelled)
2117 return;
2118 if (thr->task->taskgroup->workshare
2119 && thr->task->taskgroup->prev
2120 && thr->task->taskgroup->prev->cancelled)
2121 return;
2122 }
2123 }
a9833286 2124
2125 gomp_task_maybe_wait_for_dependencies (depend);
43895be5 2126 }
43895be5 2127 }
2128 }
2129
2130 if (devicep == NULL
56686608 2131 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2132 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2133 return;
2134
2135 struct gomp_thread *thr = gomp_thread ();
2136 struct gomp_team *team = thr->ts.team;
2137 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
7e5a76c8 2138 if (__builtin_expect (gomp_cancel_var, 0) && team)
2139 {
2140 if (gomp_team_barrier_cancelled (&team->barrier))
2141 return;
2142 if (thr->task->taskgroup)
2143 {
2144 if (thr->task->taskgroup->cancelled)
2145 return;
2146 if (thr->task->taskgroup->workshare
2147 && thr->task->taskgroup->prev
2148 && thr->task->taskgroup->prev->cancelled)
2149 return;
2150 }
2151 }
43895be5 2152
2153 size_t i;
2154 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2155 for (i = 0; i < mapnum; i++)
2156 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2157 {
2158 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2159 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2160 i += sizes[i];
2161 }
2162 else
2163 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2164 true, GOMP_MAP_VARS_ENTER_DATA);
2165 else
2166 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2167}
2168
a9833286 2169bool
43895be5 2170gomp_target_task_fn (void *data)
2171{
2172 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
a9833286 2173 struct gomp_device_descr *devicep = ttask->devicep;
2174
43895be5 2175 if (ttask->fn != NULL)
2176 {
f87b2900 2177 void *fn_addr;
a9833286 2178 if (devicep == NULL
f87b2900 2179 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 2180 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2181 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
a9833286 2182 {
2183 ttask->state = GOMP_TARGET_TASK_FALLBACK;
84217e9d 2184 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
a9833286 2185 return false;
2186 }
2187
2188 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2189 {
56686608 2190 if (ttask->tgt)
2191 gomp_unmap_vars (ttask->tgt, true);
a9833286 2192 return false;
2193 }
2194
56686608 2195 void *actual_arguments;
2196 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2197 {
2198 ttask->tgt = NULL;
56686608 2199 actual_arguments = ttask->hostaddrs;
2200 }
2201 else
2202 {
2203 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2204 NULL, ttask->sizes, ttask->kinds, true,
2205 GOMP_MAP_VARS_TARGET);
2206 actual_arguments = (void *) ttask->tgt->tgt_start;
2207 }
a9833286 2208 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2209
56686608 2210 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2211 ttask->args, (void *) ttask);
a9833286 2212 return true;
43895be5 2213 }
a9833286 2214 else if (devicep == NULL
56686608 2215 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2216 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
a9833286 2217 return false;
43895be5 2218
2219 size_t i;
2220 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
a9833286 2221 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
43895be5 2222 ttask->kinds, true);
2223 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2224 for (i = 0; i < ttask->mapnum; i++)
2225 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2226 {
a9833286 2227 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2228 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2229 GOMP_MAP_VARS_ENTER_DATA);
43895be5 2230 i += ttask->sizes[i];
2231 }
2232 else
a9833286 2233 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2234 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
43895be5 2235 else
a9833286 2236 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2237 ttask->kinds);
2238 return false;
bc7bff74 2239}
2240
2241void
2242GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2243{
2244 if (thread_limit)
2245 {
2246 struct gomp_task_icv *icv = gomp_icv (true);
2247 icv->thread_limit_var
2248 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2249 }
2250 (void) num_teams;
2251}
995b27b9 2252
43895be5 2253void *
2254omp_target_alloc (size_t size, int device_num)
2255{
2256 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2257 return malloc (size);
2258
2259 if (device_num < 0)
2260 return NULL;
2261
2262 struct gomp_device_descr *devicep = resolve_device (device_num);
2263 if (devicep == NULL)
2264 return NULL;
2265
56686608 2266 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2267 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2268 return malloc (size);
2269
2270 gomp_mutex_lock (&devicep->lock);
2271 void *ret = devicep->alloc_func (devicep->target_id, size);
2272 gomp_mutex_unlock (&devicep->lock);
2273 return ret;
2274}
2275
2276void
2277omp_target_free (void *device_ptr, int device_num)
2278{
2279 if (device_ptr == NULL)
2280 return;
2281
2282 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2283 {
2284 free (device_ptr);
2285 return;
2286 }
2287
2288 if (device_num < 0)
2289 return;
2290
2291 struct gomp_device_descr *devicep = resolve_device (device_num);
2292 if (devicep == NULL)
2293 return;
2294
56686608 2295 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2296 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2297 {
2298 free (device_ptr);
2299 return;
2300 }
2301
2302 gomp_mutex_lock (&devicep->lock);
9b50ad1d 2303 gomp_free_device_memory (devicep, device_ptr);
43895be5 2304 gomp_mutex_unlock (&devicep->lock);
2305}
2306
2307int
7e5a76c8 2308omp_target_is_present (const void *ptr, int device_num)
43895be5 2309{
2310 if (ptr == NULL)
2311 return 1;
2312
2313 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2314 return 1;
2315
2316 if (device_num < 0)
2317 return 0;
2318
2319 struct gomp_device_descr *devicep = resolve_device (device_num);
2320 if (devicep == NULL)
2321 return 0;
2322
56686608 2323 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2324 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2325 return 1;
2326
2327 gomp_mutex_lock (&devicep->lock);
2328 struct splay_tree_s *mem_map = &devicep->mem_map;
2329 struct splay_tree_key_s cur_node;
2330
2331 cur_node.host_start = (uintptr_t) ptr;
2332 cur_node.host_end = cur_node.host_start;
9561765e 2333 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
43895be5 2334 int ret = n != NULL;
2335 gomp_mutex_unlock (&devicep->lock);
2336 return ret;
2337}
2338
2339int
7e5a76c8 2340omp_target_memcpy (void *dst, const void *src, size_t length,
2341 size_t dst_offset, size_t src_offset, int dst_device_num,
2342 int src_device_num)
43895be5 2343{
2344 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
9b50ad1d 2345 bool ret;
43895be5 2346
2347 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2348 {
2349 if (dst_device_num < 0)
2350 return EINVAL;
2351
2352 dst_devicep = resolve_device (dst_device_num);
2353 if (dst_devicep == NULL)
2354 return EINVAL;
2355
56686608 2356 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2357 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2358 dst_devicep = NULL;
2359 }
2360 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2361 {
2362 if (src_device_num < 0)
2363 return EINVAL;
2364
2365 src_devicep = resolve_device (src_device_num);
2366 if (src_devicep == NULL)
2367 return EINVAL;
2368
56686608 2369 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2370 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2371 src_devicep = NULL;
2372 }
2373 if (src_devicep == NULL && dst_devicep == NULL)
2374 {
2375 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2376 return 0;
2377 }
2378 if (src_devicep == NULL)
2379 {
2380 gomp_mutex_lock (&dst_devicep->lock);
9b50ad1d 2381 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2382 (char *) dst + dst_offset,
2383 (char *) src + src_offset, length);
43895be5 2384 gomp_mutex_unlock (&dst_devicep->lock);
9b50ad1d 2385 return (ret ? 0 : EINVAL);
43895be5 2386 }
2387 if (dst_devicep == NULL)
2388 {
2389 gomp_mutex_lock (&src_devicep->lock);
9b50ad1d 2390 ret = src_devicep->dev2host_func (src_devicep->target_id,
2391 (char *) dst + dst_offset,
2392 (char *) src + src_offset, length);
43895be5 2393 gomp_mutex_unlock (&src_devicep->lock);
9b50ad1d 2394 return (ret ? 0 : EINVAL);
43895be5 2395 }
2396 if (src_devicep == dst_devicep)
2397 {
2398 gomp_mutex_lock (&src_devicep->lock);
9b50ad1d 2399 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2400 (char *) dst + dst_offset,
2401 (char *) src + src_offset, length);
43895be5 2402 gomp_mutex_unlock (&src_devicep->lock);
9b50ad1d 2403 return (ret ? 0 : EINVAL);
43895be5 2404 }
2405 return EINVAL;
2406}
2407
2408static int
7e5a76c8 2409omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
43895be5 2410 int num_dims, const size_t *volume,
2411 const size_t *dst_offsets,
2412 const size_t *src_offsets,
2413 const size_t *dst_dimensions,
2414 const size_t *src_dimensions,
2415 struct gomp_device_descr *dst_devicep,
2416 struct gomp_device_descr *src_devicep)
2417{
2418 size_t dst_slice = element_size;
2419 size_t src_slice = element_size;
2420 size_t j, dst_off, src_off, length;
2421 int i, ret;
2422
2423 if (num_dims == 1)
2424 {
2425 if (__builtin_mul_overflow (element_size, volume[0], &length)
2426 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2427 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2428 return EINVAL;
2429 if (dst_devicep == NULL && src_devicep == NULL)
9b50ad1d 2430 {
7e5a76c8 2431 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2432 length);
9b50ad1d 2433 ret = 1;
2434 }
43895be5 2435 else if (src_devicep == NULL)
9b50ad1d 2436 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2437 (char *) dst + dst_off,
7e5a76c8 2438 (const char *) src + src_off,
2439 length);
43895be5 2440 else if (dst_devicep == NULL)
9b50ad1d 2441 ret = src_devicep->dev2host_func (src_devicep->target_id,
2442 (char *) dst + dst_off,
7e5a76c8 2443 (const char *) src + src_off,
2444 length);
43895be5 2445 else if (src_devicep == dst_devicep)
9b50ad1d 2446 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2447 (char *) dst + dst_off,
7e5a76c8 2448 (const char *) src + src_off,
2449 length);
43895be5 2450 else
9b50ad1d 2451 ret = 0;
2452 return ret ? 0 : EINVAL;
43895be5 2453 }
2454
2455 /* FIXME: it would be nice to have some plugin function to handle
2456 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2457 be handled in the generic recursion below, and for host-host it
2458 should be used even for any num_dims >= 2. */
2459
2460 for (i = 1; i < num_dims; i++)
2461 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2462 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2463 return EINVAL;
2464 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2465 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2466 return EINVAL;
2467 for (j = 0; j < volume[0]; j++)
2468 {
2469 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
7e5a76c8 2470 (const char *) src + src_off,
43895be5 2471 element_size, num_dims - 1,
2472 volume + 1, dst_offsets + 1,
2473 src_offsets + 1, dst_dimensions + 1,
2474 src_dimensions + 1, dst_devicep,
2475 src_devicep);
2476 if (ret)
2477 return ret;
2478 dst_off += dst_slice;
2479 src_off += src_slice;
2480 }
2481 return 0;
2482}
2483
2484int
7e5a76c8 2485omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
43895be5 2486 int num_dims, const size_t *volume,
2487 const size_t *dst_offsets,
2488 const size_t *src_offsets,
2489 const size_t *dst_dimensions,
2490 const size_t *src_dimensions,
2491 int dst_device_num, int src_device_num)
2492{
2493 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2494
2495 if (!dst && !src)
2496 return INT_MAX;
2497
2498 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2499 {
2500 if (dst_device_num < 0)
2501 return EINVAL;
2502
2503 dst_devicep = resolve_device (dst_device_num);
2504 if (dst_devicep == NULL)
2505 return EINVAL;
2506
56686608 2507 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2508 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2509 dst_devicep = NULL;
2510 }
2511 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2512 {
2513 if (src_device_num < 0)
2514 return EINVAL;
2515
2516 src_devicep = resolve_device (src_device_num);
2517 if (src_devicep == NULL)
2518 return EINVAL;
2519
56686608 2520 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2521 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2522 src_devicep = NULL;
2523 }
2524
2525 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2526 return EINVAL;
2527
2528 if (src_devicep)
2529 gomp_mutex_lock (&src_devicep->lock);
2530 else if (dst_devicep)
2531 gomp_mutex_lock (&dst_devicep->lock);
2532 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2533 volume, dst_offsets, src_offsets,
2534 dst_dimensions, src_dimensions,
2535 dst_devicep, src_devicep);
2536 if (src_devicep)
2537 gomp_mutex_unlock (&src_devicep->lock);
2538 else if (dst_devicep)
2539 gomp_mutex_unlock (&dst_devicep->lock);
2540 return ret;
2541}
2542
2543int
7e5a76c8 2544omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2545 size_t size, size_t device_offset, int device_num)
43895be5 2546{
2547 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2548 return EINVAL;
2549
2550 if (device_num < 0)
2551 return EINVAL;
2552
2553 struct gomp_device_descr *devicep = resolve_device (device_num);
2554 if (devicep == NULL)
2555 return EINVAL;
2556
56686608 2557 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2558 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2559 return EINVAL;
2560
2561 gomp_mutex_lock (&devicep->lock);
2562
2563 struct splay_tree_s *mem_map = &devicep->mem_map;
2564 struct splay_tree_key_s cur_node;
2565 int ret = EINVAL;
2566
2567 cur_node.host_start = (uintptr_t) host_ptr;
2568 cur_node.host_end = cur_node.host_start + size;
2569 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2570 if (n)
2571 {
2572 if (n->tgt->tgt_start + n->tgt_offset
2573 == (uintptr_t) device_ptr + device_offset
2574 && n->host_start <= cur_node.host_start
2575 && n->host_end >= cur_node.host_end)
2576 ret = 0;
2577 }
2578 else
2579 {
2580 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2581 tgt->array = gomp_malloc (sizeof (*tgt->array));
2582 tgt->refcount = 1;
2583 tgt->tgt_start = 0;
2584 tgt->tgt_end = 0;
2585 tgt->to_free = NULL;
2586 tgt->prev = NULL;
2587 tgt->list_count = 0;
2588 tgt->device_descr = devicep;
2589 splay_tree_node array = tgt->array;
2590 splay_tree_key k = &array->key;
2591 k->host_start = cur_node.host_start;
2592 k->host_end = cur_node.host_end;
2593 k->tgt = tgt;
2594 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2595 k->refcount = REFCOUNT_INFINITY;
43895be5 2596 array->left = NULL;
2597 array->right = NULL;
2598 splay_tree_insert (&devicep->mem_map, array);
2599 ret = 0;
2600 }
2601 gomp_mutex_unlock (&devicep->lock);
2602 return ret;
2603}
2604
2605int
7e5a76c8 2606omp_target_disassociate_ptr (const void *ptr, int device_num)
43895be5 2607{
2608 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2609 return EINVAL;
2610
2611 if (device_num < 0)
2612 return EINVAL;
2613
2614 struct gomp_device_descr *devicep = resolve_device (device_num);
2615 if (devicep == NULL)
2616 return EINVAL;
2617
2618 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2619 return EINVAL;
2620
2621 gomp_mutex_lock (&devicep->lock);
2622
2623 struct splay_tree_s *mem_map = &devicep->mem_map;
2624 struct splay_tree_key_s cur_node;
2625 int ret = EINVAL;
2626
2627 cur_node.host_start = (uintptr_t) ptr;
2628 cur_node.host_end = cur_node.host_start;
2629 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2630 if (n
2631 && n->host_start == cur_node.host_start
2632 && n->refcount == REFCOUNT_INFINITY
2633 && n->tgt->tgt_start == 0
2634 && n->tgt->to_free == NULL
2635 && n->tgt->refcount == 1
2636 && n->tgt->list_count == 0)
2637 {
2638 splay_tree_remove (&devicep->mem_map, n);
2639 gomp_unmap_tgt (n->tgt);
2640 ret = 0;
2641 }
2642
2643 gomp_mutex_unlock (&devicep->lock);
2644 return ret;
2645}
2646
7e5a76c8 2647int
2648omp_pause_resource (omp_pause_resource_t kind, int device_num)
2649{
2650 (void) kind;
2651 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2652 return gomp_pause_host ();
2653 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2654 return -1;
2655 /* Do nothing for target devices for now. */
2656 return 0;
2657}
2658
2659int
2660omp_pause_resource_all (omp_pause_resource_t kind)
2661{
2662 (void) kind;
2663 if (gomp_pause_host ())
2664 return -1;
2665 /* Do nothing for target devices for now. */
2666 return 0;
2667}
2668
2669ialias (omp_pause_resource)
2670ialias (omp_pause_resource_all)
2671
995b27b9 2672#ifdef PLUGIN_SUPPORT
2673
2674/* This function tries to load a plugin for DEVICE. Name of plugin is passed
2675 in PLUGIN_NAME.
2676 The handles of the found functions are stored in the corresponding fields
2677 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2678
2679static bool
2680gomp_load_plugin_for_device (struct gomp_device_descr *device,
2681 const char *plugin_name)
2682{
c6aa02c6 2683 const char *err = NULL, *last_missing = NULL;
ca4c3545 2684
995b27b9 2685 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2686 if (!plugin_handle)
d3d8e632 2687 goto dl_fail;
995b27b9 2688
2689 /* Check if all required functions are available in the plugin and store
d3d8e632 2690 their handlers. None of the symbols can legitimately be NULL,
2691 so we don't need to check dlerror all the time. */
ca4c3545 2692#define DLSYM(f) \
d3d8e632 2693 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2694 goto dl_fail
2695 /* Similar, but missing functions are not an error. Return false if
2696 failed, true otherwise. */
2697#define DLSYM_OPT(f, n) \
2698 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2699 || (last_missing = #n, 0))
2700
2701 DLSYM (version);
2702 if (device->version_func () != GOMP_VERSION)
2703 {
2704 err = "plugin version mismatch";
2705 goto fail;
2706 }
ca4c3545 2707
2708 DLSYM (get_name);
2709 DLSYM (get_caps);
995b27b9 2710 DLSYM (get_type);
2711 DLSYM (get_num_devices);
995b27b9 2712 DLSYM (init_device);
ca4c3545 2713 DLSYM (fini_device);
0d8c703d 2714 DLSYM (load_image);
2715 DLSYM (unload_image);
995b27b9 2716 DLSYM (alloc);
2717 DLSYM (free);
2718 DLSYM (dev2host);
2719 DLSYM (host2dev);
ca4c3545 2720 device->capabilities = device->get_caps_func ();
2721 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
43895be5 2722 {
2723 DLSYM (run);
a9833286 2724 DLSYM (async_run);
56686608 2725 DLSYM_OPT (can_run, can_run);
43895be5 2726 DLSYM (dev2dev);
2727 }
ca4c3545 2728 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2729 {
65caa53b 2730 if (!DLSYM_OPT (openacc.exec, openacc_exec)
d3d8e632 2731 || !DLSYM_OPT (openacc.create_thread_data,
2732 openacc_create_thread_data)
2733 || !DLSYM_OPT (openacc.destroy_thread_data,
534b5e00 2734 openacc_destroy_thread_data)
2735 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
2736 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
2737 || !DLSYM_OPT (openacc.async.test, openacc_async_test)
2738 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
2739 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
2740 || !DLSYM_OPT (openacc.async.queue_callback,
2741 openacc_async_queue_callback)
2742 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
2743 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
2744 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
ca4c3545 2745 {
d3d8e632 2746 /* Require all the OpenACC handlers if we have
2747 GOMP_OFFLOAD_CAP_OPENACC_200. */
ca4c3545 2748 err = "plugin missing OpenACC handler function";
d3d8e632 2749 goto fail;
ca4c3545 2750 }
d3d8e632 2751
2752 unsigned cuda = 0;
2753 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
65caa53b 2754 openacc_cuda_get_current_device);
d3d8e632 2755 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
65caa53b 2756 openacc_cuda_get_current_context);
2757 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2758 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
d3d8e632 2759 if (cuda && cuda != 4)
ca4c3545 2760 {
d3d8e632 2761 /* Make sure all the CUDA functions are there if any of them are. */
ca4c3545 2762 err = "plugin missing OpenACC CUDA handler function";
d3d8e632 2763 goto fail;
ca4c3545 2764 }
2765 }
995b27b9 2766#undef DLSYM
ca4c3545 2767#undef DLSYM_OPT
995b27b9 2768
d3d8e632 2769 return 1;
2770
2771 dl_fail:
2772 err = dlerror ();
2773 fail:
2774 gomp_error ("while loading %s: %s", plugin_name, err);
2775 if (last_missing)
2776 gomp_error ("missing function was %s", last_missing);
2777 if (plugin_handle)
2778 dlclose (plugin_handle);
2779
2780 return 0;
995b27b9 2781}
2782
f87b2900 2783/* This function finalizes all initialized devices. */
2784
2785static void
2786gomp_target_fini (void)
2787{
2788 int i;
2789 for (i = 0; i < num_devices; i++)
2790 {
9b50ad1d 2791 bool ret = true;
f87b2900 2792 struct gomp_device_descr *devicep = &devices[i];
2793 gomp_mutex_lock (&devicep->lock);
2794 if (devicep->state == GOMP_DEVICE_INITIALIZED)
534b5e00 2795 ret = gomp_fini_device (devicep);
f87b2900 2796 gomp_mutex_unlock (&devicep->lock);
9b50ad1d 2797 if (!ret)
2798 gomp_fatal ("device finalization failed");
f87b2900 2799 }
2800}
2801
3a590203 2802/* This function initializes the runtime for offloading.
2803 It parses the list of offload plugins, and tries to load these.
2804 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
ca4c3545 2805 will be set, and the array DEVICES initialized, containing descriptors for
2806 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2807 by the others. */
995b27b9 2808
2809static void
2810gomp_target_init (void)
2811{
2812 const char *prefix ="libgomp-plugin-";
4fda895a 2813 const char *suffix = SONAME_SUFFIX (1);
995b27b9 2814 const char *cur, *next;
2815 char *plugin_name;
2816 int i, new_num_devices;
2817
2818 num_devices = 0;
2819 devices = NULL;
2820
3a590203 2821 cur = OFFLOAD_PLUGINS;
995b27b9 2822 if (*cur)
2823 do
2824 {
2825 struct gomp_device_descr current_device;
02b7bb30 2826 size_t prefix_len, suffix_len, cur_len;
995b27b9 2827
2828 next = strchr (cur, ',');
2829
02b7bb30 2830 prefix_len = strlen (prefix);
2831 cur_len = next ? next - cur : strlen (cur);
2832 suffix_len = strlen (suffix);
2833
2834 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
995b27b9 2835 if (!plugin_name)
2836 {
2837 num_devices = 0;
2838 break;
2839 }
2840
02b7bb30 2841 memcpy (plugin_name, prefix, prefix_len);
2842 memcpy (plugin_name + prefix_len, cur, cur_len);
2843 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
995b27b9 2844
2845 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2846 {
2847 new_num_devices = current_device.get_num_devices_func ();
2848 if (new_num_devices >= 1)
2849 {
ca4c3545 2850 /* Augment DEVICES and NUM_DEVICES. */
2851
995b27b9 2852 devices = realloc (devices, (num_devices + new_num_devices)
2853 * sizeof (struct gomp_device_descr));
2854 if (!devices)
2855 {
2856 num_devices = 0;
2857 free (plugin_name);
2858 break;
2859 }
2860
ca4c3545 2861 current_device.name = current_device.get_name_func ();
2862 /* current_device.capabilities has already been set. */
995b27b9 2863 current_device.type = current_device.get_type_func ();
0d8c703d 2864 current_device.mem_map.root = NULL;
f87b2900 2865 current_device.state = GOMP_DEVICE_UNINITIALIZED;
ca4c3545 2866 current_device.openacc.data_environ = NULL;
995b27b9 2867 for (i = 0; i < new_num_devices; i++)
2868 {
995b27b9 2869 current_device.target_id = i;
2870 devices[num_devices] = current_device;
ca4c3545 2871 gomp_mutex_init (&devices[num_devices].lock);
995b27b9 2872 num_devices++;
2873 }
2874 }
2875 }
2876
2877 free (plugin_name);
2878 cur = next + 1;
2879 }
2880 while (next);
2881
ca4c3545 2882 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2883 NUM_DEVICES_OPENMP. */
2884 struct gomp_device_descr *devices_s
2885 = malloc (num_devices * sizeof (struct gomp_device_descr));
2886 if (!devices_s)
2887 {
2888 num_devices = 0;
2889 free (devices);
2890 devices = NULL;
2891 }
2892 num_devices_openmp = 0;
2893 for (i = 0; i < num_devices; i++)
2894 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2895 devices_s[num_devices_openmp++] = devices[i];
2896 int num_devices_after_openmp = num_devices_openmp;
2897 for (i = 0; i < num_devices; i++)
2898 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2899 devices_s[num_devices_after_openmp++] = devices[i];
2900 free (devices);
2901 devices = devices_s;
2902
2903 for (i = 0; i < num_devices; i++)
2904 {
ca4c3545 2905 /* The 'devices' array can be moved (by the realloc call) until we have
2906 found all the plugins, so registering with the OpenACC runtime (which
2907 takes a copy of the pointer argument) must be delayed until now. */
2908 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2909 goacc_register (&devices[i]);
2910 }
f87b2900 2911
2912 if (atexit (gomp_target_fini) != 0)
2913 gomp_fatal ("atexit failed");
995b27b9 2914}
2915
2916#else /* PLUGIN_SUPPORT */
2917/* If dlfcn.h is unavailable we always fallback to host execution.
2918 GOMP_target* routines are just stubs for this case. */
2919static void
2920gomp_target_init (void)
2921{
2922}
2923#endif /* PLUGIN_SUPPORT */