]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/target.c
* gcc.dg/pr87874.c (em): Declare uint64_max as
[thirdparty/gcc.git] / libgomp / target.c
CommitLineData
8e8f6434 1/* Copyright (C) 2013-2018 Free Software Foundation, Inc.
bc7bff74 2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
c35c9a62 4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
bc7bff74 6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
995b27b9 26/* This file contains the support of offloading. */
bc7bff74 27
995b27b9 28#include "config.h"
bc7bff74 29#include "libgomp.h"
ca4c3545 30#include "oacc-plugin.h"
31#include "oacc-int.h"
32#include "gomp-constants.h"
bc7bff74 33#include <limits.h>
34#include <stdbool.h>
35#include <stdlib.h>
2634aed9 36#ifdef HAVE_INTTYPES_H
37# include <inttypes.h> /* For PRIu64. */
38#endif
bc7bff74 39#include <string.h>
ca4c3545 40#include <assert.h>
43895be5 41#include <errno.h>
bc7bff74 42
995b27b9 43#ifdef PLUGIN_SUPPORT
44#include <dlfcn.h>
4fda895a 45#include "plugin-suffix.h"
995b27b9 46#endif
47
48static void gomp_target_init (void);
49
ca4c3545 50/* The whole initialization code for offloading plugins is only run one. */
995b27b9 51static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52
0d8c703d 53/* Mutex for offload image registration. */
54static gomp_mutex_t register_lock;
55
995b27b9 56/* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59struct offload_image_descr {
d3d8e632 60 unsigned version;
995b27b9 61 enum offload_target_type type;
4e985e0f 62 const void *host_table;
70046055 63 const void *target_data;
995b27b9 64};
65
66/* Array of descriptors of offload images. */
67static struct offload_image_descr *offload_images;
68
69/* Total number of offload images. */
70static int num_offload_images;
71
72/* Array of descriptors for all available devices. */
73static struct gomp_device_descr *devices;
74
75/* Total number of available devices. */
76static int num_devices;
77
ca4c3545 78/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79static int num_devices_openmp;
80
0d8c703d 81/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82
83static void *
84gomp_realloc_unlock (void *old, size_t size)
85{
86 void *ret = realloc (old, size);
87 if (ret == NULL)
88 {
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 }
92 return ret;
93}
94
ca4c3545 95attribute_hidden void
96gomp_init_targets_once (void)
995b27b9 97{
ca4c3545 98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99}
995b27b9 100
bc7bff74 101attribute_hidden int
102gomp_get_num_devices (void)
103{
ca4c3545 104 gomp_init_targets_once ();
105 return num_devices_openmp;
995b27b9 106}
107
108static struct gomp_device_descr *
109resolve_device (int device_id)
110{
ca4c3545 111 if (device_id == GOMP_DEVICE_ICV)
995b27b9 112 {
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
115 }
116
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
119
43895be5 120 gomp_mutex_lock (&devices[device_id].lock);
f87b2900 121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
43895be5 122 gomp_init_device (&devices[device_id]);
f87b2900 123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124 {
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
127 }
43895be5 128 gomp_mutex_unlock (&devices[device_id].lock);
129
995b27b9 130 return &devices[device_id];
131}
132
133
43895be5 134static inline splay_tree_key
135gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136{
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
139
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
151}
152
9561765e 153static inline splay_tree_key
154gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155{
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
158
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
163}
164
9b50ad1d 165static inline void
166gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
170 size_t size)
171{
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173 {
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177 }
178}
179
5af416bd 180/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181 host to device memory transfers. */
182
183struct gomp_coalesce_buf
184{
185 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
186 it will be copied to the device. */
187 void *buf;
188 struct target_mem_desc *tgt;
189 /* Array with offsets, chunks[2 * i] is the starting offset and
190 chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
191 of chunks which are to be copied to buf and later copied to device. */
192 size_t *chunks;
193 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
194 be performed. */
195 long chunk_cnt;
196 /* During construction of chunks array, how many memory regions are within
197 the last chunk. If there is just one memory region for a chunk, we copy
198 it directly to device rather than going through buf. */
199 long use_cnt;
200};
201
202/* Maximum size of memory region considered for coalescing. Larger copies
203 are performed directly. */
204#define MAX_COALESCE_BUF_SIZE (32 * 1024)
205
206/* Maximum size of a gap in between regions to consider them being copied
207 within the same chunk. All the device offsets considered are within
208 newly allocated device memory, so it isn't fatal if we copy some padding
209 in between from host to device. The gaps come either from alignment
210 padding or from memory regions which are not supposed to be copied from
211 host to device (e.g. map(alloc:), map(from:) etc.). */
212#define MAX_COALESCE_BUF_GAP (4 * 1024)
213
214/* Add region with device tgt_start relative offset and length to CBUF. */
215
216static inline void
217gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
218{
219 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
220 return;
221 if (cbuf->chunk_cnt)
222 {
223 if (cbuf->chunk_cnt < 0)
224 return;
225 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
226 {
227 cbuf->chunk_cnt = -1;
228 return;
229 }
230 if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
231 {
232 cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
233 cbuf->use_cnt++;
234 return;
235 }
236 /* If the last chunk is only used by one mapping, discard it,
237 as it will be one host to device copy anyway and
238 memcpying it around will only waste cycles. */
239 if (cbuf->use_cnt == 1)
240 cbuf->chunk_cnt--;
241 }
242 cbuf->chunks[2 * cbuf->chunk_cnt] = start;
243 cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
244 cbuf->chunk_cnt++;
245 cbuf->use_cnt = 1;
246}
247
248/* Return true for mapping kinds which need to copy data from the
249 host to device for regions that weren't previously mapped. */
250
251static inline bool
252gomp_to_device_kind_p (int kind)
253{
254 switch (kind)
255 {
256 case GOMP_MAP_ALLOC:
257 case GOMP_MAP_FROM:
258 case GOMP_MAP_FORCE_ALLOC:
259 case GOMP_MAP_ALWAYS_FROM:
260 return false;
261 default:
262 return true;
263 }
264}
265
9b50ad1d 266static void
267gomp_copy_host2dev (struct gomp_device_descr *devicep,
5af416bd 268 void *d, const void *h, size_t sz,
269 struct gomp_coalesce_buf *cbuf)
9b50ad1d 270{
5af416bd 271 if (cbuf)
272 {
273 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
274 if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
275 {
276 long first = 0;
277 long last = cbuf->chunk_cnt - 1;
278 while (first <= last)
279 {
280 long middle = (first + last) >> 1;
281 if (cbuf->chunks[2 * middle + 1] <= doff)
282 first = middle + 1;
283 else if (cbuf->chunks[2 * middle] <= doff)
284 {
285 if (doff + sz > cbuf->chunks[2 * middle + 1])
286 gomp_fatal ("internal libgomp cbuf error");
287 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
288 h, sz);
289 return;
290 }
291 else
292 last = middle - 1;
293 }
294 }
295 }
9b50ad1d 296 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
297}
298
299static void
300gomp_copy_dev2host (struct gomp_device_descr *devicep,
301 void *h, const void *d, size_t sz)
302{
303 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
304}
305
306static void
307gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
308{
309 if (!devicep->free_func (devicep->target_id, devptr))
310 {
311 gomp_mutex_unlock (&devicep->lock);
312 gomp_fatal ("error in freeing device memory block at %p", devptr);
313 }
314}
315
9561765e 316/* Handle the case where gomp_map_lookup, splay_tree_lookup or
317 gomp_map_0len_lookup found oldn for newn.
995b27b9 318 Helper function of gomp_map_vars. */
319
320static inline void
0d8c703d 321gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
43895be5 322 splay_tree_key newn, struct target_var_desc *tgt_var,
5af416bd 323 unsigned char kind, struct gomp_coalesce_buf *cbuf)
995b27b9 324{
43895be5 325 tgt_var->key = oldn;
326 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
327 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
328 tgt_var->offset = newn->host_start - oldn->host_start;
329 tgt_var->length = newn->host_end - newn->host_start;
330
ca4c3545 331 if ((kind & GOMP_MAP_FLAG_FORCE)
332 || oldn->host_start > newn->host_start
995b27b9 333 || oldn->host_end < newn->host_end)
0d8c703d 334 {
335 gomp_mutex_unlock (&devicep->lock);
336 gomp_fatal ("Trying to map into device [%p..%p) object when "
337 "[%p..%p) is already mapped",
338 (void *) newn->host_start, (void *) newn->host_end,
339 (void *) oldn->host_start, (void *) oldn->host_end);
340 }
43895be5 341
342 if (GOMP_MAP_ALWAYS_TO_P (kind))
9b50ad1d 343 gomp_copy_host2dev (devicep,
344 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
345 + newn->host_start - oldn->host_start),
346 (void *) newn->host_start,
5af416bd 347 newn->host_end - newn->host_start, cbuf);
9b50ad1d 348
43895be5 349 if (oldn->refcount != REFCOUNT_INFINITY)
350 oldn->refcount++;
995b27b9 351}
352
ca4c3545 353static int
43895be5 354get_kind (bool short_mapkind, void *kinds, int idx)
ca4c3545 355{
43895be5 356 return short_mapkind ? ((unsigned short *) kinds)[idx]
357 : ((unsigned char *) kinds)[idx];
ca4c3545 358}
359
84f53106 360static void
361gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
5af416bd 362 uintptr_t target_offset, uintptr_t bias,
363 struct gomp_coalesce_buf *cbuf)
84f53106 364{
365 struct gomp_device_descr *devicep = tgt->device_descr;
366 struct splay_tree_s *mem_map = &devicep->mem_map;
367 struct splay_tree_key_s cur_node;
368
369 cur_node.host_start = host_ptr;
370 if (cur_node.host_start == (uintptr_t) NULL)
371 {
372 cur_node.tgt_offset = (uintptr_t) NULL;
9b50ad1d 373 gomp_copy_host2dev (devicep,
374 (void *) (tgt->tgt_start + target_offset),
375 (void *) &cur_node.tgt_offset,
5af416bd 376 sizeof (void *), cbuf);
84f53106 377 return;
378 }
379 /* Add bias to the pointer value. */
380 cur_node.host_start += bias;
43895be5 381 cur_node.host_end = cur_node.host_start;
382 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
84f53106 383 if (n == NULL)
384 {
385 gomp_mutex_unlock (&devicep->lock);
386 gomp_fatal ("Pointer target of array section wasn't mapped");
387 }
388 cur_node.host_start -= n->host_start;
389 cur_node.tgt_offset
390 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
391 /* At this point tgt_offset is target address of the
392 array section. Now subtract bias to get what we want
393 to initialize the pointer with. */
394 cur_node.tgt_offset -= bias;
9b50ad1d 395 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
5af416bd 396 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
84f53106 397}
398
43895be5 399static void
400gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
401 size_t first, size_t i, void **hostaddrs,
5af416bd 402 size_t *sizes, void *kinds,
403 struct gomp_coalesce_buf *cbuf)
43895be5 404{
405 struct gomp_device_descr *devicep = tgt->device_descr;
406 struct splay_tree_s *mem_map = &devicep->mem_map;
407 struct splay_tree_key_s cur_node;
408 int kind;
409 const bool short_mapkind = true;
410 const int typemask = short_mapkind ? 0xff : 0x7;
411
412 cur_node.host_start = (uintptr_t) hostaddrs[i];
413 cur_node.host_end = cur_node.host_start + sizes[i];
414 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
415 kind = get_kind (short_mapkind, kinds, i);
416 if (n2
417 && n2->tgt == n->tgt
418 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
419 {
420 gomp_map_vars_existing (devicep, n2, &cur_node,
5af416bd 421 &tgt->list[i], kind & typemask, cbuf);
43895be5 422 return;
423 }
424 if (sizes[i] == 0)
425 {
426 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
427 {
428 cur_node.host_start--;
429 n2 = splay_tree_lookup (mem_map, &cur_node);
430 cur_node.host_start++;
431 if (n2
432 && n2->tgt == n->tgt
433 && n2->host_start - n->host_start
434 == n2->tgt_offset - n->tgt_offset)
435 {
436 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
5af416bd 437 kind & typemask, cbuf);
43895be5 438 return;
439 }
440 }
441 cur_node.host_end++;
442 n2 = splay_tree_lookup (mem_map, &cur_node);
443 cur_node.host_end--;
444 if (n2
445 && n2->tgt == n->tgt
446 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
447 {
448 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
5af416bd 449 kind & typemask, cbuf);
43895be5 450 return;
451 }
452 }
453 gomp_mutex_unlock (&devicep->lock);
454 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
455 "other mapped elements from the same structure weren't mapped "
456 "together with it", (void *) cur_node.host_start,
457 (void *) cur_node.host_end);
458}
459
9561765e 460static inline uintptr_t
461gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
462{
463 if (tgt->list[i].key != NULL)
464 return tgt->list[i].key->tgt->tgt_start
465 + tgt->list[i].key->tgt_offset
466 + tgt->list[i].offset;
467 if (tgt->list[i].offset == ~(uintptr_t) 0)
468 return (uintptr_t) hostaddrs[i];
469 if (tgt->list[i].offset == ~(uintptr_t) 1)
470 return 0;
471 if (tgt->list[i].offset == ~(uintptr_t) 2)
472 return tgt->list[i + 1].key->tgt->tgt_start
473 + tgt->list[i + 1].key->tgt_offset
474 + tgt->list[i + 1].offset
475 + (uintptr_t) hostaddrs[i]
476 - (uintptr_t) hostaddrs[i + 1];
477 return tgt->tgt_start + tgt->list[i].offset;
478}
479
ca4c3545 480attribute_hidden struct target_mem_desc *
995b27b9 481gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
ca4c3545 482 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
43895be5 483 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
995b27b9 484{
485 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
43895be5 486 bool has_firstprivate = false;
487 const int rshift = short_mapkind ? 8 : 3;
488 const int typemask = short_mapkind ? 0xff : 0x7;
0d8c703d 489 struct splay_tree_s *mem_map = &devicep->mem_map;
995b27b9 490 struct splay_tree_key_s cur_node;
491 struct target_mem_desc *tgt
492 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
493 tgt->list_count = mapnum;
43895be5 494 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
995b27b9 495 tgt->device_descr = devicep;
5af416bd 496 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
995b27b9 497
498 if (mapnum == 0)
3bdac239 499 {
500 tgt->tgt_start = 0;
501 tgt->tgt_end = 0;
502 return tgt;
503 }
995b27b9 504
505 tgt_align = sizeof (void *);
506 tgt_size = 0;
5af416bd 507 cbuf.chunks = NULL;
508 cbuf.chunk_cnt = -1;
509 cbuf.use_cnt = 0;
510 cbuf.buf = NULL;
511 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
512 {
513 cbuf.chunks
514 = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
515 cbuf.chunk_cnt = 0;
516 }
43895be5 517 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 518 {
519 size_t align = 4 * sizeof (void *);
520 tgt_align = align;
521 tgt_size = mapnum * sizeof (void *);
5af416bd 522 cbuf.chunk_cnt = 1;
523 cbuf.use_cnt = 1 + (mapnum > 1);
524 cbuf.chunks[0] = 0;
525 cbuf.chunks[1] = tgt_size;
995b27b9 526 }
527
0d8c703d 528 gomp_mutex_lock (&devicep->lock);
f87b2900 529 if (devicep->state == GOMP_DEVICE_FINALIZED)
530 {
531 gomp_mutex_unlock (&devicep->lock);
532 free (tgt);
533 return NULL;
534 }
ca4c3545 535
995b27b9 536 for (i = 0; i < mapnum; i++)
537 {
43895be5 538 int kind = get_kind (short_mapkind, kinds, i);
539 if (hostaddrs[i] == NULL
540 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
995b27b9 541 {
43895be5 542 tgt->list[i].key = NULL;
543 tgt->list[i].offset = ~(uintptr_t) 0;
544 continue;
545 }
546 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
547 {
548 cur_node.host_start = (uintptr_t) hostaddrs[i];
549 cur_node.host_end = cur_node.host_start;
550 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
551 if (n == NULL)
552 {
553 gomp_mutex_unlock (&devicep->lock);
554 gomp_fatal ("use_device_ptr pointer wasn't mapped");
555 }
556 cur_node.host_start -= n->host_start;
557 hostaddrs[i]
558 = (void *) (n->tgt->tgt_start + n->tgt_offset
559 + cur_node.host_start);
560 tgt->list[i].key = NULL;
561 tgt->list[i].offset = ~(uintptr_t) 0;
562 continue;
563 }
564 else if ((kind & typemask) == GOMP_MAP_STRUCT)
565 {
566 size_t first = i + 1;
567 size_t last = i + sizes[i];
568 cur_node.host_start = (uintptr_t) hostaddrs[i];
569 cur_node.host_end = (uintptr_t) hostaddrs[last]
570 + sizes[last];
571 tgt->list[i].key = NULL;
572 tgt->list[i].offset = ~(uintptr_t) 2;
573 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
574 if (n == NULL)
575 {
576 size_t align = (size_t) 1 << (kind >> rshift);
577 if (tgt_align < align)
578 tgt_align = align;
5af416bd 579 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
43895be5 580 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 581 tgt_size += cur_node.host_end - cur_node.host_start;
43895be5 582 not_found_cnt += last - i;
583 for (i = first; i <= last; i++)
5af416bd 584 {
585 tgt->list[i].key = NULL;
586 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
587 & typemask))
588 gomp_coalesce_buf_add (&cbuf,
589 tgt_size - cur_node.host_end
590 + (uintptr_t) hostaddrs[i],
591 sizes[i]);
592 }
43895be5 593 i--;
594 continue;
595 }
596 for (i = first; i <= last; i++)
597 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
5af416bd 598 sizes, kinds, NULL);
43895be5 599 i--;
995b27b9 600 continue;
601 }
9561765e 602 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
603 {
604 tgt->list[i].key = NULL;
605 tgt->list[i].offset = ~(uintptr_t) 1;
606 has_firstprivate = true;
607 continue;
608 }
995b27b9 609 cur_node.host_start = (uintptr_t) hostaddrs[i];
ca4c3545 610 if (!GOMP_MAP_POINTER_P (kind & typemask))
995b27b9 611 cur_node.host_end = cur_node.host_start + sizes[i];
612 else
613 cur_node.host_end = cur_node.host_start + sizeof (void *);
43895be5 614 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
615 {
616 tgt->list[i].key = NULL;
617
618 size_t align = (size_t) 1 << (kind >> rshift);
619 if (tgt_align < align)
620 tgt_align = align;
621 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 622 gomp_coalesce_buf_add (&cbuf, tgt_size,
623 cur_node.host_end - cur_node.host_start);
43895be5 624 tgt_size += cur_node.host_end - cur_node.host_start;
625 has_firstprivate = true;
626 continue;
627 }
628 splay_tree_key n;
629 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
995b27b9 630 {
9561765e 631 n = gomp_map_0len_lookup (mem_map, &cur_node);
43895be5 632 if (!n)
633 {
634 tgt->list[i].key = NULL;
635 tgt->list[i].offset = ~(uintptr_t) 1;
636 continue;
637 }
995b27b9 638 }
43895be5 639 else
640 n = splay_tree_lookup (mem_map, &cur_node);
c0998828 641 if (n && n->refcount != REFCOUNT_LINK)
43895be5 642 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
5af416bd 643 kind & typemask, NULL);
995b27b9 644 else
645 {
43895be5 646 tgt->list[i].key = NULL;
ca4c3545 647
648 size_t align = (size_t) 1 << (kind >> rshift);
995b27b9 649 not_found_cnt++;
650 if (tgt_align < align)
651 tgt_align = align;
652 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 653 if (gomp_to_device_kind_p (kind & typemask))
654 gomp_coalesce_buf_add (&cbuf, tgt_size,
655 cur_node.host_end - cur_node.host_start);
995b27b9 656 tgt_size += cur_node.host_end - cur_node.host_start;
ca4c3545 657 if ((kind & typemask) == GOMP_MAP_TO_PSET)
995b27b9 658 {
659 size_t j;
660 for (j = i + 1; j < mapnum; j++)
43895be5 661 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
ca4c3545 662 & typemask))
995b27b9 663 break;
664 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
665 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
666 > cur_node.host_end))
667 break;
668 else
669 {
43895be5 670 tgt->list[j].key = NULL;
995b27b9 671 i++;
672 }
673 }
674 }
675 }
676
ca4c3545 677 if (devaddrs)
678 {
679 if (mapnum != 1)
0d8c703d 680 {
681 gomp_mutex_unlock (&devicep->lock);
682 gomp_fatal ("unexpected aggregation");
683 }
ca4c3545 684 tgt->to_free = devaddrs[0];
685 tgt->tgt_start = (uintptr_t) tgt->to_free;
686 tgt->tgt_end = tgt->tgt_start + sizes[0];
687 }
43895be5 688 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 689 {
690 /* Allocate tgt_align aligned tgt_size block of memory. */
691 /* FIXME: Perhaps change interface to allocate properly aligned
692 memory. */
693 tgt->to_free = devicep->alloc_func (devicep->target_id,
694 tgt_size + tgt_align - 1);
9b50ad1d 695 if (!tgt->to_free)
696 {
697 gomp_mutex_unlock (&devicep->lock);
698 gomp_fatal ("device memory allocation fail");
699 }
700
995b27b9 701 tgt->tgt_start = (uintptr_t) tgt->to_free;
702 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
703 tgt->tgt_end = tgt->tgt_start + tgt_size;
5af416bd 704
705 if (cbuf.use_cnt == 1)
706 cbuf.chunk_cnt--;
707 if (cbuf.chunk_cnt > 0)
708 {
709 cbuf.buf
710 = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
711 if (cbuf.buf)
712 {
713 cbuf.tgt = tgt;
714 cbufp = &cbuf;
715 }
716 }
995b27b9 717 }
718 else
719 {
720 tgt->to_free = NULL;
721 tgt->tgt_start = 0;
722 tgt->tgt_end = 0;
723 }
724
725 tgt_size = 0;
43895be5 726 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 727 tgt_size = mapnum * sizeof (void *);
728
729 tgt->array = NULL;
43895be5 730 if (not_found_cnt || has_firstprivate)
995b27b9 731 {
43895be5 732 if (not_found_cnt)
733 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
995b27b9 734 splay_tree_node array = tgt->array;
43895be5 735 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
736 uintptr_t field_tgt_base = 0;
995b27b9 737
738 for (i = 0; i < mapnum; i++)
43895be5 739 if (tgt->list[i].key == NULL)
995b27b9 740 {
43895be5 741 int kind = get_kind (short_mapkind, kinds, i);
995b27b9 742 if (hostaddrs[i] == NULL)
743 continue;
43895be5 744 switch (kind & typemask)
745 {
746 size_t align, len, first, last;
747 splay_tree_key n;
748 case GOMP_MAP_FIRSTPRIVATE:
749 align = (size_t) 1 << (kind >> rshift);
750 tgt_size = (tgt_size + align - 1) & ~(align - 1);
751 tgt->list[i].offset = tgt_size;
752 len = sizes[i];
9b50ad1d 753 gomp_copy_host2dev (devicep,
754 (void *) (tgt->tgt_start + tgt_size),
5af416bd 755 (void *) hostaddrs[i], len, cbufp);
43895be5 756 tgt_size += len;
757 continue;
758 case GOMP_MAP_FIRSTPRIVATE_INT:
759 case GOMP_MAP_USE_DEVICE_PTR:
760 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
761 continue;
762 case GOMP_MAP_STRUCT:
763 first = i + 1;
764 last = i + sizes[i];
765 cur_node.host_start = (uintptr_t) hostaddrs[i];
766 cur_node.host_end = (uintptr_t) hostaddrs[last]
767 + sizes[last];
768 if (tgt->list[first].key != NULL)
769 continue;
770 n = splay_tree_lookup (mem_map, &cur_node);
771 if (n == NULL)
772 {
773 size_t align = (size_t) 1 << (kind >> rshift);
774 tgt_size -= (uintptr_t) hostaddrs[first]
775 - (uintptr_t) hostaddrs[i];
776 tgt_size = (tgt_size + align - 1) & ~(align - 1);
777 tgt_size += (uintptr_t) hostaddrs[first]
778 - (uintptr_t) hostaddrs[i];
779 field_tgt_base = (uintptr_t) hostaddrs[first];
780 field_tgt_offset = tgt_size;
781 field_tgt_clear = last;
782 tgt_size += cur_node.host_end
783 - (uintptr_t) hostaddrs[first];
784 continue;
785 }
786 for (i = first; i <= last; i++)
787 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
5af416bd 788 sizes, kinds, cbufp);
43895be5 789 i--;
790 continue;
9561765e 791 case GOMP_MAP_ALWAYS_POINTER:
792 cur_node.host_start = (uintptr_t) hostaddrs[i];
793 cur_node.host_end = cur_node.host_start + sizeof (void *);
794 n = splay_tree_lookup (mem_map, &cur_node);
795 if (n == NULL
796 || n->host_start > cur_node.host_start
797 || n->host_end < cur_node.host_end)
798 {
799 gomp_mutex_unlock (&devicep->lock);
800 gomp_fatal ("always pointer not mapped");
801 }
802 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
803 != GOMP_MAP_ALWAYS_POINTER)
804 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
805 if (cur_node.tgt_offset)
806 cur_node.tgt_offset -= sizes[i];
9b50ad1d 807 gomp_copy_host2dev (devicep,
808 (void *) (n->tgt->tgt_start
809 + n->tgt_offset
810 + cur_node.host_start
811 - n->host_start),
812 (void *) &cur_node.tgt_offset,
5af416bd 813 sizeof (void *), cbufp);
9561765e 814 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
815 + cur_node.host_start - n->host_start;
816 continue;
43895be5 817 default:
818 break;
819 }
995b27b9 820 splay_tree_key k = &array->key;
821 k->host_start = (uintptr_t) hostaddrs[i];
ca4c3545 822 if (!GOMP_MAP_POINTER_P (kind & typemask))
995b27b9 823 k->host_end = k->host_start + sizes[i];
824 else
825 k->host_end = k->host_start + sizeof (void *);
0d8c703d 826 splay_tree_key n = splay_tree_lookup (mem_map, k);
c0998828 827 if (n && n->refcount != REFCOUNT_LINK)
43895be5 828 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
5af416bd 829 kind & typemask, cbufp);
995b27b9 830 else
831 {
c0998828 832 k->link_key = NULL;
833 if (n && n->refcount == REFCOUNT_LINK)
834 {
835 /* Replace target address of the pointer with target address
836 of mapped object in the splay tree. */
837 splay_tree_remove (mem_map, n);
838 k->link_key = n;
839 }
ca4c3545 840 size_t align = (size_t) 1 << (kind >> rshift);
43895be5 841 tgt->list[i].key = k;
995b27b9 842 k->tgt = tgt;
43895be5 843 if (field_tgt_clear != ~(size_t) 0)
844 {
845 k->tgt_offset = k->host_start - field_tgt_base
846 + field_tgt_offset;
847 if (i == field_tgt_clear)
848 field_tgt_clear = ~(size_t) 0;
849 }
850 else
851 {
852 tgt_size = (tgt_size + align - 1) & ~(align - 1);
853 k->tgt_offset = tgt_size;
854 tgt_size += k->host_end - k->host_start;
855 }
856 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
857 tgt->list[i].always_copy_from
858 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
859 tgt->list[i].offset = 0;
860 tgt->list[i].length = k->host_end - k->host_start;
995b27b9 861 k->refcount = 1;
737cc978 862 k->dynamic_refcount = 0;
995b27b9 863 tgt->refcount++;
864 array->left = NULL;
865 array->right = NULL;
0d8c703d 866 splay_tree_insert (mem_map, array);
ca4c3545 867 switch (kind & typemask)
995b27b9 868 {
ca4c3545 869 case GOMP_MAP_ALLOC:
870 case GOMP_MAP_FROM:
871 case GOMP_MAP_FORCE_ALLOC:
872 case GOMP_MAP_FORCE_FROM:
43895be5 873 case GOMP_MAP_ALWAYS_FROM:
995b27b9 874 break;
ca4c3545 875 case GOMP_MAP_TO:
876 case GOMP_MAP_TOFROM:
877 case GOMP_MAP_FORCE_TO:
878 case GOMP_MAP_FORCE_TOFROM:
43895be5 879 case GOMP_MAP_ALWAYS_TO:
880 case GOMP_MAP_ALWAYS_TOFROM:
9b50ad1d 881 gomp_copy_host2dev (devicep,
882 (void *) (tgt->tgt_start
883 + k->tgt_offset),
884 (void *) k->host_start,
5af416bd 885 k->host_end - k->host_start, cbufp);
995b27b9 886 break;
ca4c3545 887 case GOMP_MAP_POINTER:
84f53106 888 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
5af416bd 889 k->tgt_offset, sizes[i], cbufp);
995b27b9 890 break;
ca4c3545 891 case GOMP_MAP_TO_PSET:
9b50ad1d 892 gomp_copy_host2dev (devicep,
893 (void *) (tgt->tgt_start
894 + k->tgt_offset),
895 (void *) k->host_start,
5af416bd 896 k->host_end - k->host_start, cbufp);
ca4c3545 897
995b27b9 898 for (j = i + 1; j < mapnum; j++)
43895be5 899 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
900 j)
ca4c3545 901 & typemask))
995b27b9 902 break;
903 else if ((uintptr_t) hostaddrs[j] < k->host_start
904 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
905 > k->host_end))
906 break;
907 else
908 {
43895be5 909 tgt->list[j].key = k;
910 tgt->list[j].copy_from = false;
911 tgt->list[j].always_copy_from = false;
912 if (k->refcount != REFCOUNT_INFINITY)
913 k->refcount++;
84f53106 914 gomp_map_pointer (tgt,
915 (uintptr_t) *(void **) hostaddrs[j],
916 k->tgt_offset
917 + ((uintptr_t) hostaddrs[j]
918 - k->host_start),
5af416bd 919 sizes[j], cbufp);
995b27b9 920 i++;
921 }
ca4c3545 922 break;
923 case GOMP_MAP_FORCE_PRESENT:
924 {
925 /* We already looked up the memory region above and it
926 was missing. */
927 size_t size = k->host_end - k->host_start;
0d8c703d 928 gomp_mutex_unlock (&devicep->lock);
2634aed9 929#ifdef HAVE_INTTYPES_H
930 gomp_fatal ("present clause: !acc_is_present (%p, "
931 "%"PRIu64" (0x%"PRIx64"))",
932 (void *) k->host_start,
933 (uint64_t) size, (uint64_t) size);
934#else
ca4c3545 935 gomp_fatal ("present clause: !acc_is_present (%p, "
2634aed9 936 "%lu (0x%lx))", (void *) k->host_start,
937 (unsigned long) size, (unsigned long) size);
938#endif
ca4c3545 939 }
940 break;
941 case GOMP_MAP_FORCE_DEVICEPTR:
942 assert (k->host_end - k->host_start == sizeof (void *));
9b50ad1d 943 gomp_copy_host2dev (devicep,
944 (void *) (tgt->tgt_start
945 + k->tgt_offset),
946 (void *) k->host_start,
5af416bd 947 sizeof (void *), cbufp);
ca4c3545 948 break;
949 default:
0d8c703d 950 gomp_mutex_unlock (&devicep->lock);
ca4c3545 951 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
952 kind);
995b27b9 953 }
c0998828 954
955 if (k->link_key)
956 {
957 /* Set link pointer on target to the device address of the
958 mapped object. */
959 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
960 devicep->host2dev_func (devicep->target_id,
961 (void *) n->tgt_offset,
962 &tgt_addr, sizeof (void *));
963 }
995b27b9 964 array++;
965 }
966 }
967 }
ca4c3545 968
43895be5 969 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 970 {
971 for (i = 0; i < mapnum; i++)
972 {
9561765e 973 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
9b50ad1d 974 gomp_copy_host2dev (devicep,
975 (void *) (tgt->tgt_start + i * sizeof (void *)),
5af416bd 976 (void *) &cur_node.tgt_offset, sizeof (void *),
977 cbufp);
995b27b9 978 }
979 }
980
5af416bd 981 if (cbufp)
982 {
983 long c = 0;
984 for (c = 0; c < cbuf.chunk_cnt; ++c)
985 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
986 (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
987 cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
988 free (cbuf.buf);
989 }
990
43895be5 991 /* If the variable from "omp target enter data" map-list was already mapped,
992 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
993 gomp_exit_data. */
994 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
995 {
996 free (tgt);
997 tgt = NULL;
998 }
999
0d8c703d 1000 gomp_mutex_unlock (&devicep->lock);
995b27b9 1001 return tgt;
1002}
1003
1004static void
1005gomp_unmap_tgt (struct target_mem_desc *tgt)
1006{
1007 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1008 if (tgt->tgt_end)
9b50ad1d 1009 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
995b27b9 1010
1011 free (tgt->array);
1012 free (tgt);
1013}
1014
737cc978 1015attribute_hidden bool
1016gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1017{
1018 bool is_tgt_unmapped = false;
1019 splay_tree_remove (&devicep->mem_map, k);
1020 if (k->link_key)
1021 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1022 if (k->tgt->refcount > 1)
1023 k->tgt->refcount--;
1024 else
1025 {
1026 is_tgt_unmapped = true;
1027 gomp_unmap_tgt (k->tgt);
1028 }
1029 return is_tgt_unmapped;
1030}
1031
ca4c3545 1032/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1033 variables back from device to host: if it is false, it is assumed that this
2bf775c0 1034 has been done already. */
ca4c3545 1035
1036attribute_hidden void
1037gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
995b27b9 1038{
1039 struct gomp_device_descr *devicep = tgt->device_descr;
1040
1041 if (tgt->list_count == 0)
1042 {
1043 free (tgt);
1044 return;
1045 }
1046
0d8c703d 1047 gomp_mutex_lock (&devicep->lock);
f87b2900 1048 if (devicep->state == GOMP_DEVICE_FINALIZED)
1049 {
1050 gomp_mutex_unlock (&devicep->lock);
1051 free (tgt->array);
1052 free (tgt);
1053 return;
1054 }
ca4c3545 1055
995b27b9 1056 size_t i;
995b27b9 1057 for (i = 0; i < tgt->list_count; i++)
43895be5 1058 {
1059 splay_tree_key k = tgt->list[i].key;
1060 if (k == NULL)
1061 continue;
1062
1063 bool do_unmap = false;
1064 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1065 k->refcount--;
1066 else if (k->refcount == 1)
1067 {
2bf775c0 1068 k->refcount--;
1069 do_unmap = true;
43895be5 1070 }
1071
1072 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1073 || tgt->list[i].always_copy_from)
9b50ad1d 1074 gomp_copy_dev2host (devicep,
1075 (void *) (k->host_start + tgt->list[i].offset),
1076 (void *) (k->tgt->tgt_start + k->tgt_offset
1077 + tgt->list[i].offset),
1078 tgt->list[i].length);
43895be5 1079 if (do_unmap)
737cc978 1080 gomp_remove_var (devicep, k);
43895be5 1081 }
995b27b9 1082
1083 if (tgt->refcount > 1)
1084 tgt->refcount--;
1085 else
1086 gomp_unmap_tgt (tgt);
ca4c3545 1087
0d8c703d 1088 gomp_mutex_unlock (&devicep->lock);
995b27b9 1089}
1090
1091static void
0d8c703d 1092gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
43895be5 1093 size_t *sizes, void *kinds, bool short_mapkind)
995b27b9 1094{
1095 size_t i;
1096 struct splay_tree_key_s cur_node;
43895be5 1097 const int typemask = short_mapkind ? 0xff : 0x7;
995b27b9 1098
1099 if (!devicep)
1100 return;
1101
1102 if (mapnum == 0)
1103 return;
1104
0d8c703d 1105 gomp_mutex_lock (&devicep->lock);
f87b2900 1106 if (devicep->state == GOMP_DEVICE_FINALIZED)
1107 {
1108 gomp_mutex_unlock (&devicep->lock);
1109 return;
1110 }
1111
995b27b9 1112 for (i = 0; i < mapnum; i++)
1113 if (sizes[i])
1114 {
1115 cur_node.host_start = (uintptr_t) hostaddrs[i];
1116 cur_node.host_end = cur_node.host_start + sizes[i];
0d8c703d 1117 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
995b27b9 1118 if (n)
1119 {
43895be5 1120 int kind = get_kind (short_mapkind, kinds, i);
995b27b9 1121 if (n->host_start > cur_node.host_start
1122 || n->host_end < cur_node.host_end)
0d8c703d 1123 {
1124 gomp_mutex_unlock (&devicep->lock);
1125 gomp_fatal ("Trying to update [%p..%p) object when "
1126 "only [%p..%p) is mapped",
1127 (void *) cur_node.host_start,
1128 (void *) cur_node.host_end,
1129 (void *) n->host_start,
1130 (void *) n->host_end);
1131 }
9b50ad1d 1132
1133
1134 void *hostaddr = (void *) cur_node.host_start;
1135 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1136 + cur_node.host_start - n->host_start);
1137 size_t size = cur_node.host_end - cur_node.host_start;
1138
ca4c3545 1139 if (GOMP_MAP_COPY_TO_P (kind & typemask))
5af416bd 1140 gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
ca4c3545 1141 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
9b50ad1d 1142 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
995b27b9 1143 }
995b27b9 1144 }
0d8c703d 1145 gomp_mutex_unlock (&devicep->lock);
1146}
1147
1148/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1149 And insert to splay tree the mapping between addresses from HOST_TABLE and
7de5731e 1150 from loaded target image. We rely in the host and device compiler
1151 emitting variable and functions in the same order. */
0d8c703d 1152
1153static void
d3d8e632 1154gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
7de5731e 1155 const void *host_table, const void *target_data,
1156 bool is_register_lock)
0d8c703d 1157{
1158 void **host_func_table = ((void ***) host_table)[0];
1159 void **host_funcs_end = ((void ***) host_table)[1];
1160 void **host_var_table = ((void ***) host_table)[2];
1161 void **host_vars_end = ((void ***) host_table)[3];
1162
1163 /* The func table contains only addresses, the var table contains addresses
1164 and corresponding sizes. */
1165 int num_funcs = host_funcs_end - host_func_table;
1166 int num_vars = (host_vars_end - host_var_table) / 2;
1167
1168 /* Load image to device and get target addresses for the image. */
1169 struct addr_pair *target_table = NULL;
d3d8e632 1170 int i, num_target_entries;
1171
1172 num_target_entries
1173 = devicep->load_image_func (devicep->target_id, version,
1174 target_data, &target_table);
0d8c703d 1175
1176 if (num_target_entries != num_funcs + num_vars)
1177 {
1178 gomp_mutex_unlock (&devicep->lock);
1179 if (is_register_lock)
1180 gomp_mutex_unlock (&register_lock);
d3d8e632 1181 gomp_fatal ("Cannot map target functions or variables"
1182 " (expected %u, have %u)", num_funcs + num_vars,
1183 num_target_entries);
0d8c703d 1184 }
1185
1186 /* Insert host-target address mapping into splay tree. */
1187 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1188 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
43895be5 1189 tgt->refcount = REFCOUNT_INFINITY;
0d8c703d 1190 tgt->tgt_start = 0;
1191 tgt->tgt_end = 0;
1192 tgt->to_free = NULL;
1193 tgt->prev = NULL;
1194 tgt->list_count = 0;
1195 tgt->device_descr = devicep;
1196 splay_tree_node array = tgt->array;
1197
1198 for (i = 0; i < num_funcs; i++)
1199 {
1200 splay_tree_key k = &array->key;
1201 k->host_start = (uintptr_t) host_func_table[i];
1202 k->host_end = k->host_start + 1;
1203 k->tgt = tgt;
1204 k->tgt_offset = target_table[i].start;
43895be5 1205 k->refcount = REFCOUNT_INFINITY;
c0998828 1206 k->link_key = NULL;
0d8c703d 1207 array->left = NULL;
1208 array->right = NULL;
1209 splay_tree_insert (&devicep->mem_map, array);
1210 array++;
1211 }
1212
c0998828 1213 /* Most significant bit of the size in host and target tables marks
1214 "omp declare target link" variables. */
1215 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1216 const uintptr_t size_mask = ~link_bit;
1217
0d8c703d 1218 for (i = 0; i < num_vars; i++)
1219 {
1220 struct addr_pair *target_var = &target_table[num_funcs + i];
c0998828 1221 uintptr_t target_size = target_var->end - target_var->start;
1222
1223 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
0d8c703d 1224 {
1225 gomp_mutex_unlock (&devicep->lock);
1226 if (is_register_lock)
1227 gomp_mutex_unlock (&register_lock);
c0998828 1228 gomp_fatal ("Cannot map target variables (size mismatch)");
0d8c703d 1229 }
1230
1231 splay_tree_key k = &array->key;
1232 k->host_start = (uintptr_t) host_var_table[i * 2];
c0998828 1233 k->host_end
1234 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
0d8c703d 1235 k->tgt = tgt;
1236 k->tgt_offset = target_var->start;
c0998828 1237 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
c0998828 1238 k->link_key = NULL;
0d8c703d 1239 array->left = NULL;
1240 array->right = NULL;
1241 splay_tree_insert (&devicep->mem_map, array);
1242 array++;
1243 }
1244
1245 free (target_table);
995b27b9 1246}
1247
7de5731e 1248/* Unload the mappings described by target_data from device DEVICE_P.
1249 The device must be locked. */
1250
1251static void
1252gomp_unload_image_from_device (struct gomp_device_descr *devicep,
d3d8e632 1253 unsigned version,
7de5731e 1254 const void *host_table, const void *target_data)
1255{
1256 void **host_func_table = ((void ***) host_table)[0];
1257 void **host_funcs_end = ((void ***) host_table)[1];
1258 void **host_var_table = ((void ***) host_table)[2];
1259 void **host_vars_end = ((void ***) host_table)[3];
1260
1261 /* The func table contains only addresses, the var table contains addresses
1262 and corresponding sizes. */
1263 int num_funcs = host_funcs_end - host_func_table;
1264 int num_vars = (host_vars_end - host_var_table) / 2;
1265
7de5731e 1266 struct splay_tree_key_s k;
1267 splay_tree_key node = NULL;
1268
1269 /* Find mapping at start of node array */
1270 if (num_funcs || num_vars)
1271 {
1272 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1273 : (uintptr_t) host_var_table[0]);
1274 k.host_end = k.host_start + 1;
1275 node = splay_tree_lookup (&devicep->mem_map, &k);
1276 }
d3d8e632 1277
9b50ad1d 1278 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1279 {
1280 gomp_mutex_unlock (&devicep->lock);
1281 gomp_fatal ("image unload fail");
1282 }
7de5731e 1283
1284 /* Remove mappings from splay tree. */
c0998828 1285 int i;
1286 for (i = 0; i < num_funcs; i++)
7de5731e 1287 {
c0998828 1288 k.host_start = (uintptr_t) host_func_table[i];
7de5731e 1289 k.host_end = k.host_start + 1;
1290 splay_tree_remove (&devicep->mem_map, &k);
1291 }
1292
c0998828 1293 /* Most significant bit of the size in host and target tables marks
1294 "omp declare target link" variables. */
1295 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1296 const uintptr_t size_mask = ~link_bit;
1297 bool is_tgt_unmapped = false;
1298
1299 for (i = 0; i < num_vars; i++)
7de5731e 1300 {
c0998828 1301 k.host_start = (uintptr_t) host_var_table[i * 2];
1302 k.host_end
1303 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1304
1305 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1306 splay_tree_remove (&devicep->mem_map, &k);
1307 else
1308 {
1309 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
737cc978 1310 is_tgt_unmapped = gomp_remove_var (devicep, n);
c0998828 1311 }
7de5731e 1312 }
1313
c0998828 1314 if (node && !is_tgt_unmapped)
7de5731e 1315 {
1316 free (node->tgt);
1317 free (node);
1318 }
1319}
1320
0d8c703d 1321/* This function should be called from every offload image while loading.
995b27b9 1322 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1323 the target, and TARGET_DATA needed by target plugin. */
1324
1325void
d3d8e632 1326GOMP_offload_register_ver (unsigned version, const void *host_table,
1327 int target_type, const void *target_data)
995b27b9 1328{
0d8c703d 1329 int i;
d3d8e632 1330
1331 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1332 gomp_fatal ("Library too old for offload (version %u < %u)",
1333 GOMP_VERSION, GOMP_VERSION_LIB (version));
1334
0d8c703d 1335 gomp_mutex_lock (&register_lock);
1336
1337 /* Load image to all initialized devices. */
1338 for (i = 0; i < num_devices; i++)
1339 {
1340 struct gomp_device_descr *devicep = &devices[i];
1341 gomp_mutex_lock (&devicep->lock);
f87b2900 1342 if (devicep->type == target_type
1343 && devicep->state == GOMP_DEVICE_INITIALIZED)
d3d8e632 1344 gomp_load_image_to_device (devicep, version,
1345 host_table, target_data, true);
0d8c703d 1346 gomp_mutex_unlock (&devicep->lock);
1347 }
995b27b9 1348
0d8c703d 1349 /* Insert image to array of pending images. */
1350 offload_images
1351 = gomp_realloc_unlock (offload_images,
1352 (num_offload_images + 1)
1353 * sizeof (struct offload_image_descr));
d3d8e632 1354 offload_images[num_offload_images].version = version;
995b27b9 1355 offload_images[num_offload_images].type = target_type;
1356 offload_images[num_offload_images].host_table = host_table;
1357 offload_images[num_offload_images].target_data = target_data;
1358
1359 num_offload_images++;
0d8c703d 1360 gomp_mutex_unlock (&register_lock);
995b27b9 1361}
1362
d3d8e632 1363void
1364GOMP_offload_register (const void *host_table, int target_type,
1365 const void *target_data)
1366{
1367 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1368}
1369
0d8c703d 1370/* This function should be called from every offload image while unloading.
1371 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1372 the target, and TARGET_DATA needed by target plugin. */
995b27b9 1373
0d8c703d 1374void
d3d8e632 1375GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1376 int target_type, const void *target_data)
995b27b9 1377{
0d8c703d 1378 int i;
1379
0d8c703d 1380 gomp_mutex_lock (&register_lock);
1381
1382 /* Unload image from all initialized devices. */
1383 for (i = 0; i < num_devices; i++)
1384 {
0d8c703d 1385 struct gomp_device_descr *devicep = &devices[i];
1386 gomp_mutex_lock (&devicep->lock);
f87b2900 1387 if (devicep->type == target_type
1388 && devicep->state == GOMP_DEVICE_INITIALIZED)
d3d8e632 1389 gomp_unload_image_from_device (devicep, version,
1390 host_table, target_data);
0d8c703d 1391 gomp_mutex_unlock (&devicep->lock);
1392 }
1393
1394 /* Remove image from array of pending images. */
1395 for (i = 0; i < num_offload_images; i++)
1396 if (offload_images[i].target_data == target_data)
1397 {
1398 offload_images[i] = offload_images[--num_offload_images];
1399 break;
1400 }
1401
1402 gomp_mutex_unlock (&register_lock);
ca4c3545 1403}
995b27b9 1404
d3d8e632 1405void
1406GOMP_offload_unregister (const void *host_table, int target_type,
1407 const void *target_data)
1408{
1409 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1410}
1411
0d8c703d 1412/* This function initializes the target device, specified by DEVICEP. DEVICEP
1413 must be locked on entry, and remains locked on return. */
ca4c3545 1414
1415attribute_hidden void
0d8c703d 1416gomp_init_device (struct gomp_device_descr *devicep)
ca4c3545 1417{
995b27b9 1418 int i;
9b50ad1d 1419 if (!devicep->init_device_func (devicep->target_id))
1420 {
1421 gomp_mutex_unlock (&devicep->lock);
1422 gomp_fatal ("device initialization failed");
1423 }
0d8c703d 1424
1425 /* Load to device all images registered by the moment. */
1426 for (i = 0; i < num_offload_images; i++)
995b27b9 1427 {
0d8c703d 1428 struct offload_image_descr *image = &offload_images[i];
1429 if (image->type == devicep->type)
d3d8e632 1430 gomp_load_image_to_device (devicep, image->version,
1431 image->host_table, image->target_data,
1432 false);
995b27b9 1433 }
1434
f87b2900 1435 devicep->state = GOMP_DEVICE_INITIALIZED;
ca4c3545 1436}
1437
7de5731e 1438attribute_hidden void
1439gomp_unload_device (struct gomp_device_descr *devicep)
1440{
f87b2900 1441 if (devicep->state == GOMP_DEVICE_INITIALIZED)
7de5731e 1442 {
1443 unsigned i;
1444
1445 /* Unload from device all images registered at the moment. */
1446 for (i = 0; i < num_offload_images; i++)
1447 {
1448 struct offload_image_descr *image = &offload_images[i];
1449 if (image->type == devicep->type)
d3d8e632 1450 gomp_unload_image_from_device (devicep, image->version,
1451 image->host_table,
7de5731e 1452 image->target_data);
1453 }
1454 }
1455}
1456
ca4c3545 1457/* Free address mapping tables. MM must be locked on entry, and remains locked
1458 on return. */
1459
1460attribute_hidden void
0d8c703d 1461gomp_free_memmap (struct splay_tree_s *mem_map)
ca4c3545 1462{
0d8c703d 1463 while (mem_map->root)
ca4c3545 1464 {
0d8c703d 1465 struct target_mem_desc *tgt = mem_map->root->key.tgt;
ca4c3545 1466
0d8c703d 1467 splay_tree_remove (mem_map, &mem_map->root->key);
ca4c3545 1468 free (tgt->array);
1469 free (tgt);
1470 }
ca4c3545 1471}
1472
9561765e 1473/* Host fallback for GOMP_target{,_ext} routines. */
43895be5 1474
1475static void
1476gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1477{
1478 struct gomp_thread old_thr, *thr = gomp_thread ();
1479 old_thr = *thr;
1480 memset (thr, '\0', sizeof (*thr));
1481 if (gomp_places_list)
1482 {
1483 thr->place = old_thr.place;
1484 thr->ts.place_partition_len = gomp_places_list_len;
1485 }
1486 fn (hostaddrs);
1487 gomp_free_thread (thr);
1488 *thr = old_thr;
1489}
1490
56686608 1491/* Calculate alignment and size requirements of a private copy of data shared
1492 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
9561765e 1493
56686608 1494static inline void
1495calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1496 unsigned short *kinds, size_t *tgt_align,
1497 size_t *tgt_size)
9561765e 1498{
56686608 1499 size_t i;
1500 for (i = 0; i < mapnum; i++)
1501 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1502 {
1503 size_t align = (size_t) 1 << (kinds[i] >> 8);
1504 if (*tgt_align < align)
1505 *tgt_align = align;
1506 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1507 *tgt_size += sizes[i];
1508 }
1509}
1510
1511/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1512
1513static inline void
1514copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1515 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1516 size_t tgt_size)
1517{
1518 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1519 if (al)
1520 tgt += tgt_align - al;
1521 tgt_size = 0;
1522 size_t i;
9561765e 1523 for (i = 0; i < mapnum; i++)
1524 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1525 {
1526 size_t align = (size_t) 1 << (kinds[i] >> 8);
9561765e 1527 tgt_size = (tgt_size + align - 1) & ~(align - 1);
56686608 1528 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1529 hostaddrs[i] = tgt + tgt_size;
1530 tgt_size = tgt_size + sizes[i];
9561765e 1531 }
56686608 1532}
1533
9561765e 1534/* Helper function of GOMP_target{,_ext} routines. */
43895be5 1535
1536static void *
1537gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1538 void (*host_fn) (void *))
1539{
1540 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1541 return (void *) host_fn;
1542 else
1543 {
1544 gomp_mutex_lock (&devicep->lock);
f87b2900 1545 if (devicep->state == GOMP_DEVICE_FINALIZED)
1546 {
1547 gomp_mutex_unlock (&devicep->lock);
1548 return NULL;
1549 }
1550
43895be5 1551 struct splay_tree_key_s k;
1552 k.host_start = (uintptr_t) host_fn;
1553 k.host_end = k.host_start + 1;
1554 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1555 gomp_mutex_unlock (&devicep->lock);
1556 if (tgt_fn == NULL)
bc6edebb 1557 return NULL;
43895be5 1558
1559 return (void *) tgt_fn->tgt_offset;
1560 }
1561}
1562
bc7bff74 1563/* Called when encountering a target directive. If DEVICE
ca4c3545 1564 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1565 GOMP_DEVICE_HOST_FALLBACK (or any value
1566 larger than last available hw device), use host fallback.
dc19c8fd 1567 FN is address of host code, UNUSED is part of the current ABI, but
1568 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
bc7bff74 1569 with MAPNUM entries, with addresses of the host objects,
1570 sizes of the host objects (resp. for pointer kind pointer bias
1571 and assumed sizeof (void *) size) and kinds. */
1572
1573void
dc19c8fd 1574GOMP_target (int device, void (*fn) (void *), const void *unused,
bc7bff74 1575 size_t mapnum, void **hostaddrs, size_t *sizes,
1576 unsigned char *kinds)
1577{
995b27b9 1578 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1579
f87b2900 1580 void *fn_addr;
ca4c3545 1581 if (devicep == NULL
f87b2900 1582 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 1583 /* All shared memory devices should use the GOMP_target_ext function. */
1584 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
f87b2900 1585 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
43895be5 1586 return gomp_target_fallback (fn, hostaddrs);
1587
43895be5 1588 struct target_mem_desc *tgt_vars
1589 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1590 GOMP_MAP_VARS_TARGET);
56686608 1591 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1592 NULL);
43895be5 1593 gomp_unmap_vars (tgt_vars, true);
1594}
995b27b9 1595
9561765e 1596/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1597 and several arguments have been added:
1598 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1599 DEPEND is array of dependencies, see GOMP_task for details.
56686608 1600
1601 ARGS is a pointer to an array consisting of a variable number of both
1602 device-independent and device-specific arguments, which can take one two
1603 elements where the first specifies for which device it is intended, the type
1604 and optionally also the value. If the value is not present in the first
1605 one, the whole second element the actual value. The last element of the
1606 array is a single NULL. Among the device independent can be for example
1607 NUM_TEAMS and THREAD_LIMIT.
1608
9561765e 1609 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1610 that value, or 1 if teams construct is not present, or 0, if
1611 teams construct does not have num_teams clause and so the choice is
1612 implementation defined, and -1 if it can't be determined on the host
1613 what value will GOMP_teams have on the device.
1614 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1615 body with that value, or 0, if teams construct does not have thread_limit
1616 clause or the teams construct is not present, or -1 if it can't be
1617 determined on the host what value will GOMP_teams have on the device. */
1618
43895be5 1619void
9561765e 1620GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1621 void **hostaddrs, size_t *sizes, unsigned short *kinds,
56686608 1622 unsigned int flags, void **depend, void **args)
43895be5 1623{
1624 struct gomp_device_descr *devicep = resolve_device (device);
84217e9d 1625 size_t tgt_align = 0, tgt_size = 0;
1626 bool fpc_done = false;
ca4c3545 1627
a9833286 1628 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1629 {
1630 struct gomp_thread *thr = gomp_thread ();
1631 /* Create a team if we don't have any around, as nowait
1632 target tasks make sense to run asynchronously even when
1633 outside of any parallel. */
1634 if (__builtin_expect (thr->ts.team == NULL, 0))
1635 {
1636 struct gomp_team *team = gomp_new_team (1);
1637 struct gomp_task *task = thr->task;
1638 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1639 team->prev_ts = thr->ts;
1640 thr->ts.team = team;
1641 thr->ts.team_id = 0;
1642 thr->ts.work_share = &team->work_shares[0];
1643 thr->ts.last_work_share = NULL;
1644#ifdef HAVE_SYNC_BUILTINS
1645 thr->ts.single_count = 0;
1646#endif
1647 thr->ts.static_trip = 0;
1648 thr->task = &team->implicit_task[0];
1649 gomp_init_task (thr->task, NULL, icv);
1650 if (task)
1651 {
1652 thr->task = task;
1653 gomp_end_task ();
1654 free (task);
1655 thr->task = &team->implicit_task[0];
1656 }
1657 else
1658 pthread_setspecific (gomp_thread_destructor, thr);
1659 }
1660 if (thr->ts.team
1661 && !thr->task->final_task)
1662 {
1663 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
56686608 1664 sizes, kinds, flags, depend, args,
a9833286 1665 GOMP_TARGET_TASK_BEFORE_MAP);
1666 return;
1667 }
1668 }
1669
1670 /* If there are depend clauses, but nowait is not present
1671 (or we are in a final task), block the parent task until the
1672 dependencies are resolved and then just continue with the rest
1673 of the function as if it is a merged task. */
43895be5 1674 if (depend != NULL)
1675 {
1676 struct gomp_thread *thr = gomp_thread ();
1677 if (thr->task && thr->task->depend_hash)
84217e9d 1678 {
1679 /* If we might need to wait, copy firstprivate now. */
1680 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1681 &tgt_align, &tgt_size);
1682 if (tgt_align)
1683 {
1684 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1685 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1686 tgt_align, tgt_size);
1687 }
1688 fpc_done = true;
1689 gomp_task_maybe_wait_for_dependencies (depend);
1690 }
43895be5 1691 }
ca4c3545 1692
f87b2900 1693 void *fn_addr;
43895be5 1694 if (devicep == NULL
f87b2900 1695 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 1696 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1697 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
ca4c3545 1698 {
84217e9d 1699 if (!fpc_done)
1700 {
1701 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1702 &tgt_align, &tgt_size);
1703 if (tgt_align)
1704 {
1705 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1706 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1707 tgt_align, tgt_size);
1708 }
1709 }
1710 gomp_target_fallback (fn, hostaddrs);
43895be5 1711 return;
ca4c3545 1712 }
995b27b9 1713
56686608 1714 struct target_mem_desc *tgt_vars;
56686608 1715 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1716 {
84217e9d 1717 if (!fpc_done)
1718 {
1719 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1720 &tgt_align, &tgt_size);
1721 if (tgt_align)
1722 {
1723 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1724 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1725 tgt_align, tgt_size);
1726 }
1727 }
56686608 1728 tgt_vars = NULL;
1729 }
1730 else
1731 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1732 true, GOMP_MAP_VARS_TARGET);
1733 devicep->run_func (devicep->target_id, fn_addr,
1734 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1735 args);
1736 if (tgt_vars)
1737 gomp_unmap_vars (tgt_vars, true);
bc7bff74 1738}
1739
9561765e 1740/* Host fallback for GOMP_target_data{,_ext} routines. */
43895be5 1741
1742static void
1743gomp_target_data_fallback (void)
1744{
1745 struct gomp_task_icv *icv = gomp_icv (false);
1746 if (icv->target_data)
1747 {
1748 /* Even when doing a host fallback, if there are any active
1749 #pragma omp target data constructs, need to remember the
1750 new #pragma omp target data, otherwise GOMP_target_end_data
1751 would get out of sync. */
1752 struct target_mem_desc *tgt
1753 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1754 GOMP_MAP_VARS_DATA);
1755 tgt->prev = icv->target_data;
1756 icv->target_data = tgt;
1757 }
1758}
1759
bc7bff74 1760void
dc19c8fd 1761GOMP_target_data (int device, const void *unused, size_t mapnum,
bc7bff74 1762 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1763{
995b27b9 1764 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1765
1766 if (devicep == NULL
56686608 1767 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1768 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
43895be5 1769 return gomp_target_data_fallback ();
ca4c3545 1770
995b27b9 1771 struct target_mem_desc *tgt
ca4c3545 1772 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
43895be5 1773 GOMP_MAP_VARS_DATA);
1774 struct gomp_task_icv *icv = gomp_icv (true);
1775 tgt->prev = icv->target_data;
1776 icv->target_data = tgt;
1777}
1778
1779void
9561765e 1780GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1781 size_t *sizes, unsigned short *kinds)
43895be5 1782{
1783 struct gomp_device_descr *devicep = resolve_device (device);
1784
1785 if (devicep == NULL
56686608 1786 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1787 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 1788 return gomp_target_data_fallback ();
1789
1790 struct target_mem_desc *tgt
1791 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1792 GOMP_MAP_VARS_DATA);
995b27b9 1793 struct gomp_task_icv *icv = gomp_icv (true);
1794 tgt->prev = icv->target_data;
1795 icv->target_data = tgt;
bc7bff74 1796}
1797
1798void
1799GOMP_target_end_data (void)
1800{
995b27b9 1801 struct gomp_task_icv *icv = gomp_icv (false);
1802 if (icv->target_data)
1803 {
1804 struct target_mem_desc *tgt = icv->target_data;
1805 icv->target_data = tgt->prev;
ca4c3545 1806 gomp_unmap_vars (tgt, true);
995b27b9 1807 }
bc7bff74 1808}
1809
1810void
dc19c8fd 1811GOMP_target_update (int device, const void *unused, size_t mapnum,
bc7bff74 1812 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1813{
995b27b9 1814 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1815
1816 if (devicep == NULL
56686608 1817 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1818 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
995b27b9 1819 return;
1820
43895be5 1821 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1822}
1823
1824void
9561765e 1825GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1826 size_t *sizes, unsigned short *kinds,
1827 unsigned int flags, void **depend)
43895be5 1828{
1829 struct gomp_device_descr *devicep = resolve_device (device);
1830
1831 /* If there are depend clauses, but nowait is not present,
1832 block the parent task until the dependencies are resolved
1833 and then just continue with the rest of the function as if it
1834 is a merged task. Until we are able to schedule task during
1835 variable mapping or unmapping, ignore nowait if depend clauses
1836 are not present. */
1837 if (depend != NULL)
1838 {
1839 struct gomp_thread *thr = gomp_thread ();
1840 if (thr->task && thr->task->depend_hash)
1841 {
1842 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1843 && thr->ts.team
1844 && !thr->task->final_task)
1845 {
a9833286 1846 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1847 mapnum, hostaddrs, sizes, kinds,
1848 flags | GOMP_TARGET_FLAG_UPDATE,
56686608 1849 depend, NULL, GOMP_TARGET_TASK_DATA))
a9833286 1850 return;
1851 }
1852 else
1853 {
1854 struct gomp_team *team = thr->ts.team;
1855 /* If parallel or taskgroup has been cancelled, don't start new
1856 tasks. */
1857 if (team
1858 && (gomp_team_barrier_cancelled (&team->barrier)
1859 || (thr->task->taskgroup
1860 && thr->task->taskgroup->cancelled)))
1861 return;
1862
1863 gomp_task_maybe_wait_for_dependencies (depend);
43895be5 1864 }
43895be5 1865 }
1866 }
1867
1868 if (devicep == NULL
56686608 1869 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1870 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 1871 return;
1872
1873 struct gomp_thread *thr = gomp_thread ();
1874 struct gomp_team *team = thr->ts.team;
1875 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1876 if (team
1877 && (gomp_team_barrier_cancelled (&team->barrier)
1878 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1879 return;
1880
1881 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1882}
1883
1884static void
1885gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1886 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1887{
1888 const int typemask = 0xff;
1889 size_t i;
ca4c3545 1890 gomp_mutex_lock (&devicep->lock);
f87b2900 1891 if (devicep->state == GOMP_DEVICE_FINALIZED)
1892 {
1893 gomp_mutex_unlock (&devicep->lock);
1894 return;
1895 }
1896
43895be5 1897 for (i = 0; i < mapnum; i++)
1898 {
1899 struct splay_tree_key_s cur_node;
1900 unsigned char kind = kinds[i] & typemask;
1901 switch (kind)
1902 {
1903 case GOMP_MAP_FROM:
1904 case GOMP_MAP_ALWAYS_FROM:
1905 case GOMP_MAP_DELETE:
1906 case GOMP_MAP_RELEASE:
1907 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1908 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1909 cur_node.host_start = (uintptr_t) hostaddrs[i];
1910 cur_node.host_end = cur_node.host_start + sizes[i];
1911 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1912 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
9561765e 1913 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
43895be5 1914 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1915 if (!k)
1916 continue;
1917
1918 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1919 k->refcount--;
1920 if ((kind == GOMP_MAP_DELETE
1921 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1922 && k->refcount != REFCOUNT_INFINITY)
1923 k->refcount = 0;
1924
1925 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1926 || kind == GOMP_MAP_ALWAYS_FROM)
9b50ad1d 1927 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1928 (void *) (k->tgt->tgt_start + k->tgt_offset
1929 + cur_node.host_start
1930 - k->host_start),
1931 cur_node.host_end - cur_node.host_start);
43895be5 1932 if (k->refcount == 0)
1933 {
1934 splay_tree_remove (&devicep->mem_map, k);
c0998828 1935 if (k->link_key)
1936 splay_tree_insert (&devicep->mem_map,
1937 (splay_tree_node) k->link_key);
43895be5 1938 if (k->tgt->refcount > 1)
1939 k->tgt->refcount--;
1940 else
1941 gomp_unmap_tgt (k->tgt);
1942 }
1943
1944 break;
1945 default:
1946 gomp_mutex_unlock (&devicep->lock);
1947 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1948 kind);
1949 }
1950 }
1951
ca4c3545 1952 gomp_mutex_unlock (&devicep->lock);
43895be5 1953}
ca4c3545 1954
43895be5 1955void
1956GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1957 size_t *sizes, unsigned short *kinds,
1958 unsigned int flags, void **depend)
1959{
1960 struct gomp_device_descr *devicep = resolve_device (device);
1961
1962 /* If there are depend clauses, but nowait is not present,
1963 block the parent task until the dependencies are resolved
1964 and then just continue with the rest of the function as if it
1965 is a merged task. Until we are able to schedule task during
1966 variable mapping or unmapping, ignore nowait if depend clauses
1967 are not present. */
1968 if (depend != NULL)
1969 {
1970 struct gomp_thread *thr = gomp_thread ();
1971 if (thr->task && thr->task->depend_hash)
1972 {
1973 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1974 && thr->ts.team
1975 && !thr->task->final_task)
1976 {
a9833286 1977 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1978 mapnum, hostaddrs, sizes, kinds,
56686608 1979 flags, depend, NULL,
a9833286 1980 GOMP_TARGET_TASK_DATA))
1981 return;
1982 }
1983 else
1984 {
1985 struct gomp_team *team = thr->ts.team;
1986 /* If parallel or taskgroup has been cancelled, don't start new
1987 tasks. */
1988 if (team
1989 && (gomp_team_barrier_cancelled (&team->barrier)
1990 || (thr->task->taskgroup
1991 && thr->task->taskgroup->cancelled)))
1992 return;
1993
1994 gomp_task_maybe_wait_for_dependencies (depend);
43895be5 1995 }
43895be5 1996 }
1997 }
1998
1999 if (devicep == NULL
56686608 2000 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2001 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2002 return;
2003
2004 struct gomp_thread *thr = gomp_thread ();
2005 struct gomp_team *team = thr->ts.team;
2006 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2007 if (team
2008 && (gomp_team_barrier_cancelled (&team->barrier)
2009 || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
2010 return;
2011
2012 size_t i;
2013 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2014 for (i = 0; i < mapnum; i++)
2015 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2016 {
2017 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2018 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2019 i += sizes[i];
2020 }
2021 else
2022 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2023 true, GOMP_MAP_VARS_ENTER_DATA);
2024 else
2025 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2026}
2027
a9833286 2028bool
43895be5 2029gomp_target_task_fn (void *data)
2030{
2031 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
a9833286 2032 struct gomp_device_descr *devicep = ttask->devicep;
2033
43895be5 2034 if (ttask->fn != NULL)
2035 {
f87b2900 2036 void *fn_addr;
a9833286 2037 if (devicep == NULL
f87b2900 2038 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 2039 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2040 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
a9833286 2041 {
2042 ttask->state = GOMP_TARGET_TASK_FALLBACK;
84217e9d 2043 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
a9833286 2044 return false;
2045 }
2046
2047 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2048 {
56686608 2049 if (ttask->tgt)
2050 gomp_unmap_vars (ttask->tgt, true);
a9833286 2051 return false;
2052 }
2053
56686608 2054 void *actual_arguments;
2055 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2056 {
2057 ttask->tgt = NULL;
56686608 2058 actual_arguments = ttask->hostaddrs;
2059 }
2060 else
2061 {
2062 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2063 NULL, ttask->sizes, ttask->kinds, true,
2064 GOMP_MAP_VARS_TARGET);
2065 actual_arguments = (void *) ttask->tgt->tgt_start;
2066 }
a9833286 2067 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2068
56686608 2069 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2070 ttask->args, (void *) ttask);
a9833286 2071 return true;
43895be5 2072 }
a9833286 2073 else if (devicep == NULL
56686608 2074 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2075 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
a9833286 2076 return false;
43895be5 2077
2078 size_t i;
2079 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
a9833286 2080 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
43895be5 2081 ttask->kinds, true);
2082 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2083 for (i = 0; i < ttask->mapnum; i++)
2084 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2085 {
a9833286 2086 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2087 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2088 GOMP_MAP_VARS_ENTER_DATA);
43895be5 2089 i += ttask->sizes[i];
2090 }
2091 else
a9833286 2092 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2093 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
43895be5 2094 else
a9833286 2095 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2096 ttask->kinds);
2097 return false;
bc7bff74 2098}
2099
2100void
2101GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2102{
2103 if (thread_limit)
2104 {
2105 struct gomp_task_icv *icv = gomp_icv (true);
2106 icv->thread_limit_var
2107 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2108 }
2109 (void) num_teams;
2110}
995b27b9 2111
43895be5 2112void *
2113omp_target_alloc (size_t size, int device_num)
2114{
2115 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2116 return malloc (size);
2117
2118 if (device_num < 0)
2119 return NULL;
2120
2121 struct gomp_device_descr *devicep = resolve_device (device_num);
2122 if (devicep == NULL)
2123 return NULL;
2124
56686608 2125 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2126 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2127 return malloc (size);
2128
2129 gomp_mutex_lock (&devicep->lock);
2130 void *ret = devicep->alloc_func (devicep->target_id, size);
2131 gomp_mutex_unlock (&devicep->lock);
2132 return ret;
2133}
2134
2135void
2136omp_target_free (void *device_ptr, int device_num)
2137{
2138 if (device_ptr == NULL)
2139 return;
2140
2141 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2142 {
2143 free (device_ptr);
2144 return;
2145 }
2146
2147 if (device_num < 0)
2148 return;
2149
2150 struct gomp_device_descr *devicep = resolve_device (device_num);
2151 if (devicep == NULL)
2152 return;
2153
56686608 2154 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2155 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2156 {
2157 free (device_ptr);
2158 return;
2159 }
2160
2161 gomp_mutex_lock (&devicep->lock);
9b50ad1d 2162 gomp_free_device_memory (devicep, device_ptr);
43895be5 2163 gomp_mutex_unlock (&devicep->lock);
2164}
2165
2166int
2167omp_target_is_present (void *ptr, int device_num)
2168{
2169 if (ptr == NULL)
2170 return 1;
2171
2172 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2173 return 1;
2174
2175 if (device_num < 0)
2176 return 0;
2177
2178 struct gomp_device_descr *devicep = resolve_device (device_num);
2179 if (devicep == NULL)
2180 return 0;
2181
56686608 2182 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2183 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2184 return 1;
2185
2186 gomp_mutex_lock (&devicep->lock);
2187 struct splay_tree_s *mem_map = &devicep->mem_map;
2188 struct splay_tree_key_s cur_node;
2189
2190 cur_node.host_start = (uintptr_t) ptr;
2191 cur_node.host_end = cur_node.host_start;
9561765e 2192 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
43895be5 2193 int ret = n != NULL;
2194 gomp_mutex_unlock (&devicep->lock);
2195 return ret;
2196}
2197
2198int
2199omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2200 size_t src_offset, int dst_device_num, int src_device_num)
2201{
2202 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
9b50ad1d 2203 bool ret;
43895be5 2204
2205 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2206 {
2207 if (dst_device_num < 0)
2208 return EINVAL;
2209
2210 dst_devicep = resolve_device (dst_device_num);
2211 if (dst_devicep == NULL)
2212 return EINVAL;
2213
56686608 2214 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2215 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2216 dst_devicep = NULL;
2217 }
2218 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2219 {
2220 if (src_device_num < 0)
2221 return EINVAL;
2222
2223 src_devicep = resolve_device (src_device_num);
2224 if (src_devicep == NULL)
2225 return EINVAL;
2226
56686608 2227 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2228 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2229 src_devicep = NULL;
2230 }
2231 if (src_devicep == NULL && dst_devicep == NULL)
2232 {
2233 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2234 return 0;
2235 }
2236 if (src_devicep == NULL)
2237 {
2238 gomp_mutex_lock (&dst_devicep->lock);
9b50ad1d 2239 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2240 (char *) dst + dst_offset,
2241 (char *) src + src_offset, length);
43895be5 2242 gomp_mutex_unlock (&dst_devicep->lock);
9b50ad1d 2243 return (ret ? 0 : EINVAL);
43895be5 2244 }
2245 if (dst_devicep == NULL)
2246 {
2247 gomp_mutex_lock (&src_devicep->lock);
9b50ad1d 2248 ret = src_devicep->dev2host_func (src_devicep->target_id,
2249 (char *) dst + dst_offset,
2250 (char *) src + src_offset, length);
43895be5 2251 gomp_mutex_unlock (&src_devicep->lock);
9b50ad1d 2252 return (ret ? 0 : EINVAL);
43895be5 2253 }
2254 if (src_devicep == dst_devicep)
2255 {
2256 gomp_mutex_lock (&src_devicep->lock);
9b50ad1d 2257 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2258 (char *) dst + dst_offset,
2259 (char *) src + src_offset, length);
43895be5 2260 gomp_mutex_unlock (&src_devicep->lock);
9b50ad1d 2261 return (ret ? 0 : EINVAL);
43895be5 2262 }
2263 return EINVAL;
2264}
2265
2266static int
2267omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2268 int num_dims, const size_t *volume,
2269 const size_t *dst_offsets,
2270 const size_t *src_offsets,
2271 const size_t *dst_dimensions,
2272 const size_t *src_dimensions,
2273 struct gomp_device_descr *dst_devicep,
2274 struct gomp_device_descr *src_devicep)
2275{
2276 size_t dst_slice = element_size;
2277 size_t src_slice = element_size;
2278 size_t j, dst_off, src_off, length;
2279 int i, ret;
2280
2281 if (num_dims == 1)
2282 {
2283 if (__builtin_mul_overflow (element_size, volume[0], &length)
2284 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2285 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2286 return EINVAL;
2287 if (dst_devicep == NULL && src_devicep == NULL)
9b50ad1d 2288 {
2289 memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2290 ret = 1;
2291 }
43895be5 2292 else if (src_devicep == NULL)
9b50ad1d 2293 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2294 (char *) dst + dst_off,
2295 (char *) src + src_off, length);
43895be5 2296 else if (dst_devicep == NULL)
9b50ad1d 2297 ret = src_devicep->dev2host_func (src_devicep->target_id,
2298 (char *) dst + dst_off,
2299 (char *) src + src_off, length);
43895be5 2300 else if (src_devicep == dst_devicep)
9b50ad1d 2301 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2302 (char *) dst + dst_off,
2303 (char *) src + src_off, length);
43895be5 2304 else
9b50ad1d 2305 ret = 0;
2306 return ret ? 0 : EINVAL;
43895be5 2307 }
2308
2309 /* FIXME: it would be nice to have some plugin function to handle
2310 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2311 be handled in the generic recursion below, and for host-host it
2312 should be used even for any num_dims >= 2. */
2313
2314 for (i = 1; i < num_dims; i++)
2315 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2316 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2317 return EINVAL;
2318 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2319 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2320 return EINVAL;
2321 for (j = 0; j < volume[0]; j++)
2322 {
2323 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2324 (char *) src + src_off,
2325 element_size, num_dims - 1,
2326 volume + 1, dst_offsets + 1,
2327 src_offsets + 1, dst_dimensions + 1,
2328 src_dimensions + 1, dst_devicep,
2329 src_devicep);
2330 if (ret)
2331 return ret;
2332 dst_off += dst_slice;
2333 src_off += src_slice;
2334 }
2335 return 0;
2336}
2337
2338int
2339omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2340 int num_dims, const size_t *volume,
2341 const size_t *dst_offsets,
2342 const size_t *src_offsets,
2343 const size_t *dst_dimensions,
2344 const size_t *src_dimensions,
2345 int dst_device_num, int src_device_num)
2346{
2347 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2348
2349 if (!dst && !src)
2350 return INT_MAX;
2351
2352 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2353 {
2354 if (dst_device_num < 0)
2355 return EINVAL;
2356
2357 dst_devicep = resolve_device (dst_device_num);
2358 if (dst_devicep == NULL)
2359 return EINVAL;
2360
56686608 2361 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2362 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2363 dst_devicep = NULL;
2364 }
2365 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2366 {
2367 if (src_device_num < 0)
2368 return EINVAL;
2369
2370 src_devicep = resolve_device (src_device_num);
2371 if (src_devicep == NULL)
2372 return EINVAL;
2373
56686608 2374 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2375 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2376 src_devicep = NULL;
2377 }
2378
2379 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2380 return EINVAL;
2381
2382 if (src_devicep)
2383 gomp_mutex_lock (&src_devicep->lock);
2384 else if (dst_devicep)
2385 gomp_mutex_lock (&dst_devicep->lock);
2386 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2387 volume, dst_offsets, src_offsets,
2388 dst_dimensions, src_dimensions,
2389 dst_devicep, src_devicep);
2390 if (src_devicep)
2391 gomp_mutex_unlock (&src_devicep->lock);
2392 else if (dst_devicep)
2393 gomp_mutex_unlock (&dst_devicep->lock);
2394 return ret;
2395}
2396
2397int
2398omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2399 size_t device_offset, int device_num)
2400{
2401 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2402 return EINVAL;
2403
2404 if (device_num < 0)
2405 return EINVAL;
2406
2407 struct gomp_device_descr *devicep = resolve_device (device_num);
2408 if (devicep == NULL)
2409 return EINVAL;
2410
56686608 2411 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2412 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2413 return EINVAL;
2414
2415 gomp_mutex_lock (&devicep->lock);
2416
2417 struct splay_tree_s *mem_map = &devicep->mem_map;
2418 struct splay_tree_key_s cur_node;
2419 int ret = EINVAL;
2420
2421 cur_node.host_start = (uintptr_t) host_ptr;
2422 cur_node.host_end = cur_node.host_start + size;
2423 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2424 if (n)
2425 {
2426 if (n->tgt->tgt_start + n->tgt_offset
2427 == (uintptr_t) device_ptr + device_offset
2428 && n->host_start <= cur_node.host_start
2429 && n->host_end >= cur_node.host_end)
2430 ret = 0;
2431 }
2432 else
2433 {
2434 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2435 tgt->array = gomp_malloc (sizeof (*tgt->array));
2436 tgt->refcount = 1;
2437 tgt->tgt_start = 0;
2438 tgt->tgt_end = 0;
2439 tgt->to_free = NULL;
2440 tgt->prev = NULL;
2441 tgt->list_count = 0;
2442 tgt->device_descr = devicep;
2443 splay_tree_node array = tgt->array;
2444 splay_tree_key k = &array->key;
2445 k->host_start = cur_node.host_start;
2446 k->host_end = cur_node.host_end;
2447 k->tgt = tgt;
2448 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2449 k->refcount = REFCOUNT_INFINITY;
43895be5 2450 array->left = NULL;
2451 array->right = NULL;
2452 splay_tree_insert (&devicep->mem_map, array);
2453 ret = 0;
2454 }
2455 gomp_mutex_unlock (&devicep->lock);
2456 return ret;
2457}
2458
2459int
2460omp_target_disassociate_ptr (void *ptr, int device_num)
2461{
2462 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2463 return EINVAL;
2464
2465 if (device_num < 0)
2466 return EINVAL;
2467
2468 struct gomp_device_descr *devicep = resolve_device (device_num);
2469 if (devicep == NULL)
2470 return EINVAL;
2471
2472 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2473 return EINVAL;
2474
2475 gomp_mutex_lock (&devicep->lock);
2476
2477 struct splay_tree_s *mem_map = &devicep->mem_map;
2478 struct splay_tree_key_s cur_node;
2479 int ret = EINVAL;
2480
2481 cur_node.host_start = (uintptr_t) ptr;
2482 cur_node.host_end = cur_node.host_start;
2483 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2484 if (n
2485 && n->host_start == cur_node.host_start
2486 && n->refcount == REFCOUNT_INFINITY
2487 && n->tgt->tgt_start == 0
2488 && n->tgt->to_free == NULL
2489 && n->tgt->refcount == 1
2490 && n->tgt->list_count == 0)
2491 {
2492 splay_tree_remove (&devicep->mem_map, n);
2493 gomp_unmap_tgt (n->tgt);
2494 ret = 0;
2495 }
2496
2497 gomp_mutex_unlock (&devicep->lock);
2498 return ret;
2499}
2500
995b27b9 2501#ifdef PLUGIN_SUPPORT
2502
2503/* This function tries to load a plugin for DEVICE. Name of plugin is passed
2504 in PLUGIN_NAME.
2505 The handles of the found functions are stored in the corresponding fields
2506 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2507
2508static bool
2509gomp_load_plugin_for_device (struct gomp_device_descr *device,
2510 const char *plugin_name)
2511{
c6aa02c6 2512 const char *err = NULL, *last_missing = NULL;
ca4c3545 2513
995b27b9 2514 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2515 if (!plugin_handle)
d3d8e632 2516 goto dl_fail;
995b27b9 2517
2518 /* Check if all required functions are available in the plugin and store
d3d8e632 2519 their handlers. None of the symbols can legitimately be NULL,
2520 so we don't need to check dlerror all the time. */
ca4c3545 2521#define DLSYM(f) \
d3d8e632 2522 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2523 goto dl_fail
2524 /* Similar, but missing functions are not an error. Return false if
2525 failed, true otherwise. */
2526#define DLSYM_OPT(f, n) \
2527 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2528 || (last_missing = #n, 0))
2529
2530 DLSYM (version);
2531 if (device->version_func () != GOMP_VERSION)
2532 {
2533 err = "plugin version mismatch";
2534 goto fail;
2535 }
ca4c3545 2536
2537 DLSYM (get_name);
2538 DLSYM (get_caps);
995b27b9 2539 DLSYM (get_type);
2540 DLSYM (get_num_devices);
995b27b9 2541 DLSYM (init_device);
ca4c3545 2542 DLSYM (fini_device);
0d8c703d 2543 DLSYM (load_image);
2544 DLSYM (unload_image);
995b27b9 2545 DLSYM (alloc);
2546 DLSYM (free);
2547 DLSYM (dev2host);
2548 DLSYM (host2dev);
ca4c3545 2549 device->capabilities = device->get_caps_func ();
2550 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
43895be5 2551 {
2552 DLSYM (run);
a9833286 2553 DLSYM (async_run);
56686608 2554 DLSYM_OPT (can_run, can_run);
43895be5 2555 DLSYM (dev2dev);
2556 }
ca4c3545 2557 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2558 {
65caa53b 2559 if (!DLSYM_OPT (openacc.exec, openacc_exec)
d3d8e632 2560 || !DLSYM_OPT (openacc.register_async_cleanup,
2561 openacc_register_async_cleanup)
2562 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2563 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2564 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2565 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2566 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2567 || !DLSYM_OPT (openacc.async_wait_all_async,
2568 openacc_async_wait_all_async)
2569 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2570 || !DLSYM_OPT (openacc.create_thread_data,
2571 openacc_create_thread_data)
2572 || !DLSYM_OPT (openacc.destroy_thread_data,
2573 openacc_destroy_thread_data))
ca4c3545 2574 {
d3d8e632 2575 /* Require all the OpenACC handlers if we have
2576 GOMP_OFFLOAD_CAP_OPENACC_200. */
ca4c3545 2577 err = "plugin missing OpenACC handler function";
d3d8e632 2578 goto fail;
ca4c3545 2579 }
d3d8e632 2580
2581 unsigned cuda = 0;
2582 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
65caa53b 2583 openacc_cuda_get_current_device);
d3d8e632 2584 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
65caa53b 2585 openacc_cuda_get_current_context);
2586 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2587 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
d3d8e632 2588 if (cuda && cuda != 4)
ca4c3545 2589 {
d3d8e632 2590 /* Make sure all the CUDA functions are there if any of them are. */
ca4c3545 2591 err = "plugin missing OpenACC CUDA handler function";
d3d8e632 2592 goto fail;
ca4c3545 2593 }
2594 }
995b27b9 2595#undef DLSYM
ca4c3545 2596#undef DLSYM_OPT
995b27b9 2597
d3d8e632 2598 return 1;
2599
2600 dl_fail:
2601 err = dlerror ();
2602 fail:
2603 gomp_error ("while loading %s: %s", plugin_name, err);
2604 if (last_missing)
2605 gomp_error ("missing function was %s", last_missing);
2606 if (plugin_handle)
2607 dlclose (plugin_handle);
2608
2609 return 0;
995b27b9 2610}
2611
f87b2900 2612/* This function finalizes all initialized devices. */
2613
2614static void
2615gomp_target_fini (void)
2616{
2617 int i;
2618 for (i = 0; i < num_devices; i++)
2619 {
9b50ad1d 2620 bool ret = true;
f87b2900 2621 struct gomp_device_descr *devicep = &devices[i];
2622 gomp_mutex_lock (&devicep->lock);
2623 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2624 {
9b50ad1d 2625 ret = devicep->fini_device_func (devicep->target_id);
f87b2900 2626 devicep->state = GOMP_DEVICE_FINALIZED;
2627 }
2628 gomp_mutex_unlock (&devicep->lock);
9b50ad1d 2629 if (!ret)
2630 gomp_fatal ("device finalization failed");
f87b2900 2631 }
2632}
2633
995b27b9 2634/* This function initializes the runtime needed for offloading.
ca4c3545 2635 It parses the list of offload targets and tries to load the plugins for
2636 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2637 will be set, and the array DEVICES initialized, containing descriptors for
2638 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2639 by the others. */
995b27b9 2640
2641static void
2642gomp_target_init (void)
2643{
2644 const char *prefix ="libgomp-plugin-";
4fda895a 2645 const char *suffix = SONAME_SUFFIX (1);
995b27b9 2646 const char *cur, *next;
2647 char *plugin_name;
2648 int i, new_num_devices;
2649
2650 num_devices = 0;
2651 devices = NULL;
2652
2653 cur = OFFLOAD_TARGETS;
2654 if (*cur)
2655 do
2656 {
2657 struct gomp_device_descr current_device;
02b7bb30 2658 size_t prefix_len, suffix_len, cur_len;
995b27b9 2659
2660 next = strchr (cur, ',');
2661
02b7bb30 2662 prefix_len = strlen (prefix);
2663 cur_len = next ? next - cur : strlen (cur);
2664 suffix_len = strlen (suffix);
2665
2666 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
995b27b9 2667 if (!plugin_name)
2668 {
2669 num_devices = 0;
2670 break;
2671 }
2672
02b7bb30 2673 memcpy (plugin_name, prefix, prefix_len);
2674 memcpy (plugin_name + prefix_len, cur, cur_len);
2675 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
995b27b9 2676
2677 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2678 {
2679 new_num_devices = current_device.get_num_devices_func ();
2680 if (new_num_devices >= 1)
2681 {
ca4c3545 2682 /* Augment DEVICES and NUM_DEVICES. */
2683
995b27b9 2684 devices = realloc (devices, (num_devices + new_num_devices)
2685 * sizeof (struct gomp_device_descr));
2686 if (!devices)
2687 {
2688 num_devices = 0;
2689 free (plugin_name);
2690 break;
2691 }
2692
ca4c3545 2693 current_device.name = current_device.get_name_func ();
2694 /* current_device.capabilities has already been set. */
995b27b9 2695 current_device.type = current_device.get_type_func ();
0d8c703d 2696 current_device.mem_map.root = NULL;
f87b2900 2697 current_device.state = GOMP_DEVICE_UNINITIALIZED;
ca4c3545 2698 current_device.openacc.data_environ = NULL;
995b27b9 2699 for (i = 0; i < new_num_devices; i++)
2700 {
995b27b9 2701 current_device.target_id = i;
2702 devices[num_devices] = current_device;
ca4c3545 2703 gomp_mutex_init (&devices[num_devices].lock);
995b27b9 2704 num_devices++;
2705 }
2706 }
2707 }
2708
2709 free (plugin_name);
2710 cur = next + 1;
2711 }
2712 while (next);
2713
ca4c3545 2714 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2715 NUM_DEVICES_OPENMP. */
2716 struct gomp_device_descr *devices_s
2717 = malloc (num_devices * sizeof (struct gomp_device_descr));
2718 if (!devices_s)
2719 {
2720 num_devices = 0;
2721 free (devices);
2722 devices = NULL;
2723 }
2724 num_devices_openmp = 0;
2725 for (i = 0; i < num_devices; i++)
2726 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2727 devices_s[num_devices_openmp++] = devices[i];
2728 int num_devices_after_openmp = num_devices_openmp;
2729 for (i = 0; i < num_devices; i++)
2730 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2731 devices_s[num_devices_after_openmp++] = devices[i];
2732 free (devices);
2733 devices = devices_s;
2734
2735 for (i = 0; i < num_devices; i++)
2736 {
ca4c3545 2737 /* The 'devices' array can be moved (by the realloc call) until we have
2738 found all the plugins, so registering with the OpenACC runtime (which
2739 takes a copy of the pointer argument) must be delayed until now. */
2740 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2741 goacc_register (&devices[i]);
2742 }
f87b2900 2743
2744 if (atexit (gomp_target_fini) != 0)
2745 gomp_fatal ("atexit failed");
995b27b9 2746}
2747
2748#else /* PLUGIN_SUPPORT */
2749/* If dlfcn.h is unavailable we always fallback to host execution.
2750 GOMP_target* routines are just stubs for this case. */
2751static void
2752gomp_target_init (void)
2753{
2754}
2755#endif /* PLUGIN_SUPPORT */