]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/target.c
[libgomp] In OpenACC offloading testing, be more explicit in what is supported, and...
[thirdparty/gcc.git] / libgomp / target.c
CommitLineData
fbd26352 1/* Copyright (C) 2013-2019 Free Software Foundation, Inc.
bc7bff74 2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
c35c9a62 4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
bc7bff74 6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
995b27b9 26/* This file contains the support of offloading. */
bc7bff74 27
995b27b9 28#include "config.h"
bc7bff74 29#include "libgomp.h"
ca4c3545 30#include "oacc-plugin.h"
31#include "oacc-int.h"
32#include "gomp-constants.h"
bc7bff74 33#include <limits.h>
34#include <stdbool.h>
35#include <stdlib.h>
2634aed9 36#ifdef HAVE_INTTYPES_H
37# include <inttypes.h> /* For PRIu64. */
38#endif
bc7bff74 39#include <string.h>
ca4c3545 40#include <assert.h>
43895be5 41#include <errno.h>
bc7bff74 42
995b27b9 43#ifdef PLUGIN_SUPPORT
44#include <dlfcn.h>
4fda895a 45#include "plugin-suffix.h"
995b27b9 46#endif
47
48static void gomp_target_init (void);
49
ca4c3545 50/* The whole initialization code for offloading plugins is only run one. */
995b27b9 51static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52
0d8c703d 53/* Mutex for offload image registration. */
54static gomp_mutex_t register_lock;
55
995b27b9 56/* This structure describes an offload image.
57 It contains type of the target device, pointer to host table descriptor, and
58 pointer to target data. */
59struct offload_image_descr {
d3d8e632 60 unsigned version;
995b27b9 61 enum offload_target_type type;
4e985e0f 62 const void *host_table;
70046055 63 const void *target_data;
995b27b9 64};
65
66/* Array of descriptors of offload images. */
67static struct offload_image_descr *offload_images;
68
69/* Total number of offload images. */
70static int num_offload_images;
71
72/* Array of descriptors for all available devices. */
73static struct gomp_device_descr *devices;
74
75/* Total number of available devices. */
76static int num_devices;
77
ca4c3545 78/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79static int num_devices_openmp;
80
0d8c703d 81/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82
83static void *
84gomp_realloc_unlock (void *old, size_t size)
85{
86 void *ret = realloc (old, size);
87 if (ret == NULL)
88 {
89 gomp_mutex_unlock (&register_lock);
90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91 }
92 return ret;
93}
94
ca4c3545 95attribute_hidden void
96gomp_init_targets_once (void)
995b27b9 97{
ca4c3545 98 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99}
995b27b9 100
bc7bff74 101attribute_hidden int
102gomp_get_num_devices (void)
103{
ca4c3545 104 gomp_init_targets_once ();
105 return num_devices_openmp;
995b27b9 106}
107
108static struct gomp_device_descr *
109resolve_device (int device_id)
110{
ca4c3545 111 if (device_id == GOMP_DEVICE_ICV)
995b27b9 112 {
113 struct gomp_task_icv *icv = gomp_icv (false);
114 device_id = icv->default_device_var;
115 }
116
117 if (device_id < 0 || device_id >= gomp_get_num_devices ())
118 return NULL;
119
43895be5 120 gomp_mutex_lock (&devices[device_id].lock);
f87b2900 121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
43895be5 122 gomp_init_device (&devices[device_id]);
f87b2900 123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124 {
125 gomp_mutex_unlock (&devices[device_id].lock);
126 return NULL;
127 }
43895be5 128 gomp_mutex_unlock (&devices[device_id].lock);
129
995b27b9 130 return &devices[device_id];
131}
132
133
43895be5 134static inline splay_tree_key
135gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136{
137 if (key->host_start != key->host_end)
138 return splay_tree_lookup (mem_map, key);
139
140 key->host_end++;
141 splay_tree_key n = splay_tree_lookup (mem_map, key);
142 key->host_end--;
143 if (n)
144 return n;
145 key->host_start--;
146 n = splay_tree_lookup (mem_map, key);
147 key->host_start++;
148 if (n)
149 return n;
150 return splay_tree_lookup (mem_map, key);
151}
152
9561765e 153static inline splay_tree_key
154gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155{
156 if (key->host_start != key->host_end)
157 return splay_tree_lookup (mem_map, key);
158
159 key->host_end++;
160 splay_tree_key n = splay_tree_lookup (mem_map, key);
161 key->host_end--;
162 return n;
163}
164
9b50ad1d 165static inline void
166gomp_device_copy (struct gomp_device_descr *devicep,
167 bool (*copy_func) (int, void *, const void *, size_t),
168 const char *dst, void *dstaddr,
169 const char *src, const void *srcaddr,
170 size_t size)
171{
172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173 {
174 gomp_mutex_unlock (&devicep->lock);
175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177 }
178}
179
5af416bd 180/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181 host to device memory transfers. */
182
65c9e9f9 183struct gomp_coalesce_chunk
184{
185 /* The starting and ending point of a coalesced chunk of memory. */
186 size_t start, end;
187};
188
5af416bd 189struct gomp_coalesce_buf
190{
191 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
192 it will be copied to the device. */
193 void *buf;
194 struct target_mem_desc *tgt;
65c9e9f9 195 /* Array with offsets, chunks[i].start is the starting offset and
196 chunks[i].end ending offset relative to tgt->tgt_start device address
5af416bd 197 of chunks which are to be copied to buf and later copied to device. */
65c9e9f9 198 struct gomp_coalesce_chunk *chunks;
5af416bd 199 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
200 be performed. */
201 long chunk_cnt;
202 /* During construction of chunks array, how many memory regions are within
203 the last chunk. If there is just one memory region for a chunk, we copy
204 it directly to device rather than going through buf. */
205 long use_cnt;
206};
207
208/* Maximum size of memory region considered for coalescing. Larger copies
209 are performed directly. */
210#define MAX_COALESCE_BUF_SIZE (32 * 1024)
211
212/* Maximum size of a gap in between regions to consider them being copied
213 within the same chunk. All the device offsets considered are within
214 newly allocated device memory, so it isn't fatal if we copy some padding
215 in between from host to device. The gaps come either from alignment
216 padding or from memory regions which are not supposed to be copied from
217 host to device (e.g. map(alloc:), map(from:) etc.). */
218#define MAX_COALESCE_BUF_GAP (4 * 1024)
219
220/* Add region with device tgt_start relative offset and length to CBUF. */
221
222static inline void
223gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
224{
225 if (len > MAX_COALESCE_BUF_SIZE || len == 0)
226 return;
227 if (cbuf->chunk_cnt)
228 {
229 if (cbuf->chunk_cnt < 0)
230 return;
65c9e9f9 231 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
5af416bd 232 {
233 cbuf->chunk_cnt = -1;
234 return;
235 }
65c9e9f9 236 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
5af416bd 237 {
65c9e9f9 238 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
5af416bd 239 cbuf->use_cnt++;
240 return;
241 }
242 /* If the last chunk is only used by one mapping, discard it,
243 as it will be one host to device copy anyway and
244 memcpying it around will only waste cycles. */
245 if (cbuf->use_cnt == 1)
246 cbuf->chunk_cnt--;
247 }
65c9e9f9 248 cbuf->chunks[cbuf->chunk_cnt].start = start;
249 cbuf->chunks[cbuf->chunk_cnt].end = start + len;
5af416bd 250 cbuf->chunk_cnt++;
251 cbuf->use_cnt = 1;
252}
253
254/* Return true for mapping kinds which need to copy data from the
255 host to device for regions that weren't previously mapped. */
256
257static inline bool
258gomp_to_device_kind_p (int kind)
259{
260 switch (kind)
261 {
262 case GOMP_MAP_ALLOC:
263 case GOMP_MAP_FROM:
264 case GOMP_MAP_FORCE_ALLOC:
265 case GOMP_MAP_ALWAYS_FROM:
266 return false;
267 default:
268 return true;
269 }
270}
271
9b50ad1d 272static void
273gomp_copy_host2dev (struct gomp_device_descr *devicep,
5af416bd 274 void *d, const void *h, size_t sz,
275 struct gomp_coalesce_buf *cbuf)
9b50ad1d 276{
5af416bd 277 if (cbuf)
278 {
279 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
65c9e9f9 280 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
5af416bd 281 {
282 long first = 0;
283 long last = cbuf->chunk_cnt - 1;
284 while (first <= last)
285 {
286 long middle = (first + last) >> 1;
65c9e9f9 287 if (cbuf->chunks[middle].end <= doff)
5af416bd 288 first = middle + 1;
65c9e9f9 289 else if (cbuf->chunks[middle].start <= doff)
5af416bd 290 {
65c9e9f9 291 if (doff + sz > cbuf->chunks[middle].end)
5af416bd 292 gomp_fatal ("internal libgomp cbuf error");
65c9e9f9 293 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
5af416bd 294 h, sz);
295 return;
296 }
297 else
298 last = middle - 1;
299 }
300 }
301 }
9b50ad1d 302 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
303}
304
305static void
306gomp_copy_dev2host (struct gomp_device_descr *devicep,
307 void *h, const void *d, size_t sz)
308{
309 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
310}
311
312static void
313gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
314{
315 if (!devicep->free_func (devicep->target_id, devptr))
316 {
317 gomp_mutex_unlock (&devicep->lock);
318 gomp_fatal ("error in freeing device memory block at %p", devptr);
319 }
320}
321
9561765e 322/* Handle the case where gomp_map_lookup, splay_tree_lookup or
323 gomp_map_0len_lookup found oldn for newn.
995b27b9 324 Helper function of gomp_map_vars. */
325
326static inline void
0d8c703d 327gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
43895be5 328 splay_tree_key newn, struct target_var_desc *tgt_var,
5af416bd 329 unsigned char kind, struct gomp_coalesce_buf *cbuf)
995b27b9 330{
43895be5 331 tgt_var->key = oldn;
332 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
333 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
334 tgt_var->offset = newn->host_start - oldn->host_start;
335 tgt_var->length = newn->host_end - newn->host_start;
336
ca4c3545 337 if ((kind & GOMP_MAP_FLAG_FORCE)
338 || oldn->host_start > newn->host_start
995b27b9 339 || oldn->host_end < newn->host_end)
0d8c703d 340 {
341 gomp_mutex_unlock (&devicep->lock);
342 gomp_fatal ("Trying to map into device [%p..%p) object when "
343 "[%p..%p) is already mapped",
344 (void *) newn->host_start, (void *) newn->host_end,
345 (void *) oldn->host_start, (void *) oldn->host_end);
346 }
43895be5 347
348 if (GOMP_MAP_ALWAYS_TO_P (kind))
9b50ad1d 349 gomp_copy_host2dev (devicep,
350 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
351 + newn->host_start - oldn->host_start),
352 (void *) newn->host_start,
5af416bd 353 newn->host_end - newn->host_start, cbuf);
9b50ad1d 354
43895be5 355 if (oldn->refcount != REFCOUNT_INFINITY)
356 oldn->refcount++;
995b27b9 357}
358
ca4c3545 359static int
43895be5 360get_kind (bool short_mapkind, void *kinds, int idx)
ca4c3545 361{
43895be5 362 return short_mapkind ? ((unsigned short *) kinds)[idx]
363 : ((unsigned char *) kinds)[idx];
ca4c3545 364}
365
84f53106 366static void
367gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
5af416bd 368 uintptr_t target_offset, uintptr_t bias,
369 struct gomp_coalesce_buf *cbuf)
84f53106 370{
371 struct gomp_device_descr *devicep = tgt->device_descr;
372 struct splay_tree_s *mem_map = &devicep->mem_map;
373 struct splay_tree_key_s cur_node;
374
375 cur_node.host_start = host_ptr;
376 if (cur_node.host_start == (uintptr_t) NULL)
377 {
378 cur_node.tgt_offset = (uintptr_t) NULL;
9b50ad1d 379 gomp_copy_host2dev (devicep,
380 (void *) (tgt->tgt_start + target_offset),
381 (void *) &cur_node.tgt_offset,
5af416bd 382 sizeof (void *), cbuf);
84f53106 383 return;
384 }
385 /* Add bias to the pointer value. */
386 cur_node.host_start += bias;
43895be5 387 cur_node.host_end = cur_node.host_start;
388 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
84f53106 389 if (n == NULL)
390 {
391 gomp_mutex_unlock (&devicep->lock);
392 gomp_fatal ("Pointer target of array section wasn't mapped");
393 }
394 cur_node.host_start -= n->host_start;
395 cur_node.tgt_offset
396 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
397 /* At this point tgt_offset is target address of the
398 array section. Now subtract bias to get what we want
399 to initialize the pointer with. */
400 cur_node.tgt_offset -= bias;
9b50ad1d 401 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
5af416bd 402 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
84f53106 403}
404
43895be5 405static void
406gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
407 size_t first, size_t i, void **hostaddrs,
5af416bd 408 size_t *sizes, void *kinds,
409 struct gomp_coalesce_buf *cbuf)
43895be5 410{
411 struct gomp_device_descr *devicep = tgt->device_descr;
412 struct splay_tree_s *mem_map = &devicep->mem_map;
413 struct splay_tree_key_s cur_node;
414 int kind;
415 const bool short_mapkind = true;
416 const int typemask = short_mapkind ? 0xff : 0x7;
417
418 cur_node.host_start = (uintptr_t) hostaddrs[i];
419 cur_node.host_end = cur_node.host_start + sizes[i];
420 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
421 kind = get_kind (short_mapkind, kinds, i);
422 if (n2
423 && n2->tgt == n->tgt
424 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
425 {
426 gomp_map_vars_existing (devicep, n2, &cur_node,
5af416bd 427 &tgt->list[i], kind & typemask, cbuf);
43895be5 428 return;
429 }
430 if (sizes[i] == 0)
431 {
432 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
433 {
434 cur_node.host_start--;
435 n2 = splay_tree_lookup (mem_map, &cur_node);
436 cur_node.host_start++;
437 if (n2
438 && n2->tgt == n->tgt
439 && n2->host_start - n->host_start
440 == n2->tgt_offset - n->tgt_offset)
441 {
442 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
5af416bd 443 kind & typemask, cbuf);
43895be5 444 return;
445 }
446 }
447 cur_node.host_end++;
448 n2 = splay_tree_lookup (mem_map, &cur_node);
449 cur_node.host_end--;
450 if (n2
451 && n2->tgt == n->tgt
452 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
453 {
454 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
5af416bd 455 kind & typemask, cbuf);
43895be5 456 return;
457 }
458 }
459 gomp_mutex_unlock (&devicep->lock);
460 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
461 "other mapped elements from the same structure weren't mapped "
462 "together with it", (void *) cur_node.host_start,
463 (void *) cur_node.host_end);
464}
465
9561765e 466static inline uintptr_t
467gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
468{
469 if (tgt->list[i].key != NULL)
470 return tgt->list[i].key->tgt->tgt_start
471 + tgt->list[i].key->tgt_offset
472 + tgt->list[i].offset;
473 if (tgt->list[i].offset == ~(uintptr_t) 0)
474 return (uintptr_t) hostaddrs[i];
475 if (tgt->list[i].offset == ~(uintptr_t) 1)
476 return 0;
477 if (tgt->list[i].offset == ~(uintptr_t) 2)
478 return tgt->list[i + 1].key->tgt->tgt_start
479 + tgt->list[i + 1].key->tgt_offset
480 + tgt->list[i + 1].offset
481 + (uintptr_t) hostaddrs[i]
482 - (uintptr_t) hostaddrs[i + 1];
483 return tgt->tgt_start + tgt->list[i].offset;
484}
485
ca4c3545 486attribute_hidden struct target_mem_desc *
995b27b9 487gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
ca4c3545 488 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
43895be5 489 bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
995b27b9 490{
491 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
43895be5 492 bool has_firstprivate = false;
493 const int rshift = short_mapkind ? 8 : 3;
494 const int typemask = short_mapkind ? 0xff : 0x7;
0d8c703d 495 struct splay_tree_s *mem_map = &devicep->mem_map;
995b27b9 496 struct splay_tree_key_s cur_node;
497 struct target_mem_desc *tgt
498 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
499 tgt->list_count = mapnum;
43895be5 500 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
995b27b9 501 tgt->device_descr = devicep;
5af416bd 502 struct gomp_coalesce_buf cbuf, *cbufp = NULL;
995b27b9 503
504 if (mapnum == 0)
3bdac239 505 {
506 tgt->tgt_start = 0;
507 tgt->tgt_end = 0;
508 return tgt;
509 }
995b27b9 510
511 tgt_align = sizeof (void *);
512 tgt_size = 0;
5af416bd 513 cbuf.chunks = NULL;
514 cbuf.chunk_cnt = -1;
515 cbuf.use_cnt = 0;
516 cbuf.buf = NULL;
517 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
518 {
65c9e9f9 519 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
520 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
5af416bd 521 cbuf.chunk_cnt = 0;
522 }
43895be5 523 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 524 {
525 size_t align = 4 * sizeof (void *);
526 tgt_align = align;
527 tgt_size = mapnum * sizeof (void *);
5af416bd 528 cbuf.chunk_cnt = 1;
529 cbuf.use_cnt = 1 + (mapnum > 1);
65c9e9f9 530 cbuf.chunks[0].start = 0;
531 cbuf.chunks[0].end = tgt_size;
995b27b9 532 }
533
0d8c703d 534 gomp_mutex_lock (&devicep->lock);
f87b2900 535 if (devicep->state == GOMP_DEVICE_FINALIZED)
536 {
537 gomp_mutex_unlock (&devicep->lock);
538 free (tgt);
539 return NULL;
540 }
ca4c3545 541
995b27b9 542 for (i = 0; i < mapnum; i++)
543 {
43895be5 544 int kind = get_kind (short_mapkind, kinds, i);
545 if (hostaddrs[i] == NULL
546 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
995b27b9 547 {
43895be5 548 tgt->list[i].key = NULL;
549 tgt->list[i].offset = ~(uintptr_t) 0;
550 continue;
551 }
552 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
553 {
554 cur_node.host_start = (uintptr_t) hostaddrs[i];
555 cur_node.host_end = cur_node.host_start;
556 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
557 if (n == NULL)
558 {
559 gomp_mutex_unlock (&devicep->lock);
560 gomp_fatal ("use_device_ptr pointer wasn't mapped");
561 }
562 cur_node.host_start -= n->host_start;
563 hostaddrs[i]
564 = (void *) (n->tgt->tgt_start + n->tgt_offset
565 + cur_node.host_start);
566 tgt->list[i].key = NULL;
567 tgt->list[i].offset = ~(uintptr_t) 0;
568 continue;
569 }
570 else if ((kind & typemask) == GOMP_MAP_STRUCT)
571 {
572 size_t first = i + 1;
573 size_t last = i + sizes[i];
574 cur_node.host_start = (uintptr_t) hostaddrs[i];
575 cur_node.host_end = (uintptr_t) hostaddrs[last]
576 + sizes[last];
577 tgt->list[i].key = NULL;
578 tgt->list[i].offset = ~(uintptr_t) 2;
579 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
580 if (n == NULL)
581 {
582 size_t align = (size_t) 1 << (kind >> rshift);
583 if (tgt_align < align)
584 tgt_align = align;
5af416bd 585 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
43895be5 586 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 587 tgt_size += cur_node.host_end - cur_node.host_start;
43895be5 588 not_found_cnt += last - i;
589 for (i = first; i <= last; i++)
5af416bd 590 {
591 tgt->list[i].key = NULL;
592 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
593 & typemask))
594 gomp_coalesce_buf_add (&cbuf,
595 tgt_size - cur_node.host_end
596 + (uintptr_t) hostaddrs[i],
597 sizes[i]);
598 }
43895be5 599 i--;
600 continue;
601 }
602 for (i = first; i <= last; i++)
603 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
5af416bd 604 sizes, kinds, NULL);
43895be5 605 i--;
995b27b9 606 continue;
607 }
9561765e 608 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
609 {
610 tgt->list[i].key = NULL;
611 tgt->list[i].offset = ~(uintptr_t) 1;
612 has_firstprivate = true;
613 continue;
614 }
995b27b9 615 cur_node.host_start = (uintptr_t) hostaddrs[i];
ca4c3545 616 if (!GOMP_MAP_POINTER_P (kind & typemask))
995b27b9 617 cur_node.host_end = cur_node.host_start + sizes[i];
618 else
619 cur_node.host_end = cur_node.host_start + sizeof (void *);
43895be5 620 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
621 {
622 tgt->list[i].key = NULL;
623
624 size_t align = (size_t) 1 << (kind >> rshift);
625 if (tgt_align < align)
626 tgt_align = align;
627 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 628 gomp_coalesce_buf_add (&cbuf, tgt_size,
629 cur_node.host_end - cur_node.host_start);
43895be5 630 tgt_size += cur_node.host_end - cur_node.host_start;
631 has_firstprivate = true;
632 continue;
633 }
634 splay_tree_key n;
635 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
995b27b9 636 {
9561765e 637 n = gomp_map_0len_lookup (mem_map, &cur_node);
43895be5 638 if (!n)
639 {
640 tgt->list[i].key = NULL;
641 tgt->list[i].offset = ~(uintptr_t) 1;
642 continue;
643 }
995b27b9 644 }
43895be5 645 else
646 n = splay_tree_lookup (mem_map, &cur_node);
c0998828 647 if (n && n->refcount != REFCOUNT_LINK)
43895be5 648 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
5af416bd 649 kind & typemask, NULL);
995b27b9 650 else
651 {
43895be5 652 tgt->list[i].key = NULL;
ca4c3545 653
654 size_t align = (size_t) 1 << (kind >> rshift);
995b27b9 655 not_found_cnt++;
656 if (tgt_align < align)
657 tgt_align = align;
658 tgt_size = (tgt_size + align - 1) & ~(align - 1);
5af416bd 659 if (gomp_to_device_kind_p (kind & typemask))
660 gomp_coalesce_buf_add (&cbuf, tgt_size,
661 cur_node.host_end - cur_node.host_start);
995b27b9 662 tgt_size += cur_node.host_end - cur_node.host_start;
ca4c3545 663 if ((kind & typemask) == GOMP_MAP_TO_PSET)
995b27b9 664 {
665 size_t j;
666 for (j = i + 1; j < mapnum; j++)
43895be5 667 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
ca4c3545 668 & typemask))
995b27b9 669 break;
670 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
671 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
672 > cur_node.host_end))
673 break;
674 else
675 {
43895be5 676 tgt->list[j].key = NULL;
995b27b9 677 i++;
678 }
679 }
680 }
681 }
682
ca4c3545 683 if (devaddrs)
684 {
685 if (mapnum != 1)
0d8c703d 686 {
687 gomp_mutex_unlock (&devicep->lock);
688 gomp_fatal ("unexpected aggregation");
689 }
ca4c3545 690 tgt->to_free = devaddrs[0];
691 tgt->tgt_start = (uintptr_t) tgt->to_free;
692 tgt->tgt_end = tgt->tgt_start + sizes[0];
693 }
43895be5 694 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 695 {
696 /* Allocate tgt_align aligned tgt_size block of memory. */
697 /* FIXME: Perhaps change interface to allocate properly aligned
698 memory. */
699 tgt->to_free = devicep->alloc_func (devicep->target_id,
700 tgt_size + tgt_align - 1);
9b50ad1d 701 if (!tgt->to_free)
702 {
703 gomp_mutex_unlock (&devicep->lock);
704 gomp_fatal ("device memory allocation fail");
705 }
706
995b27b9 707 tgt->tgt_start = (uintptr_t) tgt->to_free;
708 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
709 tgt->tgt_end = tgt->tgt_start + tgt_size;
5af416bd 710
711 if (cbuf.use_cnt == 1)
712 cbuf.chunk_cnt--;
713 if (cbuf.chunk_cnt > 0)
714 {
715 cbuf.buf
65c9e9f9 716 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
5af416bd 717 if (cbuf.buf)
718 {
719 cbuf.tgt = tgt;
720 cbufp = &cbuf;
721 }
722 }
995b27b9 723 }
724 else
725 {
726 tgt->to_free = NULL;
727 tgt->tgt_start = 0;
728 tgt->tgt_end = 0;
729 }
730
731 tgt_size = 0;
43895be5 732 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 733 tgt_size = mapnum * sizeof (void *);
734
735 tgt->array = NULL;
43895be5 736 if (not_found_cnt || has_firstprivate)
995b27b9 737 {
43895be5 738 if (not_found_cnt)
739 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
995b27b9 740 splay_tree_node array = tgt->array;
43895be5 741 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
742 uintptr_t field_tgt_base = 0;
995b27b9 743
744 for (i = 0; i < mapnum; i++)
43895be5 745 if (tgt->list[i].key == NULL)
995b27b9 746 {
43895be5 747 int kind = get_kind (short_mapkind, kinds, i);
995b27b9 748 if (hostaddrs[i] == NULL)
749 continue;
43895be5 750 switch (kind & typemask)
751 {
752 size_t align, len, first, last;
753 splay_tree_key n;
754 case GOMP_MAP_FIRSTPRIVATE:
755 align = (size_t) 1 << (kind >> rshift);
756 tgt_size = (tgt_size + align - 1) & ~(align - 1);
757 tgt->list[i].offset = tgt_size;
758 len = sizes[i];
9b50ad1d 759 gomp_copy_host2dev (devicep,
760 (void *) (tgt->tgt_start + tgt_size),
5af416bd 761 (void *) hostaddrs[i], len, cbufp);
43895be5 762 tgt_size += len;
763 continue;
764 case GOMP_MAP_FIRSTPRIVATE_INT:
765 case GOMP_MAP_USE_DEVICE_PTR:
766 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
767 continue;
768 case GOMP_MAP_STRUCT:
769 first = i + 1;
770 last = i + sizes[i];
771 cur_node.host_start = (uintptr_t) hostaddrs[i];
772 cur_node.host_end = (uintptr_t) hostaddrs[last]
773 + sizes[last];
774 if (tgt->list[first].key != NULL)
775 continue;
776 n = splay_tree_lookup (mem_map, &cur_node);
777 if (n == NULL)
778 {
779 size_t align = (size_t) 1 << (kind >> rshift);
780 tgt_size -= (uintptr_t) hostaddrs[first]
781 - (uintptr_t) hostaddrs[i];
782 tgt_size = (tgt_size + align - 1) & ~(align - 1);
783 tgt_size += (uintptr_t) hostaddrs[first]
784 - (uintptr_t) hostaddrs[i];
785 field_tgt_base = (uintptr_t) hostaddrs[first];
786 field_tgt_offset = tgt_size;
787 field_tgt_clear = last;
788 tgt_size += cur_node.host_end
789 - (uintptr_t) hostaddrs[first];
790 continue;
791 }
792 for (i = first; i <= last; i++)
793 gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
5af416bd 794 sizes, kinds, cbufp);
43895be5 795 i--;
796 continue;
9561765e 797 case GOMP_MAP_ALWAYS_POINTER:
798 cur_node.host_start = (uintptr_t) hostaddrs[i];
799 cur_node.host_end = cur_node.host_start + sizeof (void *);
800 n = splay_tree_lookup (mem_map, &cur_node);
801 if (n == NULL
802 || n->host_start > cur_node.host_start
803 || n->host_end < cur_node.host_end)
804 {
805 gomp_mutex_unlock (&devicep->lock);
806 gomp_fatal ("always pointer not mapped");
807 }
808 if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
809 != GOMP_MAP_ALWAYS_POINTER)
810 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
811 if (cur_node.tgt_offset)
812 cur_node.tgt_offset -= sizes[i];
9b50ad1d 813 gomp_copy_host2dev (devicep,
814 (void *) (n->tgt->tgt_start
815 + n->tgt_offset
816 + cur_node.host_start
817 - n->host_start),
818 (void *) &cur_node.tgt_offset,
5af416bd 819 sizeof (void *), cbufp);
9561765e 820 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
821 + cur_node.host_start - n->host_start;
822 continue;
43895be5 823 default:
824 break;
825 }
995b27b9 826 splay_tree_key k = &array->key;
827 k->host_start = (uintptr_t) hostaddrs[i];
ca4c3545 828 if (!GOMP_MAP_POINTER_P (kind & typemask))
995b27b9 829 k->host_end = k->host_start + sizes[i];
830 else
831 k->host_end = k->host_start + sizeof (void *);
0d8c703d 832 splay_tree_key n = splay_tree_lookup (mem_map, k);
c0998828 833 if (n && n->refcount != REFCOUNT_LINK)
43895be5 834 gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
5af416bd 835 kind & typemask, cbufp);
995b27b9 836 else
837 {
c0998828 838 k->link_key = NULL;
839 if (n && n->refcount == REFCOUNT_LINK)
840 {
841 /* Replace target address of the pointer with target address
842 of mapped object in the splay tree. */
843 splay_tree_remove (mem_map, n);
844 k->link_key = n;
845 }
ca4c3545 846 size_t align = (size_t) 1 << (kind >> rshift);
43895be5 847 tgt->list[i].key = k;
995b27b9 848 k->tgt = tgt;
43895be5 849 if (field_tgt_clear != ~(size_t) 0)
850 {
851 k->tgt_offset = k->host_start - field_tgt_base
852 + field_tgt_offset;
853 if (i == field_tgt_clear)
854 field_tgt_clear = ~(size_t) 0;
855 }
856 else
857 {
858 tgt_size = (tgt_size + align - 1) & ~(align - 1);
859 k->tgt_offset = tgt_size;
860 tgt_size += k->host_end - k->host_start;
861 }
862 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
863 tgt->list[i].always_copy_from
864 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
865 tgt->list[i].offset = 0;
866 tgt->list[i].length = k->host_end - k->host_start;
995b27b9 867 k->refcount = 1;
737cc978 868 k->dynamic_refcount = 0;
995b27b9 869 tgt->refcount++;
870 array->left = NULL;
871 array->right = NULL;
0d8c703d 872 splay_tree_insert (mem_map, array);
ca4c3545 873 switch (kind & typemask)
995b27b9 874 {
ca4c3545 875 case GOMP_MAP_ALLOC:
876 case GOMP_MAP_FROM:
877 case GOMP_MAP_FORCE_ALLOC:
878 case GOMP_MAP_FORCE_FROM:
43895be5 879 case GOMP_MAP_ALWAYS_FROM:
995b27b9 880 break;
ca4c3545 881 case GOMP_MAP_TO:
882 case GOMP_MAP_TOFROM:
883 case GOMP_MAP_FORCE_TO:
884 case GOMP_MAP_FORCE_TOFROM:
43895be5 885 case GOMP_MAP_ALWAYS_TO:
886 case GOMP_MAP_ALWAYS_TOFROM:
9b50ad1d 887 gomp_copy_host2dev (devicep,
888 (void *) (tgt->tgt_start
889 + k->tgt_offset),
890 (void *) k->host_start,
5af416bd 891 k->host_end - k->host_start, cbufp);
995b27b9 892 break;
ca4c3545 893 case GOMP_MAP_POINTER:
84f53106 894 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
5af416bd 895 k->tgt_offset, sizes[i], cbufp);
995b27b9 896 break;
ca4c3545 897 case GOMP_MAP_TO_PSET:
9b50ad1d 898 gomp_copy_host2dev (devicep,
899 (void *) (tgt->tgt_start
900 + k->tgt_offset),
901 (void *) k->host_start,
5af416bd 902 k->host_end - k->host_start, cbufp);
ca4c3545 903
995b27b9 904 for (j = i + 1; j < mapnum; j++)
43895be5 905 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
906 j)
ca4c3545 907 & typemask))
995b27b9 908 break;
909 else if ((uintptr_t) hostaddrs[j] < k->host_start
910 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
911 > k->host_end))
912 break;
913 else
914 {
43895be5 915 tgt->list[j].key = k;
916 tgt->list[j].copy_from = false;
917 tgt->list[j].always_copy_from = false;
918 if (k->refcount != REFCOUNT_INFINITY)
919 k->refcount++;
84f53106 920 gomp_map_pointer (tgt,
921 (uintptr_t) *(void **) hostaddrs[j],
922 k->tgt_offset
923 + ((uintptr_t) hostaddrs[j]
924 - k->host_start),
5af416bd 925 sizes[j], cbufp);
995b27b9 926 i++;
927 }
ca4c3545 928 break;
929 case GOMP_MAP_FORCE_PRESENT:
930 {
931 /* We already looked up the memory region above and it
932 was missing. */
933 size_t size = k->host_end - k->host_start;
0d8c703d 934 gomp_mutex_unlock (&devicep->lock);
2634aed9 935#ifdef HAVE_INTTYPES_H
936 gomp_fatal ("present clause: !acc_is_present (%p, "
937 "%"PRIu64" (0x%"PRIx64"))",
938 (void *) k->host_start,
939 (uint64_t) size, (uint64_t) size);
940#else
ca4c3545 941 gomp_fatal ("present clause: !acc_is_present (%p, "
2634aed9 942 "%lu (0x%lx))", (void *) k->host_start,
943 (unsigned long) size, (unsigned long) size);
944#endif
ca4c3545 945 }
946 break;
947 case GOMP_MAP_FORCE_DEVICEPTR:
948 assert (k->host_end - k->host_start == sizeof (void *));
9b50ad1d 949 gomp_copy_host2dev (devicep,
950 (void *) (tgt->tgt_start
951 + k->tgt_offset),
952 (void *) k->host_start,
5af416bd 953 sizeof (void *), cbufp);
ca4c3545 954 break;
955 default:
0d8c703d 956 gomp_mutex_unlock (&devicep->lock);
ca4c3545 957 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
958 kind);
995b27b9 959 }
c0998828 960
961 if (k->link_key)
962 {
963 /* Set link pointer on target to the device address of the
964 mapped object. */
965 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
9d5a0b9d 966 /* We intentionally do not use coalescing here, as it's not
967 data allocated by the current call to this function. */
968 gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
969 &tgt_addr, sizeof (void *), NULL);
c0998828 970 }
995b27b9 971 array++;
972 }
973 }
974 }
ca4c3545 975
43895be5 976 if (pragma_kind == GOMP_MAP_VARS_TARGET)
995b27b9 977 {
978 for (i = 0; i < mapnum; i++)
979 {
9561765e 980 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
9b50ad1d 981 gomp_copy_host2dev (devicep,
982 (void *) (tgt->tgt_start + i * sizeof (void *)),
5af416bd 983 (void *) &cur_node.tgt_offset, sizeof (void *),
984 cbufp);
995b27b9 985 }
986 }
987
5af416bd 988 if (cbufp)
989 {
990 long c = 0;
991 for (c = 0; c < cbuf.chunk_cnt; ++c)
65c9e9f9 992 gomp_copy_host2dev (devicep,
993 (void *) (tgt->tgt_start + cbuf.chunks[c].start),
994 (char *) cbuf.buf + (cbuf.chunks[c].start
995 - cbuf.chunks[0].start),
996 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
5af416bd 997 free (cbuf.buf);
65c9e9f9 998 cbuf.buf = NULL;
999 cbufp = NULL;
5af416bd 1000 }
1001
43895be5 1002 /* If the variable from "omp target enter data" map-list was already mapped,
1003 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1004 gomp_exit_data. */
1005 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1006 {
1007 free (tgt);
1008 tgt = NULL;
1009 }
1010
0d8c703d 1011 gomp_mutex_unlock (&devicep->lock);
995b27b9 1012 return tgt;
1013}
1014
1015static void
1016gomp_unmap_tgt (struct target_mem_desc *tgt)
1017{
1018 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1019 if (tgt->tgt_end)
9b50ad1d 1020 gomp_free_device_memory (tgt->device_descr, tgt->to_free);
995b27b9 1021
1022 free (tgt->array);
1023 free (tgt);
1024}
1025
737cc978 1026attribute_hidden bool
1027gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1028{
1029 bool is_tgt_unmapped = false;
1030 splay_tree_remove (&devicep->mem_map, k);
1031 if (k->link_key)
1032 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1033 if (k->tgt->refcount > 1)
1034 k->tgt->refcount--;
1035 else
1036 {
1037 is_tgt_unmapped = true;
1038 gomp_unmap_tgt (k->tgt);
1039 }
1040 return is_tgt_unmapped;
1041}
1042
ca4c3545 1043/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1044 variables back from device to host: if it is false, it is assumed that this
2bf775c0 1045 has been done already. */
ca4c3545 1046
1047attribute_hidden void
1048gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
995b27b9 1049{
1050 struct gomp_device_descr *devicep = tgt->device_descr;
1051
1052 if (tgt->list_count == 0)
1053 {
1054 free (tgt);
1055 return;
1056 }
1057
0d8c703d 1058 gomp_mutex_lock (&devicep->lock);
f87b2900 1059 if (devicep->state == GOMP_DEVICE_FINALIZED)
1060 {
1061 gomp_mutex_unlock (&devicep->lock);
1062 free (tgt->array);
1063 free (tgt);
1064 return;
1065 }
ca4c3545 1066
995b27b9 1067 size_t i;
995b27b9 1068 for (i = 0; i < tgt->list_count; i++)
43895be5 1069 {
1070 splay_tree_key k = tgt->list[i].key;
1071 if (k == NULL)
1072 continue;
1073
1074 bool do_unmap = false;
1075 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1076 k->refcount--;
1077 else if (k->refcount == 1)
1078 {
2bf775c0 1079 k->refcount--;
1080 do_unmap = true;
43895be5 1081 }
1082
1083 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1084 || tgt->list[i].always_copy_from)
9b50ad1d 1085 gomp_copy_dev2host (devicep,
1086 (void *) (k->host_start + tgt->list[i].offset),
1087 (void *) (k->tgt->tgt_start + k->tgt_offset
1088 + tgt->list[i].offset),
1089 tgt->list[i].length);
43895be5 1090 if (do_unmap)
737cc978 1091 gomp_remove_var (devicep, k);
43895be5 1092 }
995b27b9 1093
1094 if (tgt->refcount > 1)
1095 tgt->refcount--;
1096 else
1097 gomp_unmap_tgt (tgt);
ca4c3545 1098
0d8c703d 1099 gomp_mutex_unlock (&devicep->lock);
995b27b9 1100}
1101
1102static void
0d8c703d 1103gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
43895be5 1104 size_t *sizes, void *kinds, bool short_mapkind)
995b27b9 1105{
1106 size_t i;
1107 struct splay_tree_key_s cur_node;
43895be5 1108 const int typemask = short_mapkind ? 0xff : 0x7;
995b27b9 1109
1110 if (!devicep)
1111 return;
1112
1113 if (mapnum == 0)
1114 return;
1115
0d8c703d 1116 gomp_mutex_lock (&devicep->lock);
f87b2900 1117 if (devicep->state == GOMP_DEVICE_FINALIZED)
1118 {
1119 gomp_mutex_unlock (&devicep->lock);
1120 return;
1121 }
1122
995b27b9 1123 for (i = 0; i < mapnum; i++)
1124 if (sizes[i])
1125 {
1126 cur_node.host_start = (uintptr_t) hostaddrs[i];
1127 cur_node.host_end = cur_node.host_start + sizes[i];
0d8c703d 1128 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
995b27b9 1129 if (n)
1130 {
43895be5 1131 int kind = get_kind (short_mapkind, kinds, i);
995b27b9 1132 if (n->host_start > cur_node.host_start
1133 || n->host_end < cur_node.host_end)
0d8c703d 1134 {
1135 gomp_mutex_unlock (&devicep->lock);
1136 gomp_fatal ("Trying to update [%p..%p) object when "
1137 "only [%p..%p) is mapped",
1138 (void *) cur_node.host_start,
1139 (void *) cur_node.host_end,
1140 (void *) n->host_start,
1141 (void *) n->host_end);
1142 }
9b50ad1d 1143
1144
1145 void *hostaddr = (void *) cur_node.host_start;
1146 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1147 + cur_node.host_start - n->host_start);
1148 size_t size = cur_node.host_end - cur_node.host_start;
1149
ca4c3545 1150 if (GOMP_MAP_COPY_TO_P (kind & typemask))
5af416bd 1151 gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
ca4c3545 1152 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
9b50ad1d 1153 gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
995b27b9 1154 }
995b27b9 1155 }
0d8c703d 1156 gomp_mutex_unlock (&devicep->lock);
1157}
1158
1159/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1160 And insert to splay tree the mapping between addresses from HOST_TABLE and
7de5731e 1161 from loaded target image. We rely in the host and device compiler
1162 emitting variable and functions in the same order. */
0d8c703d 1163
1164static void
d3d8e632 1165gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
7de5731e 1166 const void *host_table, const void *target_data,
1167 bool is_register_lock)
0d8c703d 1168{
1169 void **host_func_table = ((void ***) host_table)[0];
1170 void **host_funcs_end = ((void ***) host_table)[1];
1171 void **host_var_table = ((void ***) host_table)[2];
1172 void **host_vars_end = ((void ***) host_table)[3];
1173
1174 /* The func table contains only addresses, the var table contains addresses
1175 and corresponding sizes. */
1176 int num_funcs = host_funcs_end - host_func_table;
1177 int num_vars = (host_vars_end - host_var_table) / 2;
1178
1179 /* Load image to device and get target addresses for the image. */
1180 struct addr_pair *target_table = NULL;
d3d8e632 1181 int i, num_target_entries;
1182
1183 num_target_entries
1184 = devicep->load_image_func (devicep->target_id, version,
1185 target_data, &target_table);
0d8c703d 1186
1187 if (num_target_entries != num_funcs + num_vars)
1188 {
1189 gomp_mutex_unlock (&devicep->lock);
1190 if (is_register_lock)
1191 gomp_mutex_unlock (&register_lock);
d3d8e632 1192 gomp_fatal ("Cannot map target functions or variables"
1193 " (expected %u, have %u)", num_funcs + num_vars,
1194 num_target_entries);
0d8c703d 1195 }
1196
1197 /* Insert host-target address mapping into splay tree. */
1198 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1199 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
43895be5 1200 tgt->refcount = REFCOUNT_INFINITY;
0d8c703d 1201 tgt->tgt_start = 0;
1202 tgt->tgt_end = 0;
1203 tgt->to_free = NULL;
1204 tgt->prev = NULL;
1205 tgt->list_count = 0;
1206 tgt->device_descr = devicep;
1207 splay_tree_node array = tgt->array;
1208
1209 for (i = 0; i < num_funcs; i++)
1210 {
1211 splay_tree_key k = &array->key;
1212 k->host_start = (uintptr_t) host_func_table[i];
1213 k->host_end = k->host_start + 1;
1214 k->tgt = tgt;
1215 k->tgt_offset = target_table[i].start;
43895be5 1216 k->refcount = REFCOUNT_INFINITY;
c0998828 1217 k->link_key = NULL;
0d8c703d 1218 array->left = NULL;
1219 array->right = NULL;
1220 splay_tree_insert (&devicep->mem_map, array);
1221 array++;
1222 }
1223
c0998828 1224 /* Most significant bit of the size in host and target tables marks
1225 "omp declare target link" variables. */
1226 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1227 const uintptr_t size_mask = ~link_bit;
1228
0d8c703d 1229 for (i = 0; i < num_vars; i++)
1230 {
1231 struct addr_pair *target_var = &target_table[num_funcs + i];
c0998828 1232 uintptr_t target_size = target_var->end - target_var->start;
1233
1234 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
0d8c703d 1235 {
1236 gomp_mutex_unlock (&devicep->lock);
1237 if (is_register_lock)
1238 gomp_mutex_unlock (&register_lock);
c0998828 1239 gomp_fatal ("Cannot map target variables (size mismatch)");
0d8c703d 1240 }
1241
1242 splay_tree_key k = &array->key;
1243 k->host_start = (uintptr_t) host_var_table[i * 2];
c0998828 1244 k->host_end
1245 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
0d8c703d 1246 k->tgt = tgt;
1247 k->tgt_offset = target_var->start;
c0998828 1248 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
c0998828 1249 k->link_key = NULL;
0d8c703d 1250 array->left = NULL;
1251 array->right = NULL;
1252 splay_tree_insert (&devicep->mem_map, array);
1253 array++;
1254 }
1255
1256 free (target_table);
995b27b9 1257}
1258
7de5731e 1259/* Unload the mappings described by target_data from device DEVICE_P.
1260 The device must be locked. */
1261
1262static void
1263gomp_unload_image_from_device (struct gomp_device_descr *devicep,
d3d8e632 1264 unsigned version,
7de5731e 1265 const void *host_table, const void *target_data)
1266{
1267 void **host_func_table = ((void ***) host_table)[0];
1268 void **host_funcs_end = ((void ***) host_table)[1];
1269 void **host_var_table = ((void ***) host_table)[2];
1270 void **host_vars_end = ((void ***) host_table)[3];
1271
1272 /* The func table contains only addresses, the var table contains addresses
1273 and corresponding sizes. */
1274 int num_funcs = host_funcs_end - host_func_table;
1275 int num_vars = (host_vars_end - host_var_table) / 2;
1276
7de5731e 1277 struct splay_tree_key_s k;
1278 splay_tree_key node = NULL;
1279
1280 /* Find mapping at start of node array */
1281 if (num_funcs || num_vars)
1282 {
1283 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1284 : (uintptr_t) host_var_table[0]);
1285 k.host_end = k.host_start + 1;
1286 node = splay_tree_lookup (&devicep->mem_map, &k);
1287 }
d3d8e632 1288
9b50ad1d 1289 if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1290 {
1291 gomp_mutex_unlock (&devicep->lock);
1292 gomp_fatal ("image unload fail");
1293 }
7de5731e 1294
1295 /* Remove mappings from splay tree. */
c0998828 1296 int i;
1297 for (i = 0; i < num_funcs; i++)
7de5731e 1298 {
c0998828 1299 k.host_start = (uintptr_t) host_func_table[i];
7de5731e 1300 k.host_end = k.host_start + 1;
1301 splay_tree_remove (&devicep->mem_map, &k);
1302 }
1303
c0998828 1304 /* Most significant bit of the size in host and target tables marks
1305 "omp declare target link" variables. */
1306 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1307 const uintptr_t size_mask = ~link_bit;
1308 bool is_tgt_unmapped = false;
1309
1310 for (i = 0; i < num_vars; i++)
7de5731e 1311 {
c0998828 1312 k.host_start = (uintptr_t) host_var_table[i * 2];
1313 k.host_end
1314 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1315
1316 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1317 splay_tree_remove (&devicep->mem_map, &k);
1318 else
1319 {
1320 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
737cc978 1321 is_tgt_unmapped = gomp_remove_var (devicep, n);
c0998828 1322 }
7de5731e 1323 }
1324
c0998828 1325 if (node && !is_tgt_unmapped)
7de5731e 1326 {
1327 free (node->tgt);
1328 free (node);
1329 }
1330}
1331
0d8c703d 1332/* This function should be called from every offload image while loading.
995b27b9 1333 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1334 the target, and TARGET_DATA needed by target plugin. */
1335
1336void
d3d8e632 1337GOMP_offload_register_ver (unsigned version, const void *host_table,
1338 int target_type, const void *target_data)
995b27b9 1339{
0d8c703d 1340 int i;
d3d8e632 1341
1342 if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1343 gomp_fatal ("Library too old for offload (version %u < %u)",
1344 GOMP_VERSION, GOMP_VERSION_LIB (version));
1345
0d8c703d 1346 gomp_mutex_lock (&register_lock);
1347
1348 /* Load image to all initialized devices. */
1349 for (i = 0; i < num_devices; i++)
1350 {
1351 struct gomp_device_descr *devicep = &devices[i];
1352 gomp_mutex_lock (&devicep->lock);
f87b2900 1353 if (devicep->type == target_type
1354 && devicep->state == GOMP_DEVICE_INITIALIZED)
d3d8e632 1355 gomp_load_image_to_device (devicep, version,
1356 host_table, target_data, true);
0d8c703d 1357 gomp_mutex_unlock (&devicep->lock);
1358 }
995b27b9 1359
0d8c703d 1360 /* Insert image to array of pending images. */
1361 offload_images
1362 = gomp_realloc_unlock (offload_images,
1363 (num_offload_images + 1)
1364 * sizeof (struct offload_image_descr));
d3d8e632 1365 offload_images[num_offload_images].version = version;
995b27b9 1366 offload_images[num_offload_images].type = target_type;
1367 offload_images[num_offload_images].host_table = host_table;
1368 offload_images[num_offload_images].target_data = target_data;
1369
1370 num_offload_images++;
0d8c703d 1371 gomp_mutex_unlock (&register_lock);
995b27b9 1372}
1373
d3d8e632 1374void
1375GOMP_offload_register (const void *host_table, int target_type,
1376 const void *target_data)
1377{
1378 GOMP_offload_register_ver (0, host_table, target_type, target_data);
1379}
1380
0d8c703d 1381/* This function should be called from every offload image while unloading.
1382 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1383 the target, and TARGET_DATA needed by target plugin. */
995b27b9 1384
0d8c703d 1385void
d3d8e632 1386GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1387 int target_type, const void *target_data)
995b27b9 1388{
0d8c703d 1389 int i;
1390
0d8c703d 1391 gomp_mutex_lock (&register_lock);
1392
1393 /* Unload image from all initialized devices. */
1394 for (i = 0; i < num_devices; i++)
1395 {
0d8c703d 1396 struct gomp_device_descr *devicep = &devices[i];
1397 gomp_mutex_lock (&devicep->lock);
f87b2900 1398 if (devicep->type == target_type
1399 && devicep->state == GOMP_DEVICE_INITIALIZED)
d3d8e632 1400 gomp_unload_image_from_device (devicep, version,
1401 host_table, target_data);
0d8c703d 1402 gomp_mutex_unlock (&devicep->lock);
1403 }
1404
1405 /* Remove image from array of pending images. */
1406 for (i = 0; i < num_offload_images; i++)
1407 if (offload_images[i].target_data == target_data)
1408 {
1409 offload_images[i] = offload_images[--num_offload_images];
1410 break;
1411 }
1412
1413 gomp_mutex_unlock (&register_lock);
ca4c3545 1414}
995b27b9 1415
d3d8e632 1416void
1417GOMP_offload_unregister (const void *host_table, int target_type,
1418 const void *target_data)
1419{
1420 GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1421}
1422
0d8c703d 1423/* This function initializes the target device, specified by DEVICEP. DEVICEP
1424 must be locked on entry, and remains locked on return. */
ca4c3545 1425
1426attribute_hidden void
0d8c703d 1427gomp_init_device (struct gomp_device_descr *devicep)
ca4c3545 1428{
995b27b9 1429 int i;
9b50ad1d 1430 if (!devicep->init_device_func (devicep->target_id))
1431 {
1432 gomp_mutex_unlock (&devicep->lock);
1433 gomp_fatal ("device initialization failed");
1434 }
0d8c703d 1435
1436 /* Load to device all images registered by the moment. */
1437 for (i = 0; i < num_offload_images; i++)
995b27b9 1438 {
0d8c703d 1439 struct offload_image_descr *image = &offload_images[i];
1440 if (image->type == devicep->type)
d3d8e632 1441 gomp_load_image_to_device (devicep, image->version,
1442 image->host_table, image->target_data,
1443 false);
995b27b9 1444 }
1445
f87b2900 1446 devicep->state = GOMP_DEVICE_INITIALIZED;
ca4c3545 1447}
1448
7de5731e 1449attribute_hidden void
1450gomp_unload_device (struct gomp_device_descr *devicep)
1451{
f87b2900 1452 if (devicep->state == GOMP_DEVICE_INITIALIZED)
7de5731e 1453 {
1454 unsigned i;
1455
1456 /* Unload from device all images registered at the moment. */
1457 for (i = 0; i < num_offload_images; i++)
1458 {
1459 struct offload_image_descr *image = &offload_images[i];
1460 if (image->type == devicep->type)
d3d8e632 1461 gomp_unload_image_from_device (devicep, image->version,
1462 image->host_table,
7de5731e 1463 image->target_data);
1464 }
1465 }
1466}
1467
ca4c3545 1468/* Free address mapping tables. MM must be locked on entry, and remains locked
1469 on return. */
1470
1471attribute_hidden void
0d8c703d 1472gomp_free_memmap (struct splay_tree_s *mem_map)
ca4c3545 1473{
0d8c703d 1474 while (mem_map->root)
ca4c3545 1475 {
0d8c703d 1476 struct target_mem_desc *tgt = mem_map->root->key.tgt;
ca4c3545 1477
0d8c703d 1478 splay_tree_remove (mem_map, &mem_map->root->key);
ca4c3545 1479 free (tgt->array);
1480 free (tgt);
1481 }
ca4c3545 1482}
1483
9561765e 1484/* Host fallback for GOMP_target{,_ext} routines. */
43895be5 1485
1486static void
1487gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1488{
1489 struct gomp_thread old_thr, *thr = gomp_thread ();
1490 old_thr = *thr;
1491 memset (thr, '\0', sizeof (*thr));
1492 if (gomp_places_list)
1493 {
1494 thr->place = old_thr.place;
1495 thr->ts.place_partition_len = gomp_places_list_len;
1496 }
1497 fn (hostaddrs);
1498 gomp_free_thread (thr);
1499 *thr = old_thr;
1500}
1501
56686608 1502/* Calculate alignment and size requirements of a private copy of data shared
1503 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
9561765e 1504
56686608 1505static inline void
1506calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1507 unsigned short *kinds, size_t *tgt_align,
1508 size_t *tgt_size)
9561765e 1509{
56686608 1510 size_t i;
1511 for (i = 0; i < mapnum; i++)
1512 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1513 {
1514 size_t align = (size_t) 1 << (kinds[i] >> 8);
1515 if (*tgt_align < align)
1516 *tgt_align = align;
1517 *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1518 *tgt_size += sizes[i];
1519 }
1520}
1521
1522/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1523
1524static inline void
1525copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1526 size_t *sizes, unsigned short *kinds, size_t tgt_align,
1527 size_t tgt_size)
1528{
1529 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1530 if (al)
1531 tgt += tgt_align - al;
1532 tgt_size = 0;
1533 size_t i;
9561765e 1534 for (i = 0; i < mapnum; i++)
1535 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1536 {
1537 size_t align = (size_t) 1 << (kinds[i] >> 8);
9561765e 1538 tgt_size = (tgt_size + align - 1) & ~(align - 1);
56686608 1539 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1540 hostaddrs[i] = tgt + tgt_size;
1541 tgt_size = tgt_size + sizes[i];
9561765e 1542 }
56686608 1543}
1544
9561765e 1545/* Helper function of GOMP_target{,_ext} routines. */
43895be5 1546
1547static void *
1548gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1549 void (*host_fn) (void *))
1550{
1551 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1552 return (void *) host_fn;
1553 else
1554 {
1555 gomp_mutex_lock (&devicep->lock);
f87b2900 1556 if (devicep->state == GOMP_DEVICE_FINALIZED)
1557 {
1558 gomp_mutex_unlock (&devicep->lock);
1559 return NULL;
1560 }
1561
43895be5 1562 struct splay_tree_key_s k;
1563 k.host_start = (uintptr_t) host_fn;
1564 k.host_end = k.host_start + 1;
1565 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1566 gomp_mutex_unlock (&devicep->lock);
1567 if (tgt_fn == NULL)
bc6edebb 1568 return NULL;
43895be5 1569
1570 return (void *) tgt_fn->tgt_offset;
1571 }
1572}
1573
bc7bff74 1574/* Called when encountering a target directive. If DEVICE
ca4c3545 1575 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1576 GOMP_DEVICE_HOST_FALLBACK (or any value
1577 larger than last available hw device), use host fallback.
dc19c8fd 1578 FN is address of host code, UNUSED is part of the current ABI, but
1579 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
bc7bff74 1580 with MAPNUM entries, with addresses of the host objects,
1581 sizes of the host objects (resp. for pointer kind pointer bias
1582 and assumed sizeof (void *) size) and kinds. */
1583
1584void
dc19c8fd 1585GOMP_target (int device, void (*fn) (void *), const void *unused,
bc7bff74 1586 size_t mapnum, void **hostaddrs, size_t *sizes,
1587 unsigned char *kinds)
1588{
995b27b9 1589 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1590
f87b2900 1591 void *fn_addr;
ca4c3545 1592 if (devicep == NULL
f87b2900 1593 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 1594 /* All shared memory devices should use the GOMP_target_ext function. */
1595 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
f87b2900 1596 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
43895be5 1597 return gomp_target_fallback (fn, hostaddrs);
1598
43895be5 1599 struct target_mem_desc *tgt_vars
1600 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1601 GOMP_MAP_VARS_TARGET);
56686608 1602 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1603 NULL);
43895be5 1604 gomp_unmap_vars (tgt_vars, true);
1605}
995b27b9 1606
9561765e 1607/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1608 and several arguments have been added:
1609 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1610 DEPEND is array of dependencies, see GOMP_task for details.
56686608 1611
1612 ARGS is a pointer to an array consisting of a variable number of both
1613 device-independent and device-specific arguments, which can take one two
1614 elements where the first specifies for which device it is intended, the type
1615 and optionally also the value. If the value is not present in the first
1616 one, the whole second element the actual value. The last element of the
1617 array is a single NULL. Among the device independent can be for example
1618 NUM_TEAMS and THREAD_LIMIT.
1619
9561765e 1620 NUM_TEAMS is positive if GOMP_teams will be called in the body with
1621 that value, or 1 if teams construct is not present, or 0, if
1622 teams construct does not have num_teams clause and so the choice is
1623 implementation defined, and -1 if it can't be determined on the host
1624 what value will GOMP_teams have on the device.
1625 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1626 body with that value, or 0, if teams construct does not have thread_limit
1627 clause or the teams construct is not present, or -1 if it can't be
1628 determined on the host what value will GOMP_teams have on the device. */
1629
43895be5 1630void
9561765e 1631GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1632 void **hostaddrs, size_t *sizes, unsigned short *kinds,
56686608 1633 unsigned int flags, void **depend, void **args)
43895be5 1634{
1635 struct gomp_device_descr *devicep = resolve_device (device);
84217e9d 1636 size_t tgt_align = 0, tgt_size = 0;
1637 bool fpc_done = false;
ca4c3545 1638
a9833286 1639 if (flags & GOMP_TARGET_FLAG_NOWAIT)
1640 {
1641 struct gomp_thread *thr = gomp_thread ();
1642 /* Create a team if we don't have any around, as nowait
1643 target tasks make sense to run asynchronously even when
1644 outside of any parallel. */
1645 if (__builtin_expect (thr->ts.team == NULL, 0))
1646 {
1647 struct gomp_team *team = gomp_new_team (1);
1648 struct gomp_task *task = thr->task;
1649 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1650 team->prev_ts = thr->ts;
1651 thr->ts.team = team;
1652 thr->ts.team_id = 0;
1653 thr->ts.work_share = &team->work_shares[0];
1654 thr->ts.last_work_share = NULL;
1655#ifdef HAVE_SYNC_BUILTINS
1656 thr->ts.single_count = 0;
1657#endif
1658 thr->ts.static_trip = 0;
1659 thr->task = &team->implicit_task[0];
1660 gomp_init_task (thr->task, NULL, icv);
1661 if (task)
1662 {
1663 thr->task = task;
1664 gomp_end_task ();
1665 free (task);
1666 thr->task = &team->implicit_task[0];
1667 }
1668 else
1669 pthread_setspecific (gomp_thread_destructor, thr);
1670 }
1671 if (thr->ts.team
1672 && !thr->task->final_task)
1673 {
1674 gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
56686608 1675 sizes, kinds, flags, depend, args,
a9833286 1676 GOMP_TARGET_TASK_BEFORE_MAP);
1677 return;
1678 }
1679 }
1680
1681 /* If there are depend clauses, but nowait is not present
1682 (or we are in a final task), block the parent task until the
1683 dependencies are resolved and then just continue with the rest
1684 of the function as if it is a merged task. */
43895be5 1685 if (depend != NULL)
1686 {
1687 struct gomp_thread *thr = gomp_thread ();
1688 if (thr->task && thr->task->depend_hash)
84217e9d 1689 {
1690 /* If we might need to wait, copy firstprivate now. */
1691 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1692 &tgt_align, &tgt_size);
1693 if (tgt_align)
1694 {
1695 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1696 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1697 tgt_align, tgt_size);
1698 }
1699 fpc_done = true;
1700 gomp_task_maybe_wait_for_dependencies (depend);
1701 }
43895be5 1702 }
ca4c3545 1703
f87b2900 1704 void *fn_addr;
43895be5 1705 if (devicep == NULL
f87b2900 1706 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 1707 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1708 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
ca4c3545 1709 {
84217e9d 1710 if (!fpc_done)
1711 {
1712 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1713 &tgt_align, &tgt_size);
1714 if (tgt_align)
1715 {
1716 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1717 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1718 tgt_align, tgt_size);
1719 }
1720 }
1721 gomp_target_fallback (fn, hostaddrs);
43895be5 1722 return;
ca4c3545 1723 }
995b27b9 1724
56686608 1725 struct target_mem_desc *tgt_vars;
56686608 1726 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1727 {
84217e9d 1728 if (!fpc_done)
1729 {
1730 calculate_firstprivate_requirements (mapnum, sizes, kinds,
1731 &tgt_align, &tgt_size);
1732 if (tgt_align)
1733 {
1734 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1735 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1736 tgt_align, tgt_size);
1737 }
1738 }
56686608 1739 tgt_vars = NULL;
1740 }
1741 else
1742 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1743 true, GOMP_MAP_VARS_TARGET);
1744 devicep->run_func (devicep->target_id, fn_addr,
1745 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1746 args);
1747 if (tgt_vars)
1748 gomp_unmap_vars (tgt_vars, true);
bc7bff74 1749}
1750
9561765e 1751/* Host fallback for GOMP_target_data{,_ext} routines. */
43895be5 1752
1753static void
1754gomp_target_data_fallback (void)
1755{
1756 struct gomp_task_icv *icv = gomp_icv (false);
1757 if (icv->target_data)
1758 {
1759 /* Even when doing a host fallback, if there are any active
1760 #pragma omp target data constructs, need to remember the
1761 new #pragma omp target data, otherwise GOMP_target_end_data
1762 would get out of sync. */
1763 struct target_mem_desc *tgt
1764 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1765 GOMP_MAP_VARS_DATA);
1766 tgt->prev = icv->target_data;
1767 icv->target_data = tgt;
1768 }
1769}
1770
bc7bff74 1771void
dc19c8fd 1772GOMP_target_data (int device, const void *unused, size_t mapnum,
bc7bff74 1773 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1774{
995b27b9 1775 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1776
1777 if (devicep == NULL
56686608 1778 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1779 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
43895be5 1780 return gomp_target_data_fallback ();
ca4c3545 1781
995b27b9 1782 struct target_mem_desc *tgt
ca4c3545 1783 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
43895be5 1784 GOMP_MAP_VARS_DATA);
1785 struct gomp_task_icv *icv = gomp_icv (true);
1786 tgt->prev = icv->target_data;
1787 icv->target_data = tgt;
1788}
1789
1790void
9561765e 1791GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1792 size_t *sizes, unsigned short *kinds)
43895be5 1793{
1794 struct gomp_device_descr *devicep = resolve_device (device);
1795
1796 if (devicep == NULL
56686608 1797 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1798 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 1799 return gomp_target_data_fallback ();
1800
1801 struct target_mem_desc *tgt
1802 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1803 GOMP_MAP_VARS_DATA);
995b27b9 1804 struct gomp_task_icv *icv = gomp_icv (true);
1805 tgt->prev = icv->target_data;
1806 icv->target_data = tgt;
bc7bff74 1807}
1808
1809void
1810GOMP_target_end_data (void)
1811{
995b27b9 1812 struct gomp_task_icv *icv = gomp_icv (false);
1813 if (icv->target_data)
1814 {
1815 struct target_mem_desc *tgt = icv->target_data;
1816 icv->target_data = tgt->prev;
ca4c3545 1817 gomp_unmap_vars (tgt, true);
995b27b9 1818 }
bc7bff74 1819}
1820
1821void
dc19c8fd 1822GOMP_target_update (int device, const void *unused, size_t mapnum,
bc7bff74 1823 void **hostaddrs, size_t *sizes, unsigned char *kinds)
1824{
995b27b9 1825 struct gomp_device_descr *devicep = resolve_device (device);
ca4c3545 1826
1827 if (devicep == NULL
56686608 1828 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1829 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
995b27b9 1830 return;
1831
43895be5 1832 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1833}
1834
1835void
9561765e 1836GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1837 size_t *sizes, unsigned short *kinds,
1838 unsigned int flags, void **depend)
43895be5 1839{
1840 struct gomp_device_descr *devicep = resolve_device (device);
1841
1842 /* If there are depend clauses, but nowait is not present,
1843 block the parent task until the dependencies are resolved
1844 and then just continue with the rest of the function as if it
1845 is a merged task. Until we are able to schedule task during
1846 variable mapping or unmapping, ignore nowait if depend clauses
1847 are not present. */
1848 if (depend != NULL)
1849 {
1850 struct gomp_thread *thr = gomp_thread ();
1851 if (thr->task && thr->task->depend_hash)
1852 {
1853 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1854 && thr->ts.team
1855 && !thr->task->final_task)
1856 {
a9833286 1857 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1858 mapnum, hostaddrs, sizes, kinds,
1859 flags | GOMP_TARGET_FLAG_UPDATE,
56686608 1860 depend, NULL, GOMP_TARGET_TASK_DATA))
a9833286 1861 return;
1862 }
1863 else
1864 {
1865 struct gomp_team *team = thr->ts.team;
1866 /* If parallel or taskgroup has been cancelled, don't start new
1867 tasks. */
7e5a76c8 1868 if (__builtin_expect (gomp_cancel_var, 0) && team)
1869 {
1870 if (gomp_team_barrier_cancelled (&team->barrier))
1871 return;
1872 if (thr->task->taskgroup)
1873 {
1874 if (thr->task->taskgroup->cancelled)
1875 return;
1876 if (thr->task->taskgroup->workshare
1877 && thr->task->taskgroup->prev
1878 && thr->task->taskgroup->prev->cancelled)
1879 return;
1880 }
1881 }
a9833286 1882
1883 gomp_task_maybe_wait_for_dependencies (depend);
43895be5 1884 }
43895be5 1885 }
1886 }
1887
1888 if (devicep == NULL
56686608 1889 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1890 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 1891 return;
1892
1893 struct gomp_thread *thr = gomp_thread ();
1894 struct gomp_team *team = thr->ts.team;
1895 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
7e5a76c8 1896 if (__builtin_expect (gomp_cancel_var, 0) && team)
1897 {
1898 if (gomp_team_barrier_cancelled (&team->barrier))
1899 return;
1900 if (thr->task->taskgroup)
1901 {
1902 if (thr->task->taskgroup->cancelled)
1903 return;
1904 if (thr->task->taskgroup->workshare
1905 && thr->task->taskgroup->prev
1906 && thr->task->taskgroup->prev->cancelled)
1907 return;
1908 }
1909 }
43895be5 1910
1911 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1912}
1913
1914static void
1915gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1916 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1917{
1918 const int typemask = 0xff;
1919 size_t i;
ca4c3545 1920 gomp_mutex_lock (&devicep->lock);
f87b2900 1921 if (devicep->state == GOMP_DEVICE_FINALIZED)
1922 {
1923 gomp_mutex_unlock (&devicep->lock);
1924 return;
1925 }
1926
43895be5 1927 for (i = 0; i < mapnum; i++)
1928 {
1929 struct splay_tree_key_s cur_node;
1930 unsigned char kind = kinds[i] & typemask;
1931 switch (kind)
1932 {
1933 case GOMP_MAP_FROM:
1934 case GOMP_MAP_ALWAYS_FROM:
1935 case GOMP_MAP_DELETE:
1936 case GOMP_MAP_RELEASE:
1937 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1938 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1939 cur_node.host_start = (uintptr_t) hostaddrs[i];
1940 cur_node.host_end = cur_node.host_start + sizes[i];
1941 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1942 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
9561765e 1943 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
43895be5 1944 : splay_tree_lookup (&devicep->mem_map, &cur_node);
1945 if (!k)
1946 continue;
1947
1948 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1949 k->refcount--;
1950 if ((kind == GOMP_MAP_DELETE
1951 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1952 && k->refcount != REFCOUNT_INFINITY)
1953 k->refcount = 0;
1954
1955 if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1956 || kind == GOMP_MAP_ALWAYS_FROM)
9b50ad1d 1957 gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1958 (void *) (k->tgt->tgt_start + k->tgt_offset
1959 + cur_node.host_start
1960 - k->host_start),
1961 cur_node.host_end - cur_node.host_start);
43895be5 1962 if (k->refcount == 0)
1963 {
1964 splay_tree_remove (&devicep->mem_map, k);
c0998828 1965 if (k->link_key)
1966 splay_tree_insert (&devicep->mem_map,
1967 (splay_tree_node) k->link_key);
43895be5 1968 if (k->tgt->refcount > 1)
1969 k->tgt->refcount--;
1970 else
1971 gomp_unmap_tgt (k->tgt);
1972 }
1973
1974 break;
1975 default:
1976 gomp_mutex_unlock (&devicep->lock);
1977 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1978 kind);
1979 }
1980 }
1981
ca4c3545 1982 gomp_mutex_unlock (&devicep->lock);
43895be5 1983}
ca4c3545 1984
43895be5 1985void
1986GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1987 size_t *sizes, unsigned short *kinds,
1988 unsigned int flags, void **depend)
1989{
1990 struct gomp_device_descr *devicep = resolve_device (device);
1991
1992 /* If there are depend clauses, but nowait is not present,
1993 block the parent task until the dependencies are resolved
1994 and then just continue with the rest of the function as if it
1995 is a merged task. Until we are able to schedule task during
1996 variable mapping or unmapping, ignore nowait if depend clauses
1997 are not present. */
1998 if (depend != NULL)
1999 {
2000 struct gomp_thread *thr = gomp_thread ();
2001 if (thr->task && thr->task->depend_hash)
2002 {
2003 if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2004 && thr->ts.team
2005 && !thr->task->final_task)
2006 {
a9833286 2007 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2008 mapnum, hostaddrs, sizes, kinds,
56686608 2009 flags, depend, NULL,
a9833286 2010 GOMP_TARGET_TASK_DATA))
2011 return;
2012 }
2013 else
2014 {
2015 struct gomp_team *team = thr->ts.team;
2016 /* If parallel or taskgroup has been cancelled, don't start new
2017 tasks. */
7e5a76c8 2018 if (__builtin_expect (gomp_cancel_var, 0) && team)
2019 {
2020 if (gomp_team_barrier_cancelled (&team->barrier))
2021 return;
2022 if (thr->task->taskgroup)
2023 {
2024 if (thr->task->taskgroup->cancelled)
2025 return;
2026 if (thr->task->taskgroup->workshare
2027 && thr->task->taskgroup->prev
2028 && thr->task->taskgroup->prev->cancelled)
2029 return;
2030 }
2031 }
a9833286 2032
2033 gomp_task_maybe_wait_for_dependencies (depend);
43895be5 2034 }
43895be5 2035 }
2036 }
2037
2038 if (devicep == NULL
56686608 2039 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2040 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2041 return;
2042
2043 struct gomp_thread *thr = gomp_thread ();
2044 struct gomp_team *team = thr->ts.team;
2045 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
7e5a76c8 2046 if (__builtin_expect (gomp_cancel_var, 0) && team)
2047 {
2048 if (gomp_team_barrier_cancelled (&team->barrier))
2049 return;
2050 if (thr->task->taskgroup)
2051 {
2052 if (thr->task->taskgroup->cancelled)
2053 return;
2054 if (thr->task->taskgroup->workshare
2055 && thr->task->taskgroup->prev
2056 && thr->task->taskgroup->prev->cancelled)
2057 return;
2058 }
2059 }
43895be5 2060
2061 size_t i;
2062 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2063 for (i = 0; i < mapnum; i++)
2064 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2065 {
2066 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2067 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2068 i += sizes[i];
2069 }
2070 else
2071 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2072 true, GOMP_MAP_VARS_ENTER_DATA);
2073 else
2074 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2075}
2076
a9833286 2077bool
43895be5 2078gomp_target_task_fn (void *data)
2079{
2080 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
a9833286 2081 struct gomp_device_descr *devicep = ttask->devicep;
2082
43895be5 2083 if (ttask->fn != NULL)
2084 {
f87b2900 2085 void *fn_addr;
a9833286 2086 if (devicep == NULL
f87b2900 2087 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
56686608 2088 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2089 || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
a9833286 2090 {
2091 ttask->state = GOMP_TARGET_TASK_FALLBACK;
84217e9d 2092 gomp_target_fallback (ttask->fn, ttask->hostaddrs);
a9833286 2093 return false;
2094 }
2095
2096 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2097 {
56686608 2098 if (ttask->tgt)
2099 gomp_unmap_vars (ttask->tgt, true);
a9833286 2100 return false;
2101 }
2102
56686608 2103 void *actual_arguments;
2104 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2105 {
2106 ttask->tgt = NULL;
56686608 2107 actual_arguments = ttask->hostaddrs;
2108 }
2109 else
2110 {
2111 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2112 NULL, ttask->sizes, ttask->kinds, true,
2113 GOMP_MAP_VARS_TARGET);
2114 actual_arguments = (void *) ttask->tgt->tgt_start;
2115 }
a9833286 2116 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2117
56686608 2118 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2119 ttask->args, (void *) ttask);
a9833286 2120 return true;
43895be5 2121 }
a9833286 2122 else if (devicep == NULL
56686608 2123 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2124 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
a9833286 2125 return false;
43895be5 2126
2127 size_t i;
2128 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
a9833286 2129 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
43895be5 2130 ttask->kinds, true);
2131 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2132 for (i = 0; i < ttask->mapnum; i++)
2133 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2134 {
a9833286 2135 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2136 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2137 GOMP_MAP_VARS_ENTER_DATA);
43895be5 2138 i += ttask->sizes[i];
2139 }
2140 else
a9833286 2141 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2142 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
43895be5 2143 else
a9833286 2144 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2145 ttask->kinds);
2146 return false;
bc7bff74 2147}
2148
2149void
2150GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2151{
2152 if (thread_limit)
2153 {
2154 struct gomp_task_icv *icv = gomp_icv (true);
2155 icv->thread_limit_var
2156 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2157 }
2158 (void) num_teams;
2159}
995b27b9 2160
43895be5 2161void *
2162omp_target_alloc (size_t size, int device_num)
2163{
2164 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2165 return malloc (size);
2166
2167 if (device_num < 0)
2168 return NULL;
2169
2170 struct gomp_device_descr *devicep = resolve_device (device_num);
2171 if (devicep == NULL)
2172 return NULL;
2173
56686608 2174 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2175 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2176 return malloc (size);
2177
2178 gomp_mutex_lock (&devicep->lock);
2179 void *ret = devicep->alloc_func (devicep->target_id, size);
2180 gomp_mutex_unlock (&devicep->lock);
2181 return ret;
2182}
2183
2184void
2185omp_target_free (void *device_ptr, int device_num)
2186{
2187 if (device_ptr == NULL)
2188 return;
2189
2190 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2191 {
2192 free (device_ptr);
2193 return;
2194 }
2195
2196 if (device_num < 0)
2197 return;
2198
2199 struct gomp_device_descr *devicep = resolve_device (device_num);
2200 if (devicep == NULL)
2201 return;
2202
56686608 2203 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2204 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2205 {
2206 free (device_ptr);
2207 return;
2208 }
2209
2210 gomp_mutex_lock (&devicep->lock);
9b50ad1d 2211 gomp_free_device_memory (devicep, device_ptr);
43895be5 2212 gomp_mutex_unlock (&devicep->lock);
2213}
2214
2215int
7e5a76c8 2216omp_target_is_present (const void *ptr, int device_num)
43895be5 2217{
2218 if (ptr == NULL)
2219 return 1;
2220
2221 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2222 return 1;
2223
2224 if (device_num < 0)
2225 return 0;
2226
2227 struct gomp_device_descr *devicep = resolve_device (device_num);
2228 if (devicep == NULL)
2229 return 0;
2230
56686608 2231 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2232 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2233 return 1;
2234
2235 gomp_mutex_lock (&devicep->lock);
2236 struct splay_tree_s *mem_map = &devicep->mem_map;
2237 struct splay_tree_key_s cur_node;
2238
2239 cur_node.host_start = (uintptr_t) ptr;
2240 cur_node.host_end = cur_node.host_start;
9561765e 2241 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
43895be5 2242 int ret = n != NULL;
2243 gomp_mutex_unlock (&devicep->lock);
2244 return ret;
2245}
2246
2247int
7e5a76c8 2248omp_target_memcpy (void *dst, const void *src, size_t length,
2249 size_t dst_offset, size_t src_offset, int dst_device_num,
2250 int src_device_num)
43895be5 2251{
2252 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
9b50ad1d 2253 bool ret;
43895be5 2254
2255 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2256 {
2257 if (dst_device_num < 0)
2258 return EINVAL;
2259
2260 dst_devicep = resolve_device (dst_device_num);
2261 if (dst_devicep == NULL)
2262 return EINVAL;
2263
56686608 2264 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2265 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2266 dst_devicep = NULL;
2267 }
2268 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2269 {
2270 if (src_device_num < 0)
2271 return EINVAL;
2272
2273 src_devicep = resolve_device (src_device_num);
2274 if (src_devicep == NULL)
2275 return EINVAL;
2276
56686608 2277 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2278 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2279 src_devicep = NULL;
2280 }
2281 if (src_devicep == NULL && dst_devicep == NULL)
2282 {
2283 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2284 return 0;
2285 }
2286 if (src_devicep == NULL)
2287 {
2288 gomp_mutex_lock (&dst_devicep->lock);
9b50ad1d 2289 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2290 (char *) dst + dst_offset,
2291 (char *) src + src_offset, length);
43895be5 2292 gomp_mutex_unlock (&dst_devicep->lock);
9b50ad1d 2293 return (ret ? 0 : EINVAL);
43895be5 2294 }
2295 if (dst_devicep == NULL)
2296 {
2297 gomp_mutex_lock (&src_devicep->lock);
9b50ad1d 2298 ret = src_devicep->dev2host_func (src_devicep->target_id,
2299 (char *) dst + dst_offset,
2300 (char *) src + src_offset, length);
43895be5 2301 gomp_mutex_unlock (&src_devicep->lock);
9b50ad1d 2302 return (ret ? 0 : EINVAL);
43895be5 2303 }
2304 if (src_devicep == dst_devicep)
2305 {
2306 gomp_mutex_lock (&src_devicep->lock);
9b50ad1d 2307 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2308 (char *) dst + dst_offset,
2309 (char *) src + src_offset, length);
43895be5 2310 gomp_mutex_unlock (&src_devicep->lock);
9b50ad1d 2311 return (ret ? 0 : EINVAL);
43895be5 2312 }
2313 return EINVAL;
2314}
2315
2316static int
7e5a76c8 2317omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
43895be5 2318 int num_dims, const size_t *volume,
2319 const size_t *dst_offsets,
2320 const size_t *src_offsets,
2321 const size_t *dst_dimensions,
2322 const size_t *src_dimensions,
2323 struct gomp_device_descr *dst_devicep,
2324 struct gomp_device_descr *src_devicep)
2325{
2326 size_t dst_slice = element_size;
2327 size_t src_slice = element_size;
2328 size_t j, dst_off, src_off, length;
2329 int i, ret;
2330
2331 if (num_dims == 1)
2332 {
2333 if (__builtin_mul_overflow (element_size, volume[0], &length)
2334 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2335 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2336 return EINVAL;
2337 if (dst_devicep == NULL && src_devicep == NULL)
9b50ad1d 2338 {
7e5a76c8 2339 memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2340 length);
9b50ad1d 2341 ret = 1;
2342 }
43895be5 2343 else if (src_devicep == NULL)
9b50ad1d 2344 ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2345 (char *) dst + dst_off,
7e5a76c8 2346 (const char *) src + src_off,
2347 length);
43895be5 2348 else if (dst_devicep == NULL)
9b50ad1d 2349 ret = src_devicep->dev2host_func (src_devicep->target_id,
2350 (char *) dst + dst_off,
7e5a76c8 2351 (const char *) src + src_off,
2352 length);
43895be5 2353 else if (src_devicep == dst_devicep)
9b50ad1d 2354 ret = src_devicep->dev2dev_func (src_devicep->target_id,
2355 (char *) dst + dst_off,
7e5a76c8 2356 (const char *) src + src_off,
2357 length);
43895be5 2358 else
9b50ad1d 2359 ret = 0;
2360 return ret ? 0 : EINVAL;
43895be5 2361 }
2362
2363 /* FIXME: it would be nice to have some plugin function to handle
2364 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2365 be handled in the generic recursion below, and for host-host it
2366 should be used even for any num_dims >= 2. */
2367
2368 for (i = 1; i < num_dims; i++)
2369 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2370 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2371 return EINVAL;
2372 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2373 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2374 return EINVAL;
2375 for (j = 0; j < volume[0]; j++)
2376 {
2377 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
7e5a76c8 2378 (const char *) src + src_off,
43895be5 2379 element_size, num_dims - 1,
2380 volume + 1, dst_offsets + 1,
2381 src_offsets + 1, dst_dimensions + 1,
2382 src_dimensions + 1, dst_devicep,
2383 src_devicep);
2384 if (ret)
2385 return ret;
2386 dst_off += dst_slice;
2387 src_off += src_slice;
2388 }
2389 return 0;
2390}
2391
2392int
7e5a76c8 2393omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
43895be5 2394 int num_dims, const size_t *volume,
2395 const size_t *dst_offsets,
2396 const size_t *src_offsets,
2397 const size_t *dst_dimensions,
2398 const size_t *src_dimensions,
2399 int dst_device_num, int src_device_num)
2400{
2401 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2402
2403 if (!dst && !src)
2404 return INT_MAX;
2405
2406 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2407 {
2408 if (dst_device_num < 0)
2409 return EINVAL;
2410
2411 dst_devicep = resolve_device (dst_device_num);
2412 if (dst_devicep == NULL)
2413 return EINVAL;
2414
56686608 2415 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2416 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2417 dst_devicep = NULL;
2418 }
2419 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2420 {
2421 if (src_device_num < 0)
2422 return EINVAL;
2423
2424 src_devicep = resolve_device (src_device_num);
2425 if (src_devicep == NULL)
2426 return EINVAL;
2427
56686608 2428 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2429 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2430 src_devicep = NULL;
2431 }
2432
2433 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2434 return EINVAL;
2435
2436 if (src_devicep)
2437 gomp_mutex_lock (&src_devicep->lock);
2438 else if (dst_devicep)
2439 gomp_mutex_lock (&dst_devicep->lock);
2440 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2441 volume, dst_offsets, src_offsets,
2442 dst_dimensions, src_dimensions,
2443 dst_devicep, src_devicep);
2444 if (src_devicep)
2445 gomp_mutex_unlock (&src_devicep->lock);
2446 else if (dst_devicep)
2447 gomp_mutex_unlock (&dst_devicep->lock);
2448 return ret;
2449}
2450
2451int
7e5a76c8 2452omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2453 size_t size, size_t device_offset, int device_num)
43895be5 2454{
2455 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2456 return EINVAL;
2457
2458 if (device_num < 0)
2459 return EINVAL;
2460
2461 struct gomp_device_descr *devicep = resolve_device (device_num);
2462 if (devicep == NULL)
2463 return EINVAL;
2464
56686608 2465 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2466 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
43895be5 2467 return EINVAL;
2468
2469 gomp_mutex_lock (&devicep->lock);
2470
2471 struct splay_tree_s *mem_map = &devicep->mem_map;
2472 struct splay_tree_key_s cur_node;
2473 int ret = EINVAL;
2474
2475 cur_node.host_start = (uintptr_t) host_ptr;
2476 cur_node.host_end = cur_node.host_start + size;
2477 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2478 if (n)
2479 {
2480 if (n->tgt->tgt_start + n->tgt_offset
2481 == (uintptr_t) device_ptr + device_offset
2482 && n->host_start <= cur_node.host_start
2483 && n->host_end >= cur_node.host_end)
2484 ret = 0;
2485 }
2486 else
2487 {
2488 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2489 tgt->array = gomp_malloc (sizeof (*tgt->array));
2490 tgt->refcount = 1;
2491 tgt->tgt_start = 0;
2492 tgt->tgt_end = 0;
2493 tgt->to_free = NULL;
2494 tgt->prev = NULL;
2495 tgt->list_count = 0;
2496 tgt->device_descr = devicep;
2497 splay_tree_node array = tgt->array;
2498 splay_tree_key k = &array->key;
2499 k->host_start = cur_node.host_start;
2500 k->host_end = cur_node.host_end;
2501 k->tgt = tgt;
2502 k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2503 k->refcount = REFCOUNT_INFINITY;
43895be5 2504 array->left = NULL;
2505 array->right = NULL;
2506 splay_tree_insert (&devicep->mem_map, array);
2507 ret = 0;
2508 }
2509 gomp_mutex_unlock (&devicep->lock);
2510 return ret;
2511}
2512
2513int
7e5a76c8 2514omp_target_disassociate_ptr (const void *ptr, int device_num)
43895be5 2515{
2516 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2517 return EINVAL;
2518
2519 if (device_num < 0)
2520 return EINVAL;
2521
2522 struct gomp_device_descr *devicep = resolve_device (device_num);
2523 if (devicep == NULL)
2524 return EINVAL;
2525
2526 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2527 return EINVAL;
2528
2529 gomp_mutex_lock (&devicep->lock);
2530
2531 struct splay_tree_s *mem_map = &devicep->mem_map;
2532 struct splay_tree_key_s cur_node;
2533 int ret = EINVAL;
2534
2535 cur_node.host_start = (uintptr_t) ptr;
2536 cur_node.host_end = cur_node.host_start;
2537 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2538 if (n
2539 && n->host_start == cur_node.host_start
2540 && n->refcount == REFCOUNT_INFINITY
2541 && n->tgt->tgt_start == 0
2542 && n->tgt->to_free == NULL
2543 && n->tgt->refcount == 1
2544 && n->tgt->list_count == 0)
2545 {
2546 splay_tree_remove (&devicep->mem_map, n);
2547 gomp_unmap_tgt (n->tgt);
2548 ret = 0;
2549 }
2550
2551 gomp_mutex_unlock (&devicep->lock);
2552 return ret;
2553}
2554
7e5a76c8 2555int
2556omp_pause_resource (omp_pause_resource_t kind, int device_num)
2557{
2558 (void) kind;
2559 if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2560 return gomp_pause_host ();
2561 if (device_num < 0 || device_num >= gomp_get_num_devices ())
2562 return -1;
2563 /* Do nothing for target devices for now. */
2564 return 0;
2565}
2566
2567int
2568omp_pause_resource_all (omp_pause_resource_t kind)
2569{
2570 (void) kind;
2571 if (gomp_pause_host ())
2572 return -1;
2573 /* Do nothing for target devices for now. */
2574 return 0;
2575}
2576
2577ialias (omp_pause_resource)
2578ialias (omp_pause_resource_all)
2579
995b27b9 2580#ifdef PLUGIN_SUPPORT
2581
2582/* This function tries to load a plugin for DEVICE. Name of plugin is passed
2583 in PLUGIN_NAME.
2584 The handles of the found functions are stored in the corresponding fields
2585 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2586
2587static bool
2588gomp_load_plugin_for_device (struct gomp_device_descr *device,
2589 const char *plugin_name)
2590{
c6aa02c6 2591 const char *err = NULL, *last_missing = NULL;
ca4c3545 2592
995b27b9 2593 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2594 if (!plugin_handle)
d3d8e632 2595 goto dl_fail;
995b27b9 2596
2597 /* Check if all required functions are available in the plugin and store
d3d8e632 2598 their handlers. None of the symbols can legitimately be NULL,
2599 so we don't need to check dlerror all the time. */
ca4c3545 2600#define DLSYM(f) \
d3d8e632 2601 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2602 goto dl_fail
2603 /* Similar, but missing functions are not an error. Return false if
2604 failed, true otherwise. */
2605#define DLSYM_OPT(f, n) \
2606 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2607 || (last_missing = #n, 0))
2608
2609 DLSYM (version);
2610 if (device->version_func () != GOMP_VERSION)
2611 {
2612 err = "plugin version mismatch";
2613 goto fail;
2614 }
ca4c3545 2615
2616 DLSYM (get_name);
2617 DLSYM (get_caps);
995b27b9 2618 DLSYM (get_type);
2619 DLSYM (get_num_devices);
995b27b9 2620 DLSYM (init_device);
ca4c3545 2621 DLSYM (fini_device);
0d8c703d 2622 DLSYM (load_image);
2623 DLSYM (unload_image);
995b27b9 2624 DLSYM (alloc);
2625 DLSYM (free);
2626 DLSYM (dev2host);
2627 DLSYM (host2dev);
ca4c3545 2628 device->capabilities = device->get_caps_func ();
2629 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
43895be5 2630 {
2631 DLSYM (run);
a9833286 2632 DLSYM (async_run);
56686608 2633 DLSYM_OPT (can_run, can_run);
43895be5 2634 DLSYM (dev2dev);
2635 }
ca4c3545 2636 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2637 {
65caa53b 2638 if (!DLSYM_OPT (openacc.exec, openacc_exec)
d3d8e632 2639 || !DLSYM_OPT (openacc.register_async_cleanup,
2640 openacc_register_async_cleanup)
2641 || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2642 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2643 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2644 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2645 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2646 || !DLSYM_OPT (openacc.async_wait_all_async,
2647 openacc_async_wait_all_async)
2648 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2649 || !DLSYM_OPT (openacc.create_thread_data,
2650 openacc_create_thread_data)
2651 || !DLSYM_OPT (openacc.destroy_thread_data,
2652 openacc_destroy_thread_data))
ca4c3545 2653 {
d3d8e632 2654 /* Require all the OpenACC handlers if we have
2655 GOMP_OFFLOAD_CAP_OPENACC_200. */
ca4c3545 2656 err = "plugin missing OpenACC handler function";
d3d8e632 2657 goto fail;
ca4c3545 2658 }
d3d8e632 2659
2660 unsigned cuda = 0;
2661 cuda += DLSYM_OPT (openacc.cuda.get_current_device,
65caa53b 2662 openacc_cuda_get_current_device);
d3d8e632 2663 cuda += DLSYM_OPT (openacc.cuda.get_current_context,
65caa53b 2664 openacc_cuda_get_current_context);
2665 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2666 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
d3d8e632 2667 if (cuda && cuda != 4)
ca4c3545 2668 {
d3d8e632 2669 /* Make sure all the CUDA functions are there if any of them are. */
ca4c3545 2670 err = "plugin missing OpenACC CUDA handler function";
d3d8e632 2671 goto fail;
ca4c3545 2672 }
2673 }
995b27b9 2674#undef DLSYM
ca4c3545 2675#undef DLSYM_OPT
995b27b9 2676
d3d8e632 2677 return 1;
2678
2679 dl_fail:
2680 err = dlerror ();
2681 fail:
2682 gomp_error ("while loading %s: %s", plugin_name, err);
2683 if (last_missing)
2684 gomp_error ("missing function was %s", last_missing);
2685 if (plugin_handle)
2686 dlclose (plugin_handle);
2687
2688 return 0;
995b27b9 2689}
2690
f87b2900 2691/* This function finalizes all initialized devices. */
2692
2693static void
2694gomp_target_fini (void)
2695{
2696 int i;
2697 for (i = 0; i < num_devices; i++)
2698 {
9b50ad1d 2699 bool ret = true;
f87b2900 2700 struct gomp_device_descr *devicep = &devices[i];
2701 gomp_mutex_lock (&devicep->lock);
2702 if (devicep->state == GOMP_DEVICE_INITIALIZED)
2703 {
9b50ad1d 2704 ret = devicep->fini_device_func (devicep->target_id);
f87b2900 2705 devicep->state = GOMP_DEVICE_FINALIZED;
2706 }
2707 gomp_mutex_unlock (&devicep->lock);
9b50ad1d 2708 if (!ret)
2709 gomp_fatal ("device finalization failed");
f87b2900 2710 }
2711}
2712
995b27b9 2713/* This function initializes the runtime needed for offloading.
ca4c3545 2714 It parses the list of offload targets and tries to load the plugins for
2715 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2716 will be set, and the array DEVICES initialized, containing descriptors for
2717 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2718 by the others. */
995b27b9 2719
2720static void
2721gomp_target_init (void)
2722{
2723 const char *prefix ="libgomp-plugin-";
4fda895a 2724 const char *suffix = SONAME_SUFFIX (1);
995b27b9 2725 const char *cur, *next;
2726 char *plugin_name;
2727 int i, new_num_devices;
2728
2729 num_devices = 0;
2730 devices = NULL;
2731
2732 cur = OFFLOAD_TARGETS;
2733 if (*cur)
2734 do
2735 {
2736 struct gomp_device_descr current_device;
02b7bb30 2737 size_t prefix_len, suffix_len, cur_len;
995b27b9 2738
2739 next = strchr (cur, ',');
2740
02b7bb30 2741 prefix_len = strlen (prefix);
2742 cur_len = next ? next - cur : strlen (cur);
2743 suffix_len = strlen (suffix);
2744
2745 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
995b27b9 2746 if (!plugin_name)
2747 {
2748 num_devices = 0;
2749 break;
2750 }
2751
02b7bb30 2752 memcpy (plugin_name, prefix, prefix_len);
2753 memcpy (plugin_name + prefix_len, cur, cur_len);
2754 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
995b27b9 2755
2756 if (gomp_load_plugin_for_device (&current_device, plugin_name))
2757 {
2758 new_num_devices = current_device.get_num_devices_func ();
2759 if (new_num_devices >= 1)
2760 {
ca4c3545 2761 /* Augment DEVICES and NUM_DEVICES. */
2762
995b27b9 2763 devices = realloc (devices, (num_devices + new_num_devices)
2764 * sizeof (struct gomp_device_descr));
2765 if (!devices)
2766 {
2767 num_devices = 0;
2768 free (plugin_name);
2769 break;
2770 }
2771
ca4c3545 2772 current_device.name = current_device.get_name_func ();
2773 /* current_device.capabilities has already been set. */
995b27b9 2774 current_device.type = current_device.get_type_func ();
0d8c703d 2775 current_device.mem_map.root = NULL;
f87b2900 2776 current_device.state = GOMP_DEVICE_UNINITIALIZED;
ca4c3545 2777 current_device.openacc.data_environ = NULL;
995b27b9 2778 for (i = 0; i < new_num_devices; i++)
2779 {
995b27b9 2780 current_device.target_id = i;
2781 devices[num_devices] = current_device;
ca4c3545 2782 gomp_mutex_init (&devices[num_devices].lock);
995b27b9 2783 num_devices++;
2784 }
2785 }
2786 }
2787
2788 free (plugin_name);
2789 cur = next + 1;
2790 }
2791 while (next);
2792
ca4c3545 2793 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2794 NUM_DEVICES_OPENMP. */
2795 struct gomp_device_descr *devices_s
2796 = malloc (num_devices * sizeof (struct gomp_device_descr));
2797 if (!devices_s)
2798 {
2799 num_devices = 0;
2800 free (devices);
2801 devices = NULL;
2802 }
2803 num_devices_openmp = 0;
2804 for (i = 0; i < num_devices; i++)
2805 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2806 devices_s[num_devices_openmp++] = devices[i];
2807 int num_devices_after_openmp = num_devices_openmp;
2808 for (i = 0; i < num_devices; i++)
2809 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2810 devices_s[num_devices_after_openmp++] = devices[i];
2811 free (devices);
2812 devices = devices_s;
2813
2814 for (i = 0; i < num_devices; i++)
2815 {
ca4c3545 2816 /* The 'devices' array can be moved (by the realloc call) until we have
2817 found all the plugins, so registering with the OpenACC runtime (which
2818 takes a copy of the pointer argument) must be delayed until now. */
2819 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2820 goacc_register (&devices[i]);
2821 }
f87b2900 2822
2823 if (atexit (gomp_target_fini) != 0)
2824 gomp_fatal ("atexit failed");
995b27b9 2825}
2826
2827#else /* PLUGIN_SUPPORT */
2828/* If dlfcn.h is unavailable we always fallback to host execution.
2829 GOMP_target* routines are just stubs for this case. */
2830static void
2831gomp_target_init (void)
2832{
2833}
2834#endif /* PLUGIN_SUPPORT */