]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/target.c
re PR libgomp/64635 (darwin produces libgomp-plugin-host_nonshm.1.dylib but tries...
[thirdparty/gcc.git] / libgomp / target.c
CommitLineData
5624e564 1/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
acf0174b
JJ
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
3
f1f3453e
TS
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
acf0174b
JJ
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
1df3f842 26/* This file contains the support of offloading. */
acf0174b 27
1df3f842 28#include "config.h"
acf0174b 29#include "libgomp.h"
41dbbb37
TS
30#include "oacc-plugin.h"
31#include "oacc-int.h"
32#include "gomp-constants.h"
acf0174b
JJ
33#include <limits.h>
34#include <stdbool.h>
35#include <stdlib.h>
36#include <string.h>
41dbbb37 37#include <assert.h>
acf0174b 38
1df3f842
JJ
39#ifdef PLUGIN_SUPPORT
40#include <dlfcn.h>
b5f7a6ca 41#include "plugin-suffix.h"
1df3f842
JJ
42#endif
43
44static void gomp_target_init (void);
45
41dbbb37 46/* The whole initialization code for offloading plugins is only run one. */
1df3f842
JJ
47static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
48
1df3f842
JJ
49/* This structure describes an offload image.
50 It contains type of the target device, pointer to host table descriptor, and
51 pointer to target data. */
52struct offload_image_descr {
53 enum offload_target_type type;
54 void *host_table;
55 void *target_data;
56};
57
58/* Array of descriptors of offload images. */
59static struct offload_image_descr *offload_images;
60
61/* Total number of offload images. */
62static int num_offload_images;
63
64/* Array of descriptors for all available devices. */
65static struct gomp_device_descr *devices;
66
973e9808 67#ifdef PLUGIN_SUPPORT
1df3f842
JJ
68/* Total number of available devices. */
69static int num_devices;
973e9808 70#endif
1df3f842 71
41dbbb37
TS
72/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
73static int num_devices_openmp;
74
1df3f842
JJ
75/* The comparison function. */
76
41dbbb37 77attribute_hidden int
1df3f842
JJ
78splay_compare (splay_tree_key x, splay_tree_key y)
79{
80 if (x->host_start == x->host_end
81 && y->host_start == y->host_end)
82 return 0;
83 if (x->host_end <= y->host_start)
84 return -1;
85 if (x->host_start >= y->host_end)
86 return 1;
87 return 0;
88}
89
90#include "splay-tree.h"
91
41dbbb37
TS
92attribute_hidden void
93gomp_init_targets_once (void)
1df3f842 94{
41dbbb37
TS
95 (void) pthread_once (&gomp_is_initialized, gomp_target_init);
96}
1df3f842 97
acf0174b
JJ
98attribute_hidden int
99gomp_get_num_devices (void)
100{
41dbbb37
TS
101 gomp_init_targets_once ();
102 return num_devices_openmp;
1df3f842
JJ
103}
104
105static struct gomp_device_descr *
106resolve_device (int device_id)
107{
41dbbb37 108 if (device_id == GOMP_DEVICE_ICV)
1df3f842
JJ
109 {
110 struct gomp_task_icv *icv = gomp_icv (false);
111 device_id = icv->default_device_var;
112 }
113
114 if (device_id < 0 || device_id >= gomp_get_num_devices ())
115 return NULL;
116
117 return &devices[device_id];
118}
119
120
121/* Handle the case where splay_tree_lookup found oldn for newn.
122 Helper function of gomp_map_vars. */
123
124static inline void
125gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
126 unsigned char kind)
127{
41dbbb37
TS
128 if ((kind & GOMP_MAP_FLAG_FORCE)
129 || oldn->host_start > newn->host_start
1df3f842 130 || oldn->host_end < newn->host_end)
41dbbb37 131 gomp_fatal ("Trying to map into device [%p..%p) object when "
1df3f842
JJ
132 "[%p..%p) is already mapped",
133 (void *) newn->host_start, (void *) newn->host_end,
134 (void *) oldn->host_start, (void *) oldn->host_end);
135 oldn->refcount++;
136}
137
41dbbb37
TS
138static int
139get_kind (bool is_openacc, void *kinds, int idx)
140{
141 return is_openacc ? ((unsigned short *) kinds)[idx]
142 : ((unsigned char *) kinds)[idx];
143}
144
145attribute_hidden struct target_mem_desc *
1df3f842 146gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
41dbbb37
TS
147 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
148 bool is_openacc, bool is_target)
1df3f842
JJ
149{
150 size_t i, tgt_align, tgt_size, not_found_cnt = 0;
41dbbb37
TS
151 const int rshift = is_openacc ? 8 : 3;
152 const int typemask = is_openacc ? 0xff : 0x7;
153 struct gomp_memory_mapping *mm = &devicep->mem_map;
1df3f842
JJ
154 struct splay_tree_key_s cur_node;
155 struct target_mem_desc *tgt
156 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
157 tgt->list_count = mapnum;
158 tgt->refcount = 1;
159 tgt->device_descr = devicep;
41dbbb37 160 tgt->mem_map = mm;
1df3f842
JJ
161
162 if (mapnum == 0)
163 return tgt;
164
165 tgt_align = sizeof (void *);
166 tgt_size = 0;
167 if (is_target)
168 {
169 size_t align = 4 * sizeof (void *);
170 tgt_align = align;
171 tgt_size = mapnum * sizeof (void *);
172 }
173
41dbbb37
TS
174 gomp_mutex_lock (&mm->lock);
175
1df3f842
JJ
176 for (i = 0; i < mapnum; i++)
177 {
41dbbb37 178 int kind = get_kind (is_openacc, kinds, i);
1df3f842
JJ
179 if (hostaddrs[i] == NULL)
180 {
181 tgt->list[i] = NULL;
182 continue;
183 }
184 cur_node.host_start = (uintptr_t) hostaddrs[i];
41dbbb37 185 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
186 cur_node.host_end = cur_node.host_start + sizes[i];
187 else
188 cur_node.host_end = cur_node.host_start + sizeof (void *);
41dbbb37 189 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
1df3f842
JJ
190 if (n)
191 {
192 tgt->list[i] = n;
41dbbb37 193 gomp_map_vars_existing (n, &cur_node, kind & typemask);
1df3f842
JJ
194 }
195 else
196 {
1df3f842 197 tgt->list[i] = NULL;
41dbbb37
TS
198
199 size_t align = (size_t) 1 << (kind >> rshift);
1df3f842
JJ
200 not_found_cnt++;
201 if (tgt_align < align)
202 tgt_align = align;
203 tgt_size = (tgt_size + align - 1) & ~(align - 1);
204 tgt_size += cur_node.host_end - cur_node.host_start;
41dbbb37 205 if ((kind & typemask) == GOMP_MAP_TO_PSET)
1df3f842
JJ
206 {
207 size_t j;
208 for (j = i + 1; j < mapnum; j++)
41dbbb37
TS
209 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
210 & typemask))
1df3f842
JJ
211 break;
212 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
213 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
214 > cur_node.host_end))
215 break;
216 else
217 {
218 tgt->list[j] = NULL;
219 i++;
220 }
221 }
222 }
223 }
224
41dbbb37
TS
225 if (devaddrs)
226 {
227 if (mapnum != 1)
228 gomp_fatal ("unexpected aggregation");
229 tgt->to_free = devaddrs[0];
230 tgt->tgt_start = (uintptr_t) tgt->to_free;
231 tgt->tgt_end = tgt->tgt_start + sizes[0];
232 }
233 else if (not_found_cnt || is_target)
1df3f842
JJ
234 {
235 /* Allocate tgt_align aligned tgt_size block of memory. */
236 /* FIXME: Perhaps change interface to allocate properly aligned
237 memory. */
238 tgt->to_free = devicep->alloc_func (devicep->target_id,
239 tgt_size + tgt_align - 1);
240 tgt->tgt_start = (uintptr_t) tgt->to_free;
241 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
242 tgt->tgt_end = tgt->tgt_start + tgt_size;
243 }
244 else
245 {
246 tgt->to_free = NULL;
247 tgt->tgt_start = 0;
248 tgt->tgt_end = 0;
249 }
250
251 tgt_size = 0;
252 if (is_target)
253 tgt_size = mapnum * sizeof (void *);
254
255 tgt->array = NULL;
256 if (not_found_cnt)
257 {
258 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
259 splay_tree_node array = tgt->array;
260 size_t j;
261
262 for (i = 0; i < mapnum; i++)
263 if (tgt->list[i] == NULL)
264 {
41dbbb37 265 int kind = get_kind (is_openacc, kinds, i);
1df3f842
JJ
266 if (hostaddrs[i] == NULL)
267 continue;
268 splay_tree_key k = &array->key;
269 k->host_start = (uintptr_t) hostaddrs[i];
41dbbb37 270 if (!GOMP_MAP_POINTER_P (kind & typemask))
1df3f842
JJ
271 k->host_end = k->host_start + sizes[i];
272 else
273 k->host_end = k->host_start + sizeof (void *);
41dbbb37 274 splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
1df3f842
JJ
275 if (n)
276 {
277 tgt->list[i] = n;
41dbbb37 278 gomp_map_vars_existing (n, k, kind & typemask);
1df3f842
JJ
279 }
280 else
281 {
41dbbb37 282 size_t align = (size_t) 1 << (kind >> rshift);
1df3f842
JJ
283 tgt->list[i] = k;
284 tgt_size = (tgt_size + align - 1) & ~(align - 1);
285 k->tgt = tgt;
286 k->tgt_offset = tgt_size;
287 tgt_size += k->host_end - k->host_start;
41dbbb37 288 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1df3f842 289 k->refcount = 1;
41dbbb37 290 k->async_refcount = 0;
1df3f842
JJ
291 tgt->refcount++;
292 array->left = NULL;
293 array->right = NULL;
41dbbb37
TS
294 splay_tree_insert (&mm->splay_tree, array);
295 switch (kind & typemask)
1df3f842 296 {
41dbbb37
TS
297 case GOMP_MAP_ALLOC:
298 case GOMP_MAP_FROM:
299 case GOMP_MAP_FORCE_ALLOC:
300 case GOMP_MAP_FORCE_FROM:
1df3f842 301 break;
41dbbb37
TS
302 case GOMP_MAP_TO:
303 case GOMP_MAP_TOFROM:
304 case GOMP_MAP_FORCE_TO:
305 case GOMP_MAP_FORCE_TOFROM:
1df3f842
JJ
306 /* FIXME: Perhaps add some smarts, like if copying
307 several adjacent fields from host to target, use some
308 host buffer to avoid sending each var individually. */
309 devicep->host2dev_func (devicep->target_id,
310 (void *) (tgt->tgt_start
311 + k->tgt_offset),
312 (void *) k->host_start,
313 k->host_end - k->host_start);
314 break;
41dbbb37 315 case GOMP_MAP_POINTER:
1df3f842
JJ
316 cur_node.host_start
317 = (uintptr_t) *(void **) k->host_start;
318 if (cur_node.host_start == (uintptr_t) NULL)
319 {
320 cur_node.tgt_offset = (uintptr_t) NULL;
41dbbb37 321 /* FIXME: see above FIXME comment. */
1df3f842
JJ
322 devicep->host2dev_func (devicep->target_id,
323 (void *) (tgt->tgt_start
324 + k->tgt_offset),
325 (void *) &cur_node.tgt_offset,
326 sizeof (void *));
327 break;
328 }
329 /* Add bias to the pointer value. */
330 cur_node.host_start += sizes[i];
331 cur_node.host_end = cur_node.host_start + 1;
41dbbb37 332 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
1df3f842
JJ
333 if (n == NULL)
334 {
335 /* Could be possibly zero size array section. */
336 cur_node.host_end--;
41dbbb37 337 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
1df3f842
JJ
338 if (n == NULL)
339 {
340 cur_node.host_start--;
41dbbb37 341 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
1df3f842
JJ
342 cur_node.host_start++;
343 }
344 }
345 if (n == NULL)
346 gomp_fatal ("Pointer target of array section "
347 "wasn't mapped");
348 cur_node.host_start -= n->host_start;
349 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
350 + cur_node.host_start;
351 /* At this point tgt_offset is target address of the
352 array section. Now subtract bias to get what we want
353 to initialize the pointer with. */
354 cur_node.tgt_offset -= sizes[i];
41dbbb37 355 /* FIXME: see above FIXME comment. */
1df3f842
JJ
356 devicep->host2dev_func (devicep->target_id,
357 (void *) (tgt->tgt_start
358 + k->tgt_offset),
359 (void *) &cur_node.tgt_offset,
360 sizeof (void *));
361 break;
41dbbb37
TS
362 case GOMP_MAP_TO_PSET:
363 /* FIXME: see above FIXME comment. */
1df3f842
JJ
364 devicep->host2dev_func (devicep->target_id,
365 (void *) (tgt->tgt_start
366 + k->tgt_offset),
367 (void *) k->host_start,
368 k->host_end - k->host_start);
41dbbb37 369
1df3f842 370 for (j = i + 1; j < mapnum; j++)
41dbbb37
TS
371 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
372 & typemask))
1df3f842
JJ
373 break;
374 else if ((uintptr_t) hostaddrs[j] < k->host_start
375 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
376 > k->host_end))
377 break;
378 else
379 {
380 tgt->list[j] = k;
381 k->refcount++;
382 cur_node.host_start
383 = (uintptr_t) *(void **) hostaddrs[j];
384 if (cur_node.host_start == (uintptr_t) NULL)
385 {
386 cur_node.tgt_offset = (uintptr_t) NULL;
41dbbb37 387 /* FIXME: see above FIXME comment. */
1df3f842
JJ
388 devicep->host2dev_func (devicep->target_id,
389 (void *) (tgt->tgt_start + k->tgt_offset
390 + ((uintptr_t) hostaddrs[j]
391 - k->host_start)),
392 (void *) &cur_node.tgt_offset,
393 sizeof (void *));
394 i++;
395 continue;
396 }
397 /* Add bias to the pointer value. */
398 cur_node.host_start += sizes[j];
399 cur_node.host_end = cur_node.host_start + 1;
41dbbb37 400 n = splay_tree_lookup (&mm->splay_tree, &cur_node);
1df3f842
JJ
401 if (n == NULL)
402 {
403 /* Could be possibly zero size array section. */
404 cur_node.host_end--;
41dbbb37 405 n = splay_tree_lookup (&mm->splay_tree,
1df3f842
JJ
406 &cur_node);
407 if (n == NULL)
408 {
409 cur_node.host_start--;
41dbbb37
TS
410 n = splay_tree_lookup (&mm->splay_tree,
411 &cur_node);
1df3f842
JJ
412 cur_node.host_start++;
413 }
414 }
415 if (n == NULL)
416 gomp_fatal ("Pointer target of array section "
417 "wasn't mapped");
418 cur_node.host_start -= n->host_start;
419 cur_node.tgt_offset = n->tgt->tgt_start
420 + n->tgt_offset
421 + cur_node.host_start;
422 /* At this point tgt_offset is target address of the
423 array section. Now subtract bias to get what we
424 want to initialize the pointer with. */
425 cur_node.tgt_offset -= sizes[j];
41dbbb37 426 /* FIXME: see above FIXME comment. */
1df3f842
JJ
427 devicep->host2dev_func (devicep->target_id,
428 (void *) (tgt->tgt_start + k->tgt_offset
429 + ((uintptr_t) hostaddrs[j]
430 - k->host_start)),
431 (void *) &cur_node.tgt_offset,
432 sizeof (void *));
433 i++;
434 }
41dbbb37
TS
435 break;
436 case GOMP_MAP_FORCE_PRESENT:
437 {
438 /* We already looked up the memory region above and it
439 was missing. */
440 size_t size = k->host_end - k->host_start;
441 gomp_fatal ("present clause: !acc_is_present (%p, "
442 "%zd (0x%zx))", (void *) k->host_start,
443 size, size);
444 }
445 break;
446 case GOMP_MAP_FORCE_DEVICEPTR:
447 assert (k->host_end - k->host_start == sizeof (void *));
448
449 devicep->host2dev_func (devicep->target_id,
450 (void *) (tgt->tgt_start
451 + k->tgt_offset),
452 (void *) k->host_start,
453 sizeof (void *));
454 break;
455 default:
456 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
457 kind);
1df3f842
JJ
458 }
459 array++;
460 }
461 }
462 }
41dbbb37 463
1df3f842
JJ
464 if (is_target)
465 {
466 for (i = 0; i < mapnum; i++)
467 {
468 if (tgt->list[i] == NULL)
469 cur_node.tgt_offset = (uintptr_t) NULL;
470 else
471 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
472 + tgt->list[i]->tgt_offset;
41dbbb37 473 /* FIXME: see above FIXME comment. */
1df3f842
JJ
474 devicep->host2dev_func (devicep->target_id,
475 (void *) (tgt->tgt_start
476 + i * sizeof (void *)),
477 (void *) &cur_node.tgt_offset,
478 sizeof (void *));
479 }
480 }
481
41dbbb37 482 gomp_mutex_unlock (&mm->lock);
1df3f842
JJ
483 return tgt;
484}
485
486static void
487gomp_unmap_tgt (struct target_mem_desc *tgt)
488{
489 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
490 if (tgt->tgt_end)
491 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
492
493 free (tgt->array);
494 free (tgt);
495}
496
41dbbb37
TS
497/* Decrease the refcount for a set of mapped variables, and queue asychronous
498 copies from the device back to the host after any work that has been issued.
499 Because the regions are still "live", increment an asynchronous reference
500 count to indicate that they should not be unmapped from host-side data
501 structures until the asynchronous copy has completed. */
502
503attribute_hidden void
504gomp_copy_from_async (struct target_mem_desc *tgt)
505{
506 struct gomp_device_descr *devicep = tgt->device_descr;
507 struct gomp_memory_mapping *mm = tgt->mem_map;
508 size_t i;
509
510 gomp_mutex_lock (&mm->lock);
511
512 for (i = 0; i < tgt->list_count; i++)
513 if (tgt->list[i] == NULL)
514 ;
515 else if (tgt->list[i]->refcount > 1)
516 {
517 tgt->list[i]->refcount--;
518 tgt->list[i]->async_refcount++;
519 }
520 else
521 {
522 splay_tree_key k = tgt->list[i];
523 if (k->copy_from)
524 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
525 (void *) (k->tgt->tgt_start + k->tgt_offset),
526 k->host_end - k->host_start);
527 }
528
529 gomp_mutex_unlock (&mm->lock);
530}
531
532/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
533 variables back from device to host: if it is false, it is assumed that this
534 has been done already, i.e. by gomp_copy_from_async above. */
535
536attribute_hidden void
537gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1df3f842
JJ
538{
539 struct gomp_device_descr *devicep = tgt->device_descr;
41dbbb37 540 struct gomp_memory_mapping *mm = tgt->mem_map;
1df3f842
JJ
541
542 if (tgt->list_count == 0)
543 {
544 free (tgt);
545 return;
546 }
547
41dbbb37
TS
548 gomp_mutex_lock (&mm->lock);
549
1df3f842 550 size_t i;
1df3f842
JJ
551 for (i = 0; i < tgt->list_count; i++)
552 if (tgt->list[i] == NULL)
553 ;
554 else if (tgt->list[i]->refcount > 1)
555 tgt->list[i]->refcount--;
41dbbb37
TS
556 else if (tgt->list[i]->async_refcount > 0)
557 tgt->list[i]->async_refcount--;
1df3f842
JJ
558 else
559 {
560 splay_tree_key k = tgt->list[i];
41dbbb37 561 if (k->copy_from && do_copyfrom)
1df3f842
JJ
562 devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
563 (void *) (k->tgt->tgt_start + k->tgt_offset),
564 k->host_end - k->host_start);
41dbbb37 565 splay_tree_remove (&mm->splay_tree, k);
1df3f842
JJ
566 if (k->tgt->refcount > 1)
567 k->tgt->refcount--;
568 else
569 gomp_unmap_tgt (k->tgt);
570 }
571
572 if (tgt->refcount > 1)
573 tgt->refcount--;
574 else
575 gomp_unmap_tgt (tgt);
41dbbb37
TS
576
577 gomp_mutex_unlock (&mm->lock);
1df3f842
JJ
578}
579
580static void
41dbbb37
TS
581gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
582 size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
583 bool is_openacc)
1df3f842
JJ
584{
585 size_t i;
586 struct splay_tree_key_s cur_node;
41dbbb37 587 const int typemask = is_openacc ? 0xff : 0x7;
1df3f842
JJ
588
589 if (!devicep)
590 return;
591
592 if (mapnum == 0)
593 return;
594
41dbbb37 595 gomp_mutex_lock (&mm->lock);
1df3f842
JJ
596 for (i = 0; i < mapnum; i++)
597 if (sizes[i])
598 {
599 cur_node.host_start = (uintptr_t) hostaddrs[i];
600 cur_node.host_end = cur_node.host_start + sizes[i];
41dbbb37 601 splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
1df3f842
JJ
602 &cur_node);
603 if (n)
604 {
41dbbb37 605 int kind = get_kind (is_openacc, kinds, i);
1df3f842
JJ
606 if (n->host_start > cur_node.host_start
607 || n->host_end < cur_node.host_end)
608 gomp_fatal ("Trying to update [%p..%p) object when"
609 "only [%p..%p) is mapped",
610 (void *) cur_node.host_start,
611 (void *) cur_node.host_end,
612 (void *) n->host_start,
613 (void *) n->host_end);
41dbbb37 614 if (GOMP_MAP_COPY_TO_P (kind & typemask))
1df3f842
JJ
615 devicep->host2dev_func (devicep->target_id,
616 (void *) (n->tgt->tgt_start
617 + n->tgt_offset
618 + cur_node.host_start
619 - n->host_start),
620 (void *) cur_node.host_start,
621 cur_node.host_end - cur_node.host_start);
41dbbb37 622 if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1df3f842
JJ
623 devicep->dev2host_func (devicep->target_id,
624 (void *) cur_node.host_start,
625 (void *) (n->tgt->tgt_start
626 + n->tgt_offset
627 + cur_node.host_start
628 - n->host_start),
629 cur_node.host_end - cur_node.host_start);
630 }
631 else
632 gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
633 (void *) cur_node.host_start,
634 (void *) cur_node.host_end);
635 }
41dbbb37 636 gomp_mutex_unlock (&mm->lock);
1df3f842
JJ
637}
638
639/* This function should be called from every offload image.
640 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
641 the target, and TARGET_DATA needed by target plugin. */
642
643void
644GOMP_offload_register (void *host_table, enum offload_target_type target_type,
645 void *target_data)
646{
647 offload_images = gomp_realloc (offload_images,
648 (num_offload_images + 1)
649 * sizeof (struct offload_image_descr));
650
651 offload_images[num_offload_images].type = target_type;
652 offload_images[num_offload_images].host_table = host_table;
653 offload_images[num_offload_images].target_data = target_data;
654
655 num_offload_images++;
656}
657
41dbbb37
TS
658/* This function initializes the target device, specified by DEVICEP. DEVICEP
659 must be locked on entry, and remains locked on return. */
1df3f842 660
41dbbb37 661attribute_hidden void
1df3f842
JJ
662gomp_init_device (struct gomp_device_descr *devicep)
663{
664 devicep->init_device_func (devicep->target_id);
41dbbb37
TS
665 devicep->is_initialized = true;
666}
1df3f842 667
41dbbb37
TS
668/* Initialize address mapping tables. MM must be locked on entry, and remains
669 locked on return. */
670
671attribute_hidden void
672gomp_init_tables (struct gomp_device_descr *devicep,
673 struct gomp_memory_mapping *mm)
674{
1df3f842
JJ
675 /* Get address mapping table for device. */
676 struct mapping_table *table = NULL;
677 int num_entries = devicep->get_table_func (devicep->target_id, &table);
678
679 /* Insert host-target address mapping into dev_splay_tree. */
680 int i;
681 for (i = 0; i < num_entries; i++)
682 {
683 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
684 tgt->refcount = 1;
685 tgt->array = gomp_malloc (sizeof (*tgt->array));
686 tgt->tgt_start = table[i].tgt_start;
687 tgt->tgt_end = table[i].tgt_end;
688 tgt->to_free = NULL;
689 tgt->list_count = 0;
690 tgt->device_descr = devicep;
691 splay_tree_node node = tgt->array;
692 splay_tree_key k = &node->key;
693 k->host_start = table[i].host_start;
694 k->host_end = table[i].host_end;
695 k->tgt_offset = 0;
696 k->refcount = 1;
697 k->copy_from = false;
698 k->tgt = tgt;
699 node->left = NULL;
700 node->right = NULL;
41dbbb37 701 splay_tree_insert (&mm->splay_tree, node);
1df3f842
JJ
702 }
703
704 free (table);
41dbbb37
TS
705 mm->is_initialized = true;
706}
707
708/* Free address mapping tables. MM must be locked on entry, and remains locked
709 on return. */
710
711attribute_hidden void
712gomp_free_memmap (struct gomp_memory_mapping *mm)
713{
714 while (mm->splay_tree.root)
715 {
716 struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
717
718 splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
719 free (tgt->array);
720 free (tgt);
721 }
722
723 mm->is_initialized = false;
724}
725
726/* This function de-initializes the target device, specified by DEVICEP.
727 DEVICEP must be locked on entry, and remains locked on return. */
728
729attribute_hidden void
730gomp_fini_device (struct gomp_device_descr *devicep)
731{
732 if (devicep->is_initialized)
733 devicep->fini_device_func (devicep->target_id);
734
735 devicep->is_initialized = false;
acf0174b
JJ
736}
737
738/* Called when encountering a target directive. If DEVICE
41dbbb37
TS
739 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
740 GOMP_DEVICE_HOST_FALLBACK (or any value
741 larger than last available hw device), use host fallback.
128b26dc
TS
742 FN is address of host code, UNUSED is part of the current ABI, but
743 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
acf0174b
JJ
744 with MAPNUM entries, with addresses of the host objects,
745 sizes of the host objects (resp. for pointer kind pointer bias
746 and assumed sizeof (void *) size) and kinds. */
747
748void
128b26dc 749GOMP_target (int device, void (*fn) (void *), const void *unused,
acf0174b
JJ
750 size_t mapnum, void **hostaddrs, size_t *sizes,
751 unsigned char *kinds)
752{
1df3f842 753 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37
TS
754
755 if (devicep == NULL
756 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1df3f842
JJ
757 {
758 /* Host fallback. */
759 struct gomp_thread old_thr, *thr = gomp_thread ();
760 old_thr = *thr;
761 memset (thr, '\0', sizeof (*thr));
762 if (gomp_places_list)
763 {
764 thr->place = old_thr.place;
765 thr->ts.place_partition_len = gomp_places_list_len;
766 }
767 fn (hostaddrs);
768 gomp_free_thread (thr);
769 *thr = old_thr;
770 return;
771 }
772
41dbbb37 773 gomp_mutex_lock (&devicep->lock);
1df3f842
JJ
774 if (!devicep->is_initialized)
775 gomp_init_device (devicep);
41dbbb37
TS
776 gomp_mutex_unlock (&devicep->lock);
777
778 void *fn_addr;
779
780 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
781 fn_addr = (void *) fn;
782 else
783 {
784 struct gomp_memory_mapping *mm = &devicep->mem_map;
785 gomp_mutex_lock (&mm->lock);
786
787 if (!mm->is_initialized)
788 gomp_init_tables (devicep, mm);
789
790 struct splay_tree_key_s k;
791 k.host_start = (uintptr_t) fn;
792 k.host_end = k.host_start + 1;
793 splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
794 if (tgt_fn == NULL)
795 gomp_fatal ("Target function wasn't mapped");
1df3f842 796
41dbbb37
TS
797 gomp_mutex_unlock (&mm->lock);
798
799 fn_addr = (void *) tgt_fn->tgt->tgt_start;
800 }
1df3f842
JJ
801
802 struct target_mem_desc *tgt_vars
41dbbb37
TS
803 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
804 true);
acf0174b
JJ
805 struct gomp_thread old_thr, *thr = gomp_thread ();
806 old_thr = *thr;
807 memset (thr, '\0', sizeof (*thr));
808 if (gomp_places_list)
809 {
810 thr->place = old_thr.place;
811 thr->ts.place_partition_len = gomp_places_list_len;
812 }
41dbbb37 813 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
acf0174b
JJ
814 gomp_free_thread (thr);
815 *thr = old_thr;
41dbbb37 816 gomp_unmap_vars (tgt_vars, true);
acf0174b
JJ
817}
818
819void
128b26dc 820GOMP_target_data (int device, const void *unused, size_t mapnum,
acf0174b
JJ
821 void **hostaddrs, size_t *sizes, unsigned char *kinds)
822{
1df3f842 823 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37
TS
824
825 if (devicep == NULL
826 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1df3f842
JJ
827 {
828 /* Host fallback. */
829 struct gomp_task_icv *icv = gomp_icv (false);
830 if (icv->target_data)
831 {
832 /* Even when doing a host fallback, if there are any active
833 #pragma omp target data constructs, need to remember the
834 new #pragma omp target data, otherwise GOMP_target_end_data
835 would get out of sync. */
836 struct target_mem_desc *tgt
41dbbb37 837 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
1df3f842
JJ
838 tgt->prev = icv->target_data;
839 icv->target_data = tgt;
840 }
841 return;
842 }
843
41dbbb37 844 gomp_mutex_lock (&devicep->lock);
1df3f842
JJ
845 if (!devicep->is_initialized)
846 gomp_init_device (devicep);
41dbbb37
TS
847 gomp_mutex_unlock (&devicep->lock);
848
849 struct gomp_memory_mapping *mm = &devicep->mem_map;
850 gomp_mutex_lock (&mm->lock);
851 if (!mm->is_initialized)
852 gomp_init_tables (devicep, mm);
853 gomp_mutex_unlock (&mm->lock);
1df3f842
JJ
854
855 struct target_mem_desc *tgt
41dbbb37
TS
856 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
857 false);
1df3f842
JJ
858 struct gomp_task_icv *icv = gomp_icv (true);
859 tgt->prev = icv->target_data;
860 icv->target_data = tgt;
acf0174b
JJ
861}
862
863void
864GOMP_target_end_data (void)
865{
1df3f842
JJ
866 struct gomp_task_icv *icv = gomp_icv (false);
867 if (icv->target_data)
868 {
869 struct target_mem_desc *tgt = icv->target_data;
870 icv->target_data = tgt->prev;
41dbbb37 871 gomp_unmap_vars (tgt, true);
1df3f842 872 }
acf0174b
JJ
873}
874
875void
128b26dc 876GOMP_target_update (int device, const void *unused, size_t mapnum,
acf0174b
JJ
877 void **hostaddrs, size_t *sizes, unsigned char *kinds)
878{
1df3f842 879 struct gomp_device_descr *devicep = resolve_device (device);
41dbbb37
TS
880
881 if (devicep == NULL
882 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1df3f842
JJ
883 return;
884
41dbbb37 885 gomp_mutex_lock (&devicep->lock);
1df3f842
JJ
886 if (!devicep->is_initialized)
887 gomp_init_device (devicep);
41dbbb37
TS
888 gomp_mutex_unlock (&devicep->lock);
889
890 struct gomp_memory_mapping *mm = &devicep->mem_map;
891 gomp_mutex_lock (&mm->lock);
892 if (!mm->is_initialized)
893 gomp_init_tables (devicep, mm);
894 gomp_mutex_unlock (&mm->lock);
1df3f842 895
41dbbb37 896 gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
acf0174b
JJ
897}
898
899void
900GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
901{
902 if (thread_limit)
903 {
904 struct gomp_task_icv *icv = gomp_icv (true);
905 icv->thread_limit_var
906 = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
907 }
908 (void) num_teams;
909}
1df3f842
JJ
910
911#ifdef PLUGIN_SUPPORT
912
913/* This function tries to load a plugin for DEVICE. Name of plugin is passed
914 in PLUGIN_NAME.
915 The handles of the found functions are stored in the corresponding fields
916 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
917
918static bool
919gomp_load_plugin_for_device (struct gomp_device_descr *device,
920 const char *plugin_name)
921{
41dbbb37
TS
922 char *err = NULL, *last_missing = NULL;
923 int optional_present, optional_total;
924
925 /* Clear any existing error. */
926 dlerror ();
927
1df3f842
JJ
928 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
929 if (!plugin_handle)
41dbbb37
TS
930 {
931 err = dlerror ();
932 goto out;
933 }
1df3f842
JJ
934
935 /* Check if all required functions are available in the plugin and store
936 their handlers. */
41dbbb37
TS
937#define DLSYM(f) \
938 do \
939 { \
940 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
941 err = dlerror (); \
942 if (err != NULL) \
943 goto out; \
944 } \
1df3f842 945 while (0)
41dbbb37
TS
946 /* Similar, but missing functions are not an error. */
947#define DLSYM_OPT(f, n) \
948 do \
949 { \
950 char *tmp_err; \
951 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
952 tmp_err = dlerror (); \
953 if (tmp_err == NULL) \
954 optional_present++; \
955 else \
956 last_missing = #n; \
957 optional_total++; \
958 } \
959 while (0)
960
961 DLSYM (get_name);
962 DLSYM (get_caps);
1df3f842
JJ
963 DLSYM (get_type);
964 DLSYM (get_num_devices);
965 DLSYM (register_image);
966 DLSYM (init_device);
41dbbb37 967 DLSYM (fini_device);
1df3f842
JJ
968 DLSYM (get_table);
969 DLSYM (alloc);
970 DLSYM (free);
971 DLSYM (dev2host);
972 DLSYM (host2dev);
41dbbb37
TS
973 device->capabilities = device->get_caps_func ();
974 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
975 DLSYM (run);
976 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
977 {
978 optional_present = optional_total = 0;
979 DLSYM_OPT (openacc.exec, openacc_parallel);
980 DLSYM_OPT (openacc.open_device, openacc_open_device);
981 DLSYM_OPT (openacc.close_device, openacc_close_device);
982 DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
983 DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
984 DLSYM_OPT (openacc.register_async_cleanup,
985 openacc_register_async_cleanup);
986 DLSYM_OPT (openacc.async_test, openacc_async_test);
987 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
988 DLSYM_OPT (openacc.async_wait, openacc_async_wait);
989 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
990 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
991 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
992 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
993 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
994 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
995 /* Require all the OpenACC handlers if we have
996 GOMP_OFFLOAD_CAP_OPENACC_200. */
997 if (optional_present != optional_total)
998 {
999 err = "plugin missing OpenACC handler function";
1000 goto out;
1001 }
1002 optional_present = optional_total = 0;
1003 DLSYM_OPT (openacc.cuda.get_current_device,
1004 openacc_get_current_cuda_device);
1005 DLSYM_OPT (openacc.cuda.get_current_context,
1006 openacc_get_current_cuda_context);
1007 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1008 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1009 /* Make sure all the CUDA functions are there if any of them are. */
1010 if (optional_present && optional_present != optional_total)
1011 {
1012 err = "plugin missing OpenACC CUDA handler function";
1013 goto out;
1014 }
1015 }
1df3f842 1016#undef DLSYM
41dbbb37 1017#undef DLSYM_OPT
1df3f842 1018
41dbbb37
TS
1019 out:
1020 if (err != NULL)
1021 {
1022 gomp_error ("while loading %s: %s", plugin_name, err);
1023 if (last_missing)
1024 gomp_error ("missing function was %s", last_missing);
1025 if (plugin_handle)
1026 dlclose (plugin_handle);
1027 }
1028 return err == NULL;
1df3f842
JJ
1029}
1030
41dbbb37
TS
1031/* This function adds a compatible offload image IMAGE to an accelerator device
1032 DEVICE. DEVICE must be locked on entry, and remains locked on return. */
1df3f842
JJ
1033
1034static void
41dbbb37
TS
1035gomp_register_image_for_device (struct gomp_device_descr *device,
1036 struct offload_image_descr *image)
1df3f842 1037{
41dbbb37
TS
1038 if (!device->offload_regions_registered
1039 && (device->type == image->type
1040 || device->type == OFFLOAD_TARGET_TYPE_HOST))
1df3f842 1041 {
41dbbb37
TS
1042 device->register_image_func (image->host_table, image->target_data);
1043 device->offload_regions_registered = true;
1df3f842
JJ
1044 }
1045}
1046
1047/* This function initializes the runtime needed for offloading.
41dbbb37
TS
1048 It parses the list of offload targets and tries to load the plugins for
1049 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1050 will be set, and the array DEVICES initialized, containing descriptors for
1051 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1052 by the others. */
1df3f842
JJ
1053
1054static void
1055gomp_target_init (void)
1056{
1057 const char *prefix ="libgomp-plugin-";
b5f7a6ca 1058 const char *suffix = SONAME_SUFFIX (1);
1df3f842
JJ
1059 const char *cur, *next;
1060 char *plugin_name;
1061 int i, new_num_devices;
1062
1063 num_devices = 0;
1064 devices = NULL;
1065
1066 cur = OFFLOAD_TARGETS;
1067 if (*cur)
1068 do
1069 {
1070 struct gomp_device_descr current_device;
1071
1072 next = strchr (cur, ',');
1073
1074 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1075 + strlen (prefix) + strlen (suffix));
1076 if (!plugin_name)
1077 {
1078 num_devices = 0;
1079 break;
1080 }
1081
1082 strcpy (plugin_name, prefix);
1083 strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1084 strcat (plugin_name, suffix);
1085
1086 if (gomp_load_plugin_for_device (&current_device, plugin_name))
1087 {
1088 new_num_devices = current_device.get_num_devices_func ();
1089 if (new_num_devices >= 1)
1090 {
41dbbb37
TS
1091 /* Augment DEVICES and NUM_DEVICES. */
1092
1df3f842
JJ
1093 devices = realloc (devices, (num_devices + new_num_devices)
1094 * sizeof (struct gomp_device_descr));
1095 if (!devices)
1096 {
1097 num_devices = 0;
1098 free (plugin_name);
1099 break;
1100 }
1101
41dbbb37
TS
1102 current_device.name = current_device.get_name_func ();
1103 /* current_device.capabilities has already been set. */
1df3f842 1104 current_device.type = current_device.get_type_func ();
41dbbb37
TS
1105 current_device.mem_map.is_initialized = false;
1106 current_device.mem_map.splay_tree.root = NULL;
1df3f842 1107 current_device.is_initialized = false;
41dbbb37
TS
1108 current_device.offload_regions_registered = false;
1109 current_device.openacc.data_environ = NULL;
1110 current_device.openacc.target_data = NULL;
1df3f842
JJ
1111 for (i = 0; i < new_num_devices; i++)
1112 {
1df3f842
JJ
1113 current_device.target_id = i;
1114 devices[num_devices] = current_device;
41dbbb37
TS
1115 gomp_mutex_init (&devices[num_devices].mem_map.lock);
1116 gomp_mutex_init (&devices[num_devices].lock);
1df3f842
JJ
1117 num_devices++;
1118 }
1119 }
1120 }
1121
1122 free (plugin_name);
1123 cur = next + 1;
1124 }
1125 while (next);
1126
41dbbb37
TS
1127 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1128 NUM_DEVICES_OPENMP. */
1129 struct gomp_device_descr *devices_s
1130 = malloc (num_devices * sizeof (struct gomp_device_descr));
1131 if (!devices_s)
1132 {
1133 num_devices = 0;
1134 free (devices);
1135 devices = NULL;
1136 }
1137 num_devices_openmp = 0;
1138 for (i = 0; i < num_devices; i++)
1139 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1140 devices_s[num_devices_openmp++] = devices[i];
1141 int num_devices_after_openmp = num_devices_openmp;
1142 for (i = 0; i < num_devices; i++)
1143 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1144 devices_s[num_devices_after_openmp++] = devices[i];
1145 free (devices);
1146 devices = devices_s;
1147
1148 for (i = 0; i < num_devices; i++)
1149 {
1150 int j;
1151
1152 for (j = 0; j < num_offload_images; j++)
1153 gomp_register_image_for_device (&devices[i], &offload_images[j]);
1154
1155 /* The 'devices' array can be moved (by the realloc call) until we have
1156 found all the plugins, so registering with the OpenACC runtime (which
1157 takes a copy of the pointer argument) must be delayed until now. */
1158 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1159 goacc_register (&devices[i]);
1160 }
1161
1df3f842
JJ
1162 free (offload_images);
1163 offload_images = NULL;
1164 num_offload_images = 0;
1165}
1166
1167#else /* PLUGIN_SUPPORT */
1168/* If dlfcn.h is unavailable we always fallback to host execution.
1169 GOMP_target* routines are just stubs for this case. */
1170static void
1171gomp_target_init (void)
1172{
1173}
1174#endif /* PLUGIN_SUPPORT */