1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 typedef uintptr_t *hash_entry_type
;
49 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
50 static inline void htab_free (void *ptr
) { free (ptr
); }
53 ialias_redirect (GOMP_task
)
55 static inline hashval_t
56 htab_hash (hash_entry_type element
)
58 return hash_pointer ((void *) element
);
62 htab_eq (hash_entry_type x
, hash_entry_type y
)
67 #define FIELD_TGT_EMPTY (~(size_t) 0)
69 static void gomp_target_init (void);
71 /* The whole initialization code for offloading plugins is only run one. */
72 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
74 /* Mutex for offload image registration. */
75 static gomp_mutex_t register_lock
;
77 /* This structure describes an offload image.
78 It contains type of the target device, pointer to host table descriptor, and
79 pointer to target data. */
80 struct offload_image_descr
{
82 enum offload_target_type type
;
83 const void *host_table
;
84 const void *target_data
;
87 /* Array of descriptors of offload images. */
88 static struct offload_image_descr
*offload_images
;
90 /* Total number of offload images. */
91 static int num_offload_images
;
93 /* Array of descriptors for all available devices. */
94 static struct gomp_device_descr
*devices
;
96 /* Total number of available devices. */
97 static int num_devices
;
99 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
100 static int num_devices_openmp
;
102 /* OpenMP requires mask. */
103 static int omp_requires_mask
;
105 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
108 gomp_realloc_unlock (void *old
, size_t size
)
110 void *ret
= realloc (old
, size
);
113 gomp_mutex_unlock (®ister_lock
);
114 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
119 attribute_hidden
void
120 gomp_init_targets_once (void)
122 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
126 gomp_get_num_devices (void)
128 gomp_init_targets_once ();
129 return num_devices_openmp
;
132 static struct gomp_device_descr
*
133 resolve_device (int device_id
, bool remapped
)
135 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
137 struct gomp_task_icv
*icv
= gomp_icv (false);
138 device_id
= icv
->default_device_var
;
144 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
145 : omp_initial_device
))
147 if (device_id
== omp_invalid_device
)
148 gomp_fatal ("omp_invalid_device encountered");
149 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
150 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
151 "but device not found");
155 else if (device_id
>= gomp_get_num_devices ())
157 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
158 && device_id
!= num_devices_openmp
)
159 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
160 "but device not found");
165 gomp_mutex_lock (&devices
[device_id
].lock
);
166 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
167 gomp_init_device (&devices
[device_id
]);
168 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
170 gomp_mutex_unlock (&devices
[device_id
].lock
);
172 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
173 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
174 "but device is finalized");
178 gomp_mutex_unlock (&devices
[device_id
].lock
);
180 return &devices
[device_id
];
184 static inline splay_tree_key
185 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
187 if (key
->host_start
!= key
->host_end
)
188 return splay_tree_lookup (mem_map
, key
);
191 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
196 n
= splay_tree_lookup (mem_map
, key
);
200 return splay_tree_lookup (mem_map
, key
);
203 static inline splay_tree_key
204 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
206 if (key
->host_start
!= key
->host_end
)
207 return splay_tree_lookup (mem_map
, key
);
210 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
216 gomp_device_copy (struct gomp_device_descr
*devicep
,
217 bool (*copy_func
) (int, void *, const void *, size_t),
218 const char *dst
, void *dstaddr
,
219 const char *src
, const void *srcaddr
,
222 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
224 gomp_mutex_unlock (&devicep
->lock
);
225 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
226 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
231 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
232 bool (*copy_func
) (int, void *, const void *, size_t,
233 struct goacc_asyncqueue
*),
234 const char *dst
, void *dstaddr
,
235 const char *src
, const void *srcaddr
,
236 const void *srcaddr_orig
,
237 size_t size
, struct goacc_asyncqueue
*aq
)
239 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
241 gomp_mutex_unlock (&devicep
->lock
);
242 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
243 gomp_fatal ("Copying of %s object [%p..%p)"
244 " via buffer %s object [%p..%p)"
245 " to %s object [%p..%p) failed",
246 src
, srcaddr_orig
, srcaddr_orig
+ size
,
247 src
, srcaddr
, srcaddr
+ size
,
248 dst
, dstaddr
, dstaddr
+ size
);
250 gomp_fatal ("Copying of %s object [%p..%p)"
251 " to %s object [%p..%p) failed",
252 src
, srcaddr
, srcaddr
+ size
,
253 dst
, dstaddr
, dstaddr
+ size
);
257 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
258 host to device memory transfers. */
260 struct gomp_coalesce_chunk
262 /* The starting and ending point of a coalesced chunk of memory. */
266 struct gomp_coalesce_buf
268 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
269 it will be copied to the device. */
271 struct target_mem_desc
*tgt
;
272 /* Array with offsets, chunks[i].start is the starting offset and
273 chunks[i].end ending offset relative to tgt->tgt_start device address
274 of chunks which are to be copied to buf and later copied to device. */
275 struct gomp_coalesce_chunk
*chunks
;
276 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
279 /* During construction of chunks array, how many memory regions are within
280 the last chunk. If there is just one memory region for a chunk, we copy
281 it directly to device rather than going through buf. */
285 /* Maximum size of memory region considered for coalescing. Larger copies
286 are performed directly. */
287 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
289 /* Maximum size of a gap in between regions to consider them being copied
290 within the same chunk. All the device offsets considered are within
291 newly allocated device memory, so it isn't fatal if we copy some padding
292 in between from host to device. The gaps come either from alignment
293 padding or from memory regions which are not supposed to be copied from
294 host to device (e.g. map(alloc:), map(from:) etc.). */
295 #define MAX_COALESCE_BUF_GAP (4 * 1024)
297 /* Add region with device tgt_start relative offset and length to CBUF.
299 This must not be used for asynchronous copies, because the host data might
300 not be computed yet (by an earlier asynchronous compute region, for
302 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
303 is it more performant to use libgomp CBUF buffering or individual device
304 asyncronous copying?) */
307 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
309 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
313 if (cbuf
->chunk_cnt
< 0)
315 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
317 cbuf
->chunk_cnt
= -1;
320 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
322 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
326 /* If the last chunk is only used by one mapping, discard it,
327 as it will be one host to device copy anyway and
328 memcpying it around will only waste cycles. */
329 if (cbuf
->use_cnt
== 1)
332 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
333 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
338 /* Return true for mapping kinds which need to copy data from the
339 host to device for regions that weren't previously mapped. */
342 gomp_to_device_kind_p (int kind
)
348 case GOMP_MAP_FORCE_ALLOC
:
349 case GOMP_MAP_FORCE_FROM
:
350 case GOMP_MAP_ALWAYS_FROM
:
357 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
358 non-NULL), when the source data is stack or may otherwise be deallocated
359 before the asynchronous copy takes place, EPHEMERAL must be passed as
362 attribute_hidden
void
363 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
364 struct goacc_asyncqueue
*aq
,
365 void *d
, const void *h
, size_t sz
,
366 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
368 if (__builtin_expect (aq
!= NULL
, 0))
370 /* See 'gomp_coalesce_buf_add'. */
373 void *h_buf
= (void *) h
;
376 /* We're queueing up an asynchronous copy from data that may
377 disappear before the transfer takes place (i.e. because it is a
378 stack local in a function that is no longer executing). Make a
379 copy of the data into a temporary buffer in those cases. */
380 h_buf
= gomp_malloc (sz
);
381 memcpy (h_buf
, h
, sz
);
383 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
384 "dev", d
, "host", h_buf
, h
, sz
, aq
);
386 /* Free temporary buffer once the transfer has completed. */
387 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
394 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
395 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
398 long last
= cbuf
->chunk_cnt
- 1;
399 while (first
<= last
)
401 long middle
= (first
+ last
) >> 1;
402 if (cbuf
->chunks
[middle
].end
<= doff
)
404 else if (cbuf
->chunks
[middle
].start
<= doff
)
406 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
408 gomp_mutex_unlock (&devicep
->lock
);
409 gomp_fatal ("internal libgomp cbuf error");
411 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
421 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
424 attribute_hidden
void
425 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
426 struct goacc_asyncqueue
*aq
,
427 void *h
, const void *d
, size_t sz
)
429 if (__builtin_expect (aq
!= NULL
, 0))
430 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
431 "host", h
, "dev", d
, NULL
, sz
, aq
);
433 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
437 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
439 if (!devicep
->free_func (devicep
->target_id
, devptr
))
441 gomp_mutex_unlock (&devicep
->lock
);
442 gomp_fatal ("error in freeing device memory block at %p", devptr
);
446 /* Increment reference count of a splay_tree_key region K by 1.
447 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
448 increment the value if refcount is not yet contained in the set (used for
449 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
450 once for each construct). */
453 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
455 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
458 uintptr_t *refcount_ptr
= &k
->refcount
;
460 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
461 refcount_ptr
= &k
->structelem_refcount
;
462 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
463 refcount_ptr
= k
->structelem_refcount_ptr
;
467 if (htab_find (*refcount_set
, refcount_ptr
))
469 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
470 *slot
= refcount_ptr
;
477 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
478 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
479 track already seen refcounts, and only adjust the value if refcount is not
480 yet contained in the set (like gomp_increment_refcount).
482 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
483 it is already zero and we know we decremented it earlier. This signals that
484 associated maps should be copied back to host.
486 *DO_REMOVE is set to true when we this is the first handling of this refcount
487 and we are setting it to zero. This signals a removal of this key from the
490 Copy and removal are separated due to cases like handling of structure
491 elements, e.g. each map of a structure element representing a possible copy
492 out of a structure field has to be handled individually, but we only signal
493 removal for one (the first encountered) sibing map. */
496 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
497 bool *do_copy
, bool *do_remove
)
499 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
501 *do_copy
= *do_remove
= false;
505 uintptr_t *refcount_ptr
= &k
->refcount
;
507 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
508 refcount_ptr
= &k
->structelem_refcount
;
509 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
510 refcount_ptr
= k
->structelem_refcount_ptr
;
512 bool new_encountered_refcount
;
513 bool set_to_zero
= false;
514 bool is_zero
= false;
516 uintptr_t orig_refcount
= *refcount_ptr
;
520 if (htab_find (*refcount_set
, refcount_ptr
))
522 new_encountered_refcount
= false;
526 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
527 *slot
= refcount_ptr
;
528 new_encountered_refcount
= true;
531 /* If no refcount_set being used, assume all keys are being decremented
532 for the first time. */
533 new_encountered_refcount
= true;
537 else if (*refcount_ptr
> 0)
541 if (*refcount_ptr
== 0)
543 if (orig_refcount
> 0)
549 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
550 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
553 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
554 gomp_map_0len_lookup found oldn for newn.
555 Helper function of gomp_map_vars. */
558 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
559 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
560 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
561 unsigned char kind
, bool always_to_flag
, bool implicit
,
562 struct gomp_coalesce_buf
*cbuf
,
563 htab_t
*refcount_set
)
565 assert (kind
!= GOMP_MAP_ATTACH
566 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
569 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
570 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
571 tgt_var
->is_attach
= false;
572 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
574 /* For implicit maps, old contained in new is valid. */
575 bool implicit_subset
= (implicit
576 && newn
->host_start
<= oldn
->host_start
577 && oldn
->host_end
<= newn
->host_end
);
579 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
581 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
583 if ((kind
& GOMP_MAP_FLAG_FORCE
)
584 /* For implicit maps, old contained in new is valid. */
586 /* Otherwise, new contained inside old is considered valid. */
587 || (oldn
->host_start
<= newn
->host_start
588 && newn
->host_end
<= oldn
->host_end
)))
590 gomp_mutex_unlock (&devicep
->lock
);
591 gomp_fatal ("Trying to map into device [%p..%p) object when "
592 "[%p..%p) is already mapped",
593 (void *) newn
->host_start
, (void *) newn
->host_end
,
594 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
597 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
599 /* Implicit + always should not happen. If this does occur, below
600 address/length adjustment is a TODO. */
601 assert (!implicit_subset
);
603 if (oldn
->aux
&& oldn
->aux
->attach_count
)
605 /* We have to be careful not to overwrite still attached pointers
606 during the copyback to host. */
607 uintptr_t addr
= newn
->host_start
;
608 while (addr
< newn
->host_end
)
610 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
611 if (oldn
->aux
->attach_count
[i
] == 0)
612 gomp_copy_host2dev (devicep
, aq
,
613 (void *) (oldn
->tgt
->tgt_start
615 + addr
- oldn
->host_start
),
617 sizeof (void *), false, cbuf
);
618 addr
+= sizeof (void *);
622 gomp_copy_host2dev (devicep
, aq
,
623 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
624 + newn
->host_start
- oldn
->host_start
),
625 (void *) newn
->host_start
,
626 newn
->host_end
- newn
->host_start
, false, cbuf
);
629 gomp_increment_refcount (oldn
, refcount_set
);
633 get_kind (bool short_mapkind
, void *kinds
, int idx
)
636 return ((unsigned char *) kinds
)[idx
];
638 int val
= ((unsigned short *) kinds
)[idx
];
639 if (GOMP_MAP_IMPLICIT_P (val
))
640 val
&= ~GOMP_MAP_IMPLICIT
;
646 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
651 int val
= ((unsigned short *) kinds
)[idx
];
652 return GOMP_MAP_IMPLICIT_P (val
);
656 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
657 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
658 struct gomp_coalesce_buf
*cbuf
,
659 bool allow_zero_length_array_sections
)
661 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
662 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
663 struct splay_tree_key_s cur_node
;
665 cur_node
.host_start
= host_ptr
;
666 if (cur_node
.host_start
== (uintptr_t) NULL
)
668 cur_node
.tgt_offset
= (uintptr_t) NULL
;
669 gomp_copy_host2dev (devicep
, aq
,
670 (void *) (tgt
->tgt_start
+ target_offset
),
671 (void *) &cur_node
.tgt_offset
, sizeof (void *),
675 /* Add bias to the pointer value. */
676 cur_node
.host_start
+= bias
;
677 cur_node
.host_end
= cur_node
.host_start
;
678 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
681 if (allow_zero_length_array_sections
)
682 cur_node
.tgt_offset
= 0;
685 gomp_mutex_unlock (&devicep
->lock
);
686 gomp_fatal ("Pointer target of array section wasn't mapped");
691 cur_node
.host_start
-= n
->host_start
;
693 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
694 /* At this point tgt_offset is target address of the
695 array section. Now subtract bias to get what we want
696 to initialize the pointer with. */
697 cur_node
.tgt_offset
-= bias
;
699 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
700 (void *) &cur_node
.tgt_offset
, sizeof (void *),
705 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
706 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
707 size_t first
, size_t i
, void **hostaddrs
,
708 size_t *sizes
, void *kinds
,
709 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
711 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
712 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
713 struct splay_tree_key_s cur_node
;
716 const bool short_mapkind
= true;
717 const int typemask
= short_mapkind
? 0xff : 0x7;
719 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
720 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
721 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
722 kind
= get_kind (short_mapkind
, kinds
, i
);
723 implicit
= get_implicit (short_mapkind
, kinds
, i
);
726 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
728 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
729 kind
& typemask
, false, implicit
, cbuf
,
735 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
737 cur_node
.host_start
--;
738 n2
= splay_tree_lookup (mem_map
, &cur_node
);
739 cur_node
.host_start
++;
742 && n2
->host_start
- n
->host_start
743 == n2
->tgt_offset
- n
->tgt_offset
)
745 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
746 kind
& typemask
, false, implicit
, cbuf
,
752 n2
= splay_tree_lookup (mem_map
, &cur_node
);
756 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
758 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
759 kind
& typemask
, false, implicit
, cbuf
,
764 gomp_mutex_unlock (&devicep
->lock
);
765 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
766 "other mapped elements from the same structure weren't mapped "
767 "together with it", (void *) cur_node
.host_start
,
768 (void *) cur_node
.host_end
);
771 attribute_hidden
void
772 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
773 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
774 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
775 struct gomp_coalesce_buf
*cbufp
,
776 bool allow_zero_length_array_sections
)
778 struct splay_tree_key_s s
;
783 gomp_mutex_unlock (&devicep
->lock
);
784 gomp_fatal ("enclosing struct not mapped for attach");
787 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
788 /* We might have a pointer in a packed struct: however we cannot have more
789 than one such pointer in each pointer-sized portion of the struct, so
791 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
794 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
796 if (!n
->aux
->attach_count
)
798 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
800 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
801 n
->aux
->attach_count
[idx
]++;
804 gomp_mutex_unlock (&devicep
->lock
);
805 gomp_fatal ("attach count overflow");
808 if (n
->aux
->attach_count
[idx
] == 1)
810 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
812 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
816 if ((void *) target
== NULL
)
818 gomp_mutex_unlock (&devicep
->lock
);
819 gomp_fatal ("attempt to attach null pointer");
822 s
.host_start
= target
+ bias
;
823 s
.host_end
= s
.host_start
+ 1;
824 tn
= splay_tree_lookup (mem_map
, &s
);
828 if (allow_zero_length_array_sections
)
829 /* When allowing attachment to zero-length array sections, we
830 allow attaching to NULL pointers when the target region is not
835 gomp_mutex_unlock (&devicep
->lock
);
836 gomp_fatal ("pointer target not mapped for attach");
840 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
843 "%s: attaching host %p, target %p (struct base %p) to %p\n",
844 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
845 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
847 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
848 sizeof (void *), true, cbufp
);
851 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
852 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
855 attribute_hidden
void
856 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
857 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
858 uintptr_t detach_from
, bool finalize
,
859 struct gomp_coalesce_buf
*cbufp
)
865 gomp_mutex_unlock (&devicep
->lock
);
866 gomp_fatal ("enclosing struct not mapped for detach");
869 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
871 if (!n
->aux
|| !n
->aux
->attach_count
)
873 gomp_mutex_unlock (&devicep
->lock
);
874 gomp_fatal ("no attachment counters for struct");
878 n
->aux
->attach_count
[idx
] = 1;
880 if (n
->aux
->attach_count
[idx
] == 0)
882 gomp_mutex_unlock (&devicep
->lock
);
883 gomp_fatal ("attach count underflow");
886 n
->aux
->attach_count
[idx
]--;
888 if (n
->aux
->attach_count
[idx
] == 0)
890 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
892 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
895 "%s: detaching host %p, target %p (struct base %p) to %p\n",
896 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
897 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
900 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
901 sizeof (void *), true, cbufp
);
904 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
905 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
908 attribute_hidden
uintptr_t
909 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
911 if (tgt
->list
[i
].key
!= NULL
)
912 return tgt
->list
[i
].key
->tgt
->tgt_start
913 + tgt
->list
[i
].key
->tgt_offset
914 + tgt
->list
[i
].offset
;
916 switch (tgt
->list
[i
].offset
)
919 return (uintptr_t) hostaddrs
[i
];
925 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
926 + tgt
->list
[i
+ 1].key
->tgt_offset
927 + tgt
->list
[i
+ 1].offset
928 + (uintptr_t) hostaddrs
[i
]
929 - (uintptr_t) hostaddrs
[i
+ 1];
932 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
936 static inline __attribute__((always_inline
)) struct target_mem_desc
*
937 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
938 struct goacc_asyncqueue
*aq
, size_t mapnum
,
939 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
940 void *kinds
, bool short_mapkind
,
941 htab_t
*refcount_set
,
942 enum gomp_map_vars_kind pragma_kind
)
944 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
945 bool has_firstprivate
= false;
946 bool has_always_ptrset
= false;
947 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
948 const int rshift
= short_mapkind
? 8 : 3;
949 const int typemask
= short_mapkind
? 0xff : 0x7;
950 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
951 struct splay_tree_key_s cur_node
;
952 struct target_mem_desc
*tgt
953 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
954 tgt
->list_count
= mapnum
;
955 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
956 tgt
->device_descr
= devicep
;
958 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
967 tgt_align
= sizeof (void *);
973 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
975 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
976 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
979 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
981 size_t align
= 4 * sizeof (void *);
983 tgt_size
= mapnum
* sizeof (void *);
985 cbuf
.use_cnt
= 1 + (mapnum
> 1);
986 cbuf
.chunks
[0].start
= 0;
987 cbuf
.chunks
[0].end
= tgt_size
;
990 gomp_mutex_lock (&devicep
->lock
);
991 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
993 gomp_mutex_unlock (&devicep
->lock
);
998 for (i
= 0; i
< mapnum
; i
++)
1000 int kind
= get_kind (short_mapkind
, kinds
, i
);
1001 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1002 if (hostaddrs
[i
] == NULL
1003 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1005 tgt
->list
[i
].key
= NULL
;
1006 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1009 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1010 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1012 tgt
->list
[i
].key
= NULL
;
1015 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1016 on a separate construct prior to using use_device_{addr,ptr}.
1017 In OpenMP 5.0, map directives need to be ordered by the
1018 middle-end before the use_device_* clauses. If
1019 !not_found_cnt, all mappings requested (if any) are already
1020 mapped, so use_device_{addr,ptr} can be resolved right away.
1021 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1022 now but would succeed after performing the mappings in the
1023 following loop. We can't defer this always to the second
1024 loop, because it is not even invoked when !not_found_cnt
1025 after the first loop. */
1026 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1027 cur_node
.host_end
= cur_node
.host_start
;
1028 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1031 cur_node
.host_start
-= n
->host_start
;
1033 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1034 + cur_node
.host_start
);
1036 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1038 gomp_mutex_unlock (&devicep
->lock
);
1039 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1041 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1042 /* If not present, continue using the host address. */
1045 __builtin_unreachable ();
1046 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1049 tgt
->list
[i
].offset
= 0;
1052 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1054 size_t first
= i
+ 1;
1055 size_t last
= i
+ sizes
[i
];
1056 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1057 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1059 tgt
->list
[i
].key
= NULL
;
1060 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1061 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1064 size_t align
= (size_t) 1 << (kind
>> rshift
);
1065 if (tgt_align
< align
)
1067 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1068 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1069 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1070 not_found_cnt
+= last
- i
;
1071 for (i
= first
; i
<= last
; i
++)
1073 tgt
->list
[i
].key
= NULL
;
1075 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1077 gomp_coalesce_buf_add (&cbuf
,
1078 tgt_size
- cur_node
.host_end
1079 + (uintptr_t) hostaddrs
[i
],
1085 for (i
= first
; i
<= last
; i
++)
1086 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1087 sizes
, kinds
, NULL
, refcount_set
);
1091 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1093 tgt
->list
[i
].key
= NULL
;
1094 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1095 has_firstprivate
= true;
1098 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1099 || ((kind
& typemask
)
1100 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1102 tgt
->list
[i
].key
= NULL
;
1103 has_firstprivate
= true;
1106 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1107 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1108 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1110 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1111 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1113 tgt
->list
[i
].key
= NULL
;
1115 size_t align
= (size_t) 1 << (kind
>> rshift
);
1116 if (tgt_align
< align
)
1118 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1120 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1121 cur_node
.host_end
- cur_node
.host_start
);
1122 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1123 has_firstprivate
= true;
1127 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1129 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1132 tgt
->list
[i
].key
= NULL
;
1133 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1138 n
= splay_tree_lookup (mem_map
, &cur_node
);
1139 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1141 int always_to_cnt
= 0;
1142 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1144 bool has_nullptr
= false;
1146 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1147 if (n
->tgt
->list
[j
].key
== n
)
1149 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1152 if (n
->tgt
->list_count
== 0)
1154 /* 'declare target'; assume has_nullptr; it could also be
1155 statically assigned pointer, but that it should be to
1156 the equivalent variable on the host. */
1157 assert (n
->refcount
== REFCOUNT_INFINITY
);
1161 assert (j
< n
->tgt
->list_count
);
1162 /* Re-map the data if there is an 'always' modifier or if it a
1163 null pointer was there and non a nonnull has been found; that
1164 permits transparent re-mapping for Fortran array descriptors
1165 which were previously mapped unallocated. */
1166 for (j
= i
+ 1; j
< mapnum
; j
++)
1168 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1169 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1171 || !GOMP_MAP_POINTER_P (ptr_kind
)
1172 || *(void **) hostaddrs
[j
] == NULL
))
1174 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1175 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1176 > cur_node
.host_end
))
1180 has_always_ptrset
= true;
1185 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1186 kind
& typemask
, always_to_cnt
> 0, implicit
,
1187 NULL
, refcount_set
);
1192 tgt
->list
[i
].key
= NULL
;
1194 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1196 /* Not present, hence, skip entry - including its MAP_POINTER,
1198 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1200 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1201 == GOMP_MAP_POINTER
))
1204 tgt
->list
[i
].key
= NULL
;
1205 tgt
->list
[i
].offset
= 0;
1209 size_t align
= (size_t) 1 << (kind
>> rshift
);
1211 if (tgt_align
< align
)
1213 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1215 && gomp_to_device_kind_p (kind
& typemask
))
1216 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1217 cur_node
.host_end
- cur_node
.host_start
);
1218 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1219 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1223 for (j
= i
+ 1; j
< mapnum
; j
++)
1224 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1225 kinds
, j
)) & typemask
))
1226 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1228 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1229 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1230 > cur_node
.host_end
))
1234 tgt
->list
[j
].key
= NULL
;
1245 gomp_mutex_unlock (&devicep
->lock
);
1246 gomp_fatal ("unexpected aggregation");
1248 tgt
->to_free
= devaddrs
[0];
1249 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1250 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1252 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
1254 /* Allocate tgt_align aligned tgt_size block of memory. */
1255 /* FIXME: Perhaps change interface to allocate properly aligned
1257 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1258 tgt_size
+ tgt_align
- 1);
1261 gomp_mutex_unlock (&devicep
->lock
);
1262 gomp_fatal ("device memory allocation fail");
1265 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1266 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1267 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1269 if (cbuf
.use_cnt
== 1)
1271 if (cbuf
.chunk_cnt
> 0)
1274 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1284 tgt
->to_free
= NULL
;
1290 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1291 tgt_size
= mapnum
* sizeof (void *);
1294 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1297 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1298 splay_tree_node array
= tgt
->array
;
1299 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1300 uintptr_t field_tgt_base
= 0;
1301 splay_tree_key field_tgt_structelem_first
= NULL
;
1303 for (i
= 0; i
< mapnum
; i
++)
1304 if (has_always_ptrset
1306 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1307 == GOMP_MAP_TO_PSET
)
1309 splay_tree_key k
= tgt
->list
[i
].key
;
1310 bool has_nullptr
= false;
1312 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1313 if (k
->tgt
->list
[j
].key
== k
)
1315 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1318 if (k
->tgt
->list_count
== 0)
1321 assert (j
< k
->tgt
->list_count
);
1323 tgt
->list
[i
].has_null_ptr_assoc
= false;
1324 for (j
= i
+ 1; j
< mapnum
; j
++)
1326 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1327 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1329 || !GOMP_MAP_POINTER_P (ptr_kind
)
1330 || *(void **) hostaddrs
[j
] == NULL
))
1332 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1333 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1338 if (*(void **) hostaddrs
[j
] == NULL
)
1339 tgt
->list
[i
].has_null_ptr_assoc
= true;
1340 tgt
->list
[j
].key
= k
;
1341 tgt
->list
[j
].copy_from
= false;
1342 tgt
->list
[j
].always_copy_from
= false;
1343 tgt
->list
[j
].is_attach
= false;
1344 gomp_increment_refcount (k
, refcount_set
);
1345 gomp_map_pointer (k
->tgt
, aq
,
1346 (uintptr_t) *(void **) hostaddrs
[j
],
1347 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1349 sizes
[j
], cbufp
, false);
1354 else if (tgt
->list
[i
].key
== NULL
)
1356 int kind
= get_kind (short_mapkind
, kinds
, i
);
1357 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1358 if (hostaddrs
[i
] == NULL
)
1360 switch (kind
& typemask
)
1362 size_t align
, len
, first
, last
;
1364 case GOMP_MAP_FIRSTPRIVATE
:
1365 align
= (size_t) 1 << (kind
>> rshift
);
1366 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1367 tgt
->list
[i
].offset
= tgt_size
;
1369 gomp_copy_host2dev (devicep
, aq
,
1370 (void *) (tgt
->tgt_start
+ tgt_size
),
1371 (void *) hostaddrs
[i
], len
, false, cbufp
);
1372 /* Save device address in hostaddr to permit latter availablity
1373 when doing a deep-firstprivate with pointer attach. */
1374 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1377 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1378 firstprivate to hostaddrs[i+1], which is assumed to contain a
1382 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1384 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1385 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1386 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1387 sizeof (void *), false, cbufp
);
1391 case GOMP_MAP_FIRSTPRIVATE_INT
:
1392 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1394 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1395 /* The OpenACC 'host_data' construct only allows 'use_device'
1396 "mapping" clauses, so in the first loop, 'not_found_cnt'
1397 must always have been zero, so all OpenACC 'use_device'
1398 clauses have already been handled. (We can only easily test
1399 'use_device' with 'if_present' clause here.) */
1400 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1401 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1402 code conceptually simple, similar to the first loop. */
1403 case GOMP_MAP_USE_DEVICE_PTR
:
1404 if (tgt
->list
[i
].offset
== 0)
1406 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1407 cur_node
.host_end
= cur_node
.host_start
;
1408 n
= gomp_map_lookup (mem_map
, &cur_node
);
1411 cur_node
.host_start
-= n
->host_start
;
1413 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1414 + cur_node
.host_start
);
1416 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1418 gomp_mutex_unlock (&devicep
->lock
);
1419 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1421 else if ((kind
& typemask
)
1422 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1423 /* If not present, continue using the host address. */
1426 __builtin_unreachable ();
1427 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1430 case GOMP_MAP_STRUCT
:
1432 last
= i
+ sizes
[i
];
1433 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1434 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1436 if (tgt
->list
[first
].key
!= NULL
)
1438 n
= splay_tree_lookup (mem_map
, &cur_node
);
1441 size_t align
= (size_t) 1 << (kind
>> rshift
);
1442 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1443 - (uintptr_t) hostaddrs
[i
];
1444 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1445 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1446 - (uintptr_t) hostaddrs
[i
];
1447 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1448 field_tgt_offset
= tgt_size
;
1449 field_tgt_clear
= last
;
1450 field_tgt_structelem_first
= NULL
;
1451 tgt_size
+= cur_node
.host_end
1452 - (uintptr_t) hostaddrs
[first
];
1455 for (i
= first
; i
<= last
; i
++)
1456 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1457 sizes
, kinds
, cbufp
, refcount_set
);
1460 case GOMP_MAP_ALWAYS_POINTER
:
1461 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1462 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1463 n
= splay_tree_lookup (mem_map
, &cur_node
);
1465 || n
->host_start
> cur_node
.host_start
1466 || n
->host_end
< cur_node
.host_end
)
1468 gomp_mutex_unlock (&devicep
->lock
);
1469 gomp_fatal ("always pointer not mapped");
1471 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1472 != GOMP_MAP_ALWAYS_POINTER
)
1473 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1474 if (cur_node
.tgt_offset
)
1475 cur_node
.tgt_offset
-= sizes
[i
];
1476 gomp_copy_host2dev (devicep
, aq
,
1477 (void *) (n
->tgt
->tgt_start
1479 + cur_node
.host_start
1481 (void *) &cur_node
.tgt_offset
,
1482 sizeof (void *), true, cbufp
);
1483 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1484 + cur_node
.host_start
- n
->host_start
;
1486 case GOMP_MAP_IF_PRESENT
:
1487 /* Not present - otherwise handled above. Skip over its
1488 MAP_POINTER as well. */
1490 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1491 == GOMP_MAP_POINTER
))
1494 case GOMP_MAP_ATTACH
:
1495 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1497 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1498 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1499 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1502 tgt
->list
[i
].key
= n
;
1503 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1504 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1505 tgt
->list
[i
].copy_from
= false;
1506 tgt
->list
[i
].always_copy_from
= false;
1507 tgt
->list
[i
].is_attach
= true;
1508 /* OpenACC 'attach'/'detach' doesn't affect
1509 structured/dynamic reference counts ('n->refcount',
1510 'n->dynamic_refcount'). */
1513 = ((kind
& typemask
)
1514 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1515 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1516 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1519 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1521 gomp_mutex_unlock (&devicep
->lock
);
1522 gomp_fatal ("outer struct not mapped for attach");
1529 splay_tree_key k
= &array
->key
;
1530 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1531 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1532 k
->host_end
= k
->host_start
+ sizes
[i
];
1534 k
->host_end
= k
->host_start
+ sizeof (void *);
1535 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1536 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1537 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1538 kind
& typemask
, false, implicit
, cbufp
,
1543 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1545 /* Replace target address of the pointer with target address
1546 of mapped object in the splay tree. */
1547 splay_tree_remove (mem_map
, n
);
1549 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1550 k
->aux
->link_key
= n
;
1552 size_t align
= (size_t) 1 << (kind
>> rshift
);
1553 tgt
->list
[i
].key
= k
;
1556 k
->dynamic_refcount
= 0;
1557 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1559 k
->tgt_offset
= k
->host_start
- field_tgt_base
1563 k
->refcount
= REFCOUNT_STRUCTELEM
;
1564 if (field_tgt_structelem_first
== NULL
)
1566 /* Set to first structure element of sequence. */
1567 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1568 field_tgt_structelem_first
= k
;
1571 /* Point to refcount of leading element, but do not
1573 k
->structelem_refcount_ptr
1574 = &field_tgt_structelem_first
->structelem_refcount
;
1576 if (i
== field_tgt_clear
)
1578 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1579 field_tgt_structelem_first
= NULL
;
1582 if (i
== field_tgt_clear
)
1583 field_tgt_clear
= FIELD_TGT_EMPTY
;
1587 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1588 k
->tgt_offset
= tgt_size
;
1589 tgt_size
+= k
->host_end
- k
->host_start
;
1591 /* First increment, from 0 to 1. gomp_increment_refcount
1592 encapsulates the different increment cases, so use this
1593 instead of directly setting 1 during initialization. */
1594 gomp_increment_refcount (k
, refcount_set
);
1596 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1597 tgt
->list
[i
].always_copy_from
1598 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1599 tgt
->list
[i
].is_attach
= false;
1600 tgt
->list
[i
].offset
= 0;
1601 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1604 array
->right
= NULL
;
1605 splay_tree_insert (mem_map
, array
);
1606 switch (kind
& typemask
)
1608 case GOMP_MAP_ALLOC
:
1610 case GOMP_MAP_FORCE_ALLOC
:
1611 case GOMP_MAP_FORCE_FROM
:
1612 case GOMP_MAP_ALWAYS_FROM
:
1615 case GOMP_MAP_TOFROM
:
1616 case GOMP_MAP_FORCE_TO
:
1617 case GOMP_MAP_FORCE_TOFROM
:
1618 case GOMP_MAP_ALWAYS_TO
:
1619 case GOMP_MAP_ALWAYS_TOFROM
:
1620 gomp_copy_host2dev (devicep
, aq
,
1621 (void *) (tgt
->tgt_start
1623 (void *) k
->host_start
,
1624 k
->host_end
- k
->host_start
,
1627 case GOMP_MAP_POINTER
:
1628 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1630 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1631 k
->tgt_offset
, sizes
[i
], cbufp
,
1633 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1635 case GOMP_MAP_TO_PSET
:
1636 gomp_copy_host2dev (devicep
, aq
,
1637 (void *) (tgt
->tgt_start
1639 (void *) k
->host_start
,
1640 k
->host_end
- k
->host_start
,
1642 tgt
->list
[i
].has_null_ptr_assoc
= false;
1644 for (j
= i
+ 1; j
< mapnum
; j
++)
1646 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1648 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1649 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1651 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1652 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1657 tgt
->list
[j
].key
= k
;
1658 tgt
->list
[j
].copy_from
= false;
1659 tgt
->list
[j
].always_copy_from
= false;
1660 tgt
->list
[j
].is_attach
= false;
1661 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1662 /* For OpenMP, the use of refcount_sets causes
1663 errors if we set k->refcount = 1 above but also
1664 increment it again here, for decrementing will
1665 not properly match, since we decrement only once
1666 for each key's refcount. Therefore avoid this
1667 increment for OpenMP constructs. */
1669 gomp_increment_refcount (k
, refcount_set
);
1670 gomp_map_pointer (tgt
, aq
,
1671 (uintptr_t) *(void **) hostaddrs
[j
],
1673 + ((uintptr_t) hostaddrs
[j
]
1675 sizes
[j
], cbufp
, false);
1680 case GOMP_MAP_FORCE_PRESENT
:
1682 /* We already looked up the memory region above and it
1684 size_t size
= k
->host_end
- k
->host_start
;
1685 gomp_mutex_unlock (&devicep
->lock
);
1686 #ifdef HAVE_INTTYPES_H
1687 gomp_fatal ("present clause: !acc_is_present (%p, "
1688 "%"PRIu64
" (0x%"PRIx64
"))",
1689 (void *) k
->host_start
,
1690 (uint64_t) size
, (uint64_t) size
);
1692 gomp_fatal ("present clause: !acc_is_present (%p, "
1693 "%lu (0x%lx))", (void *) k
->host_start
,
1694 (unsigned long) size
, (unsigned long) size
);
1698 case GOMP_MAP_FORCE_DEVICEPTR
:
1699 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1700 gomp_copy_host2dev (devicep
, aq
,
1701 (void *) (tgt
->tgt_start
1703 (void *) k
->host_start
,
1704 sizeof (void *), false, cbufp
);
1707 gomp_mutex_unlock (&devicep
->lock
);
1708 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1712 if (k
->aux
&& k
->aux
->link_key
)
1714 /* Set link pointer on target to the device address of the
1716 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1717 /* We intentionally do not use coalescing here, as it's not
1718 data allocated by the current call to this function. */
1719 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1720 &tgt_addr
, sizeof (void *), true, NULL
);
1727 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1729 for (i
= 0; i
< mapnum
; i
++)
1731 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1732 gomp_copy_host2dev (devicep
, aq
,
1733 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1734 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1741 /* See 'gomp_coalesce_buf_add'. */
1745 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1746 gomp_copy_host2dev (devicep
, aq
,
1747 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1748 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1749 - cbuf
.chunks
[0].start
),
1750 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1757 /* If the variable from "omp target enter data" map-list was already mapped,
1758 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1760 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1766 gomp_mutex_unlock (&devicep
->lock
);
1770 static struct target_mem_desc
*
1771 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1772 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1773 bool short_mapkind
, htab_t
*refcount_set
,
1774 enum gomp_map_vars_kind pragma_kind
)
1776 /* This management of a local refcount_set is for convenience of callers
1777 who do not share a refcount_set over multiple map/unmap uses. */
1778 htab_t local_refcount_set
= NULL
;
1779 if (refcount_set
== NULL
)
1781 local_refcount_set
= htab_create (mapnum
);
1782 refcount_set
= &local_refcount_set
;
1785 struct target_mem_desc
*tgt
;
1786 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1787 sizes
, kinds
, short_mapkind
, refcount_set
,
1789 if (local_refcount_set
)
1790 htab_free (local_refcount_set
);
1795 attribute_hidden
struct target_mem_desc
*
1796 goacc_map_vars (struct gomp_device_descr
*devicep
,
1797 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1798 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1799 void *kinds
, bool short_mapkind
,
1800 enum gomp_map_vars_kind pragma_kind
)
1802 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1803 sizes
, kinds
, short_mapkind
, NULL
,
1804 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1808 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1810 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1812 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1819 gomp_unref_tgt (void *ptr
)
1821 bool is_tgt_unmapped
= false;
1823 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1825 if (tgt
->refcount
> 1)
1829 gomp_unmap_tgt (tgt
);
1830 is_tgt_unmapped
= true;
1833 return is_tgt_unmapped
;
1837 gomp_unref_tgt_void (void *ptr
)
1839 (void) gomp_unref_tgt (ptr
);
1843 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1845 splay_tree_remove (sp
, k
);
1848 if (k
->aux
->link_key
)
1849 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1850 if (k
->aux
->attach_count
)
1851 free (k
->aux
->attach_count
);
1857 static inline __attribute__((always_inline
)) bool
1858 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1859 struct goacc_asyncqueue
*aq
)
1861 bool is_tgt_unmapped
= false;
1863 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1865 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1866 /* Infer the splay_tree_key of the first structelem key using the
1867 pointer to the first structleme_refcount. */
1868 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1869 - offsetof (struct splay_tree_key_s
,
1870 structelem_refcount
));
1871 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1873 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1874 with the splay_tree_keys embedded inside. */
1875 splay_tree_node node
=
1876 (splay_tree_node
) ((char *) k
1877 - offsetof (struct splay_tree_node_s
, key
));
1880 /* Starting from the _FIRST key, and continue for all following
1882 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1883 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1890 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1893 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1896 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1897 return is_tgt_unmapped
;
1900 attribute_hidden
bool
1901 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1903 return gomp_remove_var_internal (devicep
, k
, NULL
);
1906 /* Remove a variable asynchronously. This actually removes the variable
1907 mapping immediately, but retains the linked target_mem_desc until the
1908 asynchronous operation has completed (as it may still refer to target
1909 memory). The device lock must be held before entry, and remains locked on
1912 attribute_hidden
void
1913 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1914 struct goacc_asyncqueue
*aq
)
1916 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1919 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1920 variables back from device to host: if it is false, it is assumed that this
1921 has been done already. */
1923 static inline __attribute__((always_inline
)) void
1924 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1925 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1927 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1929 if (tgt
->list_count
== 0)
1935 gomp_mutex_lock (&devicep
->lock
);
1936 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1938 gomp_mutex_unlock (&devicep
->lock
);
1946 /* We must perform detachments before any copies back to the host. */
1947 for (i
= 0; i
< tgt
->list_count
; i
++)
1949 splay_tree_key k
= tgt
->list
[i
].key
;
1951 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1952 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1953 + tgt
->list
[i
].offset
,
1957 for (i
= 0; i
< tgt
->list_count
; i
++)
1959 splay_tree_key k
= tgt
->list
[i
].key
;
1963 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1964 counts ('n->refcount', 'n->dynamic_refcount'). */
1965 if (tgt
->list
[i
].is_attach
)
1968 bool do_copy
, do_remove
;
1969 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
1971 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1972 || tgt
->list
[i
].always_copy_from
)
1973 gomp_copy_dev2host (devicep
, aq
,
1974 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1975 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1976 + tgt
->list
[i
].offset
),
1977 tgt
->list
[i
].length
);
1980 struct target_mem_desc
*k_tgt
= k
->tgt
;
1981 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1982 /* It would be bad if TGT got unmapped while we're still iterating
1983 over its LIST_COUNT, and also expect to use it in the following
1985 assert (!is_tgt_unmapped
1991 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1994 gomp_unref_tgt ((void *) tgt
);
1996 gomp_mutex_unlock (&devicep
->lock
);
2000 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2001 htab_t
*refcount_set
)
2003 /* This management of a local refcount_set is for convenience of callers
2004 who do not share a refcount_set over multiple map/unmap uses. */
2005 htab_t local_refcount_set
= NULL
;
2006 if (refcount_set
== NULL
)
2008 local_refcount_set
= htab_create (tgt
->list_count
);
2009 refcount_set
= &local_refcount_set
;
2012 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2014 if (local_refcount_set
)
2015 htab_free (local_refcount_set
);
2018 attribute_hidden
void
2019 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2020 struct goacc_asyncqueue
*aq
)
2022 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2026 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2027 size_t *sizes
, void *kinds
, bool short_mapkind
)
2030 struct splay_tree_key_s cur_node
;
2031 const int typemask
= short_mapkind
? 0xff : 0x7;
2039 gomp_mutex_lock (&devicep
->lock
);
2040 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2042 gomp_mutex_unlock (&devicep
->lock
);
2046 for (i
= 0; i
< mapnum
; i
++)
2049 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2050 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2051 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2054 int kind
= get_kind (short_mapkind
, kinds
, i
);
2055 if (n
->host_start
> cur_node
.host_start
2056 || n
->host_end
< cur_node
.host_end
)
2058 gomp_mutex_unlock (&devicep
->lock
);
2059 gomp_fatal ("Trying to update [%p..%p) object when "
2060 "only [%p..%p) is mapped",
2061 (void *) cur_node
.host_start
,
2062 (void *) cur_node
.host_end
,
2063 (void *) n
->host_start
,
2064 (void *) n
->host_end
);
2067 if (n
->aux
&& n
->aux
->attach_count
)
2069 uintptr_t addr
= cur_node
.host_start
;
2070 while (addr
< cur_node
.host_end
)
2072 /* We have to be careful not to overwrite still attached
2073 pointers during host<->device updates. */
2074 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2075 if (n
->aux
->attach_count
[i
] == 0)
2077 void *devaddr
= (void *) (n
->tgt
->tgt_start
2079 + addr
- n
->host_start
);
2080 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2081 gomp_copy_host2dev (devicep
, NULL
,
2082 devaddr
, (void *) addr
,
2083 sizeof (void *), false, NULL
);
2084 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2085 gomp_copy_dev2host (devicep
, NULL
,
2086 (void *) addr
, devaddr
,
2089 addr
+= sizeof (void *);
2094 void *hostaddr
= (void *) cur_node
.host_start
;
2095 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2096 + cur_node
.host_start
2098 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2100 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2101 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2103 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2104 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2108 gomp_mutex_unlock (&devicep
->lock
);
2111 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2112 And insert to splay tree the mapping between addresses from HOST_TABLE and
2113 from loaded target image. We rely in the host and device compiler
2114 emitting variable and functions in the same order. */
2117 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2118 const void *host_table
, const void *target_data
,
2119 bool is_register_lock
)
2121 void **host_func_table
= ((void ***) host_table
)[0];
2122 void **host_funcs_end
= ((void ***) host_table
)[1];
2123 void **host_var_table
= ((void ***) host_table
)[2];
2124 void **host_vars_end
= ((void ***) host_table
)[3];
2126 /* The func table contains only addresses, the var table contains addresses
2127 and corresponding sizes. */
2128 int num_funcs
= host_funcs_end
- host_func_table
;
2129 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2131 /* Others currently is only 'device_num' */
2134 /* Load image to device and get target addresses for the image. */
2135 struct addr_pair
*target_table
= NULL
;
2136 int i
, num_target_entries
;
2139 = devicep
->load_image_func (devicep
->target_id
, version
,
2140 target_data
, &target_table
);
2142 if (num_target_entries
!= num_funcs
+ num_vars
2143 /* Others (device_num) are included as trailing entries in pair list. */
2144 && num_target_entries
!= num_funcs
+ num_vars
+ num_others
)
2146 gomp_mutex_unlock (&devicep
->lock
);
2147 if (is_register_lock
)
2148 gomp_mutex_unlock (®ister_lock
);
2149 gomp_fatal ("Cannot map target functions or variables"
2150 " (expected %u, have %u)", num_funcs
+ num_vars
,
2151 num_target_entries
);
2154 /* Insert host-target address mapping into splay tree. */
2155 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2156 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
) * sizeof (*tgt
->array
));
2157 tgt
->refcount
= REFCOUNT_INFINITY
;
2160 tgt
->to_free
= NULL
;
2162 tgt
->list_count
= 0;
2163 tgt
->device_descr
= devicep
;
2164 splay_tree_node array
= tgt
->array
;
2166 for (i
= 0; i
< num_funcs
; i
++)
2168 splay_tree_key k
= &array
->key
;
2169 k
->host_start
= (uintptr_t) host_func_table
[i
];
2170 k
->host_end
= k
->host_start
+ 1;
2172 k
->tgt_offset
= target_table
[i
].start
;
2173 k
->refcount
= REFCOUNT_INFINITY
;
2174 k
->dynamic_refcount
= 0;
2177 array
->right
= NULL
;
2178 splay_tree_insert (&devicep
->mem_map
, array
);
2182 /* Most significant bit of the size in host and target tables marks
2183 "omp declare target link" variables. */
2184 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2185 const uintptr_t size_mask
= ~link_bit
;
2187 for (i
= 0; i
< num_vars
; i
++)
2189 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2190 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2191 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2193 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2195 gomp_mutex_unlock (&devicep
->lock
);
2196 if (is_register_lock
)
2197 gomp_mutex_unlock (®ister_lock
);
2198 gomp_fatal ("Cannot map target variables (size mismatch)");
2201 splay_tree_key k
= &array
->key
;
2202 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2204 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2206 k
->tgt_offset
= target_var
->start
;
2207 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2208 k
->dynamic_refcount
= 0;
2211 array
->right
= NULL
;
2212 splay_tree_insert (&devicep
->mem_map
, array
);
2216 /* Last entry is for the on-device 'device_num' variable. Tolerate case
2217 where plugin does not return this entry. */
2218 if (num_funcs
+ num_vars
< num_target_entries
)
2220 struct addr_pair
*device_num_var
= &target_table
[num_funcs
+ num_vars
];
2221 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2222 was found in this image. */
2223 if (device_num_var
->start
!= 0)
2225 /* The index of the devicep within devices[] is regarded as its
2226 'device number', which is different from the per-device type
2227 devicep->target_id. */
2228 int device_num_val
= (int) (devicep
- &devices
[0]);
2229 if (device_num_var
->end
- device_num_var
->start
!= sizeof (int))
2231 gomp_mutex_unlock (&devicep
->lock
);
2232 if (is_register_lock
)
2233 gomp_mutex_unlock (®ister_lock
);
2234 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2238 /* Copy device_num value to place on device memory, hereby actually
2239 designating its device number into effect. */
2240 gomp_copy_host2dev (devicep
, NULL
, (void *) device_num_var
->start
,
2241 &device_num_val
, sizeof (int), false, NULL
);
2245 free (target_table
);
2248 /* Unload the mappings described by target_data from device DEVICE_P.
2249 The device must be locked. */
2252 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2254 const void *host_table
, const void *target_data
)
2256 void **host_func_table
= ((void ***) host_table
)[0];
2257 void **host_funcs_end
= ((void ***) host_table
)[1];
2258 void **host_var_table
= ((void ***) host_table
)[2];
2259 void **host_vars_end
= ((void ***) host_table
)[3];
2261 /* The func table contains only addresses, the var table contains addresses
2262 and corresponding sizes. */
2263 int num_funcs
= host_funcs_end
- host_func_table
;
2264 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2266 struct splay_tree_key_s k
;
2267 splay_tree_key node
= NULL
;
2269 /* Find mapping at start of node array */
2270 if (num_funcs
|| num_vars
)
2272 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2273 : (uintptr_t) host_var_table
[0]);
2274 k
.host_end
= k
.host_start
+ 1;
2275 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2278 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2280 gomp_mutex_unlock (&devicep
->lock
);
2281 gomp_fatal ("image unload fail");
2284 /* Remove mappings from splay tree. */
2286 for (i
= 0; i
< num_funcs
; i
++)
2288 k
.host_start
= (uintptr_t) host_func_table
[i
];
2289 k
.host_end
= k
.host_start
+ 1;
2290 splay_tree_remove (&devicep
->mem_map
, &k
);
2293 /* Most significant bit of the size in host and target tables marks
2294 "omp declare target link" variables. */
2295 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2296 const uintptr_t size_mask
= ~link_bit
;
2297 bool is_tgt_unmapped
= false;
2299 for (i
= 0; i
< num_vars
; i
++)
2301 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2303 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2305 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2306 splay_tree_remove (&devicep
->mem_map
, &k
);
2309 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2310 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2314 if (node
&& !is_tgt_unmapped
)
2322 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2324 char *end
= buf
+ size
, *p
= buf
;
2325 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2326 p
+= snprintf (p
, end
- p
, "unified_address");
2327 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2328 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2329 (p
== buf
? "" : ", "));
2330 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2331 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2332 (p
== buf
? "" : ", "));
2335 /* This function should be called from every offload image while loading.
2336 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2337 the target, and TARGET_DATA needed by target plugin. */
2340 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2341 int target_type
, const void *target_data
)
2346 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2347 gomp_fatal ("Library too old for offload (version %u < %u)",
2348 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2350 if (GOMP_VERSION_LIB (version
) > 1)
2352 omp_req
= (int) (size_t) ((void **) target_data
)[0];
2353 target_data
= &((void **) target_data
)[1];
2356 gomp_mutex_lock (®ister_lock
);
2358 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2360 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2361 "reverse_offload")];
2362 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2363 "reverse_offload")];
2364 gomp_requires_to_name (buf2
, sizeof (buf2
),
2365 omp_req
!= GOMP_REQUIRES_TARGET_USED
2366 ? omp_req
: omp_requires_mask
);
2367 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2368 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2370 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2371 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2372 "in multiple compilation units: '%s' vs. '%s'",
2376 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2377 "some compilation units", buf2
);
2379 omp_requires_mask
= omp_req
;
2381 /* Load image to all initialized devices. */
2382 for (i
= 0; i
< num_devices
; i
++)
2384 struct gomp_device_descr
*devicep
= &devices
[i
];
2385 gomp_mutex_lock (&devicep
->lock
);
2386 if (devicep
->type
== target_type
2387 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2388 gomp_load_image_to_device (devicep
, version
,
2389 host_table
, target_data
, true);
2390 gomp_mutex_unlock (&devicep
->lock
);
2393 /* Insert image to array of pending images. */
2395 = gomp_realloc_unlock (offload_images
,
2396 (num_offload_images
+ 1)
2397 * sizeof (struct offload_image_descr
));
2398 offload_images
[num_offload_images
].version
= version
;
2399 offload_images
[num_offload_images
].type
= target_type
;
2400 offload_images
[num_offload_images
].host_table
= host_table
;
2401 offload_images
[num_offload_images
].target_data
= target_data
;
2403 num_offload_images
++;
2404 gomp_mutex_unlock (®ister_lock
);
2408 GOMP_offload_register (const void *host_table
, int target_type
,
2409 const void *target_data
)
2411 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2414 /* This function should be called from every offload image while unloading.
2415 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2416 the target, and TARGET_DATA needed by target plugin. */
2419 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2420 int target_type
, const void *target_data
)
2424 gomp_mutex_lock (®ister_lock
);
2426 /* Unload image from all initialized devices. */
2427 for (i
= 0; i
< num_devices
; i
++)
2429 struct gomp_device_descr
*devicep
= &devices
[i
];
2430 gomp_mutex_lock (&devicep
->lock
);
2431 if (devicep
->type
== target_type
2432 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2433 gomp_unload_image_from_device (devicep
, version
,
2434 host_table
, target_data
);
2435 gomp_mutex_unlock (&devicep
->lock
);
2438 /* Remove image from array of pending images. */
2439 for (i
= 0; i
< num_offload_images
; i
++)
2440 if (offload_images
[i
].target_data
== target_data
)
2442 offload_images
[i
] = offload_images
[--num_offload_images
];
2446 gomp_mutex_unlock (®ister_lock
);
2450 GOMP_offload_unregister (const void *host_table
, int target_type
,
2451 const void *target_data
)
2453 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2456 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2457 must be locked on entry, and remains locked on return. */
2459 attribute_hidden
void
2460 gomp_init_device (struct gomp_device_descr
*devicep
)
2463 if (!devicep
->init_device_func (devicep
->target_id
))
2465 gomp_mutex_unlock (&devicep
->lock
);
2466 gomp_fatal ("device initialization failed");
2469 /* Load to device all images registered by the moment. */
2470 for (i
= 0; i
< num_offload_images
; i
++)
2472 struct offload_image_descr
*image
= &offload_images
[i
];
2473 if (image
->type
== devicep
->type
)
2474 gomp_load_image_to_device (devicep
, image
->version
,
2475 image
->host_table
, image
->target_data
,
2479 /* Initialize OpenACC asynchronous queues. */
2480 goacc_init_asyncqueues (devicep
);
2482 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2485 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2486 must be locked on entry, and remains locked on return. */
2488 attribute_hidden
bool
2489 gomp_fini_device (struct gomp_device_descr
*devicep
)
2491 bool ret
= goacc_fini_asyncqueues (devicep
);
2492 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2493 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2497 attribute_hidden
void
2498 gomp_unload_device (struct gomp_device_descr
*devicep
)
2500 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2504 /* Unload from device all images registered at the moment. */
2505 for (i
= 0; i
< num_offload_images
; i
++)
2507 struct offload_image_descr
*image
= &offload_images
[i
];
2508 if (image
->type
== devicep
->type
)
2509 gomp_unload_image_from_device (devicep
, image
->version
,
2511 image
->target_data
);
2516 /* Host fallback for GOMP_target{,_ext} routines. */
2519 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2520 struct gomp_device_descr
*devicep
, void **args
)
2522 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2524 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2526 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2527 "be used for offloading");
2530 memset (thr
, '\0', sizeof (*thr
));
2531 if (gomp_places_list
)
2533 thr
->place
= old_thr
.place
;
2534 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2539 intptr_t id
= (intptr_t) *args
++, val
;
2540 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2541 val
= (intptr_t) *args
++;
2543 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2544 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2546 id
&= GOMP_TARGET_ARG_ID_MASK
;
2547 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2549 val
= val
> INT_MAX
? INT_MAX
: val
;
2551 gomp_icv (true)->thread_limit_var
= val
;
2556 gomp_free_thread (thr
);
2560 /* Calculate alignment and size requirements of a private copy of data shared
2561 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2564 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2565 unsigned short *kinds
, size_t *tgt_align
,
2569 for (i
= 0; i
< mapnum
; i
++)
2570 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2572 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2573 if (*tgt_align
< align
)
2575 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2576 *tgt_size
+= sizes
[i
];
2580 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2583 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2584 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2587 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2589 tgt
+= tgt_align
- al
;
2592 for (i
= 0; i
< mapnum
; i
++)
2593 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2595 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2596 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2597 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2598 hostaddrs
[i
] = tgt
+ tgt_size
;
2599 tgt_size
= tgt_size
+ sizes
[i
];
2600 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2602 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2608 /* Helper function of GOMP_target{,_ext} routines. */
2611 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2612 void (*host_fn
) (void *))
2614 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2615 return (void *) host_fn
;
2618 gomp_mutex_lock (&devicep
->lock
);
2619 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2621 gomp_mutex_unlock (&devicep
->lock
);
2625 struct splay_tree_key_s k
;
2626 k
.host_start
= (uintptr_t) host_fn
;
2627 k
.host_end
= k
.host_start
+ 1;
2628 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2629 gomp_mutex_unlock (&devicep
->lock
);
2633 return (void *) tgt_fn
->tgt_offset
;
2637 /* Called when encountering a target directive. If DEVICE
2638 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2639 GOMP_DEVICE_HOST_FALLBACK (or any value
2640 larger than last available hw device), use host fallback.
2641 FN is address of host code, UNUSED is part of the current ABI, but
2642 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2643 with MAPNUM entries, with addresses of the host objects,
2644 sizes of the host objects (resp. for pointer kind pointer bias
2645 and assumed sizeof (void *) size) and kinds. */
2648 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2649 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2650 unsigned char *kinds
)
2652 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2656 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2657 /* All shared memory devices should use the GOMP_target_ext function. */
2658 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2659 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2660 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2662 htab_t refcount_set
= htab_create (mapnum
);
2663 struct target_mem_desc
*tgt_vars
2664 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2665 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2666 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2668 htab_clear (refcount_set
);
2669 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2670 htab_free (refcount_set
);
2673 static inline unsigned int
2674 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2676 /* If we cannot run asynchronously, simply ignore nowait. */
2677 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2678 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2683 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2684 and several arguments have been added:
2685 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2686 DEPEND is array of dependencies, see GOMP_task for details.
2688 ARGS is a pointer to an array consisting of a variable number of both
2689 device-independent and device-specific arguments, which can take one two
2690 elements where the first specifies for which device it is intended, the type
2691 and optionally also the value. If the value is not present in the first
2692 one, the whole second element the actual value. The last element of the
2693 array is a single NULL. Among the device independent can be for example
2694 NUM_TEAMS and THREAD_LIMIT.
2696 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2697 that value, or 1 if teams construct is not present, or 0, if
2698 teams construct does not have num_teams clause and so the choice is
2699 implementation defined, and -1 if it can't be determined on the host
2700 what value will GOMP_teams have on the device.
2701 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2702 body with that value, or 0, if teams construct does not have thread_limit
2703 clause or the teams construct is not present, or -1 if it can't be
2704 determined on the host what value will GOMP_teams have on the device. */
2707 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2708 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2709 unsigned int flags
, void **depend
, void **args
)
2711 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2712 size_t tgt_align
= 0, tgt_size
= 0;
2713 bool fpc_done
= false;
2715 flags
= clear_unsupported_flags (devicep
, flags
);
2717 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
2719 struct gomp_thread
*thr
= gomp_thread ();
2720 /* Create a team if we don't have any around, as nowait
2721 target tasks make sense to run asynchronously even when
2722 outside of any parallel. */
2723 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
2725 struct gomp_team
*team
= gomp_new_team (1);
2726 struct gomp_task
*task
= thr
->task
;
2727 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
2728 team
->prev_ts
= thr
->ts
;
2729 thr
->ts
.team
= team
;
2730 thr
->ts
.team_id
= 0;
2731 thr
->ts
.work_share
= &team
->work_shares
[0];
2732 thr
->ts
.last_work_share
= NULL
;
2733 #ifdef HAVE_SYNC_BUILTINS
2734 thr
->ts
.single_count
= 0;
2736 thr
->ts
.static_trip
= 0;
2737 thr
->task
= &team
->implicit_task
[0];
2738 gomp_init_task (thr
->task
, NULL
, icv
);
2744 thr
->task
= &team
->implicit_task
[0];
2747 pthread_setspecific (gomp_thread_destructor
, thr
);
2750 && !thr
->task
->final_task
)
2752 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
2753 sizes
, kinds
, flags
, depend
, args
,
2754 GOMP_TARGET_TASK_BEFORE_MAP
);
2759 /* If there are depend clauses, but nowait is not present
2760 (or we are in a final task), block the parent task until the
2761 dependencies are resolved and then just continue with the rest
2762 of the function as if it is a merged task. */
2765 struct gomp_thread
*thr
= gomp_thread ();
2766 if (thr
->task
&& thr
->task
->depend_hash
)
2768 /* If we might need to wait, copy firstprivate now. */
2769 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2770 &tgt_align
, &tgt_size
);
2773 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2774 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2775 tgt_align
, tgt_size
);
2778 gomp_task_maybe_wait_for_dependencies (depend
);
2784 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2785 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
2786 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
2790 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2791 &tgt_align
, &tgt_size
);
2794 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2795 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2796 tgt_align
, tgt_size
);
2799 gomp_target_fallback (fn
, hostaddrs
, devicep
, args
);
2803 struct target_mem_desc
*tgt_vars
;
2804 htab_t refcount_set
= NULL
;
2806 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2810 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
2811 &tgt_align
, &tgt_size
);
2814 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
2815 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
2816 tgt_align
, tgt_size
);
2823 refcount_set
= htab_create (mapnum
);
2824 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
2825 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
2827 devicep
->run_func (devicep
->target_id
, fn_addr
,
2828 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
2832 htab_clear (refcount_set
);
2833 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2836 htab_free (refcount_set
);
2839 /* Host fallback for GOMP_target_data{,_ext} routines. */
2842 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
2844 struct gomp_task_icv
*icv
= gomp_icv (false);
2846 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2848 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2849 "be used for offloading");
2851 if (icv
->target_data
)
2853 /* Even when doing a host fallback, if there are any active
2854 #pragma omp target data constructs, need to remember the
2855 new #pragma omp target data, otherwise GOMP_target_end_data
2856 would get out of sync. */
2857 struct target_mem_desc
*tgt
2858 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
2859 NULL
, GOMP_MAP_VARS_DATA
);
2860 tgt
->prev
= icv
->target_data
;
2861 icv
->target_data
= tgt
;
2866 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
2867 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2869 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2872 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2873 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
2874 return gomp_target_data_fallback (devicep
);
2876 struct target_mem_desc
*tgt
2877 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2878 NULL
, GOMP_MAP_VARS_DATA
);
2879 struct gomp_task_icv
*icv
= gomp_icv (true);
2880 tgt
->prev
= icv
->target_data
;
2881 icv
->target_data
= tgt
;
2885 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
2886 size_t *sizes
, unsigned short *kinds
)
2888 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2891 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2892 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2893 return gomp_target_data_fallback (devicep
);
2895 struct target_mem_desc
*tgt
2896 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
2897 NULL
, GOMP_MAP_VARS_DATA
);
2898 struct gomp_task_icv
*icv
= gomp_icv (true);
2899 tgt
->prev
= icv
->target_data
;
2900 icv
->target_data
= tgt
;
2904 GOMP_target_end_data (void)
2906 struct gomp_task_icv
*icv
= gomp_icv (false);
2907 if (icv
->target_data
)
2909 struct target_mem_desc
*tgt
= icv
->target_data
;
2910 icv
->target_data
= tgt
->prev
;
2911 gomp_unmap_vars (tgt
, true, NULL
);
2916 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
2917 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
2919 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2922 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2923 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2926 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
2930 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
2931 size_t *sizes
, unsigned short *kinds
,
2932 unsigned int flags
, void **depend
)
2934 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2936 /* If there are depend clauses, but nowait is not present,
2937 block the parent task until the dependencies are resolved
2938 and then just continue with the rest of the function as if it
2939 is a merged task. Until we are able to schedule task during
2940 variable mapping or unmapping, ignore nowait if depend clauses
2944 struct gomp_thread
*thr
= gomp_thread ();
2945 if (thr
->task
&& thr
->task
->depend_hash
)
2947 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
2949 && !thr
->task
->final_task
)
2951 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
2952 mapnum
, hostaddrs
, sizes
, kinds
,
2953 flags
| GOMP_TARGET_FLAG_UPDATE
,
2954 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
2959 struct gomp_team
*team
= thr
->ts
.team
;
2960 /* If parallel or taskgroup has been cancelled, don't start new
2962 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2964 if (gomp_team_barrier_cancelled (&team
->barrier
))
2966 if (thr
->task
->taskgroup
)
2968 if (thr
->task
->taskgroup
->cancelled
)
2970 if (thr
->task
->taskgroup
->workshare
2971 && thr
->task
->taskgroup
->prev
2972 && thr
->task
->taskgroup
->prev
->cancelled
)
2977 gomp_task_maybe_wait_for_dependencies (depend
);
2983 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2984 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
2987 struct gomp_thread
*thr
= gomp_thread ();
2988 struct gomp_team
*team
= thr
->ts
.team
;
2989 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2990 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
2992 if (gomp_team_barrier_cancelled (&team
->barrier
))
2994 if (thr
->task
->taskgroup
)
2996 if (thr
->task
->taskgroup
->cancelled
)
2998 if (thr
->task
->taskgroup
->workshare
2999 && thr
->task
->taskgroup
->prev
3000 && thr
->task
->taskgroup
->prev
->cancelled
)
3005 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
3009 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
3010 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3011 htab_t
*refcount_set
)
3013 const int typemask
= 0xff;
3015 gomp_mutex_lock (&devicep
->lock
);
3016 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3018 gomp_mutex_unlock (&devicep
->lock
);
3022 for (i
= 0; i
< mapnum
; i
++)
3023 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
3025 struct splay_tree_key_s cur_node
;
3026 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3027 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
3028 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3031 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
3036 splay_tree_key remove_vars
[mapnum
];
3038 for (i
= 0; i
< mapnum
; i
++)
3040 struct splay_tree_key_s cur_node
;
3041 unsigned char kind
= kinds
[i
] & typemask
;
3045 case GOMP_MAP_ALWAYS_FROM
:
3046 case GOMP_MAP_DELETE
:
3047 case GOMP_MAP_RELEASE
:
3048 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3049 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3050 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3051 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
3052 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3053 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
3054 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
3055 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3059 bool delete_p
= (kind
== GOMP_MAP_DELETE
3060 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
3061 bool do_copy
, do_remove
;
3062 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
3065 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
3066 || kind
== GOMP_MAP_ALWAYS_FROM
)
3068 if (k
->aux
&& k
->aux
->attach_count
)
3070 /* We have to be careful not to overwrite still attached
3071 pointers during the copyback to host. */
3072 uintptr_t addr
= k
->host_start
;
3073 while (addr
< k
->host_end
)
3075 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
3076 if (k
->aux
->attach_count
[i
] == 0)
3077 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
3078 (void *) (k
->tgt
->tgt_start
3080 + addr
- k
->host_start
),
3082 addr
+= sizeof (void *);
3086 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
3087 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
3088 + cur_node
.host_start
3090 cur_node
.host_end
- cur_node
.host_start
);
3093 /* Structure elements lists are removed altogether at once, which
3094 may cause immediate deallocation of the target_mem_desc, causing
3095 errors if we still have following element siblings to copy back.
3096 While we're at it, it also seems more disciplined to simply
3097 queue all removals together for processing below.
3099 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3100 not have this problem, since they maintain an additional
3101 tgt->refcount = 1 reference to the target_mem_desc to start with.
3104 remove_vars
[nrmvars
++] = k
;
3107 case GOMP_MAP_DETACH
:
3110 gomp_mutex_unlock (&devicep
->lock
);
3111 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3116 for (int i
= 0; i
< nrmvars
; i
++)
3117 gomp_remove_var (devicep
, remove_vars
[i
]);
3119 gomp_mutex_unlock (&devicep
->lock
);
3123 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
3124 size_t *sizes
, unsigned short *kinds
,
3125 unsigned int flags
, void **depend
)
3127 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3129 /* If there are depend clauses, but nowait is not present,
3130 block the parent task until the dependencies are resolved
3131 and then just continue with the rest of the function as if it
3132 is a merged task. Until we are able to schedule task during
3133 variable mapping or unmapping, ignore nowait if depend clauses
3137 struct gomp_thread
*thr
= gomp_thread ();
3138 if (thr
->task
&& thr
->task
->depend_hash
)
3140 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3142 && !thr
->task
->final_task
)
3144 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3145 mapnum
, hostaddrs
, sizes
, kinds
,
3146 flags
, depend
, NULL
,
3147 GOMP_TARGET_TASK_DATA
))
3152 struct gomp_team
*team
= thr
->ts
.team
;
3153 /* If parallel or taskgroup has been cancelled, don't start new
3155 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3157 if (gomp_team_barrier_cancelled (&team
->barrier
))
3159 if (thr
->task
->taskgroup
)
3161 if (thr
->task
->taskgroup
->cancelled
)
3163 if (thr
->task
->taskgroup
->workshare
3164 && thr
->task
->taskgroup
->prev
3165 && thr
->task
->taskgroup
->prev
->cancelled
)
3170 gomp_task_maybe_wait_for_dependencies (depend
);
3176 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3177 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3180 struct gomp_thread
*thr
= gomp_thread ();
3181 struct gomp_team
*team
= thr
->ts
.team
;
3182 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3183 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3185 if (gomp_team_barrier_cancelled (&team
->barrier
))
3187 if (thr
->task
->taskgroup
)
3189 if (thr
->task
->taskgroup
->cancelled
)
3191 if (thr
->task
->taskgroup
->workshare
3192 && thr
->task
->taskgroup
->prev
3193 && thr
->task
->taskgroup
->prev
->cancelled
)
3198 htab_t refcount_set
= htab_create (mapnum
);
3200 /* The variables are mapped separately such that they can be released
3203 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3204 for (i
= 0; i
< mapnum
; i
++)
3205 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3207 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
3208 &kinds
[i
], true, &refcount_set
,
3209 GOMP_MAP_VARS_ENTER_DATA
);
3212 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
3214 for (j
= i
+ 1; j
< mapnum
; j
++)
3215 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
3216 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
3218 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
3219 &kinds
[i
], true, &refcount_set
,
3220 GOMP_MAP_VARS_ENTER_DATA
);
3223 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
3225 /* An attach operation must be processed together with the mapped
3226 base-pointer list item. */
3227 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
3228 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3232 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
3233 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3235 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
3236 htab_free (refcount_set
);
3240 gomp_target_task_fn (void *data
)
3242 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
3243 struct gomp_device_descr
*devicep
= ttask
->devicep
;
3245 if (ttask
->fn
!= NULL
)
3249 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3250 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
3251 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3253 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
3254 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
3259 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
3262 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
3266 void *actual_arguments
;
3267 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3270 actual_arguments
= ttask
->hostaddrs
;
3274 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
3275 NULL
, ttask
->sizes
, ttask
->kinds
, true,
3276 NULL
, GOMP_MAP_VARS_TARGET
);
3277 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
3279 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
3281 assert (devicep
->async_run_func
);
3282 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
3283 ttask
->args
, (void *) ttask
);
3286 else if (devicep
== NULL
3287 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3288 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3292 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
3293 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3294 ttask
->kinds
, true);
3297 htab_t refcount_set
= htab_create (ttask
->mapnum
);
3298 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
3299 for (i
= 0; i
< ttask
->mapnum
; i
++)
3300 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
3302 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
3303 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
3304 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
3305 i
+= ttask
->sizes
[i
];
3308 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
3309 &ttask
->kinds
[i
], true, &refcount_set
,
3310 GOMP_MAP_VARS_ENTER_DATA
);
3312 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
3313 ttask
->kinds
, &refcount_set
);
3314 htab_free (refcount_set
);
3320 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
3324 struct gomp_task_icv
*icv
= gomp_icv (true);
3325 icv
->thread_limit_var
3326 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3332 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
3333 unsigned int thread_limit
, bool first
)
3335 struct gomp_thread
*thr
= gomp_thread ();
3340 struct gomp_task_icv
*icv
= gomp_icv (true);
3341 icv
->thread_limit_var
3342 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
3344 (void) num_teams_high
;
3345 if (num_teams_low
== 0)
3347 thr
->num_teams
= num_teams_low
- 1;
3350 else if (thr
->team_num
== thr
->num_teams
)
3358 omp_target_alloc (size_t size
, int device_num
)
3360 if (device_num
== omp_initial_device
3361 || device_num
== gomp_get_num_devices ())
3362 return malloc (size
);
3364 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
3365 if (devicep
== NULL
)
3368 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3369 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3370 return malloc (size
);
3372 gomp_mutex_lock (&devicep
->lock
);
3373 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
3374 gomp_mutex_unlock (&devicep
->lock
);
3379 omp_target_free (void *device_ptr
, int device_num
)
3381 if (device_num
== omp_initial_device
3382 || device_num
== gomp_get_num_devices ())
3388 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
3389 if (devicep
== NULL
|| device_ptr
== NULL
)
3392 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3393 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3399 gomp_mutex_lock (&devicep
->lock
);
3400 gomp_free_device_memory (devicep
, device_ptr
);
3401 gomp_mutex_unlock (&devicep
->lock
);
3405 omp_target_is_present (const void *ptr
, int device_num
)
3407 if (device_num
== omp_initial_device
3408 || device_num
== gomp_get_num_devices ())
3411 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
3412 if (devicep
== NULL
)
3418 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3419 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3422 gomp_mutex_lock (&devicep
->lock
);
3423 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3424 struct splay_tree_key_s cur_node
;
3426 cur_node
.host_start
= (uintptr_t) ptr
;
3427 cur_node
.host_end
= cur_node
.host_start
;
3428 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3429 int ret
= n
!= NULL
;
3430 gomp_mutex_unlock (&devicep
->lock
);
3435 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
3436 struct gomp_device_descr
**dst_devicep
,
3437 struct gomp_device_descr
**src_devicep
)
3439 if (dst_device_num
!= gomp_get_num_devices ()
3440 /* Above gomp_get_num_devices has to be called unconditionally. */
3441 && dst_device_num
!= omp_initial_device
)
3443 *dst_devicep
= resolve_device (dst_device_num
, false);
3444 if (*dst_devicep
== NULL
)
3447 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3448 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3449 *dst_devicep
= NULL
;
3452 if (src_device_num
!= num_devices_openmp
3453 && src_device_num
!= omp_initial_device
)
3455 *src_devicep
= resolve_device (src_device_num
, false);
3456 if (*src_devicep
== NULL
)
3459 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3460 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3461 *src_devicep
= NULL
;
3468 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
3469 size_t dst_offset
, size_t src_offset
,
3470 struct gomp_device_descr
*dst_devicep
,
3471 struct gomp_device_descr
*src_devicep
)
3474 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
3476 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
3479 if (src_devicep
== NULL
)
3481 gomp_mutex_lock (&dst_devicep
->lock
);
3482 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3483 (char *) dst
+ dst_offset
,
3484 (char *) src
+ src_offset
, length
);
3485 gomp_mutex_unlock (&dst_devicep
->lock
);
3486 return (ret
? 0 : EINVAL
);
3488 if (dst_devicep
== NULL
)
3490 gomp_mutex_lock (&src_devicep
->lock
);
3491 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3492 (char *) dst
+ dst_offset
,
3493 (char *) src
+ src_offset
, length
);
3494 gomp_mutex_unlock (&src_devicep
->lock
);
3495 return (ret
? 0 : EINVAL
);
3497 if (src_devicep
== dst_devicep
)
3499 gomp_mutex_lock (&src_devicep
->lock
);
3500 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3501 (char *) dst
+ dst_offset
,
3502 (char *) src
+ src_offset
, length
);
3503 gomp_mutex_unlock (&src_devicep
->lock
);
3504 return (ret
? 0 : EINVAL
);
3510 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
3511 size_t src_offset
, int dst_device_num
, int src_device_num
)
3513 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3514 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
3515 &dst_devicep
, &src_devicep
);
3520 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
3521 dst_devicep
, src_devicep
);
3533 struct gomp_device_descr
*dst_devicep
;
3534 struct gomp_device_descr
*src_devicep
;
3535 } omp_target_memcpy_data
;
3538 omp_target_memcpy_async_helper (void *args
)
3540 omp_target_memcpy_data
*a
= args
;
3541 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
3542 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
3543 gomp_fatal ("omp_target_memcpy failed");
3547 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
3548 size_t dst_offset
, size_t src_offset
,
3549 int dst_device_num
, int src_device_num
,
3550 int depobj_count
, omp_depend_t
*depobj_list
)
3552 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3553 unsigned int flags
= 0;
3554 void *depend
[depobj_count
+ 5];
3556 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
3557 &dst_devicep
, &src_devicep
);
3559 omp_target_memcpy_data s
= {
3563 .dst_offset
= dst_offset
,
3564 .src_offset
= src_offset
,
3565 .dst_devicep
= dst_devicep
,
3566 .src_devicep
= src_devicep
3572 if (depobj_count
> 0 && depobj_list
!= NULL
)
3574 flags
|= GOMP_TASK_FLAG_DEPEND
;
3576 depend
[1] = (void *) (uintptr_t) depobj_count
;
3577 depend
[2] = depend
[3] = depend
[4] = 0;
3578 for (i
= 0; i
< depobj_count
; ++i
)
3579 depend
[i
+ 5] = &depobj_list
[i
];
3582 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
3583 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
3589 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
3590 int num_dims
, const size_t *volume
,
3591 const size_t *dst_offsets
,
3592 const size_t *src_offsets
,
3593 const size_t *dst_dimensions
,
3594 const size_t *src_dimensions
,
3595 struct gomp_device_descr
*dst_devicep
,
3596 struct gomp_device_descr
*src_devicep
)
3598 size_t dst_slice
= element_size
;
3599 size_t src_slice
= element_size
;
3600 size_t j
, dst_off
, src_off
, length
;
3605 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
3606 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
3607 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
3609 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
3611 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
3615 else if (src_devicep
== NULL
)
3616 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
3617 (char *) dst
+ dst_off
,
3618 (const char *) src
+ src_off
,
3620 else if (dst_devicep
== NULL
)
3621 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
3622 (char *) dst
+ dst_off
,
3623 (const char *) src
+ src_off
,
3625 else if (src_devicep
== dst_devicep
)
3626 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
3627 (char *) dst
+ dst_off
,
3628 (const char *) src
+ src_off
,
3632 return ret
? 0 : EINVAL
;
3635 /* FIXME: it would be nice to have some plugin function to handle
3636 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
3637 be handled in the generic recursion below, and for host-host it
3638 should be used even for any num_dims >= 2. */
3640 for (i
= 1; i
< num_dims
; i
++)
3641 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
3642 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
3644 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
3645 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
3647 for (j
= 0; j
< volume
[0]; j
++)
3649 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
3650 (const char *) src
+ src_off
,
3651 element_size
, num_dims
- 1,
3652 volume
+ 1, dst_offsets
+ 1,
3653 src_offsets
+ 1, dst_dimensions
+ 1,
3654 src_dimensions
+ 1, dst_devicep
,
3658 dst_off
+= dst_slice
;
3659 src_off
+= src_slice
;
3665 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
3667 struct gomp_device_descr
**dst_devicep
,
3668 struct gomp_device_descr
**src_devicep
)
3673 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
3674 dst_devicep
, src_devicep
);
3678 if (*src_devicep
!= NULL
&& *dst_devicep
!= NULL
&& *src_devicep
!= *dst_devicep
)
3685 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
3686 size_t element_size
, int num_dims
,
3687 const size_t *volume
, const size_t *dst_offsets
,
3688 const size_t *src_offsets
,
3689 const size_t *dst_dimensions
,
3690 const size_t *src_dimensions
,
3691 struct gomp_device_descr
*dst_devicep
,
3692 struct gomp_device_descr
*src_devicep
)
3695 gomp_mutex_lock (&src_devicep
->lock
);
3696 else if (dst_devicep
)
3697 gomp_mutex_lock (&dst_devicep
->lock
);
3698 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
3699 volume
, dst_offsets
, src_offsets
,
3700 dst_dimensions
, src_dimensions
,
3701 dst_devicep
, src_devicep
);
3703 gomp_mutex_unlock (&src_devicep
->lock
);
3704 else if (dst_devicep
)
3705 gomp_mutex_unlock (&dst_devicep
->lock
);
3711 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
3712 int num_dims
, const size_t *volume
,
3713 const size_t *dst_offsets
,
3714 const size_t *src_offsets
,
3715 const size_t *dst_dimensions
,
3716 const size_t *src_dimensions
,
3717 int dst_device_num
, int src_device_num
)
3719 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3721 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
3722 src_device_num
, &dst_devicep
,
3728 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
3729 volume
, dst_offsets
, src_offsets
,
3730 dst_dimensions
, src_dimensions
,
3731 dst_devicep
, src_devicep
);
3740 size_t element_size
;
3741 const size_t *volume
;
3742 const size_t *dst_offsets
;
3743 const size_t *src_offsets
;
3744 const size_t *dst_dimensions
;
3745 const size_t *src_dimensions
;
3746 struct gomp_device_descr
*dst_devicep
;
3747 struct gomp_device_descr
*src_devicep
;
3749 } omp_target_memcpy_rect_data
;
3752 omp_target_memcpy_rect_async_helper (void *args
)
3754 omp_target_memcpy_rect_data
*a
= args
;
3755 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
3756 a
->num_dims
, a
->volume
, a
->dst_offsets
,
3757 a
->src_offsets
, a
->dst_dimensions
,
3758 a
->src_dimensions
, a
->dst_devicep
,
3761 gomp_fatal ("omp_target_memcpy_rect failed");
3765 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
3766 int num_dims
, const size_t *volume
,
3767 const size_t *dst_offsets
,
3768 const size_t *src_offsets
,
3769 const size_t *dst_dimensions
,
3770 const size_t *src_dimensions
,
3771 int dst_device_num
, int src_device_num
,
3772 int depobj_count
, omp_depend_t
*depobj_list
)
3774 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
3776 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
3777 src_device_num
, &dst_devicep
,
3779 void *depend
[depobj_count
+ 5];
3782 omp_target_memcpy_rect_data s
= {
3785 .element_size
= element_size
,
3786 .num_dims
= num_dims
,
3788 .dst_offsets
= dst_offsets
,
3789 .src_offsets
= src_offsets
,
3790 .dst_dimensions
= dst_dimensions
,
3791 .src_dimensions
= src_dimensions
,
3792 .dst_devicep
= dst_devicep
,
3793 .src_devicep
= src_devicep
3799 if (depobj_count
> 0 && depobj_list
!= NULL
)
3801 flags
|= GOMP_TASK_FLAG_DEPEND
;
3803 depend
[1] = (void *) (uintptr_t) depobj_count
;
3804 depend
[2] = depend
[3] = depend
[4] = 0;
3805 for (i
= 0; i
< depobj_count
; ++i
)
3806 depend
[i
+ 5] = &depobj_list
[i
];
3809 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
3810 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
3816 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
3817 size_t size
, size_t device_offset
, int device_num
)
3819 if (device_num
== omp_initial_device
3820 || device_num
== gomp_get_num_devices ())
3823 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
3824 if (devicep
== NULL
)
3827 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3828 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3831 gomp_mutex_lock (&devicep
->lock
);
3833 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3834 struct splay_tree_key_s cur_node
;
3837 cur_node
.host_start
= (uintptr_t) host_ptr
;
3838 cur_node
.host_end
= cur_node
.host_start
+ size
;
3839 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3842 if (n
->tgt
->tgt_start
+ n
->tgt_offset
3843 == (uintptr_t) device_ptr
+ device_offset
3844 && n
->host_start
<= cur_node
.host_start
3845 && n
->host_end
>= cur_node
.host_end
)
3850 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
3851 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
3855 tgt
->to_free
= NULL
;
3857 tgt
->list_count
= 0;
3858 tgt
->device_descr
= devicep
;
3859 splay_tree_node array
= tgt
->array
;
3860 splay_tree_key k
= &array
->key
;
3861 k
->host_start
= cur_node
.host_start
;
3862 k
->host_end
= cur_node
.host_end
;
3864 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
3865 k
->refcount
= REFCOUNT_INFINITY
;
3866 k
->dynamic_refcount
= 0;
3869 array
->right
= NULL
;
3870 splay_tree_insert (&devicep
->mem_map
, array
);
3873 gomp_mutex_unlock (&devicep
->lock
);
3878 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
3880 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
3881 if (devicep
== NULL
)
3884 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
3887 gomp_mutex_lock (&devicep
->lock
);
3889 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3890 struct splay_tree_key_s cur_node
;
3893 cur_node
.host_start
= (uintptr_t) ptr
;
3894 cur_node
.host_end
= cur_node
.host_start
;
3895 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
3897 && n
->host_start
== cur_node
.host_start
3898 && n
->refcount
== REFCOUNT_INFINITY
3899 && n
->tgt
->tgt_start
== 0
3900 && n
->tgt
->to_free
== NULL
3901 && n
->tgt
->refcount
== 1
3902 && n
->tgt
->list_count
== 0)
3904 splay_tree_remove (&devicep
->mem_map
, n
);
3905 gomp_unmap_tgt (n
->tgt
);
3909 gomp_mutex_unlock (&devicep
->lock
);
3914 omp_get_mapped_ptr (const void *ptr
, int device_num
)
3916 if (device_num
== omp_initial_device
3917 || device_num
== omp_get_initial_device ())
3918 return (void *) ptr
;
3920 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
3921 if (devicep
== NULL
)
3924 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3925 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3926 return (void *) ptr
;
3928 gomp_mutex_lock (&devicep
->lock
);
3930 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
3931 struct splay_tree_key_s cur_node
;
3934 cur_node
.host_start
= (uintptr_t) ptr
;
3935 cur_node
.host_end
= cur_node
.host_start
;
3936 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
3940 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
3941 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
3944 gomp_mutex_unlock (&devicep
->lock
);
3950 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
3952 if (device_num
== omp_initial_device
3953 || device_num
== gomp_get_num_devices ())
3956 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
3957 if (devicep
== NULL
)
3960 /* TODO: Unified shared memory must be handled when available. */
3962 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
3966 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
3969 if (device_num
== omp_initial_device
3970 || device_num
== gomp_get_num_devices ())
3971 return gomp_pause_host ();
3973 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
3974 if (devicep
== NULL
)
3977 /* Do nothing for target devices for now. */
3982 omp_pause_resource_all (omp_pause_resource_t kind
)
3985 if (gomp_pause_host ())
3987 /* Do nothing for target devices for now. */
3991 ialias (omp_pause_resource
)
3992 ialias (omp_pause_resource_all
)
3994 #ifdef PLUGIN_SUPPORT
3996 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
3998 The handles of the found functions are stored in the corresponding fields
3999 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4002 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
4003 const char *plugin_name
)
4005 const char *err
= NULL
, *last_missing
= NULL
;
4007 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
4009 #if OFFLOAD_DEFAULTED
4015 /* Check if all required functions are available in the plugin and store
4016 their handlers. None of the symbols can legitimately be NULL,
4017 so we don't need to check dlerror all the time. */
4019 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4021 /* Similar, but missing functions are not an error. Return false if
4022 failed, true otherwise. */
4023 #define DLSYM_OPT(f, n) \
4024 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4025 || (last_missing = #n, 0))
4028 if (device
->version_func () != GOMP_VERSION
)
4030 err
= "plugin version mismatch";
4037 DLSYM (get_num_devices
);
4038 DLSYM (init_device
);
4039 DLSYM (fini_device
);
4041 DLSYM (unload_image
);
4046 device
->capabilities
= device
->get_caps_func ();
4047 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4050 DLSYM_OPT (async_run
, async_run
);
4051 DLSYM_OPT (can_run
, can_run
);
4054 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4056 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
4057 || !DLSYM_OPT (openacc
.create_thread_data
,
4058 openacc_create_thread_data
)
4059 || !DLSYM_OPT (openacc
.destroy_thread_data
,
4060 openacc_destroy_thread_data
)
4061 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
4062 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
4063 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
4064 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
4065 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
4066 || !DLSYM_OPT (openacc
.async
.queue_callback
,
4067 openacc_async_queue_callback
)
4068 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
4069 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
4070 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
4071 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
4073 /* Require all the OpenACC handlers if we have
4074 GOMP_OFFLOAD_CAP_OPENACC_200. */
4075 err
= "plugin missing OpenACC handler function";
4080 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
4081 openacc_cuda_get_current_device
);
4082 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
4083 openacc_cuda_get_current_context
);
4084 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
4085 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
4086 if (cuda
&& cuda
!= 4)
4088 /* Make sure all the CUDA functions are there if any of them are. */
4089 err
= "plugin missing OpenACC CUDA handler function";
4101 gomp_error ("while loading %s: %s", plugin_name
, err
);
4103 gomp_error ("missing function was %s", last_missing
);
4105 dlclose (plugin_handle
);
4110 /* This function finalizes all initialized devices. */
4113 gomp_target_fini (void)
4116 for (i
= 0; i
< num_devices
; i
++)
4119 struct gomp_device_descr
*devicep
= &devices
[i
];
4120 gomp_mutex_lock (&devicep
->lock
);
4121 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
4122 ret
= gomp_fini_device (devicep
);
4123 gomp_mutex_unlock (&devicep
->lock
);
4125 gomp_fatal ("device finalization failed");
4129 /* This function initializes the runtime for offloading.
4130 It parses the list of offload plugins, and tries to load these.
4131 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
4132 will be set, and the array DEVICES initialized, containing descriptors for
4133 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
4137 gomp_target_init (void)
4139 const char *prefix
="libgomp-plugin-";
4140 const char *suffix
= SONAME_SUFFIX (1);
4141 const char *cur
, *next
;
4143 int i
, new_num_devs
;
4144 int num_devs
= 0, num_devs_openmp
;
4145 struct gomp_device_descr
*devs
= NULL
;
4147 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
4150 cur
= OFFLOAD_PLUGINS
;
4154 struct gomp_device_descr current_device
;
4155 size_t prefix_len
, suffix_len
, cur_len
;
4157 next
= strchr (cur
, ',');
4159 prefix_len
= strlen (prefix
);
4160 cur_len
= next
? next
- cur
: strlen (cur
);
4161 suffix_len
= strlen (suffix
);
4163 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
4170 memcpy (plugin_name
, prefix
, prefix_len
);
4171 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
4172 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
4174 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
4176 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
4177 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
4178 if (gomp_debug_var
> 0 && new_num_devs
< 0)
4181 int type
= current_device
.get_type_func ();
4182 for (int img
= 0; img
< num_offload_images
; img
++)
4183 if (type
== offload_images
[img
].type
)
4187 char buf
[sizeof ("unified_address, unified_shared_memory, "
4188 "reverse_offload")];
4189 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
4190 char *name
= (char *) malloc (cur_len
+ 1);
4191 memcpy (name
, cur
, cur_len
);
4192 name
[cur_len
] = '\0';
4194 "%s devices present but 'omp requires %s' "
4195 "cannot be fulfilled", name
, buf
);
4199 else if (new_num_devs
>= 1)
4201 /* Augment DEVICES and NUM_DEVICES. */
4203 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
4204 * sizeof (struct gomp_device_descr
));
4212 current_device
.name
= current_device
.get_name_func ();
4213 /* current_device.capabilities has already been set. */
4214 current_device
.type
= current_device
.get_type_func ();
4215 current_device
.mem_map
.root
= NULL
;
4216 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
4217 for (i
= 0; i
< new_num_devs
; i
++)
4219 current_device
.target_id
= i
;
4220 devs
[num_devs
] = current_device
;
4221 gomp_mutex_init (&devs
[num_devs
].lock
);
4232 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
4233 NUM_DEVICES_OPENMP. */
4234 struct gomp_device_descr
*devs_s
4235 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
4242 num_devs_openmp
= 0;
4243 for (i
= 0; i
< num_devs
; i
++)
4244 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4245 devs_s
[num_devs_openmp
++] = devs
[i
];
4246 int num_devs_after_openmp
= num_devs_openmp
;
4247 for (i
= 0; i
< num_devs
; i
++)
4248 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4249 devs_s
[num_devs_after_openmp
++] = devs
[i
];
4253 for (i
= 0; i
< num_devs
; i
++)
4255 /* The 'devices' array can be moved (by the realloc call) until we have
4256 found all the plugins, so registering with the OpenACC runtime (which
4257 takes a copy of the pointer argument) must be delayed until now. */
4258 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4259 goacc_register (&devs
[i
]);
4262 num_devices
= num_devs
;
4263 num_devices_openmp
= num_devs_openmp
;
4265 if (atexit (gomp_target_fini
) != 0)
4266 gomp_fatal ("atexit failed");
4269 #else /* PLUGIN_SUPPORT */
4270 /* If dlfcn.h is unavailable we always fallback to host execution.
4271 GOMP_target* routines are just stubs for this case. */
4273 gomp_target_init (void)
4276 #endif /* PLUGIN_SUPPORT */