1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
50 #define splay_tree_static
52 #include "splay-tree.h"
55 typedef uintptr_t *hash_entry_type
;
56 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
57 static inline void htab_free (void *ptr
) { free (ptr
); }
60 ialias_redirect (GOMP_task
)
62 static inline hashval_t
63 htab_hash (hash_entry_type element
)
65 return hash_pointer ((void *) element
);
69 htab_eq (hash_entry_type x
, hash_entry_type y
)
74 #define FIELD_TGT_EMPTY (~(size_t) 0)
76 static void gomp_target_init (void);
78 /* The whole initialization code for offloading plugins is only run one. */
79 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
81 /* Mutex for offload image registration. */
82 static gomp_mutex_t register_lock
;
84 /* This structure describes an offload image.
85 It contains type of the target device, pointer to host table descriptor, and
86 pointer to target data. */
87 struct offload_image_descr
{
89 enum offload_target_type type
;
90 const void *host_table
;
91 const void *target_data
;
94 /* Array of descriptors of offload images. */
95 static struct offload_image_descr
*offload_images
;
97 /* Total number of offload images. */
98 static int num_offload_images
;
100 /* Array of descriptors for all available devices. */
101 static struct gomp_device_descr
*devices
;
103 /* Total number of available devices. */
104 static int num_devices
;
106 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
107 static int num_devices_openmp
;
109 /* OpenMP requires mask. */
110 static int omp_requires_mask
;
112 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
115 gomp_realloc_unlock (void *old
, size_t size
)
117 void *ret
= realloc (old
, size
);
120 gomp_mutex_unlock (®ister_lock
);
121 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
126 attribute_hidden
void
127 gomp_init_targets_once (void)
129 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
133 gomp_get_num_devices (void)
135 gomp_init_targets_once ();
136 return num_devices_openmp
;
139 static struct gomp_device_descr
*
140 resolve_device (int device_id
, bool remapped
)
142 /* Get number of devices and thus ensure that 'gomp_init_targets_once' was
143 called, which must be done before using default_device_var. */
144 int num_devices
= gomp_get_num_devices ();
146 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
148 struct gomp_task_icv
*icv
= gomp_icv (false);
149 device_id
= icv
->default_device_var
;
155 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
156 : omp_initial_device
))
158 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
160 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
161 "but only the host device is available");
162 else if (device_id
== omp_invalid_device
)
163 gomp_fatal ("omp_invalid_device encountered");
164 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
170 else if (device_id
>= num_devices
)
172 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
173 && device_id
!= num_devices
)
174 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
175 "but device not found");
180 gomp_mutex_lock (&devices
[device_id
].lock
);
181 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
182 gomp_init_device (&devices
[device_id
]);
183 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
185 gomp_mutex_unlock (&devices
[device_id
].lock
);
187 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
188 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
189 "but device is finalized");
193 gomp_mutex_unlock (&devices
[device_id
].lock
);
195 return &devices
[device_id
];
199 static inline splay_tree_key
200 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
202 if (key
->host_start
!= key
->host_end
)
203 return splay_tree_lookup (mem_map
, key
);
206 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
211 n
= splay_tree_lookup (mem_map
, key
);
215 return splay_tree_lookup (mem_map
, key
);
218 static inline reverse_splay_tree_key
219 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
221 return reverse_splay_tree_lookup (mem_map_rev
, key
);
224 static inline splay_tree_key
225 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
227 if (key
->host_start
!= key
->host_end
)
228 return splay_tree_lookup (mem_map
, key
);
231 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
237 gomp_device_copy (struct gomp_device_descr
*devicep
,
238 bool (*copy_func
) (int, void *, const void *, size_t),
239 const char *dst
, void *dstaddr
,
240 const char *src
, const void *srcaddr
,
243 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
245 gomp_mutex_unlock (&devicep
->lock
);
246 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
247 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
252 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
253 bool (*copy_func
) (int, void *, const void *, size_t,
254 struct goacc_asyncqueue
*),
255 const char *dst
, void *dstaddr
,
256 const char *src
, const void *srcaddr
,
257 const void *srcaddr_orig
,
258 size_t size
, struct goacc_asyncqueue
*aq
)
260 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
262 gomp_mutex_unlock (&devicep
->lock
);
263 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
264 gomp_fatal ("Copying of %s object [%p..%p)"
265 " via buffer %s object [%p..%p)"
266 " to %s object [%p..%p) failed",
267 src
, srcaddr_orig
, srcaddr_orig
+ size
,
268 src
, srcaddr
, srcaddr
+ size
,
269 dst
, dstaddr
, dstaddr
+ size
);
271 gomp_fatal ("Copying of %s object [%p..%p)"
272 " to %s object [%p..%p) failed",
273 src
, srcaddr
, srcaddr
+ size
,
274 dst
, dstaddr
, dstaddr
+ size
);
278 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
279 host to device memory transfers. */
281 struct gomp_coalesce_chunk
283 /* The starting and ending point of a coalesced chunk of memory. */
287 struct gomp_coalesce_buf
289 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
290 it will be copied to the device. */
292 struct target_mem_desc
*tgt
;
293 /* Array with offsets, chunks[i].start is the starting offset and
294 chunks[i].end ending offset relative to tgt->tgt_start device address
295 of chunks which are to be copied to buf and later copied to device. */
296 struct gomp_coalesce_chunk
*chunks
;
297 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
300 /* During construction of chunks array, how many memory regions are within
301 the last chunk. If there is just one memory region for a chunk, we copy
302 it directly to device rather than going through buf. */
306 /* Maximum size of memory region considered for coalescing. Larger copies
307 are performed directly. */
308 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
310 /* Maximum size of a gap in between regions to consider them being copied
311 within the same chunk. All the device offsets considered are within
312 newly allocated device memory, so it isn't fatal if we copy some padding
313 in between from host to device. The gaps come either from alignment
314 padding or from memory regions which are not supposed to be copied from
315 host to device (e.g. map(alloc:), map(from:) etc.). */
316 #define MAX_COALESCE_BUF_GAP (4 * 1024)
318 /* Add region with device tgt_start relative offset and length to CBUF.
320 This must not be used for asynchronous copies, because the host data might
321 not be computed yet (by an earlier asynchronous compute region, for
322 example). The exception is for EPHEMERAL data, that we know is available
323 already "by construction". */
326 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
328 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
332 if (cbuf
->chunk_cnt
< 0)
334 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
336 cbuf
->chunk_cnt
= -1;
339 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
341 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
345 /* If the last chunk is only used by one mapping, discard it,
346 as it will be one host to device copy anyway and
347 memcpying it around will only waste cycles. */
348 if (cbuf
->use_cnt
== 1)
351 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
352 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
357 /* Return true for mapping kinds which need to copy data from the
358 host to device for regions that weren't previously mapped. */
361 gomp_to_device_kind_p (int kind
)
367 case GOMP_MAP_FORCE_ALLOC
:
368 case GOMP_MAP_FORCE_FROM
:
369 case GOMP_MAP_ALWAYS_FROM
:
370 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
371 case GOMP_MAP_FORCE_PRESENT
:
378 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
379 non-NULL), when the source data is stack or may otherwise be deallocated
380 before the asynchronous copy takes place, EPHEMERAL must be passed as
383 attribute_hidden
void
384 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
385 struct goacc_asyncqueue
*aq
,
386 void *d
, const void *h
, size_t sz
,
387 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
391 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
392 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
395 long last
= cbuf
->chunk_cnt
- 1;
396 while (first
<= last
)
398 long middle
= (first
+ last
) >> 1;
399 if (cbuf
->chunks
[middle
].end
<= doff
)
401 else if (cbuf
->chunks
[middle
].start
<= doff
)
403 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
405 gomp_mutex_unlock (&devicep
->lock
);
406 gomp_fatal ("internal libgomp cbuf error");
409 /* In an asynchronous context, verify that CBUF isn't used
410 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
411 if (__builtin_expect (aq
!= NULL
, 0))
414 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
424 if (__builtin_expect (aq
!= NULL
, 0))
426 void *h_buf
= (void *) h
;
429 /* We're queueing up an asynchronous copy from data that may
430 disappear before the transfer takes place (i.e. because it is a
431 stack local in a function that is no longer executing). As we've
432 not been able to use CBUF, make a copy of the data into a
434 h_buf
= gomp_malloc (sz
);
435 memcpy (h_buf
, h
, sz
);
437 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
438 "dev", d
, "host", h_buf
, h
, sz
, aq
);
440 /* Free once the transfer has completed. */
441 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
444 gomp_device_copy (devicep
, devicep
->host2dev_func
,
445 "dev", d
, "host", h
, sz
);
448 attribute_hidden
void
449 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
450 struct goacc_asyncqueue
*aq
,
451 void *h
, const void *d
, size_t sz
)
453 if (__builtin_expect (aq
!= NULL
, 0))
454 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
455 "host", h
, "dev", d
, NULL
, sz
, aq
);
457 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
461 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
463 if (!devicep
->free_func (devicep
->target_id
, devptr
))
465 gomp_mutex_unlock (&devicep
->lock
);
466 gomp_fatal ("error in freeing device memory block at %p", devptr
);
470 /* Increment reference count of a splay_tree_key region K by 1.
471 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
472 increment the value if refcount is not yet contained in the set (used for
473 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
474 once for each construct). */
477 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
479 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
482 uintptr_t *refcount_ptr
= &k
->refcount
;
484 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
485 refcount_ptr
= &k
->structelem_refcount
;
486 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
487 refcount_ptr
= k
->structelem_refcount_ptr
;
491 if (htab_find (*refcount_set
, refcount_ptr
))
493 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
494 *slot
= refcount_ptr
;
501 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
502 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
503 track already seen refcounts, and only adjust the value if refcount is not
504 yet contained in the set (like gomp_increment_refcount).
506 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
507 it is already zero and we know we decremented it earlier. This signals that
508 associated maps should be copied back to host.
510 *DO_REMOVE is set to true when we this is the first handling of this refcount
511 and we are setting it to zero. This signals a removal of this key from the
514 Copy and removal are separated due to cases like handling of structure
515 elements, e.g. each map of a structure element representing a possible copy
516 out of a structure field has to be handled individually, but we only signal
517 removal for one (the first encountered) sibing map. */
520 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
521 bool *do_copy
, bool *do_remove
)
523 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
525 *do_copy
= *do_remove
= false;
529 uintptr_t *refcount_ptr
= &k
->refcount
;
531 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
532 refcount_ptr
= &k
->structelem_refcount
;
533 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
534 refcount_ptr
= k
->structelem_refcount_ptr
;
536 bool new_encountered_refcount
;
537 bool set_to_zero
= false;
538 bool is_zero
= false;
540 uintptr_t orig_refcount
= *refcount_ptr
;
544 if (htab_find (*refcount_set
, refcount_ptr
))
546 new_encountered_refcount
= false;
550 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
551 *slot
= refcount_ptr
;
552 new_encountered_refcount
= true;
555 /* If no refcount_set being used, assume all keys are being decremented
556 for the first time. */
557 new_encountered_refcount
= true;
561 else if (*refcount_ptr
> 0)
565 if (*refcount_ptr
== 0)
567 if (orig_refcount
> 0)
573 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
574 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
577 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
578 gomp_map_0len_lookup found oldn for newn.
579 Helper function of gomp_map_vars. */
582 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
583 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
584 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
585 unsigned char kind
, bool always_to_flag
, bool implicit
,
586 struct gomp_coalesce_buf
*cbuf
,
587 htab_t
*refcount_set
)
589 assert (kind
!= GOMP_MAP_ATTACH
590 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
593 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
594 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
595 tgt_var
->is_attach
= false;
596 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
598 /* For implicit maps, old contained in new is valid. */
599 bool implicit_subset
= (implicit
600 && newn
->host_start
<= oldn
->host_start
601 && oldn
->host_end
<= newn
->host_end
);
603 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
605 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
607 if (GOMP_MAP_FORCE_P (kind
)
608 /* For implicit maps, old contained in new is valid. */
610 /* Otherwise, new contained inside old is considered valid. */
611 || (oldn
->host_start
<= newn
->host_start
612 && newn
->host_end
<= oldn
->host_end
)))
614 gomp_mutex_unlock (&devicep
->lock
);
615 gomp_fatal ("Trying to map into device [%p..%p) object when "
616 "[%p..%p) is already mapped",
617 (void *) newn
->host_start
, (void *) newn
->host_end
,
618 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
621 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
623 /* Implicit + always should not happen. If this does occur, below
624 address/length adjustment is a TODO. */
625 assert (!implicit_subset
);
627 if (oldn
->aux
&& oldn
->aux
->attach_count
)
629 /* We have to be careful not to overwrite still attached pointers
630 during the copyback to host. */
631 uintptr_t addr
= newn
->host_start
;
632 while (addr
< newn
->host_end
)
634 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
635 if (oldn
->aux
->attach_count
[i
] == 0)
636 gomp_copy_host2dev (devicep
, aq
,
637 (void *) (oldn
->tgt
->tgt_start
639 + addr
- oldn
->host_start
),
641 sizeof (void *), false, cbuf
);
642 addr
+= sizeof (void *);
646 gomp_copy_host2dev (devicep
, aq
,
647 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
648 + newn
->host_start
- oldn
->host_start
),
649 (void *) newn
->host_start
,
650 newn
->host_end
- newn
->host_start
, false, cbuf
);
653 gomp_increment_refcount (oldn
, refcount_set
);
657 get_kind (bool short_mapkind
, void *kinds
, int idx
)
660 return ((unsigned char *) kinds
)[idx
];
662 int val
= ((unsigned short *) kinds
)[idx
];
663 if (GOMP_MAP_IMPLICIT_P (val
))
664 val
&= ~GOMP_MAP_IMPLICIT
;
670 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
675 int val
= ((unsigned short *) kinds
)[idx
];
676 return GOMP_MAP_IMPLICIT_P (val
);
680 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
681 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
682 struct gomp_coalesce_buf
*cbuf
,
683 bool allow_zero_length_array_sections
)
685 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
686 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
687 struct splay_tree_key_s cur_node
;
689 cur_node
.host_start
= host_ptr
;
690 if (cur_node
.host_start
== (uintptr_t) NULL
)
692 cur_node
.tgt_offset
= (uintptr_t) NULL
;
693 gomp_copy_host2dev (devicep
, aq
,
694 (void *) (tgt
->tgt_start
+ target_offset
),
695 (void *) &cur_node
.tgt_offset
, sizeof (void *),
699 /* Add bias to the pointer value. */
700 cur_node
.host_start
+= bias
;
701 cur_node
.host_end
= cur_node
.host_start
;
702 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
705 if (allow_zero_length_array_sections
)
706 cur_node
.tgt_offset
= cur_node
.host_start
;
709 gomp_mutex_unlock (&devicep
->lock
);
710 gomp_fatal ("Pointer target of array section wasn't mapped");
715 cur_node
.host_start
-= n
->host_start
;
717 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
718 /* At this point tgt_offset is target address of the
719 array section. Now subtract bias to get what we want
720 to initialize the pointer with. */
721 cur_node
.tgt_offset
-= bias
;
723 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
724 (void *) &cur_node
.tgt_offset
, sizeof (void *),
729 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
730 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
731 size_t first
, size_t i
, void **hostaddrs
,
732 size_t *sizes
, void *kinds
,
733 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
735 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
736 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
737 struct splay_tree_key_s cur_node
;
740 const bool short_mapkind
= true;
741 const int typemask
= short_mapkind
? 0xff : 0x7;
743 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
744 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
745 splay_tree_key n2
= gomp_map_0len_lookup (mem_map
, &cur_node
);
746 kind
= get_kind (short_mapkind
, kinds
, i
);
747 implicit
= get_implicit (short_mapkind
, kinds
, i
);
750 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
752 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
753 kind
& typemask
, false, implicit
, cbuf
,
759 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
761 cur_node
.host_start
--;
762 n2
= splay_tree_lookup (mem_map
, &cur_node
);
763 cur_node
.host_start
++;
766 && n2
->host_start
- n
->host_start
767 == n2
->tgt_offset
- n
->tgt_offset
)
769 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
770 kind
& typemask
, false, implicit
, cbuf
,
776 n2
= splay_tree_lookup (mem_map
, &cur_node
);
780 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
782 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
783 kind
& typemask
, false, implicit
, cbuf
,
788 gomp_mutex_unlock (&devicep
->lock
);
789 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
790 "other mapped elements from the same structure weren't mapped "
791 "together with it", (void *) cur_node
.host_start
,
792 (void *) cur_node
.host_end
);
795 attribute_hidden
void
796 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
797 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
798 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
799 struct gomp_coalesce_buf
*cbufp
,
800 bool allow_zero_length_array_sections
)
802 struct splay_tree_key_s s
;
807 gomp_mutex_unlock (&devicep
->lock
);
808 gomp_fatal ("enclosing struct not mapped for attach");
811 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
812 /* We might have a pointer in a packed struct: however we cannot have more
813 than one such pointer in each pointer-sized portion of the struct, so
815 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
818 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
820 if (!n
->aux
->attach_count
)
822 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
824 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
825 n
->aux
->attach_count
[idx
]++;
828 gomp_mutex_unlock (&devicep
->lock
);
829 gomp_fatal ("attach count overflow");
832 if (n
->aux
->attach_count
[idx
] == 1)
834 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
836 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
840 if ((void *) target
== NULL
)
842 /* As a special case, allow attaching NULL host pointers. This
843 allows e.g. unassociated Fortran pointers to be mapped
848 "%s: attaching NULL host pointer, target %p "
849 "(struct base %p)\n", __FUNCTION__
, (void *) devptr
,
850 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
));
852 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
853 sizeof (void *), true, cbufp
);
858 s
.host_start
= target
+ bias
;
859 s
.host_end
= s
.host_start
+ 1;
860 tn
= splay_tree_lookup (mem_map
, &s
);
864 if (allow_zero_length_array_sections
)
865 /* When allowing attachment to zero-length array sections, we
866 copy the host pointer when the target region is not mapped. */
870 gomp_mutex_unlock (&devicep
->lock
);
871 gomp_fatal ("pointer target not mapped for attach");
875 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
878 "%s: attaching host %p, target %p (struct base %p) to %p\n",
879 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
880 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
882 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
883 sizeof (void *), true, cbufp
);
886 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
887 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
890 attribute_hidden
void
891 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
892 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
893 uintptr_t detach_from
, bool finalize
,
894 struct gomp_coalesce_buf
*cbufp
)
900 gomp_mutex_unlock (&devicep
->lock
);
901 gomp_fatal ("enclosing struct not mapped for detach");
904 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
906 if (!n
->aux
|| !n
->aux
->attach_count
)
908 gomp_mutex_unlock (&devicep
->lock
);
909 gomp_fatal ("no attachment counters for struct");
913 n
->aux
->attach_count
[idx
] = 1;
915 if (n
->aux
->attach_count
[idx
] == 0)
917 gomp_mutex_unlock (&devicep
->lock
);
918 gomp_fatal ("attach count underflow");
921 n
->aux
->attach_count
[idx
]--;
923 if (n
->aux
->attach_count
[idx
] == 0)
925 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
927 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
930 "%s: detaching host %p, target %p (struct base %p) to %p\n",
931 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
932 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
935 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
936 sizeof (void *), true, cbufp
);
939 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
940 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
943 attribute_hidden
uintptr_t
944 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
946 if (tgt
->list
[i
].key
!= NULL
)
947 return tgt
->list
[i
].key
->tgt
->tgt_start
948 + tgt
->list
[i
].key
->tgt_offset
949 + tgt
->list
[i
].offset
;
951 switch (tgt
->list
[i
].offset
)
954 return (uintptr_t) hostaddrs
[i
];
960 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
961 + tgt
->list
[i
+ 1].key
->tgt_offset
962 + tgt
->list
[i
+ 1].offset
963 + (uintptr_t) hostaddrs
[i
]
964 - (uintptr_t) hostaddrs
[i
+ 1];
967 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
971 static inline __attribute__((always_inline
)) struct target_mem_desc
*
972 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
973 struct goacc_asyncqueue
*aq
, size_t mapnum
,
974 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
975 void *kinds
, bool short_mapkind
,
976 htab_t
*refcount_set
,
977 enum gomp_map_vars_kind pragma_kind
)
979 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
980 bool has_firstprivate
= false;
981 bool has_always_ptrset
= false;
982 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
983 const int rshift
= short_mapkind
? 8 : 3;
984 const int typemask
= short_mapkind
? 0xff : 0x7;
985 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
986 struct splay_tree_key_s cur_node
;
987 struct target_mem_desc
*tgt
988 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
989 tgt
->list_count
= mapnum
;
990 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
991 tgt
->device_descr
= devicep
;
993 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
1002 tgt_align
= sizeof (void *);
1005 cbuf
.chunk_cnt
= -1;
1008 if (mapnum
> 1 || (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1010 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
1011 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
1014 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1016 size_t align
= 4 * sizeof (void *);
1018 tgt_size
= mapnum
* sizeof (void *);
1020 cbuf
.use_cnt
= 1 + (mapnum
> 1);
1021 cbuf
.chunks
[0].start
= 0;
1022 cbuf
.chunks
[0].end
= tgt_size
;
1025 gomp_mutex_lock (&devicep
->lock
);
1026 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1028 gomp_mutex_unlock (&devicep
->lock
);
1033 for (i
= 0; i
< mapnum
; i
++)
1035 int kind
= get_kind (short_mapkind
, kinds
, i
);
1036 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1037 if (hostaddrs
[i
] == NULL
1038 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1040 tgt
->list
[i
].key
= NULL
;
1041 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1044 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1045 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1047 tgt
->list
[i
].key
= NULL
;
1050 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1051 on a separate construct prior to using use_device_{addr,ptr}.
1052 In OpenMP 5.0, map directives need to be ordered by the
1053 middle-end before the use_device_* clauses. If
1054 !not_found_cnt, all mappings requested (if any) are already
1055 mapped, so use_device_{addr,ptr} can be resolved right away.
1056 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1057 now but would succeed after performing the mappings in the
1058 following loop. We can't defer this always to the second
1059 loop, because it is not even invoked when !not_found_cnt
1060 after the first loop. */
1061 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1062 cur_node
.host_end
= cur_node
.host_start
;
1063 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1066 cur_node
.host_start
-= n
->host_start
;
1068 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1069 + cur_node
.host_start
);
1071 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1073 gomp_mutex_unlock (&devicep
->lock
);
1074 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1076 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1077 /* If not present, continue using the host address. */
1080 __builtin_unreachable ();
1081 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1084 tgt
->list
[i
].offset
= 0;
1087 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1089 size_t first
= i
+ 1;
1090 size_t last
= i
+ sizes
[i
];
1091 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1092 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1094 tgt
->list
[i
].key
= NULL
;
1095 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1096 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1099 size_t align
= (size_t) 1 << (kind
>> rshift
);
1100 if (tgt_align
< align
)
1102 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1103 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1104 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1105 not_found_cnt
+= last
- i
;
1106 for (i
= first
; i
<= last
; i
++)
1108 tgt
->list
[i
].key
= NULL
;
1110 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1113 gomp_coalesce_buf_add (&cbuf
,
1114 tgt_size
- cur_node
.host_end
1115 + (uintptr_t) hostaddrs
[i
],
1121 for (i
= first
; i
<= last
; i
++)
1122 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1123 sizes
, kinds
, NULL
, refcount_set
);
1127 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1129 tgt
->list
[i
].key
= NULL
;
1130 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1131 has_firstprivate
= true;
1134 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1135 || ((kind
& typemask
)
1136 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1138 tgt
->list
[i
].key
= NULL
;
1139 has_firstprivate
= true;
1142 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1143 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1144 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1146 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1147 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1149 tgt
->list
[i
].key
= NULL
;
1151 size_t align
= (size_t) 1 << (kind
>> rshift
);
1152 if (tgt_align
< align
)
1154 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1156 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1157 cur_node
.host_end
- cur_node
.host_start
);
1158 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1159 has_firstprivate
= true;
1163 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1165 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1168 tgt
->list
[i
].key
= NULL
;
1169 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1174 n
= splay_tree_lookup (mem_map
, &cur_node
);
1175 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1177 int always_to_cnt
= 0;
1178 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1180 bool has_nullptr
= false;
1182 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1183 if (n
->tgt
->list
[j
].key
== n
)
1185 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1188 if (n
->tgt
->list_count
== 0)
1190 /* 'declare target'; assume has_nullptr; it could also be
1191 statically assigned pointer, but that it should be to
1192 the equivalent variable on the host. */
1193 assert (n
->refcount
== REFCOUNT_INFINITY
);
1197 assert (j
< n
->tgt
->list_count
);
1198 /* Re-map the data if there is an 'always' modifier or if it a
1199 null pointer was there and non a nonnull has been found; that
1200 permits transparent re-mapping for Fortran array descriptors
1201 which were previously mapped unallocated. */
1202 for (j
= i
+ 1; j
< mapnum
; j
++)
1204 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1205 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1207 || !GOMP_MAP_POINTER_P (ptr_kind
)
1208 || *(void **) hostaddrs
[j
] == NULL
))
1210 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1211 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1212 > cur_node
.host_end
))
1216 has_always_ptrset
= true;
1221 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1222 kind
& typemask
, always_to_cnt
> 0, implicit
,
1223 NULL
, refcount_set
);
1228 tgt
->list
[i
].key
= NULL
;
1230 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1232 /* Not present, hence, skip entry - including its MAP_POINTER,
1234 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1236 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1237 == GOMP_MAP_POINTER
))
1240 tgt
->list
[i
].key
= NULL
;
1241 tgt
->list
[i
].offset
= 0;
1245 size_t align
= (size_t) 1 << (kind
>> rshift
);
1247 if (tgt_align
< align
)
1249 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1251 && gomp_to_device_kind_p (kind
& typemask
))
1252 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1253 cur_node
.host_end
- cur_node
.host_start
);
1254 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1255 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1259 for (j
= i
+ 1; j
< mapnum
; j
++)
1260 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1261 kinds
, j
)) & typemask
))
1262 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1264 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1265 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1266 > cur_node
.host_end
))
1270 tgt
->list
[j
].key
= NULL
;
1281 gomp_mutex_unlock (&devicep
->lock
);
1282 gomp_fatal ("unexpected aggregation");
1284 tgt
->to_free
= devaddrs
[0];
1285 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1286 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1288 else if (not_found_cnt
|| (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1290 /* Allocate tgt_align aligned tgt_size block of memory. */
1291 /* FIXME: Perhaps change interface to allocate properly aligned
1293 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1294 tgt_size
+ tgt_align
- 1);
1297 gomp_mutex_unlock (&devicep
->lock
);
1298 gomp_fatal ("device memory allocation fail");
1301 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1302 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1303 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1305 if (cbuf
.use_cnt
== 1)
1307 if (cbuf
.chunk_cnt
> 0)
1310 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1320 tgt
->to_free
= NULL
;
1326 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1327 tgt_size
= mapnum
* sizeof (void *);
1330 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1333 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1334 splay_tree_node array
= tgt
->array
;
1335 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1336 uintptr_t field_tgt_base
= 0;
1337 splay_tree_key field_tgt_structelem_first
= NULL
;
1339 for (i
= 0; i
< mapnum
; i
++)
1340 if (has_always_ptrset
1342 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1343 == GOMP_MAP_TO_PSET
)
1345 splay_tree_key k
= tgt
->list
[i
].key
;
1346 bool has_nullptr
= false;
1348 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1349 if (k
->tgt
->list
[j
].key
== k
)
1351 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1354 if (k
->tgt
->list_count
== 0)
1357 assert (j
< k
->tgt
->list_count
);
1359 tgt
->list
[i
].has_null_ptr_assoc
= false;
1360 for (j
= i
+ 1; j
< mapnum
; j
++)
1362 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1363 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1365 || !GOMP_MAP_POINTER_P (ptr_kind
)
1366 || *(void **) hostaddrs
[j
] == NULL
))
1368 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1369 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1374 if (*(void **) hostaddrs
[j
] == NULL
)
1375 tgt
->list
[i
].has_null_ptr_assoc
= true;
1376 tgt
->list
[j
].key
= k
;
1377 tgt
->list
[j
].copy_from
= false;
1378 tgt
->list
[j
].always_copy_from
= false;
1379 tgt
->list
[j
].is_attach
= false;
1380 gomp_increment_refcount (k
, refcount_set
);
1381 gomp_map_pointer (k
->tgt
, aq
,
1382 (uintptr_t) *(void **) hostaddrs
[j
],
1383 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1385 sizes
[j
], cbufp
, false);
1390 else if (tgt
->list
[i
].key
== NULL
)
1392 int kind
= get_kind (short_mapkind
, kinds
, i
);
1393 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1394 if (hostaddrs
[i
] == NULL
)
1396 switch (kind
& typemask
)
1398 size_t align
, len
, first
, last
;
1400 case GOMP_MAP_FIRSTPRIVATE
:
1401 align
= (size_t) 1 << (kind
>> rshift
);
1402 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1403 tgt
->list
[i
].offset
= tgt_size
;
1405 gomp_copy_host2dev (devicep
, aq
,
1406 (void *) (tgt
->tgt_start
+ tgt_size
),
1407 (void *) hostaddrs
[i
], len
, false, cbufp
);
1408 /* Save device address in hostaddr to permit latter availablity
1409 when doing a deep-firstprivate with pointer attach. */
1410 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1413 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1414 firstprivate to hostaddrs[i+1], which is assumed to contain a
1418 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1420 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1421 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1423 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1424 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1425 this probably needs revision for 'aq' usage. */
1427 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1428 sizeof (void *), false, cbufp
);
1432 case GOMP_MAP_FIRSTPRIVATE_INT
:
1433 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1435 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1436 /* The OpenACC 'host_data' construct only allows 'use_device'
1437 "mapping" clauses, so in the first loop, 'not_found_cnt'
1438 must always have been zero, so all OpenACC 'use_device'
1439 clauses have already been handled. (We can only easily test
1440 'use_device' with 'if_present' clause here.) */
1441 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1442 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1443 code conceptually simple, similar to the first loop. */
1444 case GOMP_MAP_USE_DEVICE_PTR
:
1445 if (tgt
->list
[i
].offset
== 0)
1447 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1448 cur_node
.host_end
= cur_node
.host_start
;
1449 n
= gomp_map_lookup (mem_map
, &cur_node
);
1452 cur_node
.host_start
-= n
->host_start
;
1454 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1455 + cur_node
.host_start
);
1457 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1459 gomp_mutex_unlock (&devicep
->lock
);
1460 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1462 else if ((kind
& typemask
)
1463 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1464 /* If not present, continue using the host address. */
1467 __builtin_unreachable ();
1468 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1471 case GOMP_MAP_STRUCT
:
1473 last
= i
+ sizes
[i
];
1474 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1475 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1477 if (tgt
->list
[first
].key
!= NULL
)
1479 if (sizes
[last
] == 0)
1480 cur_node
.host_end
++;
1481 n
= splay_tree_lookup (mem_map
, &cur_node
);
1482 if (sizes
[last
] == 0)
1483 cur_node
.host_end
--;
1484 if (n
== NULL
&& cur_node
.host_start
== cur_node
.host_end
)
1486 gomp_mutex_unlock (&devicep
->lock
);
1487 gomp_fatal ("Struct pointer member not mapped (%p)",
1488 (void*) hostaddrs
[first
]);
1492 size_t align
= (size_t) 1 << (kind
>> rshift
);
1493 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1494 - (uintptr_t) hostaddrs
[i
];
1495 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1496 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1497 - (uintptr_t) hostaddrs
[i
];
1498 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1499 field_tgt_offset
= tgt_size
;
1500 field_tgt_clear
= last
;
1501 field_tgt_structelem_first
= NULL
;
1502 tgt_size
+= cur_node
.host_end
1503 - (uintptr_t) hostaddrs
[first
];
1506 for (i
= first
; i
<= last
; i
++)
1507 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1508 sizes
, kinds
, cbufp
, refcount_set
);
1511 case GOMP_MAP_ALWAYS_POINTER
:
1512 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1513 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1514 n
= splay_tree_lookup (mem_map
, &cur_node
);
1516 || n
->host_start
> cur_node
.host_start
1517 || n
->host_end
< cur_node
.host_end
)
1519 gomp_mutex_unlock (&devicep
->lock
);
1520 gomp_fatal ("always pointer not mapped");
1523 && ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1524 != GOMP_MAP_ALWAYS_POINTER
))
1525 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1526 if (cur_node
.tgt_offset
)
1527 cur_node
.tgt_offset
-= sizes
[i
];
1528 gomp_copy_host2dev (devicep
, aq
,
1529 (void *) (n
->tgt
->tgt_start
1531 + cur_node
.host_start
1533 (void *) &cur_node
.tgt_offset
,
1534 sizeof (void *), true, cbufp
);
1535 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1536 + cur_node
.host_start
- n
->host_start
;
1538 case GOMP_MAP_IF_PRESENT
:
1539 /* Not present - otherwise handled above. Skip over its
1540 MAP_POINTER as well. */
1542 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1543 == GOMP_MAP_POINTER
))
1546 case GOMP_MAP_ATTACH
:
1547 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1549 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1550 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1551 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1554 tgt
->list
[i
].key
= n
;
1555 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1556 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1557 tgt
->list
[i
].copy_from
= false;
1558 tgt
->list
[i
].always_copy_from
= false;
1559 tgt
->list
[i
].is_attach
= true;
1560 /* OpenACC 'attach'/'detach' doesn't affect
1561 structured/dynamic reference counts ('n->refcount',
1562 'n->dynamic_refcount'). */
1565 = ((kind
& typemask
)
1566 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1567 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1568 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1571 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1573 gomp_mutex_unlock (&devicep
->lock
);
1574 gomp_fatal ("outer struct not mapped for attach");
1581 splay_tree_key k
= &array
->key
;
1582 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1583 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1584 k
->host_end
= k
->host_start
+ sizes
[i
];
1586 k
->host_end
= k
->host_start
+ sizeof (void *);
1587 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1588 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1589 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1590 kind
& typemask
, false, implicit
, cbufp
,
1595 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1597 /* Replace target address of the pointer with target address
1598 of mapped object in the splay tree. */
1599 splay_tree_remove (mem_map
, n
);
1601 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1602 k
->aux
->link_key
= n
;
1604 size_t align
= (size_t) 1 << (kind
>> rshift
);
1605 tgt
->list
[i
].key
= k
;
1608 k
->dynamic_refcount
= 0;
1609 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1611 k
->tgt_offset
= k
->host_start
- field_tgt_base
1615 k
->refcount
= REFCOUNT_STRUCTELEM
;
1616 if (field_tgt_structelem_first
== NULL
)
1618 /* Set to first structure element of sequence. */
1619 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1620 field_tgt_structelem_first
= k
;
1623 /* Point to refcount of leading element, but do not
1625 k
->structelem_refcount_ptr
1626 = &field_tgt_structelem_first
->structelem_refcount
;
1628 if (i
== field_tgt_clear
)
1630 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1631 field_tgt_structelem_first
= NULL
;
1634 if (i
== field_tgt_clear
)
1635 field_tgt_clear
= FIELD_TGT_EMPTY
;
1639 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1640 k
->tgt_offset
= tgt_size
;
1641 tgt_size
+= k
->host_end
- k
->host_start
;
1643 /* First increment, from 0 to 1. gomp_increment_refcount
1644 encapsulates the different increment cases, so use this
1645 instead of directly setting 1 during initialization. */
1646 gomp_increment_refcount (k
, refcount_set
);
1648 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1649 tgt
->list
[i
].always_copy_from
1650 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1651 tgt
->list
[i
].is_attach
= false;
1652 tgt
->list
[i
].offset
= 0;
1653 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1656 array
->right
= NULL
;
1657 splay_tree_insert (mem_map
, array
);
1658 switch (kind
& typemask
)
1660 case GOMP_MAP_ALLOC
:
1662 case GOMP_MAP_FORCE_ALLOC
:
1663 case GOMP_MAP_FORCE_FROM
:
1664 case GOMP_MAP_ALWAYS_FROM
:
1667 case GOMP_MAP_TOFROM
:
1668 case GOMP_MAP_FORCE_TO
:
1669 case GOMP_MAP_FORCE_TOFROM
:
1670 case GOMP_MAP_ALWAYS_TO
:
1671 case GOMP_MAP_ALWAYS_TOFROM
:
1672 gomp_copy_host2dev (devicep
, aq
,
1673 (void *) (tgt
->tgt_start
1675 (void *) k
->host_start
,
1676 k
->host_end
- k
->host_start
,
1679 case GOMP_MAP_POINTER
:
1680 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1682 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1683 k
->tgt_offset
, sizes
[i
], cbufp
,
1685 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1687 case GOMP_MAP_TO_PSET
:
1688 gomp_copy_host2dev (devicep
, aq
,
1689 (void *) (tgt
->tgt_start
1691 (void *) k
->host_start
,
1692 k
->host_end
- k
->host_start
,
1694 tgt
->list
[i
].has_null_ptr_assoc
= false;
1696 for (j
= i
+ 1; j
< mapnum
; j
++)
1698 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1700 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1701 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1703 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1704 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1709 tgt
->list
[j
].key
= k
;
1710 tgt
->list
[j
].copy_from
= false;
1711 tgt
->list
[j
].always_copy_from
= false;
1712 tgt
->list
[j
].is_attach
= false;
1713 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1714 /* For OpenMP, the use of refcount_sets causes
1715 errors if we set k->refcount = 1 above but also
1716 increment it again here, for decrementing will
1717 not properly match, since we decrement only once
1718 for each key's refcount. Therefore avoid this
1719 increment for OpenMP constructs. */
1721 gomp_increment_refcount (k
, refcount_set
);
1722 gomp_map_pointer (tgt
, aq
,
1723 (uintptr_t) *(void **) hostaddrs
[j
],
1725 + ((uintptr_t) hostaddrs
[j
]
1727 sizes
[j
], cbufp
, false);
1732 case GOMP_MAP_FORCE_PRESENT
:
1733 case GOMP_MAP_ALWAYS_PRESENT_TO
:
1734 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
1735 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
1737 /* We already looked up the memory region above and it
1739 size_t size
= k
->host_end
- k
->host_start
;
1740 gomp_mutex_unlock (&devicep
->lock
);
1741 #ifdef HAVE_INTTYPES_H
1742 gomp_fatal ("present clause: not present on the device "
1743 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
1744 "dev: %d)", (void *) k
->host_start
,
1745 (uint64_t) size
, (uint64_t) size
,
1746 devicep
->target_id
);
1748 gomp_fatal ("present clause: not present on the device "
1749 "(addr: %p, size: %lu (0x%lx), dev: %d)",
1750 (void *) k
->host_start
,
1751 (unsigned long) size
, (unsigned long) size
,
1752 devicep
->target_id
);
1756 case GOMP_MAP_FORCE_DEVICEPTR
:
1757 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1758 gomp_copy_host2dev (devicep
, aq
,
1759 (void *) (tgt
->tgt_start
1761 (void *) k
->host_start
,
1762 sizeof (void *), false, cbufp
);
1765 gomp_mutex_unlock (&devicep
->lock
);
1766 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1770 if (k
->aux
&& k
->aux
->link_key
)
1772 /* Set link pointer on target to the device address of the
1774 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1775 /* We intentionally do not use coalescing here, as it's not
1776 data allocated by the current call to this function. */
1777 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1778 &tgt_addr
, sizeof (void *), true, NULL
);
1785 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1787 for (i
= 0; i
< mapnum
; i
++)
1789 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1790 gomp_copy_host2dev (devicep
, aq
,
1791 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1792 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1800 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1801 gomp_copy_host2dev (devicep
, aq
,
1802 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1803 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1804 - cbuf
.chunks
[0].start
),
1805 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1808 /* Free once the transfer has completed. */
1809 devicep
->openacc
.async
.queue_callback_func (aq
, free
, cbuf
.buf
);
1816 /* If the variable from "omp target enter data" map-list was already mapped,
1817 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1819 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1825 gomp_mutex_unlock (&devicep
->lock
);
1829 static struct target_mem_desc
*
1830 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1831 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1832 bool short_mapkind
, htab_t
*refcount_set
,
1833 enum gomp_map_vars_kind pragma_kind
)
1835 /* This management of a local refcount_set is for convenience of callers
1836 who do not share a refcount_set over multiple map/unmap uses. */
1837 htab_t local_refcount_set
= NULL
;
1838 if (refcount_set
== NULL
)
1840 local_refcount_set
= htab_create (mapnum
);
1841 refcount_set
= &local_refcount_set
;
1844 struct target_mem_desc
*tgt
;
1845 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1846 sizes
, kinds
, short_mapkind
, refcount_set
,
1848 if (local_refcount_set
)
1849 htab_free (local_refcount_set
);
1854 attribute_hidden
struct target_mem_desc
*
1855 goacc_map_vars (struct gomp_device_descr
*devicep
,
1856 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1857 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1858 void *kinds
, bool short_mapkind
,
1859 enum gomp_map_vars_kind pragma_kind
)
1861 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1862 sizes
, kinds
, short_mapkind
, NULL
,
1863 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1867 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1869 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1871 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1878 gomp_unref_tgt (void *ptr
)
1880 bool is_tgt_unmapped
= false;
1882 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1884 if (tgt
->refcount
> 1)
1888 gomp_unmap_tgt (tgt
);
1889 is_tgt_unmapped
= true;
1892 return is_tgt_unmapped
;
1896 gomp_unref_tgt_void (void *ptr
)
1898 (void) gomp_unref_tgt (ptr
);
1902 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1904 splay_tree_remove (sp
, k
);
1907 if (k
->aux
->link_key
)
1908 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1909 if (k
->aux
->attach_count
)
1910 free (k
->aux
->attach_count
);
1916 static inline __attribute__((always_inline
)) bool
1917 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1918 struct goacc_asyncqueue
*aq
)
1920 bool is_tgt_unmapped
= false;
1922 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1924 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1925 /* Infer the splay_tree_key of the first structelem key using the
1926 pointer to the first structleme_refcount. */
1927 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1928 - offsetof (struct splay_tree_key_s
,
1929 structelem_refcount
));
1930 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1932 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1933 with the splay_tree_keys embedded inside. */
1934 splay_tree_node node
=
1935 (splay_tree_node
) ((char *) k
1936 - offsetof (struct splay_tree_node_s
, key
));
1939 /* Starting from the _FIRST key, and continue for all following
1941 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1942 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1949 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1952 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1955 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1956 return is_tgt_unmapped
;
1959 attribute_hidden
bool
1960 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1962 return gomp_remove_var_internal (devicep
, k
, NULL
);
1965 /* Remove a variable asynchronously. This actually removes the variable
1966 mapping immediately, but retains the linked target_mem_desc until the
1967 asynchronous operation has completed (as it may still refer to target
1968 memory). The device lock must be held before entry, and remains locked on
1971 attribute_hidden
void
1972 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1973 struct goacc_asyncqueue
*aq
)
1975 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1978 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1979 variables back from device to host: if it is false, it is assumed that this
1980 has been done already. */
1982 static inline __attribute__((always_inline
)) void
1983 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1984 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1986 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1988 if (tgt
->list_count
== 0)
1994 gomp_mutex_lock (&devicep
->lock
);
1995 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1997 gomp_mutex_unlock (&devicep
->lock
);
2005 /* We must perform detachments before any copies back to the host. */
2006 for (i
= 0; i
< tgt
->list_count
; i
++)
2008 splay_tree_key k
= tgt
->list
[i
].key
;
2010 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
2011 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
2012 + tgt
->list
[i
].offset
,
2016 for (i
= 0; i
< tgt
->list_count
; i
++)
2018 splay_tree_key k
= tgt
->list
[i
].key
;
2022 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2023 counts ('n->refcount', 'n->dynamic_refcount'). */
2024 if (tgt
->list
[i
].is_attach
)
2027 bool do_copy
, do_remove
;
2028 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
2030 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
2031 || tgt
->list
[i
].always_copy_from
)
2032 gomp_copy_dev2host (devicep
, aq
,
2033 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
2034 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2035 + tgt
->list
[i
].offset
),
2036 tgt
->list
[i
].length
);
2039 struct target_mem_desc
*k_tgt
= k
->tgt
;
2040 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2041 /* It would be bad if TGT got unmapped while we're still iterating
2042 over its LIST_COUNT, and also expect to use it in the following
2044 assert (!is_tgt_unmapped
2050 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2053 gomp_unref_tgt ((void *) tgt
);
2055 gomp_mutex_unlock (&devicep
->lock
);
2059 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2060 htab_t
*refcount_set
)
2062 /* This management of a local refcount_set is for convenience of callers
2063 who do not share a refcount_set over multiple map/unmap uses. */
2064 htab_t local_refcount_set
= NULL
;
2065 if (refcount_set
== NULL
)
2067 local_refcount_set
= htab_create (tgt
->list_count
);
2068 refcount_set
= &local_refcount_set
;
2071 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2073 if (local_refcount_set
)
2074 htab_free (local_refcount_set
);
2077 attribute_hidden
void
2078 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2079 struct goacc_asyncqueue
*aq
)
2081 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2085 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2086 size_t *sizes
, void *kinds
, bool short_mapkind
)
2089 struct splay_tree_key_s cur_node
;
2090 const int typemask
= short_mapkind
? 0xff : 0x7;
2098 gomp_mutex_lock (&devicep
->lock
);
2099 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2101 gomp_mutex_unlock (&devicep
->lock
);
2105 for (i
= 0; i
< mapnum
; i
++)
2108 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2109 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2110 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2113 int kind
= get_kind (short_mapkind
, kinds
, i
);
2114 if (n
->host_start
> cur_node
.host_start
2115 || n
->host_end
< cur_node
.host_end
)
2117 gomp_mutex_unlock (&devicep
->lock
);
2118 gomp_fatal ("Trying to update [%p..%p) object when "
2119 "only [%p..%p) is mapped",
2120 (void *) cur_node
.host_start
,
2121 (void *) cur_node
.host_end
,
2122 (void *) n
->host_start
,
2123 (void *) n
->host_end
);
2126 if (n
->aux
&& n
->aux
->attach_count
)
2128 uintptr_t addr
= cur_node
.host_start
;
2129 while (addr
< cur_node
.host_end
)
2131 /* We have to be careful not to overwrite still attached
2132 pointers during host<->device updates. */
2133 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2134 if (n
->aux
->attach_count
[i
] == 0)
2136 void *devaddr
= (void *) (n
->tgt
->tgt_start
2138 + addr
- n
->host_start
);
2139 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2140 gomp_copy_host2dev (devicep
, NULL
,
2141 devaddr
, (void *) addr
,
2142 sizeof (void *), false, NULL
);
2143 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2144 gomp_copy_dev2host (devicep
, NULL
,
2145 (void *) addr
, devaddr
,
2148 addr
+= sizeof (void *);
2153 void *hostaddr
= (void *) cur_node
.host_start
;
2154 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2155 + cur_node
.host_start
2157 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2159 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2160 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2162 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2163 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2168 int kind
= get_kind (short_mapkind
, kinds
, i
);
2170 if (GOMP_MAP_PRESENT_P (kind
))
2172 /* We already looked up the memory region above and it
2174 gomp_mutex_unlock (&devicep
->lock
);
2175 #ifdef HAVE_INTTYPES_H
2176 gomp_fatal ("present clause: not present on the device "
2177 "(addr: %p, size: %"PRIu64
" (0x%"PRIx64
"), "
2178 "dev: %d)", (void *) hostaddrs
[i
],
2179 (uint64_t) sizes
[i
], (uint64_t) sizes
[i
],
2180 devicep
->target_id
);
2182 gomp_fatal ("present clause: not present on the device "
2183 "(addr: %p, size: %lu (0x%lx), dev: %d)",
2184 (void *) hostaddrs
[i
], (unsigned long) sizes
[i
],
2185 (unsigned long) sizes
[i
], devicep
->target_id
);
2190 gomp_mutex_unlock (&devicep
->lock
);
2193 static struct gomp_offload_icv_list
*
2194 gomp_get_offload_icv_item (int dev_num
)
2196 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2197 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2203 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2204 depending on the device num and the variable hierarchy
2205 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2206 device and thus no item with that device number is contained in
2207 gomp_offload_icv_list, then a new item is created and added to the list. */
2209 static struct gomp_offload_icvs
*
2210 get_gomp_offload_icvs (int dev_num
)
2212 struct gomp_icv_list
*dev
2213 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2214 struct gomp_icv_list
*all
2215 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2216 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2217 struct gomp_offload_icv_list
*offload_icvs
2218 = gomp_get_offload_icv_item (dev_num
);
2220 if (offload_icvs
!= NULL
)
2221 return &offload_icvs
->icvs
;
2223 struct gomp_offload_icv_list
*new
2224 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2226 new->device_num
= dev_num
;
2227 new->icvs
.device_num
= dev_num
;
2228 new->next
= gomp_offload_icv_list
;
2230 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2231 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2232 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2233 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2234 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2235 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2237 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2240 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2241 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2242 else if (dev
!= NULL
2243 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2244 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2245 else if (all
!= NULL
2246 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2247 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2249 new->icvs
.teams_thread_limit
2250 = gomp_default_icv_values
.teams_thread_limit_var
;
2253 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2254 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2255 else if (dev
!= NULL
2256 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2257 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2258 else if (all
!= NULL
2259 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2260 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2262 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2264 gomp_offload_icv_list
= new;
2268 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2269 And insert to splay tree the mapping between addresses from HOST_TABLE and
2270 from loaded target image. We rely in the host and device compiler
2271 emitting variable and functions in the same order. */
2274 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2275 const void *host_table
, const void *target_data
,
2276 bool is_register_lock
)
2278 void **host_func_table
= ((void ***) host_table
)[0];
2279 void **host_funcs_end
= ((void ***) host_table
)[1];
2280 void **host_var_table
= ((void ***) host_table
)[2];
2281 void **host_vars_end
= ((void ***) host_table
)[3];
2282 void **host_ind_func_table
= NULL
;
2283 void **host_ind_funcs_end
= NULL
;
2285 if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version
))
2287 host_ind_func_table
= ((void ***) host_table
)[4];
2288 host_ind_funcs_end
= ((void ***) host_table
)[5];
2291 /* The func and ind_func tables contain only addresses, the var table
2292 contains addresses and corresponding sizes. */
2293 int num_funcs
= host_funcs_end
- host_func_table
;
2294 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2295 int num_ind_funcs
= (host_ind_funcs_end
- host_ind_func_table
);
2297 /* Load image to device and get target addresses for the image. */
2298 struct addr_pair
*target_table
= NULL
;
2299 uint64_t *rev_target_fn_table
= NULL
;
2300 int i
, num_target_entries
;
2302 /* With reverse offload, insert also target-host addresses. */
2303 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2306 = devicep
->load_image_func (devicep
->target_id
, version
,
2307 target_data
, &target_table
,
2308 rev_lookup
? &rev_target_fn_table
: NULL
,
2310 ? (uint64_t *) host_ind_func_table
: NULL
);
2312 if (num_target_entries
!= num_funcs
+ num_vars
2313 /* "+1" due to the additional ICV struct. */
2314 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2316 gomp_mutex_unlock (&devicep
->lock
);
2317 if (is_register_lock
)
2318 gomp_mutex_unlock (®ister_lock
);
2319 gomp_fatal ("Cannot map target functions or variables"
2320 " (expected %u, have %u)", num_funcs
+ num_vars
,
2321 num_target_entries
);
2324 /* Insert host-target address mapping into splay tree. */
2325 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2326 /* "+1" due to the additional ICV struct. */
2327 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2328 * sizeof (*tgt
->array
));
2329 if (rev_target_fn_table
)
2330 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2332 tgt
->rev_array
= NULL
;
2333 tgt
->refcount
= REFCOUNT_INFINITY
;
2336 tgt
->to_free
= NULL
;
2338 tgt
->list_count
= 0;
2339 tgt
->device_descr
= devicep
;
2340 splay_tree_node array
= tgt
->array
;
2341 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2343 for (i
= 0; i
< num_funcs
; i
++)
2345 splay_tree_key k
= &array
->key
;
2346 k
->host_start
= (uintptr_t) host_func_table
[i
];
2347 k
->host_end
= k
->host_start
+ 1;
2349 k
->tgt_offset
= target_table
[i
].start
;
2350 k
->refcount
= REFCOUNT_INFINITY
;
2351 k
->dynamic_refcount
= 0;
2354 array
->right
= NULL
;
2355 splay_tree_insert (&devicep
->mem_map
, array
);
2356 if (rev_target_fn_table
)
2358 reverse_splay_tree_key k2
= &rev_array
->key
;
2359 k2
->dev
= rev_target_fn_table
[i
];
2361 rev_array
->left
= NULL
;
2362 rev_array
->right
= NULL
;
2364 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2370 /* Most significant bit of the size in host and target tables marks
2371 "omp declare target link" variables. */
2372 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2373 const uintptr_t size_mask
= ~link_bit
;
2375 for (i
= 0; i
< num_vars
; i
++)
2377 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2378 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2379 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2381 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2383 gomp_mutex_unlock (&devicep
->lock
);
2384 if (is_register_lock
)
2385 gomp_mutex_unlock (®ister_lock
);
2386 gomp_fatal ("Cannot map target variables (size mismatch)");
2389 splay_tree_key k
= &array
->key
;
2390 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2392 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2394 k
->tgt_offset
= target_var
->start
;
2395 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2396 k
->dynamic_refcount
= 0;
2399 array
->right
= NULL
;
2400 splay_tree_insert (&devicep
->mem_map
, array
);
2404 /* Last entry is for a ICVs variable.
2405 Tolerate case where plugin does not return those entries. */
2406 if (num_funcs
+ num_vars
< num_target_entries
)
2408 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2410 /* Start address will be non-zero for the ICVs variable if
2411 the variable was found in this image. */
2412 if (var
->start
!= 0)
2414 /* The index of the devicep within devices[] is regarded as its
2415 'device number', which is different from the per-device type
2416 devicep->target_id. */
2417 int dev_num
= (int) (devicep
- &devices
[0]);
2418 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2419 size_t var_size
= var
->end
- var
->start
;
2420 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2422 gomp_mutex_unlock (&devicep
->lock
);
2423 if (is_register_lock
)
2424 gomp_mutex_unlock (®ister_lock
);
2425 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2428 /* Copy the ICVs variable to place on device memory, hereby
2429 actually designating its device number into effect. */
2430 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2431 var_size
, false, NULL
);
2432 splay_tree_key k
= &array
->key
;
2433 k
->host_start
= (uintptr_t) icvs
;
2435 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2437 k
->tgt_offset
= var
->start
;
2438 k
->refcount
= REFCOUNT_INFINITY
;
2439 k
->dynamic_refcount
= 0;
2442 array
->right
= NULL
;
2443 splay_tree_insert (&devicep
->mem_map
, array
);
2448 free (target_table
);
2451 /* Unload the mappings described by target_data from device DEVICE_P.
2452 The device must be locked. */
2455 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2457 const void *host_table
, const void *target_data
)
2459 void **host_func_table
= ((void ***) host_table
)[0];
2460 void **host_funcs_end
= ((void ***) host_table
)[1];
2461 void **host_var_table
= ((void ***) host_table
)[2];
2462 void **host_vars_end
= ((void ***) host_table
)[3];
2464 /* The func table contains only addresses, the var table contains addresses
2465 and corresponding sizes. */
2466 int num_funcs
= host_funcs_end
- host_func_table
;
2467 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2469 struct splay_tree_key_s k
;
2470 splay_tree_key node
= NULL
;
2472 /* Find mapping at start of node array */
2473 if (num_funcs
|| num_vars
)
2475 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2476 : (uintptr_t) host_var_table
[0]);
2477 k
.host_end
= k
.host_start
+ 1;
2478 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2481 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2483 gomp_mutex_unlock (&devicep
->lock
);
2484 gomp_fatal ("image unload fail");
2486 if (devicep
->mem_map_rev
.root
)
2488 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2490 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2491 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2492 free (node
->tgt
->rev_array
);
2493 devicep
->mem_map_rev
.root
= NULL
;
2496 /* Remove mappings from splay tree. */
2498 for (i
= 0; i
< num_funcs
; i
++)
2500 k
.host_start
= (uintptr_t) host_func_table
[i
];
2501 k
.host_end
= k
.host_start
+ 1;
2502 splay_tree_remove (&devicep
->mem_map
, &k
);
2505 /* Most significant bit of the size in host and target tables marks
2506 "omp declare target link" variables. */
2507 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2508 const uintptr_t size_mask
= ~link_bit
;
2509 bool is_tgt_unmapped
= false;
2511 for (i
= 0; i
< num_vars
; i
++)
2513 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2515 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2517 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2518 splay_tree_remove (&devicep
->mem_map
, &k
);
2521 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2522 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2526 if (node
&& !is_tgt_unmapped
)
2534 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2536 char *end
= buf
+ size
, *p
= buf
;
2537 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2538 p
+= snprintf (p
, end
- p
, "unified_address");
2539 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2540 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2541 (p
== buf
? "" : ", "));
2542 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2543 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2544 (p
== buf
? "" : ", "));
2547 /* This function should be called from every offload image while loading.
2548 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2549 the target, and DATA. */
2552 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2553 int target_type
, const void *data
)
2557 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2558 gomp_fatal ("Library too old for offload (version %u < %u)",
2559 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2562 const void *target_data
;
2563 if (GOMP_VERSION_LIB (version
) > 1)
2565 omp_req
= (int) (size_t) ((void **) data
)[0];
2566 target_data
= &((void **) data
)[1];
2574 gomp_mutex_lock (®ister_lock
);
2576 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2578 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2579 "reverse_offload")];
2580 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2581 "reverse_offload")];
2582 gomp_requires_to_name (buf2
, sizeof (buf2
),
2583 omp_req
!= GOMP_REQUIRES_TARGET_USED
2584 ? omp_req
: omp_requires_mask
);
2585 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2586 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2588 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2589 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2590 "in multiple compilation units: '%s' vs. '%s'",
2594 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2595 "some compilation units", buf2
);
2597 omp_requires_mask
= omp_req
;
2599 /* Load image to all initialized devices. */
2600 for (i
= 0; i
< num_devices
; i
++)
2602 struct gomp_device_descr
*devicep
= &devices
[i
];
2603 gomp_mutex_lock (&devicep
->lock
);
2604 if (devicep
->type
== target_type
2605 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2606 gomp_load_image_to_device (devicep
, version
,
2607 host_table
, target_data
, true);
2608 gomp_mutex_unlock (&devicep
->lock
);
2611 /* Insert image to array of pending images. */
2613 = gomp_realloc_unlock (offload_images
,
2614 (num_offload_images
+ 1)
2615 * sizeof (struct offload_image_descr
));
2616 offload_images
[num_offload_images
].version
= version
;
2617 offload_images
[num_offload_images
].type
= target_type
;
2618 offload_images
[num_offload_images
].host_table
= host_table
;
2619 offload_images
[num_offload_images
].target_data
= target_data
;
2621 num_offload_images
++;
2622 gomp_mutex_unlock (®ister_lock
);
2625 /* Legacy entry point. */
2628 GOMP_offload_register (const void *host_table
, int target_type
,
2629 const void *target_data
)
2631 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2634 /* This function should be called from every offload image while unloading.
2635 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2636 the target, and DATA. */
2639 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2640 int target_type
, const void *data
)
2644 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2645 gomp_fatal ("Library too old for offload (version %u < %u)",
2646 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2648 const void *target_data
;
2649 if (GOMP_VERSION_LIB (version
) > 1)
2650 target_data
= &((void **) data
)[1];
2654 gomp_mutex_lock (®ister_lock
);
2656 /* Unload image from all initialized devices. */
2657 for (i
= 0; i
< num_devices
; i
++)
2659 struct gomp_device_descr
*devicep
= &devices
[i
];
2660 gomp_mutex_lock (&devicep
->lock
);
2661 if (devicep
->type
== target_type
2662 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2663 gomp_unload_image_from_device (devicep
, version
,
2664 host_table
, target_data
);
2665 gomp_mutex_unlock (&devicep
->lock
);
2668 /* Remove image from array of pending images. */
2669 for (i
= 0; i
< num_offload_images
; i
++)
2670 if (offload_images
[i
].target_data
== target_data
)
2672 offload_images
[i
] = offload_images
[--num_offload_images
];
2676 gomp_mutex_unlock (®ister_lock
);
2679 /* Legacy entry point. */
2682 GOMP_offload_unregister (const void *host_table
, int target_type
,
2683 const void *target_data
)
2685 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2688 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2689 must be locked on entry, and remains locked on return. */
2691 attribute_hidden
void
2692 gomp_init_device (struct gomp_device_descr
*devicep
)
2695 if (!devicep
->init_device_func (devicep
->target_id
))
2697 gomp_mutex_unlock (&devicep
->lock
);
2698 gomp_fatal ("device initialization failed");
2701 /* Load to device all images registered by the moment. */
2702 for (i
= 0; i
< num_offload_images
; i
++)
2704 struct offload_image_descr
*image
= &offload_images
[i
];
2705 if (image
->type
== devicep
->type
)
2706 gomp_load_image_to_device (devicep
, image
->version
,
2707 image
->host_table
, image
->target_data
,
2711 /* Initialize OpenACC asynchronous queues. */
2712 goacc_init_asyncqueues (devicep
);
2714 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2717 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2718 must be locked on entry, and remains locked on return. */
2720 attribute_hidden
bool
2721 gomp_fini_device (struct gomp_device_descr
*devicep
)
2723 bool ret
= goacc_fini_asyncqueues (devicep
);
2724 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2725 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2729 attribute_hidden
void
2730 gomp_unload_device (struct gomp_device_descr
*devicep
)
2732 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2736 /* Unload from device all images registered at the moment. */
2737 for (i
= 0; i
< num_offload_images
; i
++)
2739 struct offload_image_descr
*image
= &offload_images
[i
];
2740 if (image
->type
== devicep
->type
)
2741 gomp_unload_image_from_device (devicep
, image
->version
,
2743 image
->target_data
);
2748 /* Host fallback for GOMP_target{,_ext} routines. */
2751 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2752 struct gomp_device_descr
*devicep
, void **args
)
2754 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2756 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2758 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2759 "be used for offloading");
2762 memset (thr
, '\0', sizeof (*thr
));
2763 if (gomp_places_list
)
2765 thr
->place
= old_thr
.place
;
2766 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2771 intptr_t id
= (intptr_t) *args
++, val
;
2772 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2773 val
= (intptr_t) *args
++;
2775 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2776 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2778 id
&= GOMP_TARGET_ARG_ID_MASK
;
2779 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2781 val
= val
> INT_MAX
? INT_MAX
: val
;
2783 gomp_icv (true)->thread_limit_var
= val
;
2788 gomp_free_thread (thr
);
2792 /* Calculate alignment and size requirements of a private copy of data shared
2793 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2796 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2797 unsigned short *kinds
, size_t *tgt_align
,
2801 for (i
= 0; i
< mapnum
; i
++)
2802 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2804 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2805 if (*tgt_align
< align
)
2807 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2808 *tgt_size
+= sizes
[i
];
2812 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2815 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2816 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2819 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2821 tgt
+= tgt_align
- al
;
2824 for (i
= 0; i
< mapnum
; i
++)
2825 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2827 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2828 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2829 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2830 hostaddrs
[i
] = tgt
+ tgt_size
;
2831 tgt_size
= tgt_size
+ sizes
[i
];
2832 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2834 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2840 /* Helper function of GOMP_target{,_ext} routines. */
2843 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2844 void (*host_fn
) (void *))
2846 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2847 return (void *) host_fn
;
2850 gomp_mutex_lock (&devicep
->lock
);
2851 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2853 gomp_mutex_unlock (&devicep
->lock
);
2857 struct splay_tree_key_s k
;
2858 k
.host_start
= (uintptr_t) host_fn
;
2859 k
.host_end
= k
.host_start
+ 1;
2860 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2861 gomp_mutex_unlock (&devicep
->lock
);
2865 return (void *) tgt_fn
->tgt_offset
;
2869 /* Called when encountering a target directive. If DEVICE
2870 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2871 GOMP_DEVICE_HOST_FALLBACK (or any value
2872 larger than last available hw device), use host fallback.
2873 FN is address of host code, UNUSED is part of the current ABI, but
2874 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2875 with MAPNUM entries, with addresses of the host objects,
2876 sizes of the host objects (resp. for pointer kind pointer bias
2877 and assumed sizeof (void *) size) and kinds. */
2880 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2881 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2882 unsigned char *kinds
)
2884 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2888 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2889 /* All shared memory devices should use the GOMP_target_ext function. */
2890 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2891 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2892 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2894 htab_t refcount_set
= htab_create (mapnum
);
2895 struct target_mem_desc
*tgt_vars
2896 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2897 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2898 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2900 htab_clear (refcount_set
);
2901 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2902 htab_free (refcount_set
);
2905 static inline unsigned int
2906 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2908 /* If we cannot run asynchronously, simply ignore nowait. */
2909 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2910 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2916 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
2918 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2922 void *host_ptr
= &item
->icvs
;
2923 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
2924 if (dev_ptr
!= NULL
)
2925 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
2926 sizeof (struct gomp_offload_icvs
));
2929 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2930 and several arguments have been added:
2931 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2932 DEPEND is array of dependencies, see GOMP_task for details.
2934 ARGS is a pointer to an array consisting of a variable number of both
2935 device-independent and device-specific arguments, which can take one two
2936 elements where the first specifies for which device it is intended, the type
2937 and optionally also the value. If the value is not present in the first
2938 one, the whole second element the actual value. The last element of the
2939 array is a single NULL. Among the device independent can be for example
2940 NUM_TEAMS and THREAD_LIMIT.
2942 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2943 that value, or 1 if teams construct is not present, or 0, if
2944 teams construct does not have num_teams clause and so the choice is
2945 implementation defined, and -1 if it can't be determined on the host
2946 what value will GOMP_teams have on the device.
2947 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2948 body with that value, or 0, if teams construct does not have thread_limit
2949 clause or the teams construct is not present, or -1 if it can't be
2950 determined on the host what value will GOMP_teams have on the device. */
2953 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2954 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2955 unsigned int flags
, void **depend
, void **args
)
2957 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2958 size_t tgt_align
= 0, tgt_size
= 0;
2959 bool fpc_done
= false;
2961 /* Obtain the original TEAMS and THREADS values from ARGS. */
2962 intptr_t orig_teams
= 1, orig_threads
= 0;
2963 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
2964 void **tmpargs
= args
;
2967 intptr_t id
= (intptr_t) *tmpargs
++, val
;
2968 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2970 val
= (intptr_t) *tmpargs
++;
2975 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2979 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2981 val
= val
> INT_MAX
? INT_MAX
: val
;
2982 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
2987 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
2994 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
2995 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2996 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2997 value could not be determined. No change.
2998 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2999 Set device-specific value.
3000 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3002 if (orig_teams
== -2)
3004 else if (orig_teams
== 0)
3006 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3008 new_teams
= item
->icvs
.nteams
;
3010 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3011 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3012 e.g. a THREAD_LIMIT clause. */
3013 if (orig_teams
> -2 && orig_threads
== 0)
3015 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3017 new_threads
= item
->icvs
.teams_thread_limit
;
3020 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3022 void **new_args
= args
;
3023 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
3025 size_t tms_len
= (orig_teams
== new_teams
3027 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
3029 size_t ths_len
= (orig_threads
== new_threads
3031 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
3033 /* One additional item after the last arg must be NULL. */
3034 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
3036 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
3039 void **tmp_new_args
= new_args
;
3040 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3041 too if they have not been changed and skipped otherwise. */
3044 intptr_t id
= (intptr_t) *tmpargs
;
3045 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
3046 && orig_teams
!= new_teams
)
3047 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
3048 && orig_threads
!= new_threads
))
3051 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3056 *tmp_new_args
++ = *tmpargs
++;
3057 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3058 *tmp_new_args
++ = *tmpargs
++;
3062 /* Add the new TEAMS arg to the new args list if it has been changed. */
3063 if (orig_teams
!= new_teams
)
3065 intptr_t new_val
= new_teams
;
3068 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3069 | GOMP_TARGET_ARG_NUM_TEAMS
;
3070 *tmp_new_args
++ = (void *) new_val
;
3074 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3075 | GOMP_TARGET_ARG_NUM_TEAMS
);
3076 *tmp_new_args
++ = (void *) new_val
;
3080 /* Add the new THREADS arg to the new args list if it has been changed. */
3081 if (orig_threads
!= new_threads
)
3083 intptr_t new_val
= new_threads
;
3086 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3087 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3088 *tmp_new_args
++ = (void *) new_val
;
3092 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3093 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3094 *tmp_new_args
++ = (void *) new_val
;
3098 *tmp_new_args
= NULL
;
3101 flags
= clear_unsupported_flags (devicep
, flags
);
3103 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3105 struct gomp_thread
*thr
= gomp_thread ();
3106 /* Create a team if we don't have any around, as nowait
3107 target tasks make sense to run asynchronously even when
3108 outside of any parallel. */
3109 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3111 struct gomp_team
*team
= gomp_new_team (1);
3112 struct gomp_task
*task
= thr
->task
;
3113 struct gomp_task
**implicit_task
= &task
;
3114 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3115 team
->prev_ts
= thr
->ts
;
3116 thr
->ts
.team
= team
;
3117 thr
->ts
.team_id
= 0;
3118 thr
->ts
.work_share
= &team
->work_shares
[0];
3119 thr
->ts
.last_work_share
= NULL
;
3120 #ifdef HAVE_SYNC_BUILTINS
3121 thr
->ts
.single_count
= 0;
3123 thr
->ts
.static_trip
= 0;
3124 thr
->task
= &team
->implicit_task
[0];
3125 gomp_init_task (thr
->task
, NULL
, icv
);
3126 while (*implicit_task
3127 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3128 implicit_task
= &(*implicit_task
)->parent
;
3131 thr
->task
= *implicit_task
;
3133 free (*implicit_task
);
3134 thr
->task
= &team
->implicit_task
[0];
3137 pthread_setspecific (gomp_thread_destructor
, thr
);
3138 if (implicit_task
!= &task
)
3140 *implicit_task
= thr
->task
;
3145 && !thr
->task
->final_task
)
3147 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3148 sizes
, kinds
, flags
, depend
, new_args
,
3149 GOMP_TARGET_TASK_BEFORE_MAP
);
3154 /* If there are depend clauses, but nowait is not present
3155 (or we are in a final task), block the parent task until the
3156 dependencies are resolved and then just continue with the rest
3157 of the function as if it is a merged task. */
3160 struct gomp_thread
*thr
= gomp_thread ();
3161 if (thr
->task
&& thr
->task
->depend_hash
)
3163 /* If we might need to wait, copy firstprivate now. */
3164 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3165 &tgt_align
, &tgt_size
);
3168 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3169 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3170 tgt_align
, tgt_size
);
3173 gomp_task_maybe_wait_for_dependencies (depend
);
3179 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3180 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3181 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3185 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3186 &tgt_align
, &tgt_size
);
3189 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3190 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3191 tgt_align
, tgt_size
);
3194 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3198 struct target_mem_desc
*tgt_vars
;
3199 htab_t refcount_set
= NULL
;
3201 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3205 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3206 &tgt_align
, &tgt_size
);
3209 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3210 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3211 tgt_align
, tgt_size
);
3218 refcount_set
= htab_create (mapnum
);
3219 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3220 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3222 devicep
->run_func (devicep
->target_id
, fn_addr
,
3223 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3227 htab_clear (refcount_set
);
3228 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3231 htab_free (refcount_set
);
3233 /* Copy back ICVs from device to host.
3234 HOST_PTR is expected to exist since it was added in
3235 gomp_load_image_to_device if not already available. */
3236 gomp_copy_back_icvs (devicep
, device
);
3241 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3242 keeping track of all variable handling - assuming that reverse offload occurs
3243 ony very rarely. Downside is that the reverse search is slow. */
3245 struct gomp_splay_tree_rev_lookup_data
{
3246 uintptr_t tgt_start
;
3252 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3254 struct gomp_splay_tree_rev_lookup_data
*data
;
3255 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3256 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3258 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3262 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3263 if (key
->tgt
->list
[j
].key
== key
)
3265 assert (j
< key
->tgt
->list_count
);
3266 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3268 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3269 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3277 static inline splay_tree_key
3278 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3281 struct gomp_splay_tree_rev_lookup_data data
;
3283 data
.tgt_start
= tgt_start
;
3284 data
.tgt_end
= tgt_end
;
3286 if (tgt_start
!= tgt_end
)
3288 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3293 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3294 if (data
.key
!= NULL
|| zero_len
)
3299 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3306 bool present
, aligned
;
3310 /* Search just mapped reverse-offload data; returns index if found,
3314 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3315 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3316 uint64_t tgt_start
, uint64_t tgt_end
)
3318 const bool short_mapkind
= true;
3319 const int typemask
= short_mapkind
? 0xff : 0x7;
3321 for (i
= 0; i
< n
; i
++)
3323 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3324 == GOMP_MAP_STRUCT
);
3327 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3330 if (i
+ sizes
[i
] < n
)
3331 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3333 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3335 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3336 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3345 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3346 unsigned short *kinds
, uint64_t *sizes
,
3347 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3351 if (tgt_start
!= tgt_end
)
3352 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3353 tgt_start
, tgt_end
);
3355 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3356 tgt_start
, tgt_end
);
3357 if (i
< n
|| zero_len
)
3362 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3363 tgt_start
, tgt_end
);
3366 /* Handle reverse offload. This is called by the device plugins for a
3367 reverse offload; it is not called if the outer target runs on the host.
3368 The mapping is simplified device-affecting constructs (except for target
3369 with device(ancestor:1)) must not be encountered; in particular not
3370 target (enter/exit) data. */
3373 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3374 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3375 struct goacc_asyncqueue
*aq
)
3377 /* Return early if there is no offload code. */
3378 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3380 /* Currently, this fails because of calculate_firstprivate_requirements
3381 below; it could be fixed but additional code needs to be updated to
3382 handle 32bit hosts - thus, it is not worthwhile. */
3383 if (sizeof (void *) != sizeof (uint64_t))
3384 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3386 struct cpy_data
*cdata
= NULL
;
3389 unsigned short *kinds
;
3390 const bool short_mapkind
= true;
3391 const int typemask
= short_mapkind
? 0xff : 0x7;
3392 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3394 reverse_splay_tree_key n
;
3395 struct reverse_splay_tree_key_s k
;
3398 gomp_mutex_lock (&devicep
->lock
);
3399 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3400 gomp_mutex_unlock (&devicep
->lock
);
3403 gomp_fatal ("Cannot find reverse-offload function");
3404 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3406 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3408 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3409 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3410 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3414 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3415 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3416 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3417 gomp_copy_dev2host (devicep
, aq
, devaddrs
,
3418 (const void *) (uintptr_t) devaddrs_ptr
,
3419 mapnum
* sizeof (uint64_t));
3420 gomp_copy_dev2host (devicep
, aq
, sizes
,
3421 (const void *) (uintptr_t) sizes_ptr
,
3422 mapnum
* sizeof (uint64_t));
3423 gomp_copy_dev2host (devicep
, aq
, kinds
,
3424 (const void *) (uintptr_t) kinds_ptr
,
3425 mapnum
* sizeof (unsigned short));
3426 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3427 exit (EXIT_FAILURE
);
3430 size_t tgt_align
= 0, tgt_size
= 0;
3432 /* If actually executed on 32bit systems, the casts lead to wrong code;
3433 but 32bit with offloading is not supported; see top of this function. */
3434 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3435 (void *) (uintptr_t) kinds
,
3436 &tgt_align
, &tgt_size
);
3440 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3441 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3443 tgt
+= tgt_align
- al
;
3445 for (uint64_t i
= 0; i
< mapnum
; i
++)
3446 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3447 && devaddrs
[i
] != 0)
3449 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3450 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3451 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3452 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3456 gomp_copy_dev2host (devicep
, aq
, tgt
+ tgt_size
,
3457 (void *) (uintptr_t) devaddrs
[i
],
3459 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3460 exit (EXIT_FAILURE
);
3462 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3463 tgt_size
= tgt_size
+ sizes
[i
];
3464 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3466 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3467 == GOMP_MAP_ATTACH
))
3469 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3470 = (uint64_t) devaddrs
[i
];
3476 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3478 size_t j
, struct_cpy
= 0;
3480 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3481 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3482 gomp_mutex_lock (&devicep
->lock
);
3483 for (uint64_t i
= 0; i
< mapnum
; i
++)
3485 if (devaddrs
[i
] == 0)
3488 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3491 case GOMP_MAP_FIRSTPRIVATE
:
3492 case GOMP_MAP_FIRSTPRIVATE_INT
:
3495 case GOMP_MAP_DELETE
:
3496 case GOMP_MAP_RELEASE
:
3497 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3498 /* Assume it is present; look it up - but ignore unless the
3499 present clause is there. */
3500 case GOMP_MAP_ALLOC
:
3502 case GOMP_MAP_FORCE_ALLOC
:
3503 case GOMP_MAP_FORCE_FROM
:
3504 case GOMP_MAP_ALWAYS_FROM
:
3506 case GOMP_MAP_TOFROM
:
3507 case GOMP_MAP_FORCE_TO
:
3508 case GOMP_MAP_FORCE_TOFROM
:
3509 case GOMP_MAP_ALWAYS_TO
:
3510 case GOMP_MAP_ALWAYS_TOFROM
:
3511 case GOMP_MAP_FORCE_PRESENT
:
3512 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3513 case GOMP_MAP_ALWAYS_PRESENT_TO
:
3514 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3515 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3516 cdata
[i
].devaddr
= devaddrs
[i
];
3517 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3518 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3519 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3521 devaddrs
[i
] + sizes
[i
], zero_len
);
3525 cdata
[i
].present
= true;
3526 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3530 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3532 devaddrs
[i
] + sizes
[i
], zero_len
);
3533 cdata
[i
].present
= n2
!= NULL
;
3535 if (!cdata
[i
].present
&& GOMP_MAP_PRESENT_P (kind
))
3537 gomp_mutex_unlock (&devicep
->lock
);
3538 #ifdef HAVE_INTTYPES_H
3539 gomp_fatal ("present clause: no corresponding data on "
3540 "parent device at %p with size %"PRIu64
,
3541 (void *) (uintptr_t) devaddrs
[i
],
3542 (uint64_t) sizes
[i
]);
3544 gomp_fatal ("present clause: no corresponding data on "
3545 "parent device at %p with size %lu",
3546 (void *) (uintptr_t) devaddrs
[i
],
3547 (unsigned long) sizes
[i
]);
3551 else if (!cdata
[i
].present
3552 && kind
!= GOMP_MAP_DELETE
3553 && kind
!= GOMP_MAP_RELEASE
3554 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3556 cdata
[i
].aligned
= true;
3557 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3559 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3562 else if (n2
!= NULL
)
3563 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3564 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3565 if (((!cdata
[i
].present
|| struct_cpy
)
3566 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3567 || kind
== GOMP_MAP_FORCE_TO
3568 || kind
== GOMP_MAP_FORCE_TOFROM
3569 || GOMP_MAP_ALWAYS_TO_P (kind
))
3571 gomp_copy_dev2host (devicep
, aq
,
3572 (void *) (uintptr_t) devaddrs
[i
],
3573 (void *) (uintptr_t) cdata
[i
].devaddr
,
3575 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3577 gomp_mutex_unlock (&devicep
->lock
);
3578 exit (EXIT_FAILURE
);
3584 case GOMP_MAP_ATTACH
:
3585 case GOMP_MAP_POINTER
:
3586 case GOMP_MAP_ALWAYS_POINTER
:
3587 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3588 devaddrs
[i
] + sizes
[i
],
3589 devaddrs
[i
] + sizes
[i
]
3590 + sizeof (void*), false);
3591 cdata
[i
].present
= n2
!= NULL
;
3592 cdata
[i
].devaddr
= devaddrs
[i
];
3594 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3595 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3598 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3599 devaddrs
[i
] + sizes
[i
],
3600 devaddrs
[i
] + sizes
[i
]
3601 + sizeof (void*), false);
3604 cdata
[i
].present
= true;
3605 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3606 - cdata
[j
].devaddr
);
3609 if (!cdata
[i
].present
)
3610 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3611 /* Assume that when present, the pointer is already correct. */
3613 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3616 case GOMP_MAP_TO_PSET
:
3617 /* Assume that when present, the pointers are fine and no 'to:'
3619 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3620 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3622 cdata
[i
].present
= n2
!= NULL
;
3623 cdata
[i
].devaddr
= devaddrs
[i
];
3625 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3626 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3629 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3631 devaddrs
[i
] + sizes
[i
], false);
3634 cdata
[i
].present
= true;
3635 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3636 - cdata
[j
].devaddr
);
3639 if (!cdata
[i
].present
)
3641 cdata
[i
].aligned
= true;
3642 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3644 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3646 gomp_copy_dev2host (devicep
, aq
,
3647 (void *) (uintptr_t) devaddrs
[i
],
3648 (void *) (uintptr_t) cdata
[i
].devaddr
,
3650 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3652 gomp_mutex_unlock (&devicep
->lock
);
3653 exit (EXIT_FAILURE
);
3656 for (j
= i
+ 1; j
< mapnum
; j
++)
3658 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3659 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3660 && !GOMP_MAP_POINTER_P (kind
))
3662 if (devaddrs
[j
] < devaddrs
[i
])
3664 if (cdata
[i
].present
)
3666 if (devaddrs
[j
] == 0)
3668 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3673 /* Dereference devaddrs[j] to get the device addr. */
3674 assert (devaddrs
[j
] - sizes
[j
] == cdata
[i
].devaddr
);
3675 devaddrs
[j
] = *(uint64_t *) (uintptr_t) (devaddrs
[i
]
3677 cdata
[j
].present
= true;
3678 cdata
[j
].devaddr
= devaddrs
[j
];
3679 if (devaddrs
[j
] == 0)
3681 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3683 devaddrs
[j
] + sizeof (void*),
3686 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3687 - cdata
[k
].devaddr
);
3690 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3692 devaddrs
[j
] + sizeof (void*),
3696 gomp_mutex_unlock (&devicep
->lock
);
3697 gomp_fatal ("Pointer target wasn't mapped");
3699 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3700 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3702 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3703 = (void *) (uintptr_t) devaddrs
[j
];
3707 case GOMP_MAP_STRUCT
:
3708 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3709 devaddrs
[i
+ sizes
[i
]]
3710 + sizes
[i
+ sizes
[i
]], false);
3711 cdata
[i
].present
= n2
!= NULL
;
3712 cdata
[i
].devaddr
= devaddrs
[i
];
3713 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3716 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3718 + sizes
[i
+ sizes
[i
]]);
3719 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3720 cdata
[i
].aligned
= true;
3721 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3722 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3725 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3726 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3729 gomp_mutex_unlock (&devicep
->lock
);
3730 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3733 gomp_mutex_unlock (&devicep
->lock
);
3738 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3740 uint64_t struct_cpy
= 0;
3741 bool clean_struct
= false;
3742 for (uint64_t i
= 0; i
< mapnum
; i
++)
3744 if (cdata
[i
].devaddr
== 0)
3746 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3747 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3750 case GOMP_MAP_FORCE_FROM
:
3751 case GOMP_MAP_FORCE_TOFROM
:
3752 case GOMP_MAP_ALWAYS_FROM
:
3753 case GOMP_MAP_ALWAYS_TOFROM
:
3754 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
3755 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
3759 case GOMP_MAP_TOFROM
:
3762 gomp_copy_host2dev (devicep
, aq
,
3763 (void *) (uintptr_t) cdata
[i
].devaddr
,
3764 (void *) (uintptr_t) devaddrs
[i
],
3765 sizes
[i
], false, NULL
);
3766 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3767 exit (EXIT_FAILURE
);
3777 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3779 clean_struct
= true;
3780 struct_cpy
= sizes
[i
];
3782 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
3783 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3784 else if (!cdata
[i
].present
)
3785 free ((void *) (uintptr_t) devaddrs
[i
]);
3788 for (uint64_t i
= 0; i
< mapnum
; i
++)
3789 if (!cdata
[i
].present
3790 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3791 == GOMP_MAP_STRUCT
))
3793 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3794 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3803 /* Host fallback for GOMP_target_data{,_ext} routines. */
3806 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3808 struct gomp_task_icv
*icv
= gomp_icv (false);
3810 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3812 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3813 "be used for offloading");
3815 if (icv
->target_data
)
3817 /* Even when doing a host fallback, if there are any active
3818 #pragma omp target data constructs, need to remember the
3819 new #pragma omp target data, otherwise GOMP_target_end_data
3820 would get out of sync. */
3821 struct target_mem_desc
*tgt
3822 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3823 NULL
, GOMP_MAP_VARS_DATA
);
3824 tgt
->prev
= icv
->target_data
;
3825 icv
->target_data
= tgt
;
3830 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3831 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3833 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3836 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3837 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3838 return gomp_target_data_fallback (devicep
);
3840 struct target_mem_desc
*tgt
3841 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3842 NULL
, GOMP_MAP_VARS_DATA
);
3843 struct gomp_task_icv
*icv
= gomp_icv (true);
3844 tgt
->prev
= icv
->target_data
;
3845 icv
->target_data
= tgt
;
3849 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3850 size_t *sizes
, unsigned short *kinds
)
3852 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3855 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3856 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3857 return gomp_target_data_fallback (devicep
);
3859 struct target_mem_desc
*tgt
3860 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3861 NULL
, GOMP_MAP_VARS_DATA
);
3862 struct gomp_task_icv
*icv
= gomp_icv (true);
3863 tgt
->prev
= icv
->target_data
;
3864 icv
->target_data
= tgt
;
3868 GOMP_target_end_data (void)
3870 struct gomp_task_icv
*icv
= gomp_icv (false);
3871 if (icv
->target_data
)
3873 struct target_mem_desc
*tgt
= icv
->target_data
;
3874 icv
->target_data
= tgt
->prev
;
3875 gomp_unmap_vars (tgt
, true, NULL
);
3880 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3881 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3883 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3886 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3887 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3890 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3894 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3895 size_t *sizes
, unsigned short *kinds
,
3896 unsigned int flags
, void **depend
)
3898 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3900 /* If there are depend clauses, but nowait is not present,
3901 block the parent task until the dependencies are resolved
3902 and then just continue with the rest of the function as if it
3903 is a merged task. Until we are able to schedule task during
3904 variable mapping or unmapping, ignore nowait if depend clauses
3908 struct gomp_thread
*thr
= gomp_thread ();
3909 if (thr
->task
&& thr
->task
->depend_hash
)
3911 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3913 && !thr
->task
->final_task
)
3915 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3916 mapnum
, hostaddrs
, sizes
, kinds
,
3917 flags
| GOMP_TARGET_FLAG_UPDATE
,
3918 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3923 struct gomp_team
*team
= thr
->ts
.team
;
3924 /* If parallel or taskgroup has been cancelled, don't start new
3926 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3928 if (gomp_team_barrier_cancelled (&team
->barrier
))
3930 if (thr
->task
->taskgroup
)
3932 if (thr
->task
->taskgroup
->cancelled
)
3934 if (thr
->task
->taskgroup
->workshare
3935 && thr
->task
->taskgroup
->prev
3936 && thr
->task
->taskgroup
->prev
->cancelled
)
3941 gomp_task_maybe_wait_for_dependencies (depend
);
3947 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3948 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3951 struct gomp_thread
*thr
= gomp_thread ();
3952 struct gomp_team
*team
= thr
->ts
.team
;
3953 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3954 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3956 if (gomp_team_barrier_cancelled (&team
->barrier
))
3958 if (thr
->task
->taskgroup
)
3960 if (thr
->task
->taskgroup
->cancelled
)
3962 if (thr
->task
->taskgroup
->workshare
3963 && thr
->task
->taskgroup
->prev
3964 && thr
->task
->taskgroup
->prev
->cancelled
)
3969 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
3973 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
3974 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3975 htab_t
*refcount_set
)
3977 const int typemask
= 0xff;
3979 gomp_mutex_lock (&devicep
->lock
);
3980 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3982 gomp_mutex_unlock (&devicep
->lock
);
3986 for (i
= 0; i
< mapnum
; i
++)
3987 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
3989 struct splay_tree_key_s cur_node
;
3990 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3991 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
3992 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3995 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
4000 splay_tree_key remove_vars
[mapnum
];
4002 for (i
= 0; i
< mapnum
; i
++)
4004 struct splay_tree_key_s cur_node
;
4005 unsigned char kind
= kinds
[i
] & typemask
;
4009 case GOMP_MAP_ALWAYS_FROM
:
4010 case GOMP_MAP_DELETE
:
4011 case GOMP_MAP_RELEASE
:
4012 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
4013 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
4014 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4015 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
4016 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4017 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
4018 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
4019 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4023 bool delete_p
= (kind
== GOMP_MAP_DELETE
4024 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
4025 bool do_copy
, do_remove
;
4026 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
4029 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
4030 || kind
== GOMP_MAP_ALWAYS_FROM
)
4032 if (k
->aux
&& k
->aux
->attach_count
)
4034 /* We have to be careful not to overwrite still attached
4035 pointers during the copyback to host. */
4036 uintptr_t addr
= k
->host_start
;
4037 while (addr
< k
->host_end
)
4039 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
4040 if (k
->aux
->attach_count
[i
] == 0)
4041 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
4042 (void *) (k
->tgt
->tgt_start
4044 + addr
- k
->host_start
),
4046 addr
+= sizeof (void *);
4050 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
4051 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
4052 + cur_node
.host_start
4054 cur_node
.host_end
- cur_node
.host_start
);
4057 /* Structure elements lists are removed altogether at once, which
4058 may cause immediate deallocation of the target_mem_desc, causing
4059 errors if we still have following element siblings to copy back.
4060 While we're at it, it also seems more disciplined to simply
4061 queue all removals together for processing below.
4063 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
4064 not have this problem, since they maintain an additional
4065 tgt->refcount = 1 reference to the target_mem_desc to start with.
4068 remove_vars
[nrmvars
++] = k
;
4071 case GOMP_MAP_DETACH
:
4074 gomp_mutex_unlock (&devicep
->lock
);
4075 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4080 for (int i
= 0; i
< nrmvars
; i
++)
4081 gomp_remove_var (devicep
, remove_vars
[i
]);
4083 gomp_mutex_unlock (&devicep
->lock
);
4087 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
4088 size_t *sizes
, unsigned short *kinds
,
4089 unsigned int flags
, void **depend
)
4091 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4093 /* If there are depend clauses, but nowait is not present,
4094 block the parent task until the dependencies are resolved
4095 and then just continue with the rest of the function as if it
4096 is a merged task. Until we are able to schedule task during
4097 variable mapping or unmapping, ignore nowait if depend clauses
4101 struct gomp_thread
*thr
= gomp_thread ();
4102 if (thr
->task
&& thr
->task
->depend_hash
)
4104 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4106 && !thr
->task
->final_task
)
4108 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4109 mapnum
, hostaddrs
, sizes
, kinds
,
4110 flags
, depend
, NULL
,
4111 GOMP_TARGET_TASK_DATA
))
4116 struct gomp_team
*team
= thr
->ts
.team
;
4117 /* If parallel or taskgroup has been cancelled, don't start new
4119 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4121 if (gomp_team_barrier_cancelled (&team
->barrier
))
4123 if (thr
->task
->taskgroup
)
4125 if (thr
->task
->taskgroup
->cancelled
)
4127 if (thr
->task
->taskgroup
->workshare
4128 && thr
->task
->taskgroup
->prev
4129 && thr
->task
->taskgroup
->prev
->cancelled
)
4134 gomp_task_maybe_wait_for_dependencies (depend
);
4140 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4141 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4144 struct gomp_thread
*thr
= gomp_thread ();
4145 struct gomp_team
*team
= thr
->ts
.team
;
4146 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4147 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4149 if (gomp_team_barrier_cancelled (&team
->barrier
))
4151 if (thr
->task
->taskgroup
)
4153 if (thr
->task
->taskgroup
->cancelled
)
4155 if (thr
->task
->taskgroup
->workshare
4156 && thr
->task
->taskgroup
->prev
4157 && thr
->task
->taskgroup
->prev
->cancelled
)
4162 htab_t refcount_set
= htab_create (mapnum
);
4164 /* The variables are mapped separately such that they can be released
4167 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4168 for (i
= 0; i
< mapnum
; i
++)
4169 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4171 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4172 &kinds
[i
], true, &refcount_set
,
4173 GOMP_MAP_VARS_ENTER_DATA
);
4176 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4178 for (j
= i
+ 1; j
< mapnum
; j
++)
4179 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4180 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4182 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4183 &kinds
[i
], true, &refcount_set
,
4184 GOMP_MAP_VARS_ENTER_DATA
);
4187 else if (i
+ 1 < mapnum
4188 && ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
4189 || ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4190 && (kinds
[i
] & 0xff) != GOMP_MAP_ALWAYS_POINTER
)))
4192 /* An attach operation must be processed together with the mapped
4193 base-pointer list item. */
4194 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4195 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4199 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4200 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4202 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4203 htab_free (refcount_set
);
4207 gomp_target_task_fn (void *data
)
4209 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4210 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4212 if (ttask
->fn
!= NULL
)
4216 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4217 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4218 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4220 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4221 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4226 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4229 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4233 void *actual_arguments
;
4234 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4237 actual_arguments
= ttask
->hostaddrs
;
4241 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4242 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4243 NULL
, GOMP_MAP_VARS_TARGET
);
4244 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4246 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4248 assert (devicep
->async_run_func
);
4249 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4250 ttask
->args
, (void *) ttask
);
4253 else if (devicep
== NULL
4254 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4255 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4259 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4260 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4261 ttask
->kinds
, true);
4264 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4265 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4266 for (i
= 0; i
< ttask
->mapnum
; i
++)
4267 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4269 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4270 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4271 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4272 i
+= ttask
->sizes
[i
];
4275 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4276 &ttask
->kinds
[i
], true, &refcount_set
,
4277 GOMP_MAP_VARS_ENTER_DATA
);
4279 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4280 ttask
->kinds
, &refcount_set
);
4281 htab_free (refcount_set
);
4287 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4291 struct gomp_task_icv
*icv
= gomp_icv (true);
4292 icv
->thread_limit_var
4293 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4299 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4300 unsigned int thread_limit
, bool first
)
4302 struct gomp_thread
*thr
= gomp_thread ();
4307 struct gomp_task_icv
*icv
= gomp_icv (true);
4308 icv
->thread_limit_var
4309 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4311 (void) num_teams_high
;
4312 if (num_teams_low
== 0)
4314 thr
->num_teams
= num_teams_low
- 1;
4317 else if (thr
->team_num
== thr
->num_teams
)
4325 omp_target_alloc (size_t size
, int device_num
)
4327 if (device_num
== omp_initial_device
4328 || device_num
== gomp_get_num_devices ())
4329 return malloc (size
);
4331 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4332 if (devicep
== NULL
)
4335 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4336 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4337 return malloc (size
);
4339 gomp_mutex_lock (&devicep
->lock
);
4340 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4341 gomp_mutex_unlock (&devicep
->lock
);
4346 omp_target_free (void *device_ptr
, int device_num
)
4348 if (device_num
== omp_initial_device
4349 || device_num
== gomp_get_num_devices ())
4355 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4356 if (devicep
== NULL
|| device_ptr
== NULL
)
4359 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4360 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4366 gomp_mutex_lock (&devicep
->lock
);
4367 gomp_free_device_memory (devicep
, device_ptr
);
4368 gomp_mutex_unlock (&devicep
->lock
);
4372 omp_target_is_present (const void *ptr
, int device_num
)
4374 if (device_num
== omp_initial_device
4375 || device_num
== gomp_get_num_devices ())
4378 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4379 if (devicep
== NULL
)
4385 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4386 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4389 gomp_mutex_lock (&devicep
->lock
);
4390 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4391 struct splay_tree_key_s cur_node
;
4393 cur_node
.host_start
= (uintptr_t) ptr
;
4394 cur_node
.host_end
= cur_node
.host_start
;
4395 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4396 int ret
= n
!= NULL
;
4397 gomp_mutex_unlock (&devicep
->lock
);
4402 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4403 struct gomp_device_descr
**dst_devicep
,
4404 struct gomp_device_descr
**src_devicep
)
4406 if (dst_device_num
!= gomp_get_num_devices ()
4407 /* Above gomp_get_num_devices has to be called unconditionally. */
4408 && dst_device_num
!= omp_initial_device
)
4410 *dst_devicep
= resolve_device (dst_device_num
, false);
4411 if (*dst_devicep
== NULL
)
4414 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4415 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4416 *dst_devicep
= NULL
;
4419 if (src_device_num
!= num_devices_openmp
4420 && src_device_num
!= omp_initial_device
)
4422 *src_devicep
= resolve_device (src_device_num
, false);
4423 if (*src_devicep
== NULL
)
4426 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4427 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4428 *src_devicep
= NULL
;
4435 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4436 size_t dst_offset
, size_t src_offset
,
4437 struct gomp_device_descr
*dst_devicep
,
4438 struct gomp_device_descr
*src_devicep
)
4441 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4443 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4446 if (src_devicep
== NULL
)
4448 gomp_mutex_lock (&dst_devicep
->lock
);
4449 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4450 (char *) dst
+ dst_offset
,
4451 (char *) src
+ src_offset
, length
);
4452 gomp_mutex_unlock (&dst_devicep
->lock
);
4453 return (ret
? 0 : EINVAL
);
4455 if (dst_devicep
== NULL
)
4457 gomp_mutex_lock (&src_devicep
->lock
);
4458 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4459 (char *) dst
+ dst_offset
,
4460 (char *) src
+ src_offset
, length
);
4461 gomp_mutex_unlock (&src_devicep
->lock
);
4462 return (ret
? 0 : EINVAL
);
4464 if (src_devicep
== dst_devicep
)
4466 gomp_mutex_lock (&src_devicep
->lock
);
4467 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4468 (char *) dst
+ dst_offset
,
4469 (char *) src
+ src_offset
, length
);
4470 gomp_mutex_unlock (&src_devicep
->lock
);
4471 return (ret
? 0 : EINVAL
);
4477 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4478 size_t src_offset
, int dst_device_num
, int src_device_num
)
4480 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4481 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4482 &dst_devicep
, &src_devicep
);
4487 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4488 dst_devicep
, src_devicep
);
4500 struct gomp_device_descr
*dst_devicep
;
4501 struct gomp_device_descr
*src_devicep
;
4502 } omp_target_memcpy_data
;
4505 omp_target_memcpy_async_helper (void *args
)
4507 omp_target_memcpy_data
*a
= args
;
4508 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4509 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4510 gomp_fatal ("omp_target_memcpy failed");
4514 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4515 size_t dst_offset
, size_t src_offset
,
4516 int dst_device_num
, int src_device_num
,
4517 int depobj_count
, omp_depend_t
*depobj_list
)
4519 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4520 unsigned int flags
= 0;
4521 void *depend
[depobj_count
+ 5];
4523 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4524 &dst_devicep
, &src_devicep
);
4526 omp_target_memcpy_data s
= {
4530 .dst_offset
= dst_offset
,
4531 .src_offset
= src_offset
,
4532 .dst_devicep
= dst_devicep
,
4533 .src_devicep
= src_devicep
4539 if (depobj_count
> 0 && depobj_list
!= NULL
)
4541 flags
|= GOMP_TASK_FLAG_DEPEND
;
4543 depend
[1] = (void *) (uintptr_t) depobj_count
;
4544 depend
[2] = depend
[3] = depend
[4] = 0;
4545 for (i
= 0; i
< depobj_count
; ++i
)
4546 depend
[i
+ 5] = &depobj_list
[i
];
4549 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4550 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4556 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4557 int num_dims
, const size_t *volume
,
4558 const size_t *dst_offsets
,
4559 const size_t *src_offsets
,
4560 const size_t *dst_dimensions
,
4561 const size_t *src_dimensions
,
4562 struct gomp_device_descr
*dst_devicep
,
4563 struct gomp_device_descr
*src_devicep
,
4564 size_t *tmp_size
, void **tmp
)
4566 size_t dst_slice
= element_size
;
4567 size_t src_slice
= element_size
;
4568 size_t j
, dst_off
, src_off
, length
;
4573 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4574 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4575 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4577 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4579 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4583 else if (src_devicep
== NULL
)
4584 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4585 (char *) dst
+ dst_off
,
4586 (const char *) src
+ src_off
,
4588 else if (dst_devicep
== NULL
)
4589 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4590 (char *) dst
+ dst_off
,
4591 (const char *) src
+ src_off
,
4593 else if (src_devicep
== dst_devicep
)
4594 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4595 (char *) dst
+ dst_off
,
4596 (const char *) src
+ src_off
,
4603 *tmp
= malloc (length
);
4607 else if (*tmp_size
< length
)
4611 *tmp
= malloc (length
);
4615 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
, *tmp
,
4616 (const char *) src
+ src_off
,
4619 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4620 (char *) dst
+ dst_off
, *tmp
,
4623 return ret
? 0 : EINVAL
;
4626 /* host->device, device->host and intra device. */
4629 && src_devicep
== dst_devicep
4630 && src_devicep
->memcpy2d_func
)
4631 || (!src_devicep
!= !dst_devicep
4632 && ((src_devicep
&& src_devicep
->memcpy2d_func
)
4633 || (dst_devicep
&& dst_devicep
->memcpy2d_func
)))))
4635 size_t vol_sz1
, dst_sz1
, src_sz1
, dst_off_sz1
, src_off_sz1
;
4636 int dst_id
= dst_devicep
? dst_devicep
->target_id
: -1;
4637 int src_id
= src_devicep
? src_devicep
->target_id
: -1;
4638 struct gomp_device_descr
*devp
= dst_devicep
? dst_devicep
: src_devicep
;
4640 if (__builtin_mul_overflow (volume
[1], element_size
, &vol_sz1
)
4641 || __builtin_mul_overflow (dst_dimensions
[1], element_size
, &dst_sz1
)
4642 || __builtin_mul_overflow (src_dimensions
[1], element_size
, &src_sz1
)
4643 || __builtin_mul_overflow (dst_offsets
[1], element_size
, &dst_off_sz1
)
4644 || __builtin_mul_overflow (src_offsets
[1], element_size
,
4647 ret
= devp
->memcpy2d_func (dst_id
, src_id
, vol_sz1
, volume
[0],
4648 dst
, dst_off_sz1
, dst_offsets
[0], dst_sz1
,
4649 src
, src_off_sz1
, src_offsets
[0], src_sz1
);
4651 return ret
? 0 : EINVAL
;
4653 else if (num_dims
== 3
4655 && src_devicep
== dst_devicep
4656 && src_devicep
->memcpy3d_func
)
4657 || (!src_devicep
!= !dst_devicep
4658 && ((src_devicep
&& src_devicep
->memcpy3d_func
)
4659 || (dst_devicep
&& dst_devicep
->memcpy3d_func
)))))
4661 size_t vol_sz2
, dst_sz2
, src_sz2
, dst_off_sz2
, src_off_sz2
;
4662 int dst_id
= dst_devicep
? dst_devicep
->target_id
: -1;
4663 int src_id
= src_devicep
? src_devicep
->target_id
: -1;
4664 struct gomp_device_descr
*devp
= dst_devicep
? dst_devicep
: src_devicep
;
4666 if (__builtin_mul_overflow (volume
[2], element_size
, &vol_sz2
)
4667 || __builtin_mul_overflow (dst_dimensions
[2], element_size
, &dst_sz2
)
4668 || __builtin_mul_overflow (src_dimensions
[2], element_size
, &src_sz2
)
4669 || __builtin_mul_overflow (dst_offsets
[2], element_size
, &dst_off_sz2
)
4670 || __builtin_mul_overflow (src_offsets
[2], element_size
,
4673 ret
= devp
->memcpy3d_func (dst_id
, src_id
, vol_sz2
, volume
[1], volume
[0],
4674 dst
, dst_off_sz2
, dst_offsets
[1],
4675 dst_offsets
[0], dst_sz2
, dst_dimensions
[1],
4676 src
, src_off_sz2
, src_offsets
[1],
4677 src_offsets
[0], src_sz2
, src_dimensions
[1]);
4679 return ret
? 0 : EINVAL
;
4682 for (i
= 1; i
< num_dims
; i
++)
4683 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4684 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4686 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4687 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4689 for (j
= 0; j
< volume
[0]; j
++)
4691 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4692 (const char *) src
+ src_off
,
4693 element_size
, num_dims
- 1,
4694 volume
+ 1, dst_offsets
+ 1,
4695 src_offsets
+ 1, dst_dimensions
+ 1,
4696 src_dimensions
+ 1, dst_devicep
,
4697 src_devicep
, tmp_size
, tmp
);
4700 dst_off
+= dst_slice
;
4701 src_off
+= src_slice
;
4707 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4709 struct gomp_device_descr
**dst_devicep
,
4710 struct gomp_device_descr
**src_devicep
)
4715 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4716 dst_devicep
, src_devicep
);
4724 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4725 size_t element_size
, int num_dims
,
4726 const size_t *volume
, const size_t *dst_offsets
,
4727 const size_t *src_offsets
,
4728 const size_t *dst_dimensions
,
4729 const size_t *src_dimensions
,
4730 struct gomp_device_descr
*dst_devicep
,
4731 struct gomp_device_descr
*src_devicep
)
4733 size_t tmp_size
= 0;
4738 lock_src
= src_devicep
!= NULL
;
4739 lock_dst
= dst_devicep
!= NULL
&& src_devicep
!= dst_devicep
;
4741 gomp_mutex_lock (&src_devicep
->lock
);
4743 gomp_mutex_lock (&dst_devicep
->lock
);
4744 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4745 volume
, dst_offsets
, src_offsets
,
4746 dst_dimensions
, src_dimensions
,
4747 dst_devicep
, src_devicep
,
4750 gomp_mutex_unlock (&src_devicep
->lock
);
4752 gomp_mutex_unlock (&dst_devicep
->lock
);
4760 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4761 int num_dims
, const size_t *volume
,
4762 const size_t *dst_offsets
,
4763 const size_t *src_offsets
,
4764 const size_t *dst_dimensions
,
4765 const size_t *src_dimensions
,
4766 int dst_device_num
, int src_device_num
)
4768 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4770 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4771 src_device_num
, &dst_devicep
,
4777 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4778 volume
, dst_offsets
, src_offsets
,
4779 dst_dimensions
, src_dimensions
,
4780 dst_devicep
, src_devicep
);
4789 size_t element_size
;
4790 const size_t *volume
;
4791 const size_t *dst_offsets
;
4792 const size_t *src_offsets
;
4793 const size_t *dst_dimensions
;
4794 const size_t *src_dimensions
;
4795 struct gomp_device_descr
*dst_devicep
;
4796 struct gomp_device_descr
*src_devicep
;
4798 } omp_target_memcpy_rect_data
;
4801 omp_target_memcpy_rect_async_helper (void *args
)
4803 omp_target_memcpy_rect_data
*a
= args
;
4804 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4805 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4806 a
->src_offsets
, a
->dst_dimensions
,
4807 a
->src_dimensions
, a
->dst_devicep
,
4810 gomp_fatal ("omp_target_memcpy_rect failed");
4814 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4815 int num_dims
, const size_t *volume
,
4816 const size_t *dst_offsets
,
4817 const size_t *src_offsets
,
4818 const size_t *dst_dimensions
,
4819 const size_t *src_dimensions
,
4820 int dst_device_num
, int src_device_num
,
4821 int depobj_count
, omp_depend_t
*depobj_list
)
4823 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4825 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4826 src_device_num
, &dst_devicep
,
4828 void *depend
[depobj_count
+ 5];
4831 omp_target_memcpy_rect_data s
= {
4834 .element_size
= element_size
,
4835 .num_dims
= num_dims
,
4837 .dst_offsets
= dst_offsets
,
4838 .src_offsets
= src_offsets
,
4839 .dst_dimensions
= dst_dimensions
,
4840 .src_dimensions
= src_dimensions
,
4841 .dst_devicep
= dst_devicep
,
4842 .src_devicep
= src_devicep
4848 if (depobj_count
> 0 && depobj_list
!= NULL
)
4850 flags
|= GOMP_TASK_FLAG_DEPEND
;
4852 depend
[1] = (void *) (uintptr_t) depobj_count
;
4853 depend
[2] = depend
[3] = depend
[4] = 0;
4854 for (i
= 0; i
< depobj_count
; ++i
)
4855 depend
[i
+ 5] = &depobj_list
[i
];
4858 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4859 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4865 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4866 size_t size
, size_t device_offset
, int device_num
)
4868 if (device_num
== omp_initial_device
4869 || device_num
== gomp_get_num_devices ())
4872 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4873 if (devicep
== NULL
)
4876 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4877 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4880 gomp_mutex_lock (&devicep
->lock
);
4882 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4883 struct splay_tree_key_s cur_node
;
4886 cur_node
.host_start
= (uintptr_t) host_ptr
;
4887 cur_node
.host_end
= cur_node
.host_start
+ size
;
4888 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4891 if (n
->tgt
->tgt_start
+ n
->tgt_offset
4892 == (uintptr_t) device_ptr
+ device_offset
4893 && n
->host_start
<= cur_node
.host_start
4894 && n
->host_end
>= cur_node
.host_end
)
4899 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
4900 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
4904 tgt
->to_free
= NULL
;
4906 tgt
->list_count
= 0;
4907 tgt
->device_descr
= devicep
;
4908 splay_tree_node array
= tgt
->array
;
4909 splay_tree_key k
= &array
->key
;
4910 k
->host_start
= cur_node
.host_start
;
4911 k
->host_end
= cur_node
.host_end
;
4913 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
4914 k
->refcount
= REFCOUNT_INFINITY
;
4915 k
->dynamic_refcount
= 0;
4918 array
->right
= NULL
;
4919 splay_tree_insert (&devicep
->mem_map
, array
);
4922 gomp_mutex_unlock (&devicep
->lock
);
4927 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
4929 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4930 if (devicep
== NULL
)
4933 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4936 gomp_mutex_lock (&devicep
->lock
);
4938 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4939 struct splay_tree_key_s cur_node
;
4942 cur_node
.host_start
= (uintptr_t) ptr
;
4943 cur_node
.host_end
= cur_node
.host_start
;
4944 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4946 && n
->host_start
== cur_node
.host_start
4947 && n
->refcount
== REFCOUNT_INFINITY
4948 && n
->tgt
->tgt_start
== 0
4949 && n
->tgt
->to_free
== NULL
4950 && n
->tgt
->refcount
== 1
4951 && n
->tgt
->list_count
== 0)
4953 splay_tree_remove (&devicep
->mem_map
, n
);
4954 gomp_unmap_tgt (n
->tgt
);
4958 gomp_mutex_unlock (&devicep
->lock
);
4963 omp_get_mapped_ptr (const void *ptr
, int device_num
)
4965 if (device_num
== omp_initial_device
4966 || device_num
== omp_get_initial_device ())
4967 return (void *) ptr
;
4969 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4970 if (devicep
== NULL
)
4973 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4974 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4975 return (void *) ptr
;
4977 gomp_mutex_lock (&devicep
->lock
);
4979 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4980 struct splay_tree_key_s cur_node
;
4983 cur_node
.host_start
= (uintptr_t) ptr
;
4984 cur_node
.host_end
= cur_node
.host_start
;
4985 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4989 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
4990 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
4993 gomp_mutex_unlock (&devicep
->lock
);
4999 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
5001 if (device_num
== omp_initial_device
5002 || device_num
== gomp_get_num_devices ())
5005 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5006 if (devicep
== NULL
)
5009 /* TODO: Unified shared memory must be handled when available. */
5011 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
5015 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
5018 if (device_num
== omp_initial_device
5019 || device_num
== gomp_get_num_devices ())
5020 return gomp_pause_host ();
5022 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5023 if (devicep
== NULL
)
5026 /* Do nothing for target devices for now. */
5031 omp_pause_resource_all (omp_pause_resource_t kind
)
5034 if (gomp_pause_host ())
5036 /* Do nothing for target devices for now. */
5040 ialias (omp_pause_resource
)
5041 ialias (omp_pause_resource_all
)
5043 #ifdef PLUGIN_SUPPORT
5045 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
5047 The handles of the found functions are stored in the corresponding fields
5048 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
5051 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
5052 const char *plugin_name
)
5054 const char *err
= NULL
, *last_missing
= NULL
;
5056 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
5058 #if OFFLOAD_DEFAULTED
5064 /* Check if all required functions are available in the plugin and store
5065 their handlers. None of the symbols can legitimately be NULL,
5066 so we don't need to check dlerror all the time. */
5068 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
5070 /* Similar, but missing functions are not an error. Return false if
5071 failed, true otherwise. */
5072 #define DLSYM_OPT(f, n) \
5073 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
5074 || (last_missing = #n, 0))
5077 if (device
->version_func () != GOMP_VERSION
)
5079 err
= "plugin version mismatch";
5086 DLSYM (get_num_devices
);
5087 DLSYM (init_device
);
5088 DLSYM (fini_device
);
5090 DLSYM (unload_image
);
5095 DLSYM_OPT (memcpy2d
, memcpy2d
);
5096 DLSYM_OPT (memcpy3d
, memcpy3d
);
5097 device
->capabilities
= device
->get_caps_func ();
5098 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5101 DLSYM_OPT (async_run
, async_run
);
5102 DLSYM_OPT (can_run
, can_run
);
5105 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5107 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
5108 || !DLSYM_OPT (openacc
.create_thread_data
,
5109 openacc_create_thread_data
)
5110 || !DLSYM_OPT (openacc
.destroy_thread_data
,
5111 openacc_destroy_thread_data
)
5112 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
5113 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
5114 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
5115 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
5116 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
5117 || !DLSYM_OPT (openacc
.async
.queue_callback
,
5118 openacc_async_queue_callback
)
5119 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
5120 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
5121 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
5122 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
5124 /* Require all the OpenACC handlers if we have
5125 GOMP_OFFLOAD_CAP_OPENACC_200. */
5126 err
= "plugin missing OpenACC handler function";
5131 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
5132 openacc_cuda_get_current_device
);
5133 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
5134 openacc_cuda_get_current_context
);
5135 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
5136 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
5137 if (cuda
&& cuda
!= 4)
5139 /* Make sure all the CUDA functions are there if any of them are. */
5140 err
= "plugin missing OpenACC CUDA handler function";
5152 gomp_error ("while loading %s: %s", plugin_name
, err
);
5154 gomp_error ("missing function was %s", last_missing
);
5156 dlclose (plugin_handle
);
5161 /* This function finalizes all initialized devices. */
5164 gomp_target_fini (void)
5167 for (i
= 0; i
< num_devices
; i
++)
5170 struct gomp_device_descr
*devicep
= &devices
[i
];
5171 gomp_mutex_lock (&devicep
->lock
);
5172 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
5173 ret
= gomp_fini_device (devicep
);
5174 gomp_mutex_unlock (&devicep
->lock
);
5176 gomp_fatal ("device finalization failed");
5180 /* This function initializes the runtime for offloading.
5181 It parses the list of offload plugins, and tries to load these.
5182 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
5183 will be set, and the array DEVICES initialized, containing descriptors for
5184 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5188 gomp_target_init (void)
5190 const char *prefix
="libgomp-plugin-";
5191 const char *suffix
= SONAME_SUFFIX (1);
5192 const char *cur
, *next
;
5194 int i
, new_num_devs
;
5195 int num_devs
= 0, num_devs_openmp
;
5196 struct gomp_device_descr
*devs
= NULL
;
5198 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5201 cur
= OFFLOAD_PLUGINS
;
5205 struct gomp_device_descr current_device
;
5206 size_t prefix_len
, suffix_len
, cur_len
;
5208 next
= strchr (cur
, ',');
5210 prefix_len
= strlen (prefix
);
5211 cur_len
= next
? next
- cur
: strlen (cur
);
5212 suffix_len
= strlen (suffix
);
5214 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5221 memcpy (plugin_name
, prefix
, prefix_len
);
5222 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5223 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5225 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5227 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5228 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5229 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5232 int type
= current_device
.get_type_func ();
5233 for (int img
= 0; img
< num_offload_images
; img
++)
5234 if (type
== offload_images
[img
].type
)
5238 char buf
[sizeof ("unified_address, unified_shared_memory, "
5239 "reverse_offload")];
5240 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5241 char *name
= (char *) malloc (cur_len
+ 1);
5242 memcpy (name
, cur
, cur_len
);
5243 name
[cur_len
] = '\0';
5245 "%s devices present but 'omp requires %s' "
5246 "cannot be fulfilled\n", name
, buf
);
5250 else if (new_num_devs
>= 1)
5252 /* Augment DEVICES and NUM_DEVICES. */
5254 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5255 * sizeof (struct gomp_device_descr
));
5263 current_device
.name
= current_device
.get_name_func ();
5264 /* current_device.capabilities has already been set. */
5265 current_device
.type
= current_device
.get_type_func ();
5266 current_device
.mem_map
.root
= NULL
;
5267 current_device
.mem_map_rev
.root
= NULL
;
5268 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5269 for (i
= 0; i
< new_num_devs
; i
++)
5271 current_device
.target_id
= i
;
5272 devs
[num_devs
] = current_device
;
5273 gomp_mutex_init (&devs
[num_devs
].lock
);
5284 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5285 NUM_DEVICES_OPENMP. */
5286 struct gomp_device_descr
*devs_s
5287 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5294 num_devs_openmp
= 0;
5295 for (i
= 0; i
< num_devs
; i
++)
5296 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5297 devs_s
[num_devs_openmp
++] = devs
[i
];
5298 int num_devs_after_openmp
= num_devs_openmp
;
5299 for (i
= 0; i
< num_devs
; i
++)
5300 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5301 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5305 for (i
= 0; i
< num_devs
; i
++)
5307 /* The 'devices' array can be moved (by the realloc call) until we have
5308 found all the plugins, so registering with the OpenACC runtime (which
5309 takes a copy of the pointer argument) must be delayed until now. */
5310 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5311 goacc_register (&devs
[i
]);
5313 if (gomp_global_icv
.default_device_var
== INT_MIN
)
5315 /* This implies OMP_TARGET_OFFLOAD=mandatory. */
5316 struct gomp_icv_list
*none
;
5317 none
= gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_NO_SUFFIX
);
5318 gomp_global_icv
.default_device_var
= (num_devs_openmp
5319 ? 0 : omp_invalid_device
);
5320 none
->icvs
.default_device_var
= gomp_global_icv
.default_device_var
;
5323 num_devices
= num_devs
;
5324 num_devices_openmp
= num_devs_openmp
;
5326 if (atexit (gomp_target_fini
) != 0)
5327 gomp_fatal ("atexit failed");
5330 #else /* PLUGIN_SUPPORT */
5331 /* If dlfcn.h is unavailable we always fallback to host execution.
5332 GOMP_target* routines are just stubs for this case. */
5334 gomp_target_init (void)
5337 #endif /* PLUGIN_SUPPORT */