1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type
;
55 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
56 static inline void htab_free (void *ptr
) { free (ptr
); }
59 ialias_redirect (GOMP_task
)
61 static inline hashval_t
62 htab_hash (hash_entry_type element
)
64 return hash_pointer ((void *) element
);
68 htab_eq (hash_entry_type x
, hash_entry_type y
)
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock
;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr
{
88 enum offload_target_type type
;
89 const void *host_table
;
90 const void *target_data
;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr
*offload_images
;
96 /* Total number of offload images. */
97 static int num_offload_images
;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr
*devices
;
102 /* Total number of available devices. */
103 static int num_devices
;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp
;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask
;
112 static void *gomp_page_locked_host_alloc_dev (struct gomp_device_descr
*,
114 static bool gomp_page_locked_host_free_dev (struct gomp_device_descr
*,
116 struct goacc_asyncqueue
*);
117 static void *gomp_page_locked_host_aligned_alloc_dev (struct gomp_device_descr
*,
119 static bool gomp_page_locked_host_aligned_free_dev (struct gomp_device_descr
*,
121 struct goacc_asyncqueue
*);
123 /* Use (that is, allocate or register) page-locked host memory for memory
124 objects participating in host <-> device memory transfers.
126 When this is enabled, there is no fallback to non-page-locked host
130 bool always_pinned_mode
= false;
132 /* This function is called by the compiler when -foffload-memory=pinned
136 GOMP_enable_pinned_mode ()
138 always_pinned_mode
= true;
141 /* Verify that page-locked host memory is used for memory objects participating
142 in host <-> device memory transfers. */
144 static const bool verify_always_pinned_mode
= false;
147 gomp_verify_always_pinned_mode (struct gomp_device_descr
*device
,
148 const void *ptr
, size_t size
)
150 gomp_debug (0, "%s: device=%p (%s), ptr=%p, size=%llu\n",
152 device
, device
->name
, ptr
, (unsigned long long) size
);
155 /* Skip zero-size requests; for those we've got no actual region of
156 page-locked host memory. */
158 else if (device
->page_locked_host_register_func
)
160 int page_locked_host_p
161 = device
->page_locked_host_p_func (device
->target_id
, ptr
, size
);
162 if (page_locked_host_p
< 0)
164 gomp_error ("Failed to test page-locked host memory"
165 " via %s libgomp plugin",
169 if (!page_locked_host_p
)
171 gomp_error ("Failed page-locked host memory test");
179 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
182 gomp_realloc_unlock (void *old
, size_t size
)
184 void *ret
= realloc (old
, size
);
187 gomp_mutex_unlock (®ister_lock
);
188 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
193 attribute_hidden
void
194 gomp_init_targets_once (void)
196 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
200 gomp_get_num_devices (void)
202 gomp_init_targets_once ();
203 return num_devices_openmp
;
206 static struct gomp_device_descr
*
207 resolve_device (int device_id
, bool remapped
)
209 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
211 struct gomp_task_icv
*icv
= gomp_icv (false);
212 device_id
= icv
->default_device_var
;
218 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
219 : omp_initial_device
))
221 if (device_id
== omp_invalid_device
)
222 gomp_fatal ("omp_invalid_device encountered");
223 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
224 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
225 "but device not found");
229 else if (device_id
>= gomp_get_num_devices ())
231 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
232 && device_id
!= num_devices_openmp
)
233 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
234 "but device not found");
239 gomp_mutex_lock (&devices
[device_id
].lock
);
240 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
241 gomp_init_device (&devices
[device_id
]);
242 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
244 gomp_mutex_unlock (&devices
[device_id
].lock
);
246 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
247 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
248 "but device is finalized");
252 gomp_mutex_unlock (&devices
[device_id
].lock
);
254 return &devices
[device_id
];
258 static inline splay_tree_key
259 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
261 if (key
->host_start
!= key
->host_end
)
262 return splay_tree_lookup (mem_map
, key
);
265 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
270 n
= splay_tree_lookup (mem_map
, key
);
274 return splay_tree_lookup (mem_map
, key
);
277 static inline reverse_splay_tree_key
278 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
280 return reverse_splay_tree_lookup (mem_map_rev
, key
);
283 static inline splay_tree_key
284 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
286 if (key
->host_start
!= key
->host_end
)
287 return splay_tree_lookup (mem_map
, key
);
290 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
296 gomp_device_copy (struct gomp_device_descr
*devicep
,
297 bool (*copy_func
) (int, void *, const void *, size_t),
298 const char *dst
, void *dstaddr
,
299 const char *src
, const void *srcaddr
,
302 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
304 gomp_mutex_unlock (&devicep
->lock
);
305 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
306 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
311 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
312 bool (*copy_func
) (int, void *, const void *, size_t,
313 struct goacc_asyncqueue
*),
314 const char *dst
, void *dstaddr
,
315 const char *src
, const void *srcaddr
,
316 const void *srcaddr_orig
,
317 size_t size
, struct goacc_asyncqueue
*aq
)
319 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
321 gomp_mutex_unlock (&devicep
->lock
);
322 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
323 gomp_fatal ("Copying of %s object [%p..%p)"
324 " via buffer %s object [%p..%p)"
325 " to %s object [%p..%p) failed",
326 src
, srcaddr_orig
, srcaddr_orig
+ size
,
327 src
, srcaddr
, srcaddr
+ size
,
328 dst
, dstaddr
, dstaddr
+ size
);
330 gomp_fatal ("Copying of %s object [%p..%p)"
331 " to %s object [%p..%p) failed",
332 src
, srcaddr
, srcaddr
+ size
,
333 dst
, dstaddr
, dstaddr
+ size
);
337 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
338 host to device memory transfers. */
340 struct gomp_coalesce_chunk
342 /* The starting and ending point of a coalesced chunk of memory. */
346 struct gomp_coalesce_buf
348 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
349 it will be copied to the device. */
351 struct target_mem_desc
*tgt
;
352 /* Array with offsets, chunks[i].start is the starting offset and
353 chunks[i].end ending offset relative to tgt->tgt_start device address
354 of chunks which are to be copied to buf and later copied to device. */
355 struct gomp_coalesce_chunk
*chunks
;
356 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
359 /* During construction of chunks array, how many memory regions are within
360 the last chunk. If there is just one memory region for a chunk, we copy
361 it directly to device rather than going through buf. */
365 /* Maximum size of memory region considered for coalescing. Larger copies
366 are performed directly. */
367 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
369 /* Maximum size of a gap in between regions to consider them being copied
370 within the same chunk. All the device offsets considered are within
371 newly allocated device memory, so it isn't fatal if we copy some padding
372 in between from host to device. The gaps come either from alignment
373 padding or from memory regions which are not supposed to be copied from
374 host to device (e.g. map(alloc:), map(from:) etc.). */
375 #define MAX_COALESCE_BUF_GAP (4 * 1024)
377 /* Add region with device tgt_start relative offset and length to CBUF.
379 This must not be used for asynchronous copies, because the host data might
380 not be computed yet (by an earlier asynchronous compute region, for
381 example). The exception is for EPHEMERAL data, that we know is available
382 already "by construction". */
385 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
387 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
391 if (cbuf
->chunk_cnt
< 0)
393 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
395 cbuf
->chunk_cnt
= -1;
398 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
400 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
404 /* If the last chunk is only used by one mapping, discard it,
405 as it will be one host to device copy anyway and
406 memcpying it around will only waste cycles. */
407 if (cbuf
->use_cnt
== 1)
410 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
411 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
416 /* Return true for mapping kinds which need to copy data from the
417 host to device for regions that weren't previously mapped. */
420 gomp_to_device_kind_p (int kind
)
426 case GOMP_MAP_FORCE_ALLOC
:
427 case GOMP_MAP_FORCE_FROM
:
428 case GOMP_MAP_ALWAYS_FROM
:
429 case GOMP_MAP_PRESENT_FROM
:
430 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
437 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
438 non-NULL), when the source data is stack or may otherwise be deallocated
439 before the asynchronous copy takes place, EPHEMERAL must be passed as
442 attribute_hidden
void
443 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
444 struct goacc_asyncqueue
*aq
,
445 void *d
, const void *h
, size_t sz
,
446 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
450 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
451 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
454 long last
= cbuf
->chunk_cnt
- 1;
455 while (first
<= last
)
457 long middle
= (first
+ last
) >> 1;
458 if (cbuf
->chunks
[middle
].end
<= doff
)
460 else if (cbuf
->chunks
[middle
].start
<= doff
)
462 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
464 gomp_mutex_unlock (&devicep
->lock
);
465 gomp_fatal ("internal libgomp cbuf error");
468 /* In an asynchronous context, verify that CBUF isn't used
469 with non-EPHEMERAL data; see 'gomp_coalesce_buf_add'. */
470 if (__builtin_expect (aq
!= NULL
, 0))
473 /* We're just filling the CBUF; 'always_pinned_mode' isn't
476 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
486 if (__builtin_expect (aq
!= NULL
, 0))
488 void *h_buf
= (void *) h
;
491 /* We're queueing up an asynchronous copy from data that may
492 disappear before the transfer takes place (i.e. because it is a
493 stack local in a function that is no longer executing). As we've
494 not been able to use CBUF, make a copy of the data into a
496 if (always_pinned_mode
)
498 h_buf
= gomp_page_locked_host_alloc_dev (devicep
, sz
, false);
501 gomp_mutex_unlock (&devicep
->lock
);
506 h_buf
= gomp_malloc (sz
);
507 memcpy (h_buf
, h
, sz
);
510 /* No 'gomp_verify_always_pinned_mode' for 'ephemeral'; have just
513 && verify_always_pinned_mode
514 && always_pinned_mode
)
515 if (!gomp_verify_always_pinned_mode (devicep
, h_buf
, sz
))
517 gomp_mutex_unlock (&devicep
->lock
);
521 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
522 "dev", d
, "host", h_buf
, h
, sz
, aq
);
526 if (always_pinned_mode
)
528 if (!gomp_page_locked_host_free_dev (devicep
, h_buf
, aq
))
530 gomp_mutex_unlock (&devicep
->lock
);
535 /* Free once the transfer has completed. */
536 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
542 && always_pinned_mode
)
544 /* TODO: Page-locking on the spot probably doesn't make a lot of
545 sense (performance-wise). Should we instead use a "page-locked
546 host memory bounce buffer" (per host thread, or per device,
548 void *ptr
= (void *) h
;
549 int page_locked_host_p
550 = gomp_page_locked_host_register_dev (devicep
,
551 ptr
, sz
, GOMP_MAP_TO
);
552 if (page_locked_host_p
< 0)
554 gomp_mutex_unlock (&devicep
->lock
);
557 /* Ephemeral data isn't already page-locked host memory. */
558 assert (page_locked_host_p
);
560 else if (verify_always_pinned_mode
561 && always_pinned_mode
)
562 if (!gomp_verify_always_pinned_mode (devicep
, h
, sz
))
564 gomp_mutex_unlock (&devicep
->lock
);
568 gomp_device_copy (devicep
, devicep
->host2dev_func
,
569 "dev", d
, "host", h
, sz
);
572 && always_pinned_mode
)
574 void *ptr
= (void *) h
;
575 if (!gomp_page_locked_host_unregister_dev (devicep
, ptr
, sz
, aq
))
577 gomp_mutex_unlock (&devicep
->lock
);
584 attribute_hidden
void
585 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
586 struct goacc_asyncqueue
*aq
,
587 void *h
, const void *d
, size_t sz
)
589 if (verify_always_pinned_mode
590 && always_pinned_mode
)
591 if (!gomp_verify_always_pinned_mode (devicep
, h
, sz
))
593 gomp_mutex_unlock (&devicep
->lock
);
597 if (__builtin_expect (aq
!= NULL
, 0))
598 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
599 "host", h
, "dev", d
, NULL
, sz
, aq
);
601 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
605 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
607 if (!devicep
->free_func (devicep
->target_id
, devptr
))
609 gomp_mutex_unlock (&devicep
->lock
);
610 gomp_fatal ("error in freeing device memory block at %p", devptr
);
614 /* Increment reference count of a splay_tree_key region K by 1.
615 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
616 increment the value if refcount is not yet contained in the set (used for
617 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
618 once for each construct). */
621 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
623 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
626 uintptr_t *refcount_ptr
= &k
->refcount
;
628 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
629 refcount_ptr
= &k
->structelem_refcount
;
630 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
631 refcount_ptr
= k
->structelem_refcount_ptr
;
635 if (htab_find (*refcount_set
, refcount_ptr
))
637 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
638 *slot
= refcount_ptr
;
645 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
646 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
647 track already seen refcounts, and only adjust the value if refcount is not
648 yet contained in the set (like gomp_increment_refcount).
650 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
651 it is already zero and we know we decremented it earlier. This signals that
652 associated maps should be copied back to host.
654 *DO_REMOVE is set to true when we this is the first handling of this refcount
655 and we are setting it to zero. This signals a removal of this key from the
658 Copy and removal are separated due to cases like handling of structure
659 elements, e.g. each map of a structure element representing a possible copy
660 out of a structure field has to be handled individually, but we only signal
661 removal for one (the first encountered) sibing map. */
664 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
665 bool *do_copy
, bool *do_remove
)
667 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
669 *do_copy
= *do_remove
= false;
673 uintptr_t *refcount_ptr
= &k
->refcount
;
675 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
676 refcount_ptr
= &k
->structelem_refcount
;
677 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
678 refcount_ptr
= k
->structelem_refcount_ptr
;
680 bool new_encountered_refcount
;
681 bool set_to_zero
= false;
682 bool is_zero
= false;
684 uintptr_t orig_refcount
= *refcount_ptr
;
688 if (htab_find (*refcount_set
, refcount_ptr
))
690 new_encountered_refcount
= false;
694 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
695 *slot
= refcount_ptr
;
696 new_encountered_refcount
= true;
699 /* If no refcount_set being used, assume all keys are being decremented
700 for the first time. */
701 new_encountered_refcount
= true;
705 else if (*refcount_ptr
> 0)
709 if (*refcount_ptr
== 0)
711 if (orig_refcount
> 0)
717 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
718 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
721 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
722 gomp_map_0len_lookup found oldn for newn.
723 Helper function of gomp_map_vars. */
726 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
727 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
728 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
729 unsigned char kind
, bool always_to_flag
, bool implicit
,
730 struct gomp_coalesce_buf
*cbuf
,
731 htab_t
*refcount_set
)
733 assert (kind
!= GOMP_MAP_ATTACH
734 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
737 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
738 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
739 tgt_var
->is_attach
= false;
740 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
742 /* For implicit maps, old contained in new is valid. */
743 bool implicit_subset
= (implicit
744 && newn
->host_start
<= oldn
->host_start
745 && oldn
->host_end
<= newn
->host_end
);
747 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
749 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
751 if (GOMP_MAP_FORCE_P (kind
)
752 /* For implicit maps, old contained in new is valid. */
754 /* Otherwise, new contained inside old is considered valid. */
755 || (oldn
->host_start
<= newn
->host_start
756 && newn
->host_end
<= oldn
->host_end
)))
758 gomp_mutex_unlock (&devicep
->lock
);
759 gomp_fatal ("Trying to map into device [%p..%p) object when "
760 "[%p..%p) is already mapped",
761 (void *) newn
->host_start
, (void *) newn
->host_end
,
762 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
765 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
767 /* Implicit + always should not happen. If this does occur, below
768 address/length adjustment is a TODO. */
769 assert (!implicit_subset
);
771 if (oldn
->aux
&& oldn
->aux
->attach_count
)
773 /* We have to be careful not to overwrite still attached pointers
774 during the copyback to host. */
775 uintptr_t addr
= newn
->host_start
;
776 while (addr
< newn
->host_end
)
778 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
779 if (oldn
->aux
->attach_count
[i
] == 0)
780 gomp_copy_host2dev (devicep
, aq
,
781 (void *) (oldn
->tgt
->tgt_start
783 + addr
- oldn
->host_start
),
785 sizeof (void *), false, cbuf
);
786 addr
+= sizeof (void *);
790 gomp_copy_host2dev (devicep
, aq
,
791 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
792 + newn
->host_start
- oldn
->host_start
),
793 (void *) newn
->host_start
,
794 newn
->host_end
- newn
->host_start
, false, cbuf
);
797 gomp_increment_refcount (oldn
, refcount_set
);
801 get_kind (bool short_mapkind
, void *kinds
, int idx
)
804 return ((unsigned char *) kinds
)[idx
];
806 int val
= ((unsigned short *) kinds
)[idx
];
807 if (GOMP_MAP_IMPLICIT_P (val
))
808 val
&= ~GOMP_MAP_IMPLICIT
;
814 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
819 int val
= ((unsigned short *) kinds
)[idx
];
820 return GOMP_MAP_IMPLICIT_P (val
);
824 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
825 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
826 struct gomp_coalesce_buf
*cbuf
,
827 bool allow_zero_length_array_sections
)
829 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
830 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
831 struct splay_tree_key_s cur_node
;
833 cur_node
.host_start
= host_ptr
;
834 if (cur_node
.host_start
== (uintptr_t) NULL
)
836 cur_node
.tgt_offset
= (uintptr_t) NULL
;
837 gomp_copy_host2dev (devicep
, aq
,
838 (void *) (tgt
->tgt_start
+ target_offset
),
839 (void *) &cur_node
.tgt_offset
, sizeof (void *),
843 /* Add bias to the pointer value. */
844 cur_node
.host_start
+= bias
;
845 cur_node
.host_end
= cur_node
.host_start
;
846 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
849 if (allow_zero_length_array_sections
)
850 cur_node
.tgt_offset
= 0;
851 else if (devicep
->is_usm_ptr_func
852 && devicep
->is_usm_ptr_func ((void*)cur_node
.host_start
))
853 cur_node
.tgt_offset
= cur_node
.host_start
;
856 gomp_mutex_unlock (&devicep
->lock
);
857 gomp_fatal ("Pointer target of array section wasn't mapped");
862 cur_node
.host_start
-= n
->host_start
;
864 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
865 /* At this point tgt_offset is target address of the
866 array section. Now subtract bias to get what we want
867 to initialize the pointer with. */
868 cur_node
.tgt_offset
-= bias
;
870 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
871 (void *) &cur_node
.tgt_offset
, sizeof (void *),
876 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
877 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
878 size_t first
, size_t i
, void **hostaddrs
,
879 size_t *sizes
, void *kinds
,
880 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
882 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
883 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
884 struct splay_tree_key_s cur_node
;
887 const bool short_mapkind
= true;
888 const int typemask
= short_mapkind
? 0xff : 0x7;
890 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
891 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
892 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
893 kind
= get_kind (short_mapkind
, kinds
, i
);
894 implicit
= get_implicit (short_mapkind
, kinds
, i
);
897 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
899 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
900 kind
& typemask
, false, implicit
, cbuf
,
906 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
908 cur_node
.host_start
--;
909 n2
= splay_tree_lookup (mem_map
, &cur_node
);
910 cur_node
.host_start
++;
913 && n2
->host_start
- n
->host_start
914 == n2
->tgt_offset
- n
->tgt_offset
)
916 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
917 kind
& typemask
, false, implicit
, cbuf
,
923 n2
= splay_tree_lookup (mem_map
, &cur_node
);
927 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
929 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
930 kind
& typemask
, false, implicit
, cbuf
,
935 gomp_mutex_unlock (&devicep
->lock
);
936 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
937 "other mapped elements from the same structure weren't mapped "
938 "together with it", (void *) cur_node
.host_start
,
939 (void *) cur_node
.host_end
);
942 attribute_hidden
void
943 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
944 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
945 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
946 struct gomp_coalesce_buf
*cbufp
,
947 bool allow_zero_length_array_sections
)
949 struct splay_tree_key_s s
;
954 gomp_mutex_unlock (&devicep
->lock
);
955 gomp_fatal ("enclosing struct not mapped for attach");
958 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
959 /* We might have a pointer in a packed struct: however we cannot have more
960 than one such pointer in each pointer-sized portion of the struct, so
962 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
965 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
967 if (!n
->aux
->attach_count
)
969 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
971 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
972 n
->aux
->attach_count
[idx
]++;
975 gomp_mutex_unlock (&devicep
->lock
);
976 gomp_fatal ("attach count overflow");
979 if (n
->aux
->attach_count
[idx
] == 1)
981 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
983 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
987 if ((void *) target
== NULL
)
989 gomp_mutex_unlock (&devicep
->lock
);
990 gomp_fatal ("attempt to attach null pointer");
993 if (devicep
->is_usm_ptr_func
994 && devicep
->is_usm_ptr_func ((void*)(target
+ bias
)))
995 /* Nothing to do here. */
998 s
.host_start
= target
+ bias
;
999 s
.host_end
= s
.host_start
+ 1;
1000 tn
= splay_tree_lookup (mem_map
, &s
);
1004 if (allow_zero_length_array_sections
)
1005 /* When allowing attachment to zero-length array sections, we
1006 allow attaching to NULL pointers when the target region is not
1011 gomp_mutex_unlock (&devicep
->lock
);
1012 gomp_fatal ("pointer target not mapped for attach");
1016 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
1019 "%s: attaching host %p, target %p (struct base %p) to %p\n",
1020 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
1021 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
1023 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
1024 sizeof (void *), true, cbufp
);
1027 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
1028 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
1031 attribute_hidden
void
1032 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
1033 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
1034 uintptr_t detach_from
, bool finalize
,
1035 struct gomp_coalesce_buf
*cbufp
)
1041 gomp_mutex_unlock (&devicep
->lock
);
1042 gomp_fatal ("enclosing struct not mapped for detach");
1045 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
1047 if (!n
->aux
|| !n
->aux
->attach_count
)
1049 gomp_mutex_unlock (&devicep
->lock
);
1050 gomp_fatal ("no attachment counters for struct");
1054 n
->aux
->attach_count
[idx
] = 1;
1056 if (n
->aux
->attach_count
[idx
] == 0)
1058 gomp_mutex_unlock (&devicep
->lock
);
1059 gomp_fatal ("attach count underflow");
1062 n
->aux
->attach_count
[idx
]--;
1064 if (n
->aux
->attach_count
[idx
] == 0)
1066 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
1068 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
1071 "%s: detaching host %p, target %p (struct base %p) to %p\n",
1072 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
1073 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
1076 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
1077 sizeof (void *), true, cbufp
);
1080 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
1081 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
1084 attribute_hidden
uintptr_t
1085 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
1087 if (tgt
->list
[i
].key
!= NULL
)
1088 return tgt
->list
[i
].key
->tgt
->tgt_start
1089 + tgt
->list
[i
].key
->tgt_offset
1090 + tgt
->list
[i
].offset
;
1092 switch (tgt
->list
[i
].offset
)
1094 case OFFSET_INLINED
:
1096 return (uintptr_t) hostaddrs
[i
];
1098 case OFFSET_POINTER
:
1102 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
1103 + tgt
->list
[i
+ 1].key
->tgt_offset
1104 + tgt
->list
[i
+ 1].offset
1105 + (uintptr_t) hostaddrs
[i
]
1106 - (uintptr_t) hostaddrs
[i
+ 1];
1109 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
1113 static inline __attribute__((always_inline
)) struct target_mem_desc
*
1114 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
1115 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1116 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1117 void *kinds
, struct goacc_ncarray_info
*nca_info
,
1118 bool short_mapkind
, htab_t
*refcount_set
,
1119 enum gomp_map_vars_kind pragma_kind
)
1121 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
1122 size_t nca_data_row_num
= (nca_info
? nca_info
->num_data_rows
: 0);
1123 bool has_firstprivate
= false;
1124 bool has_always_ptrset
= false;
1125 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
1126 const int rshift
= short_mapkind
? 8 : 3;
1127 const int typemask
= short_mapkind
? 0xff : 0x7;
1128 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
1129 struct splay_tree_key_s cur_node
;
1130 struct target_mem_desc
*tgt
1131 = gomp_malloc (sizeof (*tgt
)
1132 + sizeof (tgt
->list
[0]) * (mapnum
+ nca_data_row_num
));
1133 tgt
->list_count
= mapnum
+ nca_data_row_num
;
1134 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
1135 tgt
->device_descr
= devicep
;
1137 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
1146 tgt_align
= sizeof (void *);
1149 cbuf
.chunk_cnt
= -1;
1152 if (mapnum
> 1 || (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1154 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
1155 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
1158 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1160 size_t align
= 4 * sizeof (void *);
1162 tgt_size
= mapnum
* sizeof (void *);
1164 cbuf
.use_cnt
= 1 + (mapnum
> 1);
1165 cbuf
.chunks
[0].start
= 0;
1166 cbuf
.chunks
[0].end
= tgt_size
;
1169 gomp_mutex_lock (&devicep
->lock
);
1170 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1172 gomp_mutex_unlock (&devicep
->lock
);
1177 for (i
= 0; i
< mapnum
; i
++)
1179 int kind
= get_kind (short_mapkind
, kinds
, i
);
1180 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1181 tgt
->list
[i
].offset
= 0;
1182 if (hostaddrs
[i
] == NULL
1183 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1185 tgt
->list
[i
].key
= NULL
;
1186 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1189 else if (devicep
->is_usm_ptr_func
1190 && devicep
->is_usm_ptr_func (hostaddrs
[i
]))
1192 /* The memory is visible from both host and target
1193 so nothing needs to be moved. */
1194 tgt
->list
[i
].key
= NULL
;
1195 tgt
->list
[i
].offset
= OFFSET_USM
;
1198 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1199 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1201 tgt
->list
[i
].key
= NULL
;
1204 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1205 on a separate construct prior to using use_device_{addr,ptr}.
1206 In OpenMP 5.0, map directives need to be ordered by the
1207 middle-end before the use_device_* clauses. If
1208 !not_found_cnt, all mappings requested (if any) are already
1209 mapped, so use_device_{addr,ptr} can be resolved right away.
1210 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1211 now but would succeed after performing the mappings in the
1212 following loop. We can't defer this always to the second
1213 loop, because it is not even invoked when !not_found_cnt
1214 after the first loop. */
1215 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1216 cur_node
.host_end
= cur_node
.host_start
;
1217 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1220 cur_node
.host_start
-= n
->host_start
;
1222 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1223 + cur_node
.host_start
);
1225 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1227 gomp_mutex_unlock (&devicep
->lock
);
1228 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1230 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1231 /* If not present, continue using the host address. */
1234 __builtin_unreachable ();
1235 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1238 tgt
->list
[i
].offset
= 0;
1241 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1243 size_t first
= i
+ 1;
1244 size_t last
= i
+ sizes
[i
];
1245 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1246 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1248 tgt
->list
[i
].key
= NULL
;
1249 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1250 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1253 size_t align
= (size_t) 1 << (kind
>> rshift
);
1254 if (tgt_align
< align
)
1256 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1257 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1258 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1259 not_found_cnt
+= last
- i
;
1260 for (i
= first
; i
<= last
; i
++)
1262 tgt
->list
[i
].key
= NULL
;
1264 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1266 gomp_coalesce_buf_add (&cbuf
,
1267 tgt_size
- cur_node
.host_end
1268 + (uintptr_t) hostaddrs
[i
],
1274 for (i
= first
; i
<= last
; i
++)
1275 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1276 sizes
, kinds
, NULL
, refcount_set
);
1280 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1282 tgt
->list
[i
].key
= NULL
;
1283 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1284 has_firstprivate
= true;
1287 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1288 || ((kind
& typemask
)
1289 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1291 tgt
->list
[i
].key
= NULL
;
1292 has_firstprivate
= true;
1295 else if (GOMP_MAP_NONCONTIG_ARRAY_P (kind
& typemask
))
1297 /* Ignore non-contiguous arrays for now, we process them together
1299 tgt
->list
[i
].key
= NULL
;
1300 tgt
->list
[i
].offset
= 0;
1303 /* The map for the non-contiguous array itself is never copied from
1304 during unmapping, its the data rows that count. Set copy-from
1305 flags to false here. */
1306 tgt
->list
[i
].copy_from
= false;
1307 tgt
->list
[i
].always_copy_from
= false;
1308 tgt
->list
[i
].is_attach
= false;
1310 size_t align
= (size_t) 1 << (kind
>> rshift
);
1311 if (tgt_align
< align
)
1317 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1318 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1319 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1321 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1322 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1324 tgt
->list
[i
].key
= NULL
;
1326 size_t align
= (size_t) 1 << (kind
>> rshift
);
1327 if (tgt_align
< align
)
1329 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1331 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1332 cur_node
.host_end
- cur_node
.host_start
);
1333 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1334 has_firstprivate
= true;
1338 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1340 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1343 tgt
->list
[i
].key
= NULL
;
1344 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1349 n
= splay_tree_lookup (mem_map
, &cur_node
);
1350 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1352 int always_to_cnt
= 0;
1353 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1355 bool has_nullptr
= false;
1357 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1358 if (n
->tgt
->list
[j
].key
== n
)
1360 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1363 if (n
->tgt
->list_count
== 0)
1365 /* 'declare target'; assume has_nullptr; it could also be
1366 statically assigned pointer, but that it should be to
1367 the equivalent variable on the host. */
1368 assert (n
->refcount
== REFCOUNT_INFINITY
);
1372 assert (j
< n
->tgt
->list_count
);
1373 /* Re-map the data if there is an 'always' modifier or if it a
1374 null pointer was there and non a nonnull has been found; that
1375 permits transparent re-mapping for Fortran array descriptors
1376 which were previously mapped unallocated. */
1377 for (j
= i
+ 1; j
< mapnum
; j
++)
1379 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1380 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1382 || !GOMP_MAP_POINTER_P (ptr_kind
)
1383 || *(void **) hostaddrs
[j
] == NULL
))
1385 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1386 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1387 > cur_node
.host_end
))
1391 has_always_ptrset
= true;
1396 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1397 kind
& typemask
, always_to_cnt
> 0, implicit
,
1398 NULL
, refcount_set
);
1403 tgt
->list
[i
].key
= NULL
;
1405 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1407 /* Not present, hence, skip entry - including its MAP_POINTER,
1409 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1411 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1412 == GOMP_MAP_POINTER
))
1415 tgt
->list
[i
].key
= NULL
;
1416 tgt
->list
[i
].offset
= 0;
1420 size_t align
= (size_t) 1 << (kind
>> rshift
);
1422 if (tgt_align
< align
)
1424 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1426 && gomp_to_device_kind_p (kind
& typemask
))
1427 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1428 cur_node
.host_end
- cur_node
.host_start
);
1429 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1430 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1434 for (j
= i
+ 1; j
< mapnum
; j
++)
1435 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1436 kinds
, j
)) & typemask
))
1437 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1439 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1440 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1441 > cur_node
.host_end
))
1445 tgt
->list
[j
].key
= NULL
;
1452 /* For non-contiguous arrays. Each data row is one target item, separated
1453 from the normal map clause items, hence we order them after mapnum. */
1456 struct target_var_desc
*next_var_desc
= &tgt
->list
[mapnum
];
1457 for (i
= 0; i
< nca_info
->num_ncarray
; i
++)
1459 struct goacc_ncarray
*nca
= &nca_info
->ncarray
[i
];
1460 int kind
= get_kind (short_mapkind
, kinds
, nca
->map_index
);
1461 size_t align
= (size_t) 1 << (kind
>> rshift
);
1462 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1463 tgt_size
+= nca
->ptrblock_size
;
1465 for (size_t j
= 0; j
< nca
->data_row_num
; j
++)
1467 struct target_var_desc
*row_desc
= next_var_desc
++;
1468 void *row
= nca
->data_rows
[j
];
1469 cur_node
.host_start
= (uintptr_t) row
;
1470 cur_node
.host_end
= cur_node
.host_start
+ nca
->data_row_size
;
1471 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1474 assert (n
->refcount
!= REFCOUNT_LINK
);
1475 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, row_desc
,
1476 kind
& typemask
, false, false,
1477 /* TODO: cbuf? */ NULL
,
1482 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1483 tgt_size
+= nca
->data_row_size
;
1488 assert (next_var_desc
== &tgt
->list
[mapnum
+ nca_info
->num_data_rows
]);
1495 gomp_mutex_unlock (&devicep
->lock
);
1496 gomp_fatal ("unexpected aggregation");
1498 tgt
->to_free
= devaddrs
[0];
1499 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1500 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1502 else if (not_found_cnt
|| (pragma_kind
& GOMP_MAP_VARS_TARGET
))
1504 /* Allocate tgt_align aligned tgt_size block of memory. */
1505 /* FIXME: Perhaps change interface to allocate properly aligned
1507 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1508 tgt_size
+ tgt_align
- 1);
1511 gomp_mutex_unlock (&devicep
->lock
);
1512 gomp_fatal ("device memory allocation fail");
1515 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1516 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1517 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1519 if (cbuf
.use_cnt
== 1)
1521 if (cbuf
.chunk_cnt
> 0)
1524 = cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
;
1525 if (always_pinned_mode
)
1527 cbuf
.buf
= gomp_page_locked_host_alloc_dev (devicep
, sz
, false);
1530 gomp_mutex_unlock (&devicep
->lock
);
1531 exit (EXIT_FAILURE
);
1535 cbuf
.buf
= malloc (sz
);
1545 tgt
->to_free
= NULL
;
1551 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
1552 tgt_size
= mapnum
* sizeof (void *);
1555 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1558 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1559 splay_tree_node array
= tgt
->array
;
1560 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1561 uintptr_t field_tgt_base
= 0;
1562 splay_tree_key field_tgt_structelem_first
= NULL
;
1564 for (i
= 0; i
< mapnum
; i
++)
1565 if (has_always_ptrset
1567 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1568 == GOMP_MAP_TO_PSET
)
1570 splay_tree_key k
= tgt
->list
[i
].key
;
1571 bool has_nullptr
= false;
1573 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1574 if (k
->tgt
->list
[j
].key
== k
)
1576 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1579 if (k
->tgt
->list_count
== 0)
1582 assert (j
< k
->tgt
->list_count
);
1584 tgt
->list
[i
].has_null_ptr_assoc
= false;
1585 for (j
= i
+ 1; j
< mapnum
; j
++)
1587 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1588 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1590 || !GOMP_MAP_POINTER_P (ptr_kind
)
1591 || *(void **) hostaddrs
[j
] == NULL
))
1593 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1594 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1599 if (*(void **) hostaddrs
[j
] == NULL
)
1600 tgt
->list
[i
].has_null_ptr_assoc
= true;
1601 tgt
->list
[j
].key
= k
;
1602 tgt
->list
[j
].copy_from
= false;
1603 tgt
->list
[j
].always_copy_from
= false;
1604 tgt
->list
[j
].is_attach
= false;
1605 gomp_increment_refcount (k
, refcount_set
);
1606 gomp_map_pointer (k
->tgt
, aq
,
1607 (uintptr_t) *(void **) hostaddrs
[j
],
1608 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1610 sizes
[j
], cbufp
, false);
1615 else if (tgt
->list
[i
].key
== NULL
)
1617 int kind
= get_kind (short_mapkind
, kinds
, i
);
1618 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1619 if (hostaddrs
[i
] == NULL
)
1621 if (tgt
->list
[i
].offset
== OFFSET_USM
)
1623 switch (kind
& typemask
)
1625 size_t align
, len
, first
, last
;
1627 case GOMP_MAP_FIRSTPRIVATE
:
1628 align
= (size_t) 1 << (kind
>> rshift
);
1629 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1630 tgt
->list
[i
].offset
= tgt_size
;
1632 gomp_copy_host2dev (devicep
, aq
,
1633 (void *) (tgt
->tgt_start
+ tgt_size
),
1634 (void *) hostaddrs
[i
], len
, false, cbufp
);
1635 /* Save device address in hostaddr to permit latter availablity
1636 when doing a deep-firstprivate with pointer attach. */
1637 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1640 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1641 firstprivate to hostaddrs[i+1], which is assumed to contain a
1645 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1647 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1648 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1650 <https://inbox.sourceware.org/gcc-patches/87o7pe12ke.fsf@euler.schwinge.homeip.net>
1651 "OpenMP: Handle descriptors in target's firstprivate [PR104949]"
1652 this probably needs revision for 'aq' usage. */
1654 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1655 sizeof (void *), false, cbufp
);
1659 case GOMP_MAP_FIRSTPRIVATE_INT
:
1660 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1662 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1663 /* The OpenACC 'host_data' construct only allows 'use_device'
1664 "mapping" clauses, so in the first loop, 'not_found_cnt'
1665 must always have been zero, so all OpenACC 'use_device'
1666 clauses have already been handled. (We can only easily test
1667 'use_device' with 'if_present' clause here.) */
1668 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1669 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1670 code conceptually simple, similar to the first loop. */
1671 case GOMP_MAP_USE_DEVICE_PTR
:
1672 if (tgt
->list
[i
].offset
== 0)
1674 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1675 cur_node
.host_end
= cur_node
.host_start
;
1676 n
= gomp_map_lookup (mem_map
, &cur_node
);
1679 cur_node
.host_start
-= n
->host_start
;
1681 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1682 + cur_node
.host_start
);
1684 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1686 gomp_mutex_unlock (&devicep
->lock
);
1687 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1689 else if ((kind
& typemask
)
1690 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1691 /* If not present, continue using the host address. */
1694 __builtin_unreachable ();
1695 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1698 case GOMP_MAP_STRUCT
:
1700 last
= i
+ sizes
[i
];
1701 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1702 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1704 if (tgt
->list
[first
].key
!= NULL
)
1706 n
= splay_tree_lookup (mem_map
, &cur_node
);
1709 size_t align
= (size_t) 1 << (kind
>> rshift
);
1710 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1711 - (uintptr_t) hostaddrs
[i
];
1712 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1713 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1714 - (uintptr_t) hostaddrs
[i
];
1715 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1716 field_tgt_offset
= tgt_size
;
1717 field_tgt_clear
= last
;
1718 field_tgt_structelem_first
= NULL
;
1719 tgt_size
+= cur_node
.host_end
1720 - (uintptr_t) hostaddrs
[first
];
1723 for (i
= first
; i
<= last
; i
++)
1724 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1725 sizes
, kinds
, cbufp
, refcount_set
);
1728 case GOMP_MAP_ALWAYS_POINTER
:
1729 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1730 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1731 n
= splay_tree_lookup (mem_map
, &cur_node
);
1733 || n
->host_start
> cur_node
.host_start
1734 || n
->host_end
< cur_node
.host_end
)
1736 gomp_mutex_unlock (&devicep
->lock
);
1737 gomp_fatal ("always pointer not mapped");
1740 && ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1741 != GOMP_MAP_ALWAYS_POINTER
))
1742 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1743 if (cur_node
.tgt_offset
)
1744 cur_node
.tgt_offset
-= sizes
[i
];
1745 gomp_copy_host2dev (devicep
, aq
,
1746 (void *) (n
->tgt
->tgt_start
1748 + cur_node
.host_start
1750 (void *) &cur_node
.tgt_offset
,
1751 sizeof (void *), true, cbufp
);
1752 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1753 + cur_node
.host_start
- n
->host_start
;
1755 case GOMP_MAP_IF_PRESENT
:
1756 /* Not present - otherwise handled above. Skip over its
1757 MAP_POINTER as well. */
1759 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1760 == GOMP_MAP_POINTER
))
1763 case GOMP_MAP_ATTACH
:
1764 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1766 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1767 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1768 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1771 tgt
->list
[i
].key
= n
;
1772 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1773 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1774 tgt
->list
[i
].copy_from
= false;
1775 tgt
->list
[i
].always_copy_from
= false;
1776 tgt
->list
[i
].is_attach
= true;
1777 /* OpenACC 'attach'/'detach' doesn't affect
1778 structured/dynamic reference counts ('n->refcount',
1779 'n->dynamic_refcount'). */
1782 = ((kind
& typemask
)
1783 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1784 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1785 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1788 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1790 gomp_mutex_unlock (&devicep
->lock
);
1791 gomp_fatal ("outer struct not mapped for attach");
1796 if (tgt
->list
[i
].offset
== OFFSET_INLINED
1802 if (GOMP_MAP_NONCONTIG_ARRAY_P (kind
& typemask
))
1804 tgt
->list
[i
].key
= &array
->key
;
1805 tgt
->list
[i
].key
->tgt
= tgt
;
1810 splay_tree_key k
= &array
->key
;
1811 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1812 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1813 k
->host_end
= k
->host_start
+ sizes
[i
];
1815 k
->host_end
= k
->host_start
+ sizeof (void *);
1816 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1817 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1818 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1819 kind
& typemask
, false, implicit
, cbufp
,
1824 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1826 /* Replace target address of the pointer with target address
1827 of mapped object in the splay tree. */
1828 splay_tree_remove (mem_map
, n
);
1830 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1831 k
->aux
->link_key
= n
;
1833 size_t align
= (size_t) 1 << (kind
>> rshift
);
1834 tgt
->list
[i
].key
= k
;
1837 k
->dynamic_refcount
= 0;
1838 k
->page_locked_host_p
= false;
1839 if (always_pinned_mode
)
1841 void *ptr
= (void *) k
->host_start
;
1842 size_t size
= k
->host_end
- k
->host_start
;
1843 int page_locked_host_p
= 0;
1845 page_locked_host_p
= gomp_page_locked_host_register_dev
1846 (devicep
, ptr
, size
, kind
& typemask
);
1847 if (page_locked_host_p
< 0)
1849 gomp_mutex_unlock (&devicep
->lock
);
1850 exit (EXIT_FAILURE
);
1852 if (page_locked_host_p
)
1853 k
->page_locked_host_p
= true;
1855 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1857 k
->tgt_offset
= k
->host_start
- field_tgt_base
1861 k
->refcount
= REFCOUNT_STRUCTELEM
;
1862 if (field_tgt_structelem_first
== NULL
)
1864 /* Set to first structure element of sequence. */
1865 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1866 field_tgt_structelem_first
= k
;
1869 /* Point to refcount of leading element, but do not
1871 k
->structelem_refcount_ptr
1872 = &field_tgt_structelem_first
->structelem_refcount
;
1874 if (i
== field_tgt_clear
)
1876 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1877 field_tgt_structelem_first
= NULL
;
1880 if (i
== field_tgt_clear
)
1881 field_tgt_clear
= FIELD_TGT_EMPTY
;
1885 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1886 k
->tgt_offset
= tgt_size
;
1887 tgt_size
+= k
->host_end
- k
->host_start
;
1889 /* First increment, from 0 to 1. gomp_increment_refcount
1890 encapsulates the different increment cases, so use this
1891 instead of directly setting 1 during initialization. */
1892 gomp_increment_refcount (k
, refcount_set
);
1894 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1895 tgt
->list
[i
].always_copy_from
1896 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1897 tgt
->list
[i
].is_attach
= false;
1898 tgt
->list
[i
].offset
= 0;
1899 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1902 array
->right
= NULL
;
1903 splay_tree_insert (mem_map
, array
);
1904 switch (kind
& typemask
)
1906 case GOMP_MAP_ALLOC
:
1908 case GOMP_MAP_FORCE_ALLOC
:
1909 case GOMP_MAP_FORCE_FROM
:
1910 case GOMP_MAP_ALWAYS_FROM
:
1913 case GOMP_MAP_TOFROM
:
1914 case GOMP_MAP_FORCE_TO
:
1915 case GOMP_MAP_FORCE_TOFROM
:
1916 case GOMP_MAP_ALWAYS_TO
:
1917 case GOMP_MAP_ALWAYS_TOFROM
:
1918 gomp_copy_host2dev (devicep
, aq
,
1919 (void *) (tgt
->tgt_start
1921 (void *) k
->host_start
,
1922 k
->host_end
- k
->host_start
,
1925 case GOMP_MAP_POINTER
:
1926 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1928 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1929 k
->tgt_offset
, sizes
[i
], cbufp
,
1931 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1933 case GOMP_MAP_TO_PSET
:
1934 gomp_copy_host2dev (devicep
, aq
,
1935 (void *) (tgt
->tgt_start
1937 (void *) k
->host_start
,
1938 k
->host_end
- k
->host_start
,
1940 tgt
->list
[i
].has_null_ptr_assoc
= false;
1942 for (j
= i
+ 1; j
< mapnum
; j
++)
1944 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1946 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1947 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1949 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1950 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1955 tgt
->list
[j
].key
= k
;
1956 tgt
->list
[j
].copy_from
= false;
1957 tgt
->list
[j
].always_copy_from
= false;
1958 tgt
->list
[j
].is_attach
= false;
1959 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1960 /* For OpenMP, the use of refcount_sets causes
1961 errors if we set k->refcount = 1 above but also
1962 increment it again here, for decrementing will
1963 not properly match, since we decrement only once
1964 for each key's refcount. Therefore avoid this
1965 increment for OpenMP constructs. */
1967 gomp_increment_refcount (k
, refcount_set
);
1968 gomp_map_pointer (tgt
, aq
,
1969 (uintptr_t) *(void **) hostaddrs
[j
],
1971 + ((uintptr_t) hostaddrs
[j
]
1973 sizes
[j
], cbufp
, false);
1978 case GOMP_MAP_FORCE_PRESENT
:
1980 /* We already looked up the memory region above and it
1982 size_t size
= k
->host_end
- k
->host_start
;
1983 gomp_mutex_unlock (&devicep
->lock
);
1984 #ifdef HAVE_INTTYPES_H
1985 gomp_fatal ("present clause: !acc_is_present (%p, "
1986 "%"PRIu64
" (0x%"PRIx64
"))",
1987 (void *) k
->host_start
,
1988 (uint64_t) size
, (uint64_t) size
);
1990 gomp_fatal ("present clause: !acc_is_present (%p, "
1991 "%lu (0x%lx))", (void *) k
->host_start
,
1992 (unsigned long) size
, (unsigned long) size
);
1996 case GOMP_MAP_PRESENT_ALLOC
:
1997 case GOMP_MAP_PRESENT_TO
:
1998 case GOMP_MAP_PRESENT_FROM
:
1999 case GOMP_MAP_PRESENT_TOFROM
:
2000 case GOMP_MAP_ALWAYS_PRESENT_TO
:
2001 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
2002 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
2003 /* We already looked up the memory region above and it
2005 gomp_mutex_unlock (&devicep
->lock
);
2006 gomp_fatal ("present clause: !omp_target_is_present "
2008 (void *) k
->host_start
, devicep
->target_id
);
2010 case GOMP_MAP_FORCE_DEVICEPTR
:
2011 assert (k
->host_end
- k
->host_start
== sizeof (void *));
2012 gomp_copy_host2dev (devicep
, aq
,
2013 (void *) (tgt
->tgt_start
2015 (void *) k
->host_start
,
2016 sizeof (void *), false, cbufp
);
2019 gomp_mutex_unlock (&devicep
->lock
);
2020 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
2024 if (k
->aux
&& k
->aux
->link_key
)
2026 /* Set link pointer on target to the device address of the
2028 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
2029 /* We intentionally do not use coalescing here, as it's not
2030 data allocated by the current call to this function. */
2031 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
2032 &tgt_addr
, sizeof (void *), true, NULL
);
2038 /* Processing of non-contiguous array rows. */
2041 struct target_var_desc
*next_var_desc
= &tgt
->list
[mapnum
];
2042 for (i
= 0; i
< nca_info
->num_ncarray
; i
++)
2044 struct goacc_ncarray
*nca
= &nca_info
->ncarray
[i
];
2045 int kind
= get_kind (short_mapkind
, kinds
, nca
->map_index
);
2046 size_t align
= (size_t) 1 << (kind
>> rshift
);
2047 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2049 assert (nca
->ptr
== hostaddrs
[nca
->map_index
]);
2051 /* For the map of the non-contiguous array itself, adjust so that
2052 the passed device address points to the beginning of the
2053 ptrblock. Remember to adjust the first-dimension's bias here. */
2054 tgt
->list
[nca
->map_index
].key
->tgt_offset
2055 = tgt_size
- nca
->descr
->dims
[0].base
;
2057 void *target_ptrblock
= (void*) tgt
->tgt_start
+ tgt_size
;
2058 tgt_size
+= nca
->ptrblock_size
;
2060 /* Add splay key for each data row in current non-contiguous
2062 for (size_t j
= 0; j
< nca
->data_row_num
; j
++)
2064 struct target_var_desc
*row_desc
= next_var_desc
++;
2065 void *row
= nca
->data_rows
[j
];
2066 cur_node
.host_start
= (uintptr_t) row
;
2067 cur_node
.host_end
= cur_node
.host_start
+ nca
->data_row_size
;
2068 splay_tree_key k
= splay_tree_lookup (mem_map
, &cur_node
);
2071 assert (k
->refcount
!= REFCOUNT_LINK
);
2072 gomp_map_vars_existing (devicep
, aq
, k
, &cur_node
, row_desc
,
2073 kind
& typemask
, false, false,
2074 cbufp
, refcount_set
);
2079 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2082 k
->host_start
= (uintptr_t) row
;
2083 k
->host_end
= k
->host_start
+ nca
->data_row_size
;
2087 k
->dynamic_refcount
= 0;
2089 k
->tgt_offset
= tgt_size
;
2091 tgt_size
+= nca
->data_row_size
;
2095 = GOMP_MAP_COPY_FROM_P (kind
& typemask
);
2096 row_desc
->always_copy_from
2097 = GOMP_MAP_COPY_FROM_P (kind
& typemask
);
2098 row_desc
->is_attach
= false;
2099 row_desc
->offset
= 0;
2100 row_desc
->length
= nca
->data_row_size
;
2103 array
->right
= NULL
;
2104 splay_tree_insert (mem_map
, array
);
2106 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2107 gomp_copy_host2dev (devicep
, aq
,
2108 (void *) tgt
->tgt_start
+ k
->tgt_offset
,
2109 (void *) k
->host_start
,
2110 nca
->data_row_size
, false,
2114 nca
->tgt_data_rows
[j
]
2115 = (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
);
2118 /* Now we have the target memory allocated, and target offsets of all
2119 row blocks assigned and calculated, we can construct the
2120 accelerator side ptrblock and copy it in. */
2121 if (nca
->ptrblock_size
)
2123 void *ptrblock
= gomp_malloc (nca
->ptrblock_size
);
2124 goacc_noncontig_array_create_ptrblock
2125 (nca
, ptrblock
, target_ptrblock
);
2126 gomp_copy_host2dev (devicep
, aq
, target_ptrblock
, ptrblock
,
2127 nca
->ptrblock_size
, false, cbufp
);
2129 /* Free once the transfer has completed. */
2130 devicep
->openacc
.async
.queue_callback_func (aq
, free
, ptrblock
);
2138 if (pragma_kind
& GOMP_MAP_VARS_TARGET
)
2140 for (i
= 0; i
< mapnum
; i
++)
2142 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
2143 gomp_copy_host2dev (devicep
, aq
,
2144 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
2145 (void *) &cur_node
.tgt_offset
, sizeof (void *),
2153 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
2154 gomp_copy_host2dev (devicep
, aq
,
2155 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
2156 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
2157 - cbuf
.chunks
[0].start
),
2158 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
2160 if (always_pinned_mode
)
2162 if (!gomp_page_locked_host_free_dev (devicep
, cbuf
.buf
, aq
))
2164 gomp_mutex_unlock (&devicep
->lock
);
2165 exit (EXIT_FAILURE
);
2171 /* Free once the transfer has completed. */
2172 devicep
->openacc
.async
.queue_callback_func (aq
, free
, cbuf
.buf
);
2180 /* If the variable from "omp target enter data" map-list was already mapped,
2181 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
2183 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
2189 gomp_mutex_unlock (&devicep
->lock
);
2193 static struct target_mem_desc
*
2194 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
2195 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
2196 bool short_mapkind
, htab_t
*refcount_set
,
2197 enum gomp_map_vars_kind pragma_kind
)
2199 /* This management of a local refcount_set is for convenience of callers
2200 who do not share a refcount_set over multiple map/unmap uses. */
2201 htab_t local_refcount_set
= NULL
;
2202 if (refcount_set
== NULL
)
2204 local_refcount_set
= htab_create (mapnum
);
2205 refcount_set
= &local_refcount_set
;
2208 struct target_mem_desc
*tgt
;
2209 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
2210 sizes
, kinds
, NULL
, short_mapkind
,
2211 refcount_set
, pragma_kind
);
2212 if (local_refcount_set
)
2213 htab_free (local_refcount_set
);
2218 attribute_hidden
struct target_mem_desc
*
2219 goacc_map_vars (struct gomp_device_descr
*devicep
,
2220 struct goacc_asyncqueue
*aq
, size_t mapnum
,
2221 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
2222 void *kinds
, struct goacc_ncarray_info
*nca_info
,
2224 enum gomp_map_vars_kind pragma_kind
)
2226 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
2227 sizes
, kinds
, nca_info
, short_mapkind
, NULL
,
2228 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
2232 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
2234 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
2236 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
2243 gomp_unref_tgt (void *ptr
)
2245 bool is_tgt_unmapped
= false;
2247 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
2249 if (tgt
->refcount
> 1)
2253 gomp_unmap_tgt (tgt
);
2254 is_tgt_unmapped
= true;
2257 return is_tgt_unmapped
;
2261 gomp_unref_tgt_void (void *ptr
)
2263 (void) gomp_unref_tgt (ptr
);
2267 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
2269 splay_tree_remove (sp
, k
);
2272 if (k
->aux
->link_key
)
2273 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
2274 if (k
->aux
->attach_count
)
2275 free (k
->aux
->attach_count
);
2281 static inline __attribute__((always_inline
)) bool
2282 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
2283 struct goacc_asyncqueue
*aq
)
2285 bool is_tgt_unmapped
= false;
2287 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
2289 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
2290 /* Infer the splay_tree_key of the first structelem key using the
2291 pointer to the first structleme_refcount. */
2292 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
2293 - offsetof (struct splay_tree_key_s
,
2294 structelem_refcount
));
2295 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
2297 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
2298 with the splay_tree_keys embedded inside. */
2299 splay_tree_node node
=
2300 (splay_tree_node
) ((char *) k
2301 - offsetof (struct splay_tree_node_s
, key
));
2304 /* Starting from the _FIRST key, and continue for all following
2306 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
2308 if (always_pinned_mode
)
2310 if (k
->page_locked_host_p
)
2312 void *ptr
= (void *) k
->host_start
;
2313 size_t size
= k
->host_end
- k
->host_start
;
2314 if (!gomp_page_locked_host_unregister_dev (devicep
,
2317 gomp_mutex_unlock (&devicep
->lock
);
2318 exit (EXIT_FAILURE
);
2320 k
->page_locked_host_p
= false;
2324 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
2332 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
2334 if (always_pinned_mode
)
2336 if (k
->page_locked_host_p
)
2338 void *ptr
= (void *) k
->host_start
;
2339 size_t size
= k
->host_end
- k
->host_start
;
2340 if (!gomp_page_locked_host_unregister_dev (devicep
,
2343 gomp_mutex_unlock (&devicep
->lock
);
2344 exit (EXIT_FAILURE
);
2346 k
->page_locked_host_p
= false;
2352 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2355 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
2356 return is_tgt_unmapped
;
2359 attribute_hidden
bool
2360 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
2362 return gomp_remove_var_internal (devicep
, k
, NULL
);
2365 /* Remove a variable asynchronously. This actually removes the variable
2366 mapping immediately, but retains the linked target_mem_desc until the
2367 asynchronous operation has completed (as it may still refer to target
2368 memory). The device lock must be held before entry, and remains locked on
2371 attribute_hidden
void
2372 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
2373 struct goacc_asyncqueue
*aq
)
2375 (void) gomp_remove_var_internal (devicep
, k
, aq
);
2378 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
2379 variables back from device to host: if it is false, it is assumed that this
2380 has been done already. */
2382 static inline __attribute__((always_inline
)) void
2383 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2384 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
2386 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
2388 if (tgt
->list_count
== 0)
2394 gomp_mutex_lock (&devicep
->lock
);
2395 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2397 gomp_mutex_unlock (&devicep
->lock
);
2405 /* We must perform detachments before any copies back to the host. */
2406 for (i
= 0; i
< tgt
->list_count
; i
++)
2408 splay_tree_key k
= tgt
->list
[i
].key
;
2410 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
2411 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
2412 + tgt
->list
[i
].offset
,
2417 splay_tree_key remove_vars
[tgt
->list_count
];
2419 for (i
= 0; i
< tgt
->list_count
; i
++)
2421 splay_tree_key k
= tgt
->list
[i
].key
;
2425 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
2426 counts ('n->refcount', 'n->dynamic_refcount'). */
2427 if (tgt
->list
[i
].is_attach
)
2430 bool do_copy
, do_remove
;
2431 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
2433 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
2434 || tgt
->list
[i
].always_copy_from
)
2435 gomp_copy_dev2host (devicep
, aq
,
2436 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
2437 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
2438 + tgt
->list
[i
].offset
),
2439 tgt
->list
[i
].length
);
2440 /* Queue all removals together for processing below.
2441 We may unregister page-locked host memory only after all device to
2442 host memory transfers have completed.
2443 See also 'gomp_exit_data'. */
2445 remove_vars
[nrmvars
++] = k
;
2448 for (i
= 0; i
< nrmvars
; i
++)
2450 splay_tree_key k
= remove_vars
[i
];
2451 struct target_mem_desc
*k_tgt
= k
->tgt
;
2452 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
2453 /* It would be bad if TGT got unmapped while we're still iterating over
2454 its LIST_COUNT, and also expect to use it in the following code. */
2455 assert (!is_tgt_unmapped
2460 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2463 gomp_unref_tgt ((void *) tgt
);
2465 gomp_mutex_unlock (&devicep
->lock
);
2469 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2470 htab_t
*refcount_set
)
2472 /* This management of a local refcount_set is for convenience of callers
2473 who do not share a refcount_set over multiple map/unmap uses. */
2474 htab_t local_refcount_set
= NULL
;
2475 if (refcount_set
== NULL
)
2477 local_refcount_set
= htab_create (tgt
->list_count
);
2478 refcount_set
= &local_refcount_set
;
2481 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2483 if (local_refcount_set
)
2484 htab_free (local_refcount_set
);
2487 attribute_hidden
void
2488 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2489 struct goacc_asyncqueue
*aq
)
2491 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2495 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2496 size_t *sizes
, void *kinds
, bool short_mapkind
)
2499 struct splay_tree_key_s cur_node
;
2500 const int typemask
= short_mapkind
? 0xff : 0x7;
2508 gomp_mutex_lock (&devicep
->lock
);
2509 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2511 gomp_mutex_unlock (&devicep
->lock
);
2515 for (i
= 0; i
< mapnum
; i
++)
2518 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2519 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2520 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2523 int kind
= get_kind (short_mapkind
, kinds
, i
);
2524 if (n
->host_start
> cur_node
.host_start
2525 || n
->host_end
< cur_node
.host_end
)
2527 gomp_mutex_unlock (&devicep
->lock
);
2528 gomp_fatal ("Trying to update [%p..%p) object when "
2529 "only [%p..%p) is mapped",
2530 (void *) cur_node
.host_start
,
2531 (void *) cur_node
.host_end
,
2532 (void *) n
->host_start
,
2533 (void *) n
->host_end
);
2536 if (n
->aux
&& n
->aux
->attach_count
)
2538 uintptr_t addr
= cur_node
.host_start
;
2539 while (addr
< cur_node
.host_end
)
2541 /* We have to be careful not to overwrite still attached
2542 pointers during host<->device updates. */
2543 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2544 if (n
->aux
->attach_count
[i
] == 0)
2546 void *devaddr
= (void *) (n
->tgt
->tgt_start
2548 + addr
- n
->host_start
);
2549 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2550 gomp_copy_host2dev (devicep
, NULL
,
2551 devaddr
, (void *) addr
,
2552 sizeof (void *), false, NULL
);
2553 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2554 gomp_copy_dev2host (devicep
, NULL
,
2555 (void *) addr
, devaddr
,
2558 addr
+= sizeof (void *);
2563 void *hostaddr
= (void *) cur_node
.host_start
;
2564 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2565 + cur_node
.host_start
2567 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2569 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2570 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2572 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2573 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2578 int kind
= get_kind (short_mapkind
, kinds
, i
);
2580 if (GOMP_MAP_PRESENT_P (kind
))
2582 /* We already looked up the memory region above and it
2584 gomp_mutex_unlock (&devicep
->lock
);
2585 gomp_fatal ("present clause: !omp_target_is_present "
2587 (void *) hostaddrs
[i
], devicep
->target_id
);
2591 gomp_mutex_unlock (&devicep
->lock
);
2594 static struct gomp_offload_icv_list
*
2595 gomp_get_offload_icv_item (int dev_num
)
2597 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2598 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2604 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2605 depending on the device num and the variable hierarchy
2606 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2607 device and thus no item with that device number is contained in
2608 gomp_offload_icv_list, then a new item is created and added to the list. */
2610 static struct gomp_offload_icvs
*
2611 get_gomp_offload_icvs (int dev_num
)
2613 struct gomp_icv_list
*dev
2614 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2615 struct gomp_icv_list
*all
2616 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2617 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2618 struct gomp_offload_icv_list
*offload_icvs
2619 = gomp_get_offload_icv_item (dev_num
);
2621 if (offload_icvs
!= NULL
)
2622 return &offload_icvs
->icvs
;
2624 struct gomp_offload_icv_list
*new;
2625 size_t size
= sizeof (struct gomp_offload_icv_list
);
2626 if (always_pinned_mode
)
2628 struct gomp_device_descr
*device
= &devices
[dev_num
];
2629 new = gomp_page_locked_host_alloc_dev (device
, size
, false);
2631 exit (EXIT_FAILURE
);
2634 new = gomp_malloc (size
);
2636 new->device_num
= dev_num
;
2637 new->icvs
.device_num
= dev_num
;
2638 new->next
= gomp_offload_icv_list
;
2640 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2641 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2642 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2643 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2644 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2645 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2647 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2650 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2651 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2652 else if (dev
!= NULL
2653 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2654 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2655 else if (all
!= NULL
2656 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2657 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2659 new->icvs
.teams_thread_limit
2660 = gomp_default_icv_values
.teams_thread_limit_var
;
2663 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2664 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2665 else if (dev
!= NULL
2666 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2667 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2668 else if (all
!= NULL
2669 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2670 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2672 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2674 gomp_offload_icv_list
= new;
2678 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2679 And insert to splay tree the mapping between addresses from HOST_TABLE and
2680 from loaded target image. We rely in the host and device compiler
2681 emitting variable and functions in the same order. */
2684 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2685 const void *host_table
, const void *target_data
,
2686 bool is_register_lock
)
2688 gomp_debug (0, "%s: devicep=%p (%s)\n",
2689 __FUNCTION__
, devicep
, devicep
->name
);
2690 void **host_func_table
= ((void ***) host_table
)[0];
2691 void **host_funcs_end
= ((void ***) host_table
)[1];
2692 void **host_var_table
= ((void ***) host_table
)[2];
2693 void **host_vars_end
= ((void ***) host_table
)[3];
2695 /* The func table contains only addresses, the var table contains addresses
2696 and corresponding sizes. */
2697 int num_funcs
= host_funcs_end
- host_func_table
;
2698 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2700 /* Load image to device and get target addresses for the image. */
2701 struct addr_pair
*target_table
= NULL
;
2702 uint64_t *rev_target_fn_table
= NULL
;
2703 int i
, num_target_entries
;
2705 /* With reverse offload, insert also target-host addresses. */
2706 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2709 = devicep
->load_image_func (devicep
->target_id
, version
,
2710 target_data
, &target_table
,
2711 rev_lookup
? &rev_target_fn_table
: NULL
);
2713 if (num_target_entries
!= num_funcs
+ num_vars
2714 /* "+1" due to the additional ICV struct. */
2715 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2717 gomp_mutex_unlock (&devicep
->lock
);
2718 if (is_register_lock
)
2719 gomp_mutex_unlock (®ister_lock
);
2720 gomp_fatal ("Cannot map target functions or variables"
2721 " (expected %u, have %u)", num_funcs
+ num_vars
,
2722 num_target_entries
);
2725 /* Insert host-target address mapping into splay tree. */
2726 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2727 /* "+1" due to the additional ICV struct. */
2728 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2729 * sizeof (*tgt
->array
));
2730 if (rev_target_fn_table
)
2731 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2733 tgt
->rev_array
= NULL
;
2734 tgt
->refcount
= REFCOUNT_INFINITY
;
2737 tgt
->to_free
= NULL
;
2739 tgt
->list_count
= 0;
2740 tgt
->device_descr
= devicep
;
2741 splay_tree_node array
= tgt
->array
;
2742 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2744 for (i
= 0; i
< num_funcs
; i
++)
2746 splay_tree_key k
= &array
->key
;
2747 k
->host_start
= (uintptr_t) host_func_table
[i
];
2748 k
->host_end
= k
->host_start
+ 1;
2750 k
->tgt_offset
= target_table
[i
].start
;
2751 k
->refcount
= REFCOUNT_INFINITY
;
2752 k
->dynamic_refcount
= 0;
2754 k
->page_locked_host_p
= false;
2756 array
->right
= NULL
;
2757 splay_tree_insert (&devicep
->mem_map
, array
);
2758 if (rev_target_fn_table
)
2760 reverse_splay_tree_key k2
= &rev_array
->key
;
2761 k2
->dev
= rev_target_fn_table
[i
];
2763 rev_array
->left
= NULL
;
2764 rev_array
->right
= NULL
;
2766 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2772 /* Most significant bit of the size in host and target tables marks
2773 "omp declare target link" variables. */
2774 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2775 const uintptr_t size_mask
= ~link_bit
;
2777 for (i
= 0; i
< num_vars
; i
++)
2779 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2780 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2781 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2783 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2785 gomp_mutex_unlock (&devicep
->lock
);
2786 if (is_register_lock
)
2787 gomp_mutex_unlock (®ister_lock
);
2788 gomp_fatal ("Cannot map target variables (size mismatch)");
2791 splay_tree_key k
= &array
->key
;
2792 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2794 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2796 k
->tgt_offset
= target_var
->start
;
2797 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2798 k
->dynamic_refcount
= 0;
2800 k
->page_locked_host_p
= false;
2801 if (always_pinned_mode
)
2803 void *ptr
= (void *) k
->host_start
;
2804 size_t size
= k
->host_end
- k
->host_start
;
2805 gomp_debug (0, " var %d: ptr=%p, size=%llu, is_link_var=%d\n",
2806 i
, ptr
, (unsigned long long) size
, is_link_var
);
2809 /* '#pragma omp declare target' variables typically are
2810 read/write, but in particular artificial ones, like Fortran
2811 array constructors, may be placed in section '.rodata'.
2812 We don't have the actual mapping kind available here, so we
2813 use a magic number. */
2814 const int kind
= -1;
2815 int page_locked_host_p
= gomp_page_locked_host_register_dev
2816 (devicep
, ptr
, size
, kind
);
2817 if (page_locked_host_p
< 0)
2819 gomp_mutex_unlock (&devicep
->lock
);
2820 if (is_register_lock
)
2821 gomp_mutex_unlock (®ister_lock
);
2822 exit (EXIT_FAILURE
);
2824 if (page_locked_host_p
)
2825 k
->page_locked_host_p
= true;
2829 array
->right
= NULL
;
2830 splay_tree_insert (&devicep
->mem_map
, array
);
2834 /* Last entry is for a ICVs variable.
2835 Tolerate case where plugin does not return those entries. */
2836 if (num_funcs
+ num_vars
< num_target_entries
)
2838 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2840 /* Start address will be non-zero for the ICVs variable if
2841 the variable was found in this image. */
2842 if (var
->start
!= 0)
2844 /* The index of the devicep within devices[] is regarded as its
2845 'device number', which is different from the per-device type
2846 devicep->target_id. */
2847 int dev_num
= (int) (devicep
- &devices
[0]);
2848 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2851 gomp_mutex_unlock (&devicep
->lock
);
2852 if (is_register_lock
)
2853 gomp_mutex_unlock (®ister_lock
);
2854 gomp_fatal ("'get_gomp_offload_icvs' failed");
2856 size_t var_size
= var
->end
- var
->start
;
2857 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2859 gomp_mutex_unlock (&devicep
->lock
);
2860 if (is_register_lock
)
2861 gomp_mutex_unlock (®ister_lock
);
2862 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2865 /* Copy the ICVs variable to place on device memory, hereby
2866 actually designating its device number into effect. */
2867 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2868 var_size
, false, NULL
);
2869 splay_tree_key k
= &array
->key
;
2870 k
->host_start
= (uintptr_t) icvs
;
2872 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2874 k
->tgt_offset
= var
->start
;
2875 k
->refcount
= REFCOUNT_INFINITY
;
2876 k
->dynamic_refcount
= 0;
2878 /* 'always_pinned_mode' handled via 'get_gomp_offload_icvs'. */
2879 k
->page_locked_host_p
= always_pinned_mode
;
2881 array
->right
= NULL
;
2882 splay_tree_insert (&devicep
->mem_map
, array
);
2887 free (target_table
);
2890 /* Unload the mappings described by target_data from device DEVICE_P.
2891 The device must be locked. */
2894 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2896 const void *host_table
, const void *target_data
)
2898 void **host_func_table
= ((void ***) host_table
)[0];
2899 void **host_funcs_end
= ((void ***) host_table
)[1];
2900 void **host_var_table
= ((void ***) host_table
)[2];
2901 void **host_vars_end
= ((void ***) host_table
)[3];
2903 /* The func table contains only addresses, the var table contains addresses
2904 and corresponding sizes. */
2905 int num_funcs
= host_funcs_end
- host_func_table
;
2906 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2908 struct splay_tree_key_s k
;
2909 splay_tree_key node
= NULL
;
2911 /* Find mapping at start of node array */
2912 if (num_funcs
|| num_vars
)
2914 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2915 : (uintptr_t) host_var_table
[0]);
2916 k
.host_end
= k
.host_start
+ 1;
2917 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2920 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2922 gomp_mutex_unlock (&devicep
->lock
);
2923 gomp_fatal ("image unload fail");
2925 if (devicep
->mem_map_rev
.root
)
2927 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2929 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2930 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2931 free (node
->tgt
->rev_array
);
2932 devicep
->mem_map_rev
.root
= NULL
;
2935 /* Remove mappings from splay tree. */
2937 for (i
= 0; i
< num_funcs
; i
++)
2939 k
.host_start
= (uintptr_t) host_func_table
[i
];
2940 k
.host_end
= k
.host_start
+ 1;
2941 splay_tree_remove (&devicep
->mem_map
, &k
);
2944 /* Most significant bit of the size in host and target tables marks
2945 "omp declare target link" variables. */
2946 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2947 const uintptr_t size_mask
= ~link_bit
;
2948 bool is_tgt_unmapped
= false;
2950 for (i
= 0; i
< num_vars
; i
++)
2952 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2954 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2956 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2957 splay_tree_remove (&devicep
->mem_map
, &k
);
2960 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2961 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2965 if (node
&& !is_tgt_unmapped
)
2973 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2975 char *end
= buf
+ size
, *p
= buf
;
2976 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2977 p
+= snprintf (p
, end
- p
, "unified_address");
2978 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2979 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2980 (p
== buf
? "" : ", "));
2981 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2982 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2983 (p
== buf
? "" : ", "));
2986 /* This function should be called from every offload image while loading.
2987 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2988 the target, and DATA. */
2991 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2992 int target_type
, const void *data
)
2996 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2997 gomp_fatal ("Library too old for offload (version %u < %u)",
2998 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
3001 const void *target_data
;
3002 if (GOMP_VERSION_LIB (version
) > 1)
3004 omp_req
= (int) (size_t) ((void **) data
)[0];
3005 target_data
= &((void **) data
)[1];
3013 gomp_mutex_lock (®ister_lock
);
3015 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
3017 char buf1
[sizeof ("unified_address, unified_shared_memory, "
3018 "reverse_offload")];
3019 char buf2
[sizeof ("unified_address, unified_shared_memory, "
3020 "reverse_offload")];
3021 gomp_requires_to_name (buf2
, sizeof (buf2
),
3022 omp_req
!= GOMP_REQUIRES_TARGET_USED
3023 ? omp_req
: omp_requires_mask
);
3024 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
3025 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
3027 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
3028 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
3029 "in multiple compilation units: '%s' vs. '%s'",
3033 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
3034 "some compilation units", buf2
);
3036 omp_requires_mask
= omp_req
;
3038 /* Load image to all initialized devices. */
3039 for (i
= 0; i
< num_devices
; i
++)
3041 struct gomp_device_descr
*devicep
= &devices
[i
];
3042 gomp_mutex_lock (&devicep
->lock
);
3043 if (devicep
->type
== target_type
3044 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3045 gomp_load_image_to_device (devicep
, version
,
3046 host_table
, target_data
, true);
3047 gomp_mutex_unlock (&devicep
->lock
);
3050 /* Insert image to array of pending images. */
3052 = gomp_realloc_unlock (offload_images
,
3053 (num_offload_images
+ 1)
3054 * sizeof (struct offload_image_descr
));
3055 offload_images
[num_offload_images
].version
= version
;
3056 offload_images
[num_offload_images
].type
= target_type
;
3057 offload_images
[num_offload_images
].host_table
= host_table
;
3058 offload_images
[num_offload_images
].target_data
= target_data
;
3060 num_offload_images
++;
3061 gomp_mutex_unlock (®ister_lock
);
3064 /* Legacy entry point. */
3067 GOMP_offload_register (const void *host_table
, int target_type
,
3068 const void *target_data
)
3070 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
3073 /* This function should be called from every offload image while unloading.
3074 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
3075 the target, and DATA. */
3078 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
3079 int target_type
, const void *data
)
3083 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
3084 gomp_fatal ("Library too old for offload (version %u < %u)",
3085 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
3087 const void *target_data
;
3088 if (GOMP_VERSION_LIB (version
) > 1)
3089 target_data
= &((void **) data
)[1];
3093 gomp_mutex_lock (®ister_lock
);
3095 /* Unload image from all initialized devices. */
3096 for (i
= 0; i
< num_devices
; i
++)
3098 struct gomp_device_descr
*devicep
= &devices
[i
];
3099 gomp_mutex_lock (&devicep
->lock
);
3100 if (devicep
->type
== target_type
3101 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3102 gomp_unload_image_from_device (devicep
, version
,
3103 host_table
, target_data
);
3104 gomp_mutex_unlock (&devicep
->lock
);
3107 /* Remove image from array of pending images. */
3108 for (i
= 0; i
< num_offload_images
; i
++)
3109 if (offload_images
[i
].target_data
== target_data
)
3111 offload_images
[i
] = offload_images
[--num_offload_images
];
3115 gomp_mutex_unlock (®ister_lock
);
3118 /* Legacy entry point. */
3121 GOMP_offload_unregister (const void *host_table
, int target_type
,
3122 const void *target_data
)
3124 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
3127 /* This function initializes the target device, specified by DEVICEP. DEVICEP
3128 must be locked on entry, and remains locked on return. */
3130 attribute_hidden
void
3131 gomp_init_device (struct gomp_device_descr
*devicep
)
3134 if (!devicep
->init_device_func (devicep
->target_id
))
3136 gomp_mutex_unlock (&devicep
->lock
);
3137 gomp_fatal ("device initialization failed");
3140 /* Load to device all images registered by the moment. */
3141 for (i
= 0; i
< num_offload_images
; i
++)
3143 struct offload_image_descr
*image
= &offload_images
[i
];
3144 if (image
->type
== devicep
->type
)
3145 gomp_load_image_to_device (devicep
, image
->version
,
3146 image
->host_table
, image
->target_data
,
3150 /* Initialize OpenACC asynchronous queues. */
3151 goacc_init_asyncqueues (devicep
);
3153 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
3156 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
3157 must be locked on entry, and remains locked on return. */
3159 attribute_hidden
bool
3160 gomp_fini_device (struct gomp_device_descr
*devicep
)
3162 bool ret
= goacc_fini_asyncqueues (devicep
);
3163 ret
&= devicep
->fini_device_func (devicep
->target_id
);
3164 devicep
->state
= GOMP_DEVICE_FINALIZED
;
3168 attribute_hidden
void
3169 gomp_unload_device (struct gomp_device_descr
*devicep
)
3171 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
3175 /* Unload from device all images registered at the moment. */
3176 for (i
= 0; i
< num_offload_images
; i
++)
3178 struct offload_image_descr
*image
= &offload_images
[i
];
3179 if (image
->type
== devicep
->type
)
3180 gomp_unload_image_from_device (devicep
, image
->version
,
3182 image
->target_data
);
3187 /* Host fallback for GOMP_target{,_ext} routines. */
3190 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
3191 struct gomp_device_descr
*devicep
, void **args
)
3193 struct gomp_thread old_thr
, *thr
= gomp_thread ();
3195 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3197 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3198 "be used for offloading");
3201 memset (thr
, '\0', sizeof (*thr
));
3202 if (gomp_places_list
)
3204 thr
->place
= old_thr
.place
;
3205 thr
->ts
.place_partition_len
= gomp_places_list_len
;
3210 intptr_t id
= (intptr_t) *args
++, val
;
3211 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3212 val
= (intptr_t) *args
++;
3214 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
3215 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
3217 id
&= GOMP_TARGET_ARG_ID_MASK
;
3218 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
3220 val
= val
> INT_MAX
? INT_MAX
: val
;
3222 gomp_icv (true)->thread_limit_var
= val
;
3227 gomp_free_thread (thr
);
3231 /* Calculate alignment and size requirements of a private copy of data shared
3232 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
3235 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
3236 unsigned short *kinds
, size_t *tgt_align
,
3240 for (i
= 0; i
< mapnum
; i
++)
3241 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
3243 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3244 if (*tgt_align
< align
)
3246 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
3247 *tgt_size
+= sizes
[i
];
3251 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
3254 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
3255 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
3258 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3260 tgt
+= tgt_align
- al
;
3263 for (i
= 0; i
< mapnum
; i
++)
3264 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
3266 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3267 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3268 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
3269 hostaddrs
[i
] = tgt
+ tgt_size
;
3270 tgt_size
= tgt_size
+ sizes
[i
];
3271 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
3273 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
3279 /* Helper function of GOMP_target{,_ext} routines. */
3282 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
3283 void (*host_fn
) (void *))
3285 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
3286 return (void *) host_fn
;
3289 gomp_mutex_lock (&devicep
->lock
);
3290 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3292 gomp_mutex_unlock (&devicep
->lock
);
3296 struct splay_tree_key_s k
;
3297 k
.host_start
= (uintptr_t) host_fn
;
3298 k
.host_end
= k
.host_start
+ 1;
3299 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
3300 gomp_mutex_unlock (&devicep
->lock
);
3304 return (void *) tgt_fn
->tgt_offset
;
3308 /* Called when encountering a target directive. If DEVICE
3309 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
3310 GOMP_DEVICE_HOST_FALLBACK (or any value
3311 larger than last available hw device), use host fallback.
3312 FN is address of host code, UNUSED is part of the current ABI, but
3313 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
3314 with MAPNUM entries, with addresses of the host objects,
3315 sizes of the host objects (resp. for pointer kind pointer bias
3316 and assumed sizeof (void *) size) and kinds. */
3319 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
3320 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
3321 unsigned char *kinds
)
3323 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3327 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3328 /* All shared memory devices should use the GOMP_target_ext function. */
3329 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
3330 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
3331 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
3333 htab_t refcount_set
= htab_create (mapnum
);
3334 struct target_mem_desc
*tgt_vars
3335 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3336 &refcount_set
, GOMP_MAP_VARS_TARGET
);
3337 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
3339 htab_clear (refcount_set
);
3340 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3341 htab_free (refcount_set
);
3344 static inline unsigned int
3345 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
3347 /* If we cannot run asynchronously, simply ignore nowait. */
3348 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
3349 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
3355 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
3357 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3361 void *host_ptr
= &item
->icvs
;
3362 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
3363 if (dev_ptr
!= NULL
)
3364 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
3365 sizeof (struct gomp_offload_icvs
));
3368 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
3369 and several arguments have been added:
3370 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
3371 DEPEND is array of dependencies, see GOMP_task for details.
3373 ARGS is a pointer to an array consisting of a variable number of both
3374 device-independent and device-specific arguments, which can take one two
3375 elements where the first specifies for which device it is intended, the type
3376 and optionally also the value. If the value is not present in the first
3377 one, the whole second element the actual value. The last element of the
3378 array is a single NULL. Among the device independent can be for example
3379 NUM_TEAMS and THREAD_LIMIT.
3381 NUM_TEAMS is positive if GOMP_teams will be called in the body with
3382 that value, or 1 if teams construct is not present, or 0, if
3383 teams construct does not have num_teams clause and so the choice is
3384 implementation defined, and -1 if it can't be determined on the host
3385 what value will GOMP_teams have on the device.
3386 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
3387 body with that value, or 0, if teams construct does not have thread_limit
3388 clause or the teams construct is not present, or -1 if it can't be
3389 determined on the host what value will GOMP_teams have on the device. */
3392 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
3393 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3394 unsigned int flags
, void **depend
, void **args
)
3396 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3397 size_t tgt_align
= 0, tgt_size
= 0;
3398 bool fpc_done
= false;
3400 /* Obtain the original TEAMS and THREADS values from ARGS. */
3401 intptr_t orig_teams
= 1, orig_threads
= 0;
3402 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
3403 void **tmpargs
= args
;
3406 intptr_t id
= (intptr_t) *tmpargs
++, val
;
3407 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3409 val
= (intptr_t) *tmpargs
++;
3414 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
3418 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
3420 val
= val
> INT_MAX
? INT_MAX
: val
;
3421 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
3426 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
3433 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
3434 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
3435 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
3436 value could not be determined. No change.
3437 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
3438 Set device-specific value.
3439 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
3441 if (orig_teams
== -2)
3443 else if (orig_teams
== 0)
3445 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3447 new_teams
= item
->icvs
.nteams
;
3449 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
3450 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
3451 e.g. a THREAD_LIMIT clause. */
3452 if (orig_teams
> -2 && orig_threads
== 0)
3454 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
3456 new_threads
= item
->icvs
.teams_thread_limit
;
3459 /* Copy and change the arguments list only if TEAMS or THREADS need to be
3461 void **new_args
= args
;
3462 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
3464 size_t tms_len
= (orig_teams
== new_teams
3466 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
3468 size_t ths_len
= (orig_threads
== new_threads
3470 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
3472 /* One additional item after the last arg must be NULL. */
3473 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
3475 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
3478 void **tmp_new_args
= new_args
;
3479 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
3480 too if they have not been changed and skipped otherwise. */
3483 intptr_t id
= (intptr_t) *tmpargs
;
3484 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
3485 && orig_teams
!= new_teams
)
3486 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
3487 && orig_threads
!= new_threads
))
3490 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3495 *tmp_new_args
++ = *tmpargs
++;
3496 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
3497 *tmp_new_args
++ = *tmpargs
++;
3501 /* Add the new TEAMS arg to the new args list if it has been changed. */
3502 if (orig_teams
!= new_teams
)
3504 intptr_t new_val
= new_teams
;
3507 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3508 | GOMP_TARGET_ARG_NUM_TEAMS
;
3509 *tmp_new_args
++ = (void *) new_val
;
3513 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3514 | GOMP_TARGET_ARG_NUM_TEAMS
);
3515 *tmp_new_args
++ = (void *) new_val
;
3519 /* Add the new THREADS arg to the new args list if it has been changed. */
3520 if (orig_threads
!= new_threads
)
3522 intptr_t new_val
= new_threads
;
3525 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3526 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3527 *tmp_new_args
++ = (void *) new_val
;
3531 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3532 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3533 *tmp_new_args
++ = (void *) new_val
;
3537 *tmp_new_args
= NULL
;
3540 flags
= clear_unsupported_flags (devicep
, flags
);
3542 /* For 'nowait' we supposedly have to unregister/free page-locked host memory
3543 via 'GOMP_PLUGIN_target_task_completion'. There is no current
3544 configuration exercising this (and thus, infeasible to test). */
3545 assert (!(flags
& GOMP_TARGET_FLAG_NOWAIT
)
3546 || !(devicep
&& devicep
->page_locked_host_register_func
));
3548 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3550 struct gomp_thread
*thr
= gomp_thread ();
3551 /* Create a team if we don't have any around, as nowait
3552 target tasks make sense to run asynchronously even when
3553 outside of any parallel. */
3554 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3556 struct gomp_team
*team
= gomp_new_team (1);
3557 struct gomp_task
*task
= thr
->task
;
3558 struct gomp_task
**implicit_task
= &task
;
3559 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3560 team
->prev_ts
= thr
->ts
;
3561 thr
->ts
.team
= team
;
3562 thr
->ts
.team_id
= 0;
3563 thr
->ts
.work_share
= &team
->work_shares
[0];
3564 thr
->ts
.last_work_share
= NULL
;
3565 #ifdef HAVE_SYNC_BUILTINS
3566 thr
->ts
.single_count
= 0;
3568 thr
->ts
.static_trip
= 0;
3569 thr
->task
= &team
->implicit_task
[0];
3570 gomp_init_task (thr
->task
, NULL
, icv
);
3571 while (*implicit_task
3572 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3573 implicit_task
= &(*implicit_task
)->parent
;
3576 thr
->task
= *implicit_task
;
3578 free (*implicit_task
);
3579 thr
->task
= &team
->implicit_task
[0];
3582 pthread_setspecific (gomp_thread_destructor
, thr
);
3583 if (implicit_task
!= &task
)
3585 *implicit_task
= thr
->task
;
3590 && !thr
->task
->final_task
)
3592 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3593 sizes
, kinds
, flags
, depend
, new_args
,
3594 GOMP_TARGET_TASK_BEFORE_MAP
);
3599 /* If there are depend clauses, but nowait is not present
3600 (or we are in a final task), block the parent task until the
3601 dependencies are resolved and then just continue with the rest
3602 of the function as if it is a merged task. */
3605 struct gomp_thread
*thr
= gomp_thread ();
3606 if (thr
->task
&& thr
->task
->depend_hash
)
3608 /* If we might need to wait, copy firstprivate now. */
3609 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3610 &tgt_align
, &tgt_size
);
3613 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3614 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3615 tgt_align
, tgt_size
);
3618 gomp_task_maybe_wait_for_dependencies (depend
);
3624 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3625 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3626 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3630 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3631 &tgt_align
, &tgt_size
);
3634 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3635 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3636 tgt_align
, tgt_size
);
3639 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3643 struct target_mem_desc
*tgt_vars
;
3644 htab_t refcount_set
= NULL
;
3646 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3650 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3651 &tgt_align
, &tgt_size
);
3654 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3655 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3656 tgt_align
, tgt_size
);
3663 refcount_set
= htab_create (mapnum
);
3664 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3665 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3667 devicep
->run_func (devicep
->target_id
, fn_addr
,
3668 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3672 htab_clear (refcount_set
);
3673 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3676 htab_free (refcount_set
);
3678 /* Copy back ICVs from device to host.
3679 HOST_PTR is expected to exist since it was added in
3680 gomp_load_image_to_device if not already available. */
3681 gomp_copy_back_icvs (devicep
, device
);
3686 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3687 keeping track of all variable handling - assuming that reverse offload occurs
3688 ony very rarely. Downside is that the reverse search is slow. */
3690 struct gomp_splay_tree_rev_lookup_data
{
3691 uintptr_t tgt_start
;
3697 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3699 struct gomp_splay_tree_rev_lookup_data
*data
;
3700 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3701 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3703 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3707 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3708 if (key
->tgt
->list
[j
].key
== key
)
3710 assert (j
< key
->tgt
->list_count
);
3711 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3713 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3714 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3722 static inline splay_tree_key
3723 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3726 struct gomp_splay_tree_rev_lookup_data data
;
3728 data
.tgt_start
= tgt_start
;
3729 data
.tgt_end
= tgt_end
;
3731 if (tgt_start
!= tgt_end
)
3733 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3738 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3739 if (data
.key
!= NULL
|| zero_len
)
3744 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3751 bool present
, aligned
;
3755 /* Search just mapped reverse-offload data; returns index if found,
3759 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3760 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3761 uint64_t tgt_start
, uint64_t tgt_end
)
3763 const bool short_mapkind
= true;
3764 const int typemask
= short_mapkind
? 0xff : 0x7;
3766 for (i
= 0; i
< n
; i
++)
3768 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3769 == GOMP_MAP_STRUCT
);
3772 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3775 if (i
+ sizes
[i
] < n
)
3776 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3778 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3780 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3781 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3790 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3791 unsigned short *kinds
, uint64_t *sizes
,
3792 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3796 if (tgt_start
!= tgt_end
)
3797 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3798 tgt_start
, tgt_end
);
3800 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3801 tgt_start
, tgt_end
);
3802 if (i
< n
|| zero_len
)
3807 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3808 tgt_start
, tgt_end
);
3811 /* Handle reverse offload. This is called by the device plugins for a
3812 reverse offload; it is not called if the outer target runs on the host.
3813 The mapping is simplified device-affecting constructs (except for target
3814 with device(ancestor:1)) must not be encountered; in particular not
3815 target (enter/exit) data. */
3818 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3819 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3820 struct goacc_asyncqueue
*aq
)
3822 /* Return early if there is no offload code. */
3823 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3825 /* Currently, this fails because of calculate_firstprivate_requirements
3826 below; it could be fixed but additional code needs to be updated to
3827 handle 32bit hosts - thus, it is not worthwhile. */
3828 if (sizeof (void *) != sizeof (uint64_t))
3829 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3831 struct cpy_data
*cdata
= NULL
;
3834 unsigned short *kinds
;
3835 const bool short_mapkind
= true;
3836 const int typemask
= short_mapkind
? 0xff : 0x7;
3837 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3839 reverse_splay_tree_key n
;
3840 struct reverse_splay_tree_key_s k
;
3843 gomp_mutex_lock (&devicep
->lock
);
3844 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3845 gomp_mutex_unlock (&devicep
->lock
);
3848 gomp_fatal ("Cannot find reverse-offload function");
3849 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3851 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) || mapnum
== 0)
3853 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3854 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3855 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3859 size_t devaddrs_size
= mapnum
* sizeof (uint64_t);
3860 size_t sizes_size
= mapnum
* sizeof (uint64_t);
3861 size_t kinds_size
= mapnum
* sizeof (unsigned short);
3862 if (always_pinned_mode
)
3864 if (!(devaddrs
= gomp_page_locked_host_alloc_dev (devicep
,
3867 || !(sizes
= gomp_page_locked_host_alloc_dev (devicep
,
3870 || !(kinds
= gomp_page_locked_host_alloc_dev (devicep
,
3873 exit (EXIT_FAILURE
);
3877 devaddrs
= gomp_malloc (devaddrs_size
);
3878 sizes
= gomp_malloc (sizes_size
);
3879 kinds
= gomp_malloc (kinds_size
);
3881 gomp_copy_dev2host (devicep
, aq
, devaddrs
,
3882 (const void *) (uintptr_t) devaddrs_ptr
,
3884 gomp_copy_dev2host (devicep
, aq
, sizes
,
3885 (const void *) (uintptr_t) sizes_ptr
,
3887 gomp_copy_dev2host (devicep
, aq
, kinds
,
3888 (const void *) (uintptr_t) kinds_ptr
,
3890 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3891 exit (EXIT_FAILURE
);
3894 size_t tgt_align
= 0, tgt_size
= 0;
3896 /* If actually executed on 32bit systems, the casts lead to wrong code;
3897 but 32bit with offloading is not supported; see top of this function. */
3898 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3899 (void *) (uintptr_t) kinds
,
3900 &tgt_align
, &tgt_size
);
3904 size_t tgt_alloc_size
= tgt_size
+ tgt_align
- 1;
3905 char *tgt
= gomp_alloca (tgt_alloc_size
);
3906 if (always_pinned_mode
)
3908 /* TODO: See 'gomp_copy_host2dev' re "page-locking on the spot".
3909 On the other hand, performance isn't really a concern, here. */
3910 int page_locked_host_p
= 0;
3911 if (tgt_alloc_size
!= 0)
3913 page_locked_host_p
= gomp_page_locked_host_register_dev
3914 (devicep
, tgt
, tgt_alloc_size
, GOMP_MAP_TOFROM
);
3915 if (page_locked_host_p
< 0)
3916 exit (EXIT_FAILURE
);
3917 /* 'gomp_alloca' isn't already page-locked host memory. */
3918 assert (page_locked_host_p
);
3921 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3923 tgt
+= tgt_align
- al
;
3925 for (uint64_t i
= 0; i
< mapnum
; i
++)
3926 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3927 && devaddrs
[i
] != 0)
3929 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3930 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3931 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3932 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3936 gomp_copy_dev2host (devicep
, aq
, tgt
+ tgt_size
,
3937 (void *) (uintptr_t) devaddrs
[i
],
3939 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
3940 exit (EXIT_FAILURE
);
3942 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3943 tgt_size
= tgt_size
+ sizes
[i
];
3944 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3946 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3947 == GOMP_MAP_ATTACH
))
3949 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3950 = (uint64_t) devaddrs
[i
];
3954 if (always_pinned_mode
)
3956 if (tgt_alloc_size
!= 0
3957 && !gomp_page_locked_host_unregister_dev (devicep
,
3958 tgt
, tgt_alloc_size
,
3960 exit (EXIT_FAILURE
);
3964 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
3966 size_t j
, struct_cpy
= 0;
3968 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3969 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3970 gomp_mutex_lock (&devicep
->lock
);
3971 for (uint64_t i
= 0; i
< mapnum
; i
++)
3973 if (devaddrs
[i
] == 0)
3976 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3979 case GOMP_MAP_FIRSTPRIVATE
:
3980 case GOMP_MAP_FIRSTPRIVATE_INT
:
3983 case GOMP_MAP_DELETE
:
3984 case GOMP_MAP_RELEASE
:
3985 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3986 /* Assume it is present; look it up - but ignore unless the
3987 present clause is there. */
3988 case GOMP_MAP_ALLOC
:
3990 case GOMP_MAP_FORCE_ALLOC
:
3991 case GOMP_MAP_FORCE_FROM
:
3992 case GOMP_MAP_ALWAYS_FROM
:
3994 case GOMP_MAP_TOFROM
:
3995 case GOMP_MAP_FORCE_TO
:
3996 case GOMP_MAP_FORCE_TOFROM
:
3997 case GOMP_MAP_ALWAYS_TO
:
3998 case GOMP_MAP_ALWAYS_TOFROM
:
3999 case GOMP_MAP_PRESENT_FROM
:
4000 case GOMP_MAP_PRESENT_TO
:
4001 case GOMP_MAP_PRESENT_TOFROM
:
4002 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
4003 case GOMP_MAP_ALWAYS_PRESENT_TO
:
4004 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
4005 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
4006 cdata
[i
].devaddr
= devaddrs
[i
];
4007 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4008 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
4009 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
4011 devaddrs
[i
] + sizes
[i
], zero_len
);
4015 cdata
[i
].present
= true;
4016 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
4020 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
4022 devaddrs
[i
] + sizes
[i
], zero_len
);
4023 cdata
[i
].present
= n2
!= NULL
;
4025 if (!cdata
[i
].present
&& GOMP_MAP_PRESENT_P (kind
))
4027 gomp_mutex_unlock (&devicep
->lock
);
4028 #ifdef HAVE_INTTYPES_H
4029 gomp_fatal ("present clause: no corresponding data on "
4030 "parent device at %p with size %"PRIu64
,
4031 (void *) (uintptr_t) devaddrs
[i
],
4032 (uint64_t) sizes
[i
]);
4034 gomp_fatal ("present clause: no corresponding data on "
4035 "parent device at %p with size %lu",
4036 (void *) (uintptr_t) devaddrs
[i
],
4037 (unsigned long) sizes
[i
]);
4041 else if (!cdata
[i
].present
4042 && kind
!= GOMP_MAP_DELETE
4043 && kind
!= GOMP_MAP_RELEASE
4044 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
4046 cdata
[i
].aligned
= true;
4047 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
4049 if (always_pinned_mode
)
4051 ptr
= gomp_page_locked_host_aligned_alloc_dev
4052 (devicep
, align
, sizes
[i
]);
4055 gomp_mutex_unlock (&devicep
->lock
);
4056 exit (EXIT_FAILURE
);
4060 ptr
= gomp_aligned_alloc (align
, sizes
[i
]);
4061 devaddrs
[i
] = (uint64_t) (uintptr_t) ptr
;
4063 else if (n2
!= NULL
)
4064 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
4065 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
4066 if (((!cdata
[i
].present
|| struct_cpy
)
4067 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
4068 || kind
== GOMP_MAP_FORCE_TO
4069 || kind
== GOMP_MAP_FORCE_TOFROM
4070 || GOMP_MAP_ALWAYS_TO_P (kind
))
4072 gomp_copy_dev2host (devicep
, aq
,
4073 (void *) (uintptr_t) devaddrs
[i
],
4074 (void *) (uintptr_t) cdata
[i
].devaddr
,
4076 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
4078 gomp_mutex_unlock (&devicep
->lock
);
4079 exit (EXIT_FAILURE
);
4085 case GOMP_MAP_ATTACH
:
4086 case GOMP_MAP_POINTER
:
4087 case GOMP_MAP_ALWAYS_POINTER
:
4088 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
4089 devaddrs
[i
] + sizes
[i
],
4090 devaddrs
[i
] + sizes
[i
]
4091 + sizeof (void*), false);
4092 cdata
[i
].present
= n2
!= NULL
;
4093 cdata
[i
].devaddr
= devaddrs
[i
];
4095 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
4096 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
4099 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
4100 devaddrs
[i
] + sizes
[i
],
4101 devaddrs
[i
] + sizes
[i
]
4102 + sizeof (void*), false);
4105 cdata
[i
].present
= true;
4106 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
4107 - cdata
[j
].devaddr
);
4110 if (!cdata
[i
].present
)
4113 size_t size
= sizeof (void *);
4114 if (always_pinned_mode
)
4116 ptr
= gomp_page_locked_host_alloc_dev (devicep
,
4120 gomp_mutex_unlock (&devicep
->lock
);
4121 exit (EXIT_FAILURE
);
4125 ptr
= gomp_malloc (size
);
4126 devaddrs
[i
] = (uintptr_t) ptr
;
4128 /* Assume that when present, the pointer is already correct. */
4130 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
4133 case GOMP_MAP_TO_PSET
:
4134 /* Assume that when present, the pointers are fine and no 'to:'
4136 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
4137 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
4139 cdata
[i
].present
= n2
!= NULL
;
4140 cdata
[i
].devaddr
= devaddrs
[i
];
4142 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
4143 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
4146 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
4148 devaddrs
[i
] + sizes
[i
], false);
4151 cdata
[i
].present
= true;
4152 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
4153 - cdata
[j
].devaddr
);
4156 if (!cdata
[i
].present
)
4158 cdata
[i
].aligned
= true;
4159 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
4161 if (always_pinned_mode
)
4163 ptr
= gomp_page_locked_host_aligned_alloc_dev
4164 (devicep
, align
, sizes
[i
]);
4167 gomp_mutex_unlock (&devicep
->lock
);
4168 exit (EXIT_FAILURE
);
4172 ptr
= gomp_aligned_alloc (align
, sizes
[i
]);
4173 devaddrs
[i
] = (uint64_t) (uintptr_t) ptr
;
4174 gomp_copy_dev2host (devicep
, aq
,
4175 (void *) (uintptr_t) devaddrs
[i
],
4176 (void *) (uintptr_t) cdata
[i
].devaddr
,
4178 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
4180 gomp_mutex_unlock (&devicep
->lock
);
4181 exit (EXIT_FAILURE
);
4184 for (j
= i
+ 1; j
< mapnum
; j
++)
4186 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
4187 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
4188 && !GOMP_MAP_POINTER_P (kind
))
4190 if (devaddrs
[j
] < devaddrs
[i
])
4192 if (cdata
[i
].present
)
4194 if (devaddrs
[j
] == 0)
4196 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
4201 /* Dereference devaddrs[j] to get the device addr. */
4202 assert (devaddrs
[j
] - sizes
[j
] == cdata
[i
].devaddr
);
4203 devaddrs
[j
] = *(uint64_t *) (uintptr_t) (devaddrs
[i
]
4205 cdata
[j
].present
= true;
4206 cdata
[j
].devaddr
= devaddrs
[j
];
4207 if (devaddrs
[j
] == 0)
4209 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
4211 devaddrs
[j
] + sizeof (void*),
4214 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
4215 - cdata
[k
].devaddr
);
4218 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
4220 devaddrs
[j
] + sizeof (void*),
4224 gomp_mutex_unlock (&devicep
->lock
);
4225 gomp_fatal ("Pointer target wasn't mapped");
4227 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
4228 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
4230 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
4231 = (void *) (uintptr_t) devaddrs
[j
];
4235 case GOMP_MAP_STRUCT
:
4236 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
4237 devaddrs
[i
+ sizes
[i
]]
4238 + sizes
[i
+ sizes
[i
]], false);
4239 cdata
[i
].present
= n2
!= NULL
;
4240 cdata
[i
].devaddr
= devaddrs
[i
];
4241 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
4244 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
4246 + sizes
[i
+ sizes
[i
]]);
4247 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
4248 cdata
[i
].aligned
= true;
4250 if (always_pinned_mode
)
4252 ptr
= gomp_page_locked_host_aligned_alloc_dev
4253 (devicep
, align
, sz
);
4256 gomp_mutex_unlock (&devicep
->lock
);
4257 exit (EXIT_FAILURE
);
4261 ptr
= gomp_aligned_alloc (align
, sz
);
4262 devaddrs
[i
] = (uintptr_t) ptr
;
4263 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
4266 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
4267 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
4270 gomp_mutex_unlock (&devicep
->lock
);
4271 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
4274 gomp_mutex_unlock (&devicep
->lock
);
4279 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
) && mapnum
> 0)
4281 uint64_t struct_cpy
= 0;
4282 bool clean_struct
= false;
4283 for (uint64_t i
= 0; i
< mapnum
; i
++)
4285 if (cdata
[i
].devaddr
== 0)
4287 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
4288 bool copy
= !cdata
[i
].present
|| struct_cpy
;
4291 case GOMP_MAP_FORCE_FROM
:
4292 case GOMP_MAP_FORCE_TOFROM
:
4293 case GOMP_MAP_ALWAYS_FROM
:
4294 case GOMP_MAP_ALWAYS_TOFROM
:
4295 case GOMP_MAP_PRESENT_FROM
:
4296 case GOMP_MAP_PRESENT_TOFROM
:
4297 case GOMP_MAP_ALWAYS_PRESENT_FROM
:
4298 case GOMP_MAP_ALWAYS_PRESENT_TOFROM
:
4302 case GOMP_MAP_TOFROM
:
4305 gomp_copy_host2dev (devicep
, aq
,
4306 (void *) (uintptr_t) cdata
[i
].devaddr
,
4307 (void *) (uintptr_t) devaddrs
[i
],
4308 sizes
[i
], false, NULL
);
4309 if (aq
&& !devicep
->openacc
.async
.synchronize_func (aq
))
4310 exit (EXIT_FAILURE
);
4320 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
4322 clean_struct
= true;
4323 struct_cpy
= sizes
[i
];
4325 else if (!cdata
[i
].present
&& cdata
[i
].aligned
)
4327 void *ptr
= (void *) (uintptr_t) devaddrs
[i
];
4328 if (always_pinned_mode
)
4330 if (!gomp_page_locked_host_aligned_free_dev (devicep
,
4333 exit (EXIT_FAILURE
);
4336 gomp_aligned_free (ptr
);
4338 else if (!cdata
[i
].present
)
4340 void *ptr
= (void *) (uintptr_t) devaddrs
[i
];
4341 if (always_pinned_mode
)
4343 if (!gomp_page_locked_host_free_dev (devicep
, ptr
, aq
))
4344 exit (EXIT_FAILURE
);
4351 for (uint64_t i
= 0; i
< mapnum
; i
++)
4352 if (!cdata
[i
].present
4353 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
4354 == GOMP_MAP_STRUCT
))
4356 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
4357 void *ptr
= (void *) (uintptr_t) devaddrs
[i
];
4358 if (always_pinned_mode
)
4360 if (!gomp_page_locked_host_aligned_free_dev (devicep
,
4362 exit (EXIT_FAILURE
);
4365 gomp_aligned_free (ptr
);
4368 if (always_pinned_mode
)
4370 if (!gomp_page_locked_host_free_dev (devicep
, devaddrs
, aq
)
4371 || !gomp_page_locked_host_free_dev (devicep
, sizes
, aq
)
4372 || !gomp_page_locked_host_free_dev (devicep
, kinds
, aq
))
4373 exit (EXIT_FAILURE
);
4384 /* Host fallback for GOMP_target_data{,_ext} routines. */
4387 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
4389 struct gomp_task_icv
*icv
= gomp_icv (false);
4391 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
4393 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
4394 "be used for offloading");
4396 if (icv
->target_data
)
4398 /* Even when doing a host fallback, if there are any active
4399 #pragma omp target data constructs, need to remember the
4400 new #pragma omp target data, otherwise GOMP_target_end_data
4401 would get out of sync. */
4402 struct target_mem_desc
*tgt
4403 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
4404 NULL
, GOMP_MAP_VARS_DATA
);
4405 tgt
->prev
= icv
->target_data
;
4406 icv
->target_data
= tgt
;
4411 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
4412 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
4414 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4417 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4418 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
4419 return gomp_target_data_fallback (devicep
);
4421 struct target_mem_desc
*tgt
4422 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
4423 NULL
, GOMP_MAP_VARS_DATA
);
4424 struct gomp_task_icv
*icv
= gomp_icv (true);
4425 tgt
->prev
= icv
->target_data
;
4426 icv
->target_data
= tgt
;
4430 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
4431 size_t *sizes
, unsigned short *kinds
)
4433 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4436 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4437 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4438 return gomp_target_data_fallback (devicep
);
4440 struct target_mem_desc
*tgt
4441 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
4442 NULL
, GOMP_MAP_VARS_DATA
);
4443 struct gomp_task_icv
*icv
= gomp_icv (true);
4444 tgt
->prev
= icv
->target_data
;
4445 icv
->target_data
= tgt
;
4449 GOMP_target_end_data (void)
4451 struct gomp_task_icv
*icv
= gomp_icv (false);
4452 if (icv
->target_data
)
4454 struct target_mem_desc
*tgt
= icv
->target_data
;
4455 icv
->target_data
= tgt
->prev
;
4456 gomp_unmap_vars (tgt
, true, NULL
);
4461 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
4462 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
4464 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4467 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4468 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4471 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
4475 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
4476 size_t *sizes
, unsigned short *kinds
,
4477 unsigned int flags
, void **depend
)
4479 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4481 /* If there are depend clauses, but nowait is not present,
4482 block the parent task until the dependencies are resolved
4483 and then just continue with the rest of the function as if it
4484 is a merged task. Until we are able to schedule task during
4485 variable mapping or unmapping, ignore nowait if depend clauses
4489 struct gomp_thread
*thr
= gomp_thread ();
4490 if (thr
->task
&& thr
->task
->depend_hash
)
4492 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4494 && !thr
->task
->final_task
)
4496 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4497 mapnum
, hostaddrs
, sizes
, kinds
,
4498 flags
| GOMP_TARGET_FLAG_UPDATE
,
4499 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
4504 struct gomp_team
*team
= thr
->ts
.team
;
4505 /* If parallel or taskgroup has been cancelled, don't start new
4507 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4509 if (gomp_team_barrier_cancelled (&team
->barrier
))
4511 if (thr
->task
->taskgroup
)
4513 if (thr
->task
->taskgroup
->cancelled
)
4515 if (thr
->task
->taskgroup
->workshare
4516 && thr
->task
->taskgroup
->prev
4517 && thr
->task
->taskgroup
->prev
->cancelled
)
4522 gomp_task_maybe_wait_for_dependencies (depend
);
4528 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4529 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4532 struct gomp_thread
*thr
= gomp_thread ();
4533 struct gomp_team
*team
= thr
->ts
.team
;
4534 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4535 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4537 if (gomp_team_barrier_cancelled (&team
->barrier
))
4539 if (thr
->task
->taskgroup
)
4541 if (thr
->task
->taskgroup
->cancelled
)
4543 if (thr
->task
->taskgroup
->workshare
4544 && thr
->task
->taskgroup
->prev
4545 && thr
->task
->taskgroup
->prev
->cancelled
)
4550 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
4554 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
4555 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
4556 htab_t
*refcount_set
)
4558 const int typemask
= 0xff;
4560 gomp_mutex_lock (&devicep
->lock
);
4561 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
4563 gomp_mutex_unlock (&devicep
->lock
);
4567 for (i
= 0; i
< mapnum
; i
++)
4568 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
4570 struct splay_tree_key_s cur_node
;
4571 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4572 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
4573 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4576 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
4581 splay_tree_key remove_vars
[mapnum
];
4583 for (i
= 0; i
< mapnum
; i
++)
4585 struct splay_tree_key_s cur_node
;
4586 unsigned char kind
= kinds
[i
] & typemask
;
4590 case GOMP_MAP_ALWAYS_FROM
:
4591 case GOMP_MAP_DELETE
:
4592 case GOMP_MAP_RELEASE
:
4593 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
4594 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
4595 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
4596 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
4597 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
4598 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
4599 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
4600 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
4604 bool delete_p
= (kind
== GOMP_MAP_DELETE
4605 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
4606 bool do_copy
, do_remove
;
4607 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
4610 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
4611 || kind
== GOMP_MAP_ALWAYS_FROM
)
4613 if (k
->aux
&& k
->aux
->attach_count
)
4615 /* We have to be careful not to overwrite still attached
4616 pointers during the copyback to host. */
4617 uintptr_t addr
= k
->host_start
;
4618 while (addr
< k
->host_end
)
4620 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
4621 if (k
->aux
->attach_count
[i
] == 0)
4622 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
4623 (void *) (k
->tgt
->tgt_start
4625 + addr
- k
->host_start
),
4627 addr
+= sizeof (void *);
4631 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
4632 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
4633 + cur_node
.host_start
4635 cur_node
.host_end
- cur_node
.host_start
);
4638 /* Structure elements lists are removed altogether at once, which
4639 may cause immediate deallocation of the target_mem_desc, causing
4640 errors if we still have following element siblings to copy back.
4641 While we're at it, it also seems more disciplined to simply
4642 queue all removals together for processing below.
4645 remove_vars
[nrmvars
++] = k
;
4648 case GOMP_MAP_DETACH
:
4651 gomp_mutex_unlock (&devicep
->lock
);
4652 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
4657 for (i
= 0; i
< nrmvars
; i
++)
4658 gomp_remove_var (devicep
, remove_vars
[i
]);
4660 gomp_mutex_unlock (&devicep
->lock
);
4664 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
4665 size_t *sizes
, unsigned short *kinds
,
4666 unsigned int flags
, void **depend
)
4668 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
4670 /* If there are depend clauses, but nowait is not present,
4671 block the parent task until the dependencies are resolved
4672 and then just continue with the rest of the function as if it
4673 is a merged task. Until we are able to schedule task during
4674 variable mapping or unmapping, ignore nowait if depend clauses
4678 struct gomp_thread
*thr
= gomp_thread ();
4679 if (thr
->task
&& thr
->task
->depend_hash
)
4681 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4683 && !thr
->task
->final_task
)
4685 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4686 mapnum
, hostaddrs
, sizes
, kinds
,
4687 flags
, depend
, NULL
,
4688 GOMP_TARGET_TASK_DATA
))
4693 struct gomp_team
*team
= thr
->ts
.team
;
4694 /* If parallel or taskgroup has been cancelled, don't start new
4696 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4698 if (gomp_team_barrier_cancelled (&team
->barrier
))
4700 if (thr
->task
->taskgroup
)
4702 if (thr
->task
->taskgroup
->cancelled
)
4704 if (thr
->task
->taskgroup
->workshare
4705 && thr
->task
->taskgroup
->prev
4706 && thr
->task
->taskgroup
->prev
->cancelled
)
4711 gomp_task_maybe_wait_for_dependencies (depend
);
4717 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4718 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4721 struct gomp_thread
*thr
= gomp_thread ();
4722 struct gomp_team
*team
= thr
->ts
.team
;
4723 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4724 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4726 if (gomp_team_barrier_cancelled (&team
->barrier
))
4728 if (thr
->task
->taskgroup
)
4730 if (thr
->task
->taskgroup
->cancelled
)
4732 if (thr
->task
->taskgroup
->workshare
4733 && thr
->task
->taskgroup
->prev
4734 && thr
->task
->taskgroup
->prev
->cancelled
)
4739 htab_t refcount_set
= htab_create (mapnum
);
4741 /* The variables are mapped separately such that they can be released
4744 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4745 for (i
= 0; i
< mapnum
; i
++)
4746 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4748 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4749 &kinds
[i
], true, &refcount_set
,
4750 GOMP_MAP_VARS_ENTER_DATA
);
4753 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4755 for (j
= i
+ 1; j
< mapnum
; j
++)
4756 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4757 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4759 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4760 &kinds
[i
], true, &refcount_set
,
4761 GOMP_MAP_VARS_ENTER_DATA
);
4764 else if (i
+ 1 < mapnum
4765 && ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
4766 || ((kinds
[i
+ 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER
4767 && (kinds
[i
] & 0xff) != GOMP_MAP_ALWAYS_POINTER
)))
4769 /* An attach operation must be processed together with the mapped
4770 base-pointer list item. */
4771 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4772 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4776 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4777 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4779 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4780 htab_free (refcount_set
);
4784 gomp_target_task_fn (void *data
)
4786 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4787 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4789 if (ttask
->fn
!= NULL
)
4793 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4794 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4795 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4797 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4798 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4803 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4806 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4810 void *actual_arguments
;
4811 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4814 actual_arguments
= ttask
->hostaddrs
;
4818 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4819 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4820 NULL
, GOMP_MAP_VARS_TARGET
);
4821 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4823 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4825 assert (devicep
->async_run_func
);
4826 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4827 ttask
->args
, (void *) ttask
);
4830 else if (devicep
== NULL
4831 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4832 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4836 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4837 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4838 ttask
->kinds
, true);
4841 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4842 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4843 for (i
= 0; i
< ttask
->mapnum
; i
++)
4844 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4846 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4847 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4848 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4849 i
+= ttask
->sizes
[i
];
4852 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4853 &ttask
->kinds
[i
], true, &refcount_set
,
4854 GOMP_MAP_VARS_ENTER_DATA
);
4856 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4857 ttask
->kinds
, &refcount_set
);
4858 htab_free (refcount_set
);
4864 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4868 struct gomp_task_icv
*icv
= gomp_icv (true);
4869 icv
->thread_limit_var
4870 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4876 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4877 unsigned int thread_limit
, bool first
)
4879 struct gomp_thread
*thr
= gomp_thread ();
4884 struct gomp_task_icv
*icv
= gomp_icv (true);
4885 icv
->thread_limit_var
4886 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4888 (void) num_teams_high
;
4889 if (num_teams_low
== 0)
4891 thr
->num_teams
= num_teams_low
- 1;
4894 else if (thr
->team_num
== thr
->num_teams
)
4902 omp_target_alloc (size_t size
, int device_num
)
4904 if (device_num
== omp_initial_device
4905 || device_num
== gomp_get_num_devices ())
4906 return malloc (size
);
4908 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4909 if (devicep
== NULL
)
4912 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4913 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4914 return malloc (size
);
4916 gomp_mutex_lock (&devicep
->lock
);
4917 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4918 gomp_mutex_unlock (&devicep
->lock
);
4923 omp_target_free (void *device_ptr
, int device_num
)
4925 if (device_num
== omp_initial_device
4926 || device_num
== gomp_get_num_devices ())
4932 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4933 if (devicep
== NULL
|| device_ptr
== NULL
)
4936 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4937 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4943 gomp_mutex_lock (&devicep
->lock
);
4944 gomp_free_device_memory (devicep
, device_ptr
);
4945 gomp_mutex_unlock (&devicep
->lock
);
4949 gomp_usm_alloc (size_t size
, int device_num
)
4951 if (device_num
== gomp_get_num_devices ())
4952 return malloc (size
);
4954 struct gomp_device_descr
*devicep
= resolve_device (device_num
, true);
4955 if (devicep
== NULL
)
4958 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4959 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4960 return malloc (size
);
4963 gomp_mutex_lock (&devicep
->lock
);
4964 if (devicep
->usm_alloc_func
)
4965 ret
= devicep
->usm_alloc_func (devicep
->target_id
, size
);
4966 gomp_mutex_unlock (&devicep
->lock
);
4971 gomp_usm_free (void *device_ptr
, int device_num
)
4973 if (device_ptr
== NULL
)
4976 if (device_num
== gomp_get_num_devices ())
4982 struct gomp_device_descr
*devicep
= resolve_device (device_num
, true);
4983 if (devicep
== NULL
)
4986 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4987 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4993 gomp_mutex_lock (&devicep
->lock
);
4994 if (devicep
->usm_free_func
4995 && !devicep
->usm_free_func (devicep
->target_id
, device_ptr
))
4997 gomp_mutex_unlock (&devicep
->lock
);
4998 gomp_fatal ("error in freeing device memory block at %p", device_ptr
);
5000 gomp_mutex_unlock (&devicep
->lock
);
5004 /* Allocate page-locked host memory via DEVICE. */
5007 gomp_page_locked_host_alloc_dev (struct gomp_device_descr
*device
,
5008 size_t size
, bool allow_null
)
5010 gomp_debug (0, "%s: device=%p (%s), size=%llu\n",
5011 __FUNCTION__
, device
, device
->name
, (unsigned long long) size
);
5014 if (!device
->page_locked_host_alloc_func (&ret
, size
))
5017 = "Failed to allocate page-locked host memory via %s libgomp plugin";
5019 gomp_fatal (fmt
, device
->name
);
5021 gomp_error (fmt
, device
->name
);
5024 else if (ret
== NULL
&& !allow_null
)
5025 gomp_error ("Out of memory allocating %lu bytes"
5026 " page-locked host memory"
5027 " via %s libgomp plugin",
5028 (unsigned long) size
, device
->name
);
5030 gomp_debug (0, " -> ret=[%p, %p)\n",
5035 /* Free page-locked host memory via DEVICE. */
5038 gomp_page_locked_host_free_dev (struct gomp_device_descr
*device
,
5040 struct goacc_asyncqueue
*aq
)
5042 gomp_debug (0, "%s: device=%p (%s), ptr=%p, aq=%p\n",
5043 __FUNCTION__
, device
, device
->name
, ptr
, aq
);
5045 if (!device
->page_locked_host_free_func (ptr
, aq
))
5047 gomp_error ("Failed to free page-locked host memory"
5048 " via %s libgomp plugin",
5055 /* Allocate aligned page-locked host memory via DEVICE.
5057 That is, 'gomp_aligned_alloc' (see 'alloc.c') for page-locked host
5061 gomp_page_locked_host_aligned_alloc_dev (struct gomp_device_descr
*device
,
5062 size_t al
, size_t size
)
5064 gomp_debug (0, "%s: device=%p (%s), al=%llu, size=%llu\n",
5065 __FUNCTION__
, device
, device
->name
,
5066 (unsigned long long) al
, (unsigned long long) size
);
5069 if (al
< sizeof (void *))
5070 al
= sizeof (void *);
5072 if ((al
& (al
- 1)) == 0 && size
)
5074 void *p
= gomp_page_locked_host_alloc_dev (device
, size
+ al
, true);
5077 void *ap
= (void *) (((uintptr_t) p
+ al
) & -al
);
5078 ((void **) ap
)[-1] = p
;
5083 gomp_error ("Out of memory allocating %lu bytes", (unsigned long) size
);
5085 gomp_debug (0, " -> ret=[%p, %p)\n",
5090 /* Free aligned page-locked host memory via DEVICE.
5092 That is, 'gomp_aligned_free' (see 'alloc.c') for page-locked host
5096 gomp_page_locked_host_aligned_free_dev (struct gomp_device_descr
*device
,
5098 struct goacc_asyncqueue
*aq
)
5100 gomp_debug (0, "%s: device=%p (%s), ptr=%p, aq=%p\n",
5101 __FUNCTION__
, device
, device
->name
, ptr
, aq
);
5105 ptr
= ((void **) ptr
)[-1];
5106 gomp_debug (0, " ptr=%p\n",
5109 if (!gomp_page_locked_host_free_dev (device
, ptr
, aq
))
5115 /* Register page-locked host memory via DEVICE. */
5117 attribute_hidden
int
5118 gomp_page_locked_host_register_dev (struct gomp_device_descr
*device
,
5119 void *ptr
, size_t size
, int kind
)
5121 gomp_debug (0, "%s: device=%p (%s), ptr=%p, size=%llu, kind=%d\n",
5122 __FUNCTION__
, device
, device
->name
,
5123 ptr
, (unsigned long long) size
, kind
);
5126 int ret
= device
->page_locked_host_register_func (device
->target_id
,
5129 gomp_error ("Failed to register page-locked host memory"
5130 " via %s libgomp plugin",
5135 /* Unregister page-locked host memory via DEVICE. */
5137 attribute_hidden
bool
5138 gomp_page_locked_host_unregister_dev (struct gomp_device_descr
*device
,
5139 void *ptr
, size_t size
,
5140 struct goacc_asyncqueue
*aq
)
5142 gomp_debug (0, "%s: device=%p (%s), ptr=%p, size=%llu, aq=%p\n",
5143 __FUNCTION__
, device
, device
->name
,
5144 ptr
, (unsigned long long) size
, aq
);
5147 if (!device
->page_locked_host_unregister_func (ptr
, size
, aq
))
5149 gomp_error ("Failed to unregister page-locked host memory"
5150 " via %s libgomp plugin",
5158 /* Device (really: libgomp plugin) to use for paged-locked memory. We
5159 assume there is either none or exactly one such device for the lifetime of
5162 static struct gomp_device_descr
*device_for_page_locked
5163 = /* uninitialized */ (void *) -1;
5165 static struct gomp_device_descr
*
5166 get_device_for_page_locked (void)
5168 gomp_debug (0, "%s\n",
5171 struct gomp_device_descr
*device
;
5172 #ifdef HAVE_SYNC_BUILTINS
5174 = __atomic_load_n (&device_for_page_locked
, MEMMODEL_RELAXED
);
5175 if (device
== (void *) -1)
5177 gomp_debug (0, " init\n");
5179 gomp_init_targets_once ();
5182 for (int i
= 0; i
< num_devices
; ++i
)
5184 gomp_debug (0, " i=%d, target_id=%d\n",
5185 i
, devices
[i
].target_id
);
5187 /* We consider only the first device of potentially several of the
5188 same type as this functionality is not specific to an individual
5189 offloading device, but instead relates to the host-side
5190 implementation of the respective offloading implementation. */
5191 if (devices
[i
].target_id
!= 0)
5194 if (!devices
[i
].page_locked_host_alloc_func
)
5197 gomp_debug (0, " found device: %p (%s)\n",
5198 &devices
[i
], devices
[i
].name
);
5200 gomp_fatal ("Unclear how %s and %s libgomp plugins may"
5201 " simultaneously provide functionality"
5202 " for page-locked memory",
5203 device
->name
, devices
[i
].name
);
5205 device
= &devices
[i
];
5208 struct gomp_device_descr
*device_old
5209 = __atomic_exchange_n (&device_for_page_locked
, device
,
5211 gomp_debug (0, " old device_for_page_locked: %p\n",
5213 assert (device_old
== (void *) -1
5214 /* We shouldn't have concurrently found a different or no
5216 || device_old
== device
);
5218 #else /* !HAVE_SYNC_BUILTINS */
5219 gomp_debug (0, " not implemented for '!HAVE_SYNC_BUILTINS'\n");
5220 (void) &device_for_page_locked
;
5222 #endif /* HAVE_SYNC_BUILTINS */
5224 gomp_debug (0, " -> device=%p (%s)\n",
5225 device
, device
? device
->name
: "[none]");
5229 /* Allocate page-locked host memory.
5230 Returns whether we have a device capable of that. */
5232 attribute_hidden
bool
5233 gomp_page_locked_host_alloc (void **ptr
, size_t size
)
5235 gomp_debug (0, "%s: ptr=%p, size=%llu\n",
5236 __FUNCTION__
, ptr
, (unsigned long long) size
);
5238 struct gomp_device_descr
*device
= get_device_for_page_locked ();
5239 gomp_debug (0, " device=%p (%s)\n",
5240 device
, device
? device
->name
: "[none]");
5243 gomp_mutex_lock (&device
->lock
);
5244 if (device
->state
== GOMP_DEVICE_UNINITIALIZED
)
5245 gomp_init_device (device
);
5246 else if (device
->state
== GOMP_DEVICE_FINALIZED
)
5248 gomp_mutex_unlock (&device
->lock
);
5249 gomp_fatal ("Device %s used for for page-locked memory is finalized",
5252 gomp_mutex_unlock (&device
->lock
);
5254 *ptr
= gomp_page_locked_host_alloc_dev (device
, size
, true);
5256 return device
!= NULL
;
5259 /* Free page-locked host memory.
5260 This must only be called if 'gomp_page_locked_host_alloc' returned
5263 attribute_hidden
void
5264 gomp_page_locked_host_free (void *ptr
)
5266 gomp_debug (0, "%s: ptr=%p\n",
5269 struct gomp_device_descr
*device
= get_device_for_page_locked ();
5270 gomp_debug (0, " device=%p (%s)\n",
5271 device
, device
? device
->name
: "[none]");
5274 gomp_mutex_lock (&device
->lock
);
5275 assert (device
->state
!= GOMP_DEVICE_UNINITIALIZED
);
5276 if (device
->state
== GOMP_DEVICE_FINALIZED
)
5278 gomp_mutex_unlock (&device
->lock
);
5281 gomp_mutex_unlock (&device
->lock
);
5283 if (!gomp_page_locked_host_free_dev (device
, ptr
, NULL
))
5284 exit (EXIT_FAILURE
);
5289 omp_target_is_present (const void *ptr
, int device_num
)
5291 if (device_num
== omp_initial_device
5292 || device_num
== gomp_get_num_devices ())
5295 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5296 if (devicep
== NULL
)
5302 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5303 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5306 gomp_mutex_lock (&devicep
->lock
);
5307 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5308 struct splay_tree_key_s cur_node
;
5310 cur_node
.host_start
= (uintptr_t) ptr
;
5311 cur_node
.host_end
= cur_node
.host_start
;
5312 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
5313 int ret
= n
!= NULL
;
5314 gomp_mutex_unlock (&devicep
->lock
);
5319 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
5320 struct gomp_device_descr
**dst_devicep
,
5321 struct gomp_device_descr
**src_devicep
)
5323 if (dst_device_num
!= gomp_get_num_devices ()
5324 /* Above gomp_get_num_devices has to be called unconditionally. */
5325 && dst_device_num
!= omp_initial_device
)
5327 *dst_devicep
= resolve_device (dst_device_num
, false);
5328 if (*dst_devicep
== NULL
)
5331 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5332 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5333 *dst_devicep
= NULL
;
5336 if (src_device_num
!= num_devices_openmp
5337 && src_device_num
!= omp_initial_device
)
5339 *src_devicep
= resolve_device (src_device_num
, false);
5340 if (*src_devicep
== NULL
)
5343 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5344 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5345 *src_devicep
= NULL
;
5352 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
5353 size_t dst_offset
, size_t src_offset
,
5354 struct gomp_device_descr
*dst_devicep
,
5355 struct gomp_device_descr
*src_devicep
)
5358 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
5360 /* No 'gomp_verify_always_pinned_mode' here. */
5361 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
5364 if (src_devicep
== NULL
)
5366 gomp_mutex_lock (&dst_devicep
->lock
);
5368 void *src_ptr
= (void *) src
+ src_offset
;
5369 int src_ptr_page_locked_host_p
= 0;
5371 if (always_pinned_mode
)
5374 src_ptr_page_locked_host_p
= gomp_page_locked_host_register_dev
5375 (dst_devicep
, src_ptr
, length
, GOMP_MAP_TO
);
5376 if (src_ptr_page_locked_host_p
< 0)
5378 gomp_mutex_unlock (&dst_devicep
->lock
);
5383 /* No 'gomp_verify_always_pinned_mode' here; have just registered. */
5384 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
5385 (char *) dst
+ dst_offset
,
5388 if (src_ptr_page_locked_host_p
5389 && !gomp_page_locked_host_unregister_dev (dst_devicep
,
5390 src_ptr
, length
, NULL
))
5392 gomp_mutex_unlock (&dst_devicep
->lock
);
5396 gomp_mutex_unlock (&dst_devicep
->lock
);
5397 return (ret
? 0 : EINVAL
);
5399 if (dst_devicep
== NULL
)
5401 gomp_mutex_lock (&src_devicep
->lock
);
5403 void *dst_ptr
= (void *) dst
+ dst_offset
;
5404 int dst_ptr_page_locked_host_p
= 0;
5406 if (always_pinned_mode
)
5409 dst_ptr_page_locked_host_p
= gomp_page_locked_host_register_dev
5410 (src_devicep
, dst_ptr
, length
, GOMP_MAP_FROM
);
5411 if (dst_ptr_page_locked_host_p
< 0)
5413 gomp_mutex_unlock (&src_devicep
->lock
);
5418 /* No 'gomp_verify_always_pinned_mode' here; have just registered. */
5419 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
5421 (char *) src
+ src_offset
, length
);
5423 if (dst_ptr_page_locked_host_p
5424 && !gomp_page_locked_host_unregister_dev (src_devicep
,
5425 dst_ptr
, length
, NULL
))
5427 gomp_mutex_unlock (&src_devicep
->lock
);
5431 gomp_mutex_unlock (&src_devicep
->lock
);
5432 return (ret
? 0 : EINVAL
);
5434 if (src_devicep
== dst_devicep
)
5436 gomp_mutex_lock (&src_devicep
->lock
);
5437 /* No 'gomp_verify_always_pinned_mode' here. */
5438 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
5439 (char *) dst
+ dst_offset
,
5440 (char *) src
+ src_offset
, length
);
5441 gomp_mutex_unlock (&src_devicep
->lock
);
5442 return (ret
? 0 : EINVAL
);
5448 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
5449 size_t src_offset
, int dst_device_num
, int src_device_num
)
5451 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
5452 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
5453 &dst_devicep
, &src_devicep
);
5458 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
5459 dst_devicep
, src_devicep
);
5471 struct gomp_device_descr
*dst_devicep
;
5472 struct gomp_device_descr
*src_devicep
;
5473 } omp_target_memcpy_data
;
5476 omp_target_memcpy_async_helper (void *args
)
5478 omp_target_memcpy_data
*a
= args
;
5479 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
5480 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
5481 gomp_fatal ("omp_target_memcpy failed");
5485 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
5486 size_t dst_offset
, size_t src_offset
,
5487 int dst_device_num
, int src_device_num
,
5488 int depobj_count
, omp_depend_t
*depobj_list
)
5490 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
5491 unsigned int flags
= 0;
5492 void *depend
[depobj_count
+ 5];
5494 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
5495 &dst_devicep
, &src_devicep
);
5497 omp_target_memcpy_data s
= {
5501 .dst_offset
= dst_offset
,
5502 .src_offset
= src_offset
,
5503 .dst_devicep
= dst_devicep
,
5504 .src_devicep
= src_devicep
5510 if (depobj_count
> 0 && depobj_list
!= NULL
)
5512 flags
|= GOMP_TASK_FLAG_DEPEND
;
5514 depend
[1] = (void *) (uintptr_t) depobj_count
;
5515 depend
[2] = depend
[3] = depend
[4] = 0;
5516 for (i
= 0; i
< depobj_count
; ++i
)
5517 depend
[i
+ 5] = &depobj_list
[i
];
5520 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
5521 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
5527 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
5528 int num_dims
, const size_t *volume
,
5529 const size_t *dst_offsets
,
5530 const size_t *src_offsets
,
5531 const size_t *dst_dimensions
,
5532 const size_t *src_dimensions
,
5533 struct gomp_device_descr
*dst_devicep
,
5534 struct gomp_device_descr
*src_devicep
)
5536 size_t dst_slice
= element_size
;
5537 size_t src_slice
= element_size
;
5538 size_t j
, dst_off
, src_off
, length
;
5543 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
5544 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
5545 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
5547 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
5549 /* No 'gomp_verify_always_pinned_mode' here. */
5550 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
5554 else if (src_devicep
== NULL
)
5556 void *src_ptr
= (void *) src
+ src_off
;
5557 int src_ptr_page_locked_host_p
= 0;
5559 if (always_pinned_mode
)
5562 src_ptr_page_locked_host_p
= gomp_page_locked_host_register_dev
5563 (dst_devicep
, src_ptr
, length
, GOMP_MAP_TO
);
5564 if (src_ptr_page_locked_host_p
< 0)
5568 /* No 'gomp_verify_always_pinned_mode' here; have just registered. */
5569 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
5570 (char *) dst
+ dst_off
,
5574 if (src_ptr_page_locked_host_p
5575 && !gomp_page_locked_host_unregister_dev (dst_devicep
,
5576 src_ptr
, length
, NULL
))
5579 else if (dst_devicep
== NULL
)
5581 void *dst_ptr
= (void *) dst
+ dst_off
;
5582 int dst_ptr_page_locked_host_p
= 0;
5584 if (always_pinned_mode
)
5587 dst_ptr_page_locked_host_p
= gomp_page_locked_host_register_dev
5588 (src_devicep
, dst_ptr
, length
, GOMP_MAP_FROM
);
5589 if (dst_ptr_page_locked_host_p
< 0)
5593 /* No 'gomp_verify_always_pinned_mode' here; have just registered. */
5594 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
5596 (const char *) src
+ src_off
,
5599 if (dst_ptr_page_locked_host_p
5600 && !gomp_page_locked_host_unregister_dev (src_devicep
,
5601 dst_ptr
, length
, NULL
))
5604 else if (src_devicep
== dst_devicep
)
5605 /* No 'gomp_verify_always_pinned_mode' here. */
5606 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
5607 (char *) dst
+ dst_off
,
5608 (const char *) src
+ src_off
,
5612 return ret
? 0 : EINVAL
;
5615 /* FIXME: it would be nice to have some plugin function to handle
5616 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
5617 be handled in the generic recursion below, and for host-host it
5618 should be used even for any num_dims >= 2. */
5620 for (i
= 1; i
< num_dims
; i
++)
5621 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
5622 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
5624 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
5625 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
5627 for (j
= 0; j
< volume
[0]; j
++)
5629 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
5630 (const char *) src
+ src_off
,
5631 element_size
, num_dims
- 1,
5632 volume
+ 1, dst_offsets
+ 1,
5633 src_offsets
+ 1, dst_dimensions
+ 1,
5634 src_dimensions
+ 1, dst_devicep
,
5638 dst_off
+= dst_slice
;
5639 src_off
+= src_slice
;
5645 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
5647 struct gomp_device_descr
**dst_devicep
,
5648 struct gomp_device_descr
**src_devicep
)
5653 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
5654 dst_devicep
, src_devicep
);
5658 if (*src_devicep
!= NULL
&& *dst_devicep
!= NULL
&& *src_devicep
!= *dst_devicep
)
5665 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
5666 size_t element_size
, int num_dims
,
5667 const size_t *volume
, const size_t *dst_offsets
,
5668 const size_t *src_offsets
,
5669 const size_t *dst_dimensions
,
5670 const size_t *src_dimensions
,
5671 struct gomp_device_descr
*dst_devicep
,
5672 struct gomp_device_descr
*src_devicep
)
5675 gomp_mutex_lock (&src_devicep
->lock
);
5676 else if (dst_devicep
)
5677 gomp_mutex_lock (&dst_devicep
->lock
);
5678 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
5679 volume
, dst_offsets
, src_offsets
,
5680 dst_dimensions
, src_dimensions
,
5681 dst_devicep
, src_devicep
);
5683 gomp_mutex_unlock (&src_devicep
->lock
);
5684 else if (dst_devicep
)
5685 gomp_mutex_unlock (&dst_devicep
->lock
);
5691 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
5692 int num_dims
, const size_t *volume
,
5693 const size_t *dst_offsets
,
5694 const size_t *src_offsets
,
5695 const size_t *dst_dimensions
,
5696 const size_t *src_dimensions
,
5697 int dst_device_num
, int src_device_num
)
5699 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
5701 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
5702 src_device_num
, &dst_devicep
,
5708 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
5709 volume
, dst_offsets
, src_offsets
,
5710 dst_dimensions
, src_dimensions
,
5711 dst_devicep
, src_devicep
);
5720 size_t element_size
;
5721 const size_t *volume
;
5722 const size_t *dst_offsets
;
5723 const size_t *src_offsets
;
5724 const size_t *dst_dimensions
;
5725 const size_t *src_dimensions
;
5726 struct gomp_device_descr
*dst_devicep
;
5727 struct gomp_device_descr
*src_devicep
;
5729 } omp_target_memcpy_rect_data
;
5732 omp_target_memcpy_rect_async_helper (void *args
)
5734 omp_target_memcpy_rect_data
*a
= args
;
5735 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
5736 a
->num_dims
, a
->volume
, a
->dst_offsets
,
5737 a
->src_offsets
, a
->dst_dimensions
,
5738 a
->src_dimensions
, a
->dst_devicep
,
5741 gomp_fatal ("omp_target_memcpy_rect failed");
5745 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
5746 int num_dims
, const size_t *volume
,
5747 const size_t *dst_offsets
,
5748 const size_t *src_offsets
,
5749 const size_t *dst_dimensions
,
5750 const size_t *src_dimensions
,
5751 int dst_device_num
, int src_device_num
,
5752 int depobj_count
, omp_depend_t
*depobj_list
)
5754 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
5756 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
5757 src_device_num
, &dst_devicep
,
5759 void *depend
[depobj_count
+ 5];
5762 omp_target_memcpy_rect_data s
= {
5765 .element_size
= element_size
,
5766 .num_dims
= num_dims
,
5768 .dst_offsets
= dst_offsets
,
5769 .src_offsets
= src_offsets
,
5770 .dst_dimensions
= dst_dimensions
,
5771 .src_dimensions
= src_dimensions
,
5772 .dst_devicep
= dst_devicep
,
5773 .src_devicep
= src_devicep
5779 if (depobj_count
> 0 && depobj_list
!= NULL
)
5781 flags
|= GOMP_TASK_FLAG_DEPEND
;
5783 depend
[1] = (void *) (uintptr_t) depobj_count
;
5784 depend
[2] = depend
[3] = depend
[4] = 0;
5785 for (i
= 0; i
< depobj_count
; ++i
)
5786 depend
[i
+ 5] = &depobj_list
[i
];
5789 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
5790 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
5796 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
5797 size_t size
, size_t device_offset
, int device_num
)
5799 if (device_num
== omp_initial_device
5800 || device_num
== gomp_get_num_devices ())
5803 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5804 if (devicep
== NULL
)
5807 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5808 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5811 gomp_mutex_lock (&devicep
->lock
);
5813 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5814 struct splay_tree_key_s cur_node
;
5817 cur_node
.host_start
= (uintptr_t) host_ptr
;
5818 cur_node
.host_end
= cur_node
.host_start
+ size
;
5819 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
5822 if (n
->tgt
->tgt_start
+ n
->tgt_offset
5823 == (uintptr_t) device_ptr
+ device_offset
5824 && n
->host_start
<= cur_node
.host_start
5825 && n
->host_end
>= cur_node
.host_end
)
5830 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
5831 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
5835 tgt
->to_free
= NULL
;
5837 tgt
->list_count
= 0;
5838 tgt
->device_descr
= devicep
;
5839 splay_tree_node array
= tgt
->array
;
5840 splay_tree_key k
= &array
->key
;
5841 k
->host_start
= cur_node
.host_start
;
5842 k
->host_end
= cur_node
.host_end
;
5844 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
5845 k
->refcount
= REFCOUNT_INFINITY
;
5846 k
->dynamic_refcount
= 0;
5848 k
->page_locked_host_p
= false;
5850 array
->right
= NULL
;
5851 splay_tree_insert (&devicep
->mem_map
, array
);
5854 gomp_mutex_unlock (&devicep
->lock
);
5859 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
5861 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5862 if (devicep
== NULL
)
5865 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5868 gomp_mutex_lock (&devicep
->lock
);
5870 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5871 struct splay_tree_key_s cur_node
;
5874 cur_node
.host_start
= (uintptr_t) ptr
;
5875 cur_node
.host_end
= cur_node
.host_start
;
5876 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
5878 && n
->host_start
== cur_node
.host_start
5879 && n
->refcount
== REFCOUNT_INFINITY
5880 && n
->tgt
->tgt_start
== 0
5881 && n
->tgt
->to_free
== NULL
5882 && n
->tgt
->refcount
== 1
5883 && n
->tgt
->list_count
== 0)
5885 splay_tree_remove (&devicep
->mem_map
, n
);
5886 gomp_unmap_tgt (n
->tgt
);
5890 gomp_mutex_unlock (&devicep
->lock
);
5895 omp_get_mapped_ptr (const void *ptr
, int device_num
)
5897 if (device_num
== omp_initial_device
5898 || device_num
== omp_get_initial_device ())
5899 return (void *) ptr
;
5901 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5902 if (devicep
== NULL
)
5905 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5906 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5907 return (void *) ptr
;
5909 gomp_mutex_lock (&devicep
->lock
);
5911 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
5912 struct splay_tree_key_s cur_node
;
5915 cur_node
.host_start
= (uintptr_t) ptr
;
5916 cur_node
.host_end
= cur_node
.host_start
;
5917 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
5921 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
5922 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
5925 gomp_mutex_unlock (&devicep
->lock
);
5931 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
5933 if (device_num
== omp_initial_device
5934 || device_num
== gomp_get_num_devices ())
5937 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5938 if (devicep
== NULL
)
5941 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
5944 if (devicep
->is_usm_ptr_func
&& devicep
->is_usm_ptr_func ((void *) ptr
))
5951 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
5954 if (device_num
== omp_initial_device
5955 || device_num
== gomp_get_num_devices ())
5956 return gomp_pause_host ();
5958 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
5959 if (devicep
== NULL
)
5962 /* Do nothing for target devices for now. */
5967 omp_pause_resource_all (omp_pause_resource_t kind
)
5970 if (gomp_pause_host ())
5972 /* Do nothing for target devices for now. */
5976 ialias (omp_pause_resource
)
5977 ialias (omp_pause_resource_all
)
5980 GOMP_evaluate_target_device (int device_num
, const char *kind
,
5981 const char *arch
, const char *isa
)
5986 device_num
= omp_get_default_device ();
5988 if (kind
&& strcmp (kind
, "any") == 0)
5991 gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
5992 __FUNCTION__
, device_num
, kind
, arch
, isa
);
5994 if (omp_get_device_num () == device_num
)
5995 result
= GOMP_evaluate_current_device (kind
, arch
, isa
);
5998 if (!omp_is_initial_device ())
5999 /* Accelerators are not expected to know about other devices. */
6003 struct gomp_device_descr
*device
= resolve_device (device_num
, true);
6006 else if (device
->evaluate_device_func
)
6007 result
= device
->evaluate_device_func (device_num
, kind
, arch
,
6012 gomp_debug (1, " -> %s\n", result
? "true" : "false");
6016 #ifdef PLUGIN_SUPPORT
6018 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
6020 The handles of the found functions are stored in the corresponding fields
6021 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
6024 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
6025 const char *plugin_name
)
6027 const char *err
= NULL
, *last_missing
= NULL
;
6029 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
6031 #if OFFLOAD_DEFAULTED
6037 /* Check if all required functions are available in the plugin and store
6038 their handlers. None of the symbols can legitimately be NULL,
6039 so we don't need to check dlerror all the time. */
6041 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
6043 /* Similar, but missing functions are not an error. Return false if
6044 failed, true otherwise. */
6045 #define DLSYM_OPT(f, n) \
6046 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
6047 || (last_missing = #n, 0))
6050 if (device
->version_func () != GOMP_VERSION
)
6052 err
= "plugin version mismatch";
6059 DLSYM (get_num_devices
);
6060 DLSYM (init_device
);
6061 DLSYM (fini_device
);
6063 DLSYM (unload_image
);
6066 DLSYM_OPT (usm_alloc
, usm_alloc
);
6067 DLSYM_OPT (usm_free
, usm_free
);
6068 DLSYM_OPT (is_usm_ptr
, is_usm_ptr
);
6069 DLSYM_OPT (page_locked_host_alloc
, page_locked_host_alloc
);
6070 DLSYM_OPT (page_locked_host_free
, page_locked_host_free
);
6071 DLSYM_OPT (page_locked_host_register
, page_locked_host_register
);
6072 DLSYM_OPT (page_locked_host_unregister
, page_locked_host_unregister
);
6073 DLSYM_OPT (page_locked_host_p
, page_locked_host_p
);
6076 DLSYM (evaluate_device
);
6077 device
->capabilities
= device
->get_caps_func ();
6078 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
6081 DLSYM_OPT (async_run
, async_run
);
6082 DLSYM_OPT (can_run
, can_run
);
6085 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
6087 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
6088 || !DLSYM_OPT (openacc
.create_thread_data
,
6089 openacc_create_thread_data
)
6090 || !DLSYM_OPT (openacc
.destroy_thread_data
,
6091 openacc_destroy_thread_data
)
6092 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
6093 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
6094 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
6095 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
6096 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
6097 || !DLSYM_OPT (openacc
.async
.queue_callback
,
6098 openacc_async_queue_callback
)
6099 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
6100 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
6101 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
6102 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
6104 /* Require all the OpenACC handlers if we have
6105 GOMP_OFFLOAD_CAP_OPENACC_200. */
6106 err
= "plugin missing OpenACC handler function";
6111 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
6112 openacc_cuda_get_current_device
);
6113 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
6114 openacc_cuda_get_current_context
);
6115 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
6116 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
6117 if (cuda
&& cuda
!= 4)
6119 /* Make sure all the CUDA functions are there if any of them are. */
6120 err
= "plugin missing OpenACC CUDA handler function";
6132 gomp_error ("while loading %s: %s", plugin_name
, err
);
6134 gomp_error ("missing function was %s", last_missing
);
6136 dlclose (plugin_handle
);
6141 /* This function finalizes all initialized devices. */
6144 gomp_target_fini (void)
6147 for (i
= 0; i
< num_devices
; i
++)
6150 struct gomp_device_descr
*devicep
= &devices
[i
];
6151 gomp_mutex_lock (&devicep
->lock
);
6152 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
6153 ret
= gomp_fini_device (devicep
);
6154 gomp_mutex_unlock (&devicep
->lock
);
6156 gomp_fatal ("device finalization failed");
6160 /* This function initializes the runtime for offloading.
6161 It parses the list of offload plugins, and tries to load these.
6162 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
6163 will be set, and the array DEVICES initialized, containing descriptors for
6164 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
6168 gomp_target_init (void)
6170 const char *prefix
="libgomp-plugin-";
6171 const char *suffix
= SONAME_SUFFIX (1);
6172 const char *cur
, *next
;
6174 int i
, new_num_devs
;
6175 int num_devs
= 0, num_devs_openmp
;
6176 struct gomp_device_descr
*devs
= NULL
;
6178 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
6181 cur
= OFFLOAD_PLUGINS
;
6185 struct gomp_device_descr current_device
;
6186 size_t prefix_len
, suffix_len
, cur_len
;
6188 next
= strchr (cur
, ',');
6190 prefix_len
= strlen (prefix
);
6191 cur_len
= next
? next
- cur
: strlen (cur
);
6192 suffix_len
= strlen (suffix
);
6194 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
6201 memcpy (plugin_name
, prefix
, prefix_len
);
6202 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
6203 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
6205 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
6207 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
6208 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
6209 if (gomp_debug_var
> 0 && new_num_devs
< 0)
6212 int type
= current_device
.get_type_func ();
6213 for (int img
= 0; img
< num_offload_images
; img
++)
6214 if (type
== offload_images
[img
].type
)
6218 char buf
[sizeof ("unified_address, unified_shared_memory, "
6219 "reverse_offload")];
6220 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
6221 char *name
= (char *) malloc (cur_len
+ 1);
6222 memcpy (name
, cur
, cur_len
);
6223 name
[cur_len
] = '\0';
6225 "%s devices present but 'omp requires %s' "
6226 "cannot be fulfilled\n", name
, buf
);
6230 else if (new_num_devs
>= 1)
6232 /* Augment DEVICES and NUM_DEVICES. */
6234 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
6235 * sizeof (struct gomp_device_descr
));
6243 current_device
.name
= current_device
.get_name_func ();
6244 /* current_device.capabilities has already been set. */
6245 current_device
.type
= current_device
.get_type_func ();
6246 current_device
.mem_map
.root
= NULL
;
6247 current_device
.mem_map_rev
.root
= NULL
;
6248 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
6249 for (i
= 0; i
< new_num_devs
; i
++)
6251 current_device
.target_id
= i
;
6252 devs
[num_devs
] = current_device
;
6253 gomp_mutex_init (&devs
[num_devs
].lock
);
6264 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
6265 NUM_DEVICES_OPENMP. */
6266 struct gomp_device_descr
*devs_s
6267 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
6274 num_devs_openmp
= 0;
6275 for (i
= 0; i
< num_devs
; i
++)
6276 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
6277 devs_s
[num_devs_openmp
++] = devs
[i
];
6278 int num_devs_after_openmp
= num_devs_openmp
;
6279 for (i
= 0; i
< num_devs
; i
++)
6280 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
6281 devs_s
[num_devs_after_openmp
++] = devs
[i
];
6285 for (i
= 0; i
< num_devs
; i
++)
6287 /* The 'devices' array can be moved (by the realloc call) until we have
6288 found all the plugins, so registering with the OpenACC runtime (which
6289 takes a copy of the pointer argument) must be delayed until now. */
6290 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
6291 goacc_register (&devs
[i
]);
6294 num_devices
= num_devs
;
6295 num_devices_openmp
= num_devs_openmp
;
6297 if (atexit (gomp_target_fini
) != 0)
6298 gomp_fatal ("atexit failed");
6301 #else /* PLUGIN_SUPPORT */
6302 /* If dlfcn.h is unavailable we always fallback to host execution.
6303 GOMP_target* routines are just stubs for this case. */
6305 gomp_target_init (void)
6308 #endif /* PLUGIN_SUPPORT */