1 /* Copyright (C) 2013-2023 Free Software Foundation, Inc.
2 Contributed by Jakub Jelinek <jakub@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file contains the support of offloading. */
29 #include "oacc-plugin.h"
31 #include "gomp-constants.h"
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h> /* For PRIu64. */
39 #include <stdio.h> /* For snprintf. */
45 #include "plugin-suffix.h"
48 /* Define another splay tree instantiation - for reverse offload. */
49 #define splay_tree_prefix reverse
51 #include "splay-tree.h"
54 typedef uintptr_t *hash_entry_type
;
55 static inline void * htab_alloc (size_t size
) { return gomp_malloc (size
); }
56 static inline void htab_free (void *ptr
) { free (ptr
); }
59 ialias_redirect (GOMP_task
)
61 static inline hashval_t
62 htab_hash (hash_entry_type element
)
64 return hash_pointer ((void *) element
);
68 htab_eq (hash_entry_type x
, hash_entry_type y
)
73 #define FIELD_TGT_EMPTY (~(size_t) 0)
75 static void gomp_target_init (void);
77 /* The whole initialization code for offloading plugins is only run one. */
78 static pthread_once_t gomp_is_initialized
= PTHREAD_ONCE_INIT
;
80 /* Mutex for offload image registration. */
81 static gomp_mutex_t register_lock
;
83 /* This structure describes an offload image.
84 It contains type of the target device, pointer to host table descriptor, and
85 pointer to target data. */
86 struct offload_image_descr
{
88 enum offload_target_type type
;
89 const void *host_table
;
90 const void *target_data
;
93 /* Array of descriptors of offload images. */
94 static struct offload_image_descr
*offload_images
;
96 /* Total number of offload images. */
97 static int num_offload_images
;
99 /* Array of descriptors for all available devices. */
100 static struct gomp_device_descr
*devices
;
102 /* Total number of available devices. */
103 static int num_devices
;
105 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
106 static int num_devices_openmp
;
108 /* OpenMP requires mask. */
109 static int omp_requires_mask
;
111 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
114 gomp_realloc_unlock (void *old
, size_t size
)
116 void *ret
= realloc (old
, size
);
119 gomp_mutex_unlock (®ister_lock
);
120 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size
);
125 attribute_hidden
void
126 gomp_init_targets_once (void)
128 (void) pthread_once (&gomp_is_initialized
, gomp_target_init
);
132 gomp_get_num_devices (void)
134 gomp_init_targets_once ();
135 return num_devices_openmp
;
138 static struct gomp_device_descr
*
139 resolve_device (int device_id
, bool remapped
)
141 if (remapped
&& device_id
== GOMP_DEVICE_ICV
)
143 struct gomp_task_icv
*icv
= gomp_icv (false);
144 device_id
= icv
->default_device_var
;
150 if (device_id
== (remapped
? GOMP_DEVICE_HOST_FALLBACK
151 : omp_initial_device
))
153 if (device_id
== omp_invalid_device
)
154 gomp_fatal ("omp_invalid_device encountered");
155 else if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
156 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
157 "but device not found");
161 else if (device_id
>= gomp_get_num_devices ())
163 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
164 && device_id
!= num_devices_openmp
)
165 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
166 "but device not found");
171 gomp_mutex_lock (&devices
[device_id
].lock
);
172 if (devices
[device_id
].state
== GOMP_DEVICE_UNINITIALIZED
)
173 gomp_init_device (&devices
[device_id
]);
174 else if (devices
[device_id
].state
== GOMP_DEVICE_FINALIZED
)
176 gomp_mutex_unlock (&devices
[device_id
].lock
);
178 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
)
179 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
180 "but device is finalized");
184 gomp_mutex_unlock (&devices
[device_id
].lock
);
186 return &devices
[device_id
];
190 static inline splay_tree_key
191 gomp_map_lookup (splay_tree mem_map
, splay_tree_key key
)
193 if (key
->host_start
!= key
->host_end
)
194 return splay_tree_lookup (mem_map
, key
);
197 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
202 n
= splay_tree_lookup (mem_map
, key
);
206 return splay_tree_lookup (mem_map
, key
);
209 static inline reverse_splay_tree_key
210 gomp_map_lookup_rev (reverse_splay_tree mem_map_rev
, reverse_splay_tree_key key
)
212 return reverse_splay_tree_lookup (mem_map_rev
, key
);
215 static inline splay_tree_key
216 gomp_map_0len_lookup (splay_tree mem_map
, splay_tree_key key
)
218 if (key
->host_start
!= key
->host_end
)
219 return splay_tree_lookup (mem_map
, key
);
222 splay_tree_key n
= splay_tree_lookup (mem_map
, key
);
228 gomp_device_copy (struct gomp_device_descr
*devicep
,
229 bool (*copy_func
) (int, void *, const void *, size_t),
230 const char *dst
, void *dstaddr
,
231 const char *src
, const void *srcaddr
,
234 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
))
236 gomp_mutex_unlock (&devicep
->lock
);
237 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
238 src
, srcaddr
, srcaddr
+ size
, dst
, dstaddr
, dstaddr
+ size
);
243 goacc_device_copy_async (struct gomp_device_descr
*devicep
,
244 bool (*copy_func
) (int, void *, const void *, size_t,
245 struct goacc_asyncqueue
*),
246 const char *dst
, void *dstaddr
,
247 const char *src
, const void *srcaddr
,
248 const void *srcaddr_orig
,
249 size_t size
, struct goacc_asyncqueue
*aq
)
251 if (!copy_func (devicep
->target_id
, dstaddr
, srcaddr
, size
, aq
))
253 gomp_mutex_unlock (&devicep
->lock
);
254 if (srcaddr_orig
&& srcaddr_orig
!= srcaddr
)
255 gomp_fatal ("Copying of %s object [%p..%p)"
256 " via buffer %s object [%p..%p)"
257 " to %s object [%p..%p) failed",
258 src
, srcaddr_orig
, srcaddr_orig
+ size
,
259 src
, srcaddr
, srcaddr
+ size
,
260 dst
, dstaddr
, dstaddr
+ size
);
262 gomp_fatal ("Copying of %s object [%p..%p)"
263 " to %s object [%p..%p) failed",
264 src
, srcaddr
, srcaddr
+ size
,
265 dst
, dstaddr
, dstaddr
+ size
);
269 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
270 host to device memory transfers. */
272 struct gomp_coalesce_chunk
274 /* The starting and ending point of a coalesced chunk of memory. */
278 struct gomp_coalesce_buf
280 /* Buffer into which gomp_copy_host2dev will memcpy data and from which
281 it will be copied to the device. */
283 struct target_mem_desc
*tgt
;
284 /* Array with offsets, chunks[i].start is the starting offset and
285 chunks[i].end ending offset relative to tgt->tgt_start device address
286 of chunks which are to be copied to buf and later copied to device. */
287 struct gomp_coalesce_chunk
*chunks
;
288 /* Number of chunks in chunks array, or -1 if coalesce buffering should not
291 /* During construction of chunks array, how many memory regions are within
292 the last chunk. If there is just one memory region for a chunk, we copy
293 it directly to device rather than going through buf. */
297 /* Maximum size of memory region considered for coalescing. Larger copies
298 are performed directly. */
299 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
301 /* Maximum size of a gap in between regions to consider them being copied
302 within the same chunk. All the device offsets considered are within
303 newly allocated device memory, so it isn't fatal if we copy some padding
304 in between from host to device. The gaps come either from alignment
305 padding or from memory regions which are not supposed to be copied from
306 host to device (e.g. map(alloc:), map(from:) etc.). */
307 #define MAX_COALESCE_BUF_GAP (4 * 1024)
309 /* Add region with device tgt_start relative offset and length to CBUF.
311 This must not be used for asynchronous copies, because the host data might
312 not be computed yet (by an earlier asynchronous compute region, for
314 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question:
315 is it more performant to use libgomp CBUF buffering or individual device
316 asyncronous copying?) */
319 gomp_coalesce_buf_add (struct gomp_coalesce_buf
*cbuf
, size_t start
, size_t len
)
321 if (len
> MAX_COALESCE_BUF_SIZE
|| len
== 0)
325 if (cbuf
->chunk_cnt
< 0)
327 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
329 cbuf
->chunk_cnt
= -1;
332 if (start
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
+ MAX_COALESCE_BUF_GAP
)
334 cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
= start
+ len
;
338 /* If the last chunk is only used by one mapping, discard it,
339 as it will be one host to device copy anyway and
340 memcpying it around will only waste cycles. */
341 if (cbuf
->use_cnt
== 1)
344 cbuf
->chunks
[cbuf
->chunk_cnt
].start
= start
;
345 cbuf
->chunks
[cbuf
->chunk_cnt
].end
= start
+ len
;
350 /* Return true for mapping kinds which need to copy data from the
351 host to device for regions that weren't previously mapped. */
354 gomp_to_device_kind_p (int kind
)
360 case GOMP_MAP_FORCE_ALLOC
:
361 case GOMP_MAP_FORCE_FROM
:
362 case GOMP_MAP_ALWAYS_FROM
:
369 /* Copy host memory to an offload device. In asynchronous mode (if AQ is
370 non-NULL), when the source data is stack or may otherwise be deallocated
371 before the asynchronous copy takes place, EPHEMERAL must be passed as
374 attribute_hidden
void
375 gomp_copy_host2dev (struct gomp_device_descr
*devicep
,
376 struct goacc_asyncqueue
*aq
,
377 void *d
, const void *h
, size_t sz
,
378 bool ephemeral
, struct gomp_coalesce_buf
*cbuf
)
380 if (__builtin_expect (aq
!= NULL
, 0))
382 /* See 'gomp_coalesce_buf_add'. */
385 void *h_buf
= (void *) h
;
388 /* We're queueing up an asynchronous copy from data that may
389 disappear before the transfer takes place (i.e. because it is a
390 stack local in a function that is no longer executing). Make a
391 copy of the data into a temporary buffer in those cases. */
392 h_buf
= gomp_malloc (sz
);
393 memcpy (h_buf
, h
, sz
);
395 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.host2dev_func
,
396 "dev", d
, "host", h_buf
, h
, sz
, aq
);
398 /* Free temporary buffer once the transfer has completed. */
399 devicep
->openacc
.async
.queue_callback_func (aq
, free
, h_buf
);
406 uintptr_t doff
= (uintptr_t) d
- cbuf
->tgt
->tgt_start
;
407 if (doff
< cbuf
->chunks
[cbuf
->chunk_cnt
- 1].end
)
410 long last
= cbuf
->chunk_cnt
- 1;
411 while (first
<= last
)
413 long middle
= (first
+ last
) >> 1;
414 if (cbuf
->chunks
[middle
].end
<= doff
)
416 else if (cbuf
->chunks
[middle
].start
<= doff
)
418 if (doff
+ sz
> cbuf
->chunks
[middle
].end
)
420 gomp_mutex_unlock (&devicep
->lock
);
421 gomp_fatal ("internal libgomp cbuf error");
423 memcpy ((char *) cbuf
->buf
+ (doff
- cbuf
->chunks
[0].start
),
433 gomp_device_copy (devicep
, devicep
->host2dev_func
, "dev", d
, "host", h
, sz
);
436 attribute_hidden
void
437 gomp_copy_dev2host (struct gomp_device_descr
*devicep
,
438 struct goacc_asyncqueue
*aq
,
439 void *h
, const void *d
, size_t sz
)
441 if (__builtin_expect (aq
!= NULL
, 0))
442 goacc_device_copy_async (devicep
, devicep
->openacc
.async
.dev2host_func
,
443 "host", h
, "dev", d
, NULL
, sz
, aq
);
445 gomp_device_copy (devicep
, devicep
->dev2host_func
, "host", h
, "dev", d
, sz
);
449 gomp_free_device_memory (struct gomp_device_descr
*devicep
, void *devptr
)
451 if (!devicep
->free_func (devicep
->target_id
, devptr
))
453 gomp_mutex_unlock (&devicep
->lock
);
454 gomp_fatal ("error in freeing device memory block at %p", devptr
);
458 /* Increment reference count of a splay_tree_key region K by 1.
459 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
460 increment the value if refcount is not yet contained in the set (used for
461 OpenMP 5.0, which specifies that a region's refcount is adjusted at most
462 once for each construct). */
465 gomp_increment_refcount (splay_tree_key k
, htab_t
*refcount_set
)
467 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
470 uintptr_t *refcount_ptr
= &k
->refcount
;
472 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
473 refcount_ptr
= &k
->structelem_refcount
;
474 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
475 refcount_ptr
= k
->structelem_refcount_ptr
;
479 if (htab_find (*refcount_set
, refcount_ptr
))
481 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
482 *slot
= refcount_ptr
;
489 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
490 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
491 track already seen refcounts, and only adjust the value if refcount is not
492 yet contained in the set (like gomp_increment_refcount).
494 Return out-values: set *DO_COPY to true if we set the refcount to zero, or
495 it is already zero and we know we decremented it earlier. This signals that
496 associated maps should be copied back to host.
498 *DO_REMOVE is set to true when we this is the first handling of this refcount
499 and we are setting it to zero. This signals a removal of this key from the
502 Copy and removal are separated due to cases like handling of structure
503 elements, e.g. each map of a structure element representing a possible copy
504 out of a structure field has to be handled individually, but we only signal
505 removal for one (the first encountered) sibing map. */
508 gomp_decrement_refcount (splay_tree_key k
, htab_t
*refcount_set
, bool delete_p
,
509 bool *do_copy
, bool *do_remove
)
511 if (k
== NULL
|| k
->refcount
== REFCOUNT_INFINITY
)
513 *do_copy
= *do_remove
= false;
517 uintptr_t *refcount_ptr
= &k
->refcount
;
519 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
))
520 refcount_ptr
= &k
->structelem_refcount
;
521 else if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
522 refcount_ptr
= k
->structelem_refcount_ptr
;
524 bool new_encountered_refcount
;
525 bool set_to_zero
= false;
526 bool is_zero
= false;
528 uintptr_t orig_refcount
= *refcount_ptr
;
532 if (htab_find (*refcount_set
, refcount_ptr
))
534 new_encountered_refcount
= false;
538 uintptr_t **slot
= htab_find_slot (refcount_set
, refcount_ptr
, INSERT
);
539 *slot
= refcount_ptr
;
540 new_encountered_refcount
= true;
543 /* If no refcount_set being used, assume all keys are being decremented
544 for the first time. */
545 new_encountered_refcount
= true;
549 else if (*refcount_ptr
> 0)
553 if (*refcount_ptr
== 0)
555 if (orig_refcount
> 0)
561 *do_copy
= (set_to_zero
|| (!new_encountered_refcount
&& is_zero
));
562 *do_remove
= (new_encountered_refcount
&& set_to_zero
);
565 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
566 gomp_map_0len_lookup found oldn for newn.
567 Helper function of gomp_map_vars. */
570 gomp_map_vars_existing (struct gomp_device_descr
*devicep
,
571 struct goacc_asyncqueue
*aq
, splay_tree_key oldn
,
572 splay_tree_key newn
, struct target_var_desc
*tgt_var
,
573 unsigned char kind
, bool always_to_flag
, bool implicit
,
574 struct gomp_coalesce_buf
*cbuf
,
575 htab_t
*refcount_set
)
577 assert (kind
!= GOMP_MAP_ATTACH
578 || kind
!= GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
581 tgt_var
->copy_from
= GOMP_MAP_COPY_FROM_P (kind
);
582 tgt_var
->always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind
);
583 tgt_var
->is_attach
= false;
584 tgt_var
->offset
= newn
->host_start
- oldn
->host_start
;
586 /* For implicit maps, old contained in new is valid. */
587 bool implicit_subset
= (implicit
588 && newn
->host_start
<= oldn
->host_start
589 && oldn
->host_end
<= newn
->host_end
);
591 tgt_var
->length
= oldn
->host_end
- oldn
->host_start
;
593 tgt_var
->length
= newn
->host_end
- newn
->host_start
;
595 if ((kind
& GOMP_MAP_FLAG_FORCE
)
596 /* For implicit maps, old contained in new is valid. */
598 /* Otherwise, new contained inside old is considered valid. */
599 || (oldn
->host_start
<= newn
->host_start
600 && newn
->host_end
<= oldn
->host_end
)))
602 gomp_mutex_unlock (&devicep
->lock
);
603 gomp_fatal ("Trying to map into device [%p..%p) object when "
604 "[%p..%p) is already mapped",
605 (void *) newn
->host_start
, (void *) newn
->host_end
,
606 (void *) oldn
->host_start
, (void *) oldn
->host_end
);
609 if (GOMP_MAP_ALWAYS_TO_P (kind
) || always_to_flag
)
611 /* Implicit + always should not happen. If this does occur, below
612 address/length adjustment is a TODO. */
613 assert (!implicit_subset
);
615 if (oldn
->aux
&& oldn
->aux
->attach_count
)
617 /* We have to be careful not to overwrite still attached pointers
618 during the copyback to host. */
619 uintptr_t addr
= newn
->host_start
;
620 while (addr
< newn
->host_end
)
622 size_t i
= (addr
- oldn
->host_start
) / sizeof (void *);
623 if (oldn
->aux
->attach_count
[i
] == 0)
624 gomp_copy_host2dev (devicep
, aq
,
625 (void *) (oldn
->tgt
->tgt_start
627 + addr
- oldn
->host_start
),
629 sizeof (void *), false, cbuf
);
630 addr
+= sizeof (void *);
634 gomp_copy_host2dev (devicep
, aq
,
635 (void *) (oldn
->tgt
->tgt_start
+ oldn
->tgt_offset
636 + newn
->host_start
- oldn
->host_start
),
637 (void *) newn
->host_start
,
638 newn
->host_end
- newn
->host_start
, false, cbuf
);
641 gomp_increment_refcount (oldn
, refcount_set
);
645 get_kind (bool short_mapkind
, void *kinds
, int idx
)
648 return ((unsigned char *) kinds
)[idx
];
650 int val
= ((unsigned short *) kinds
)[idx
];
651 if (GOMP_MAP_IMPLICIT_P (val
))
652 val
&= ~GOMP_MAP_IMPLICIT
;
658 get_implicit (bool short_mapkind
, void *kinds
, int idx
)
663 int val
= ((unsigned short *) kinds
)[idx
];
664 return GOMP_MAP_IMPLICIT_P (val
);
668 gomp_map_pointer (struct target_mem_desc
*tgt
, struct goacc_asyncqueue
*aq
,
669 uintptr_t host_ptr
, uintptr_t target_offset
, uintptr_t bias
,
670 struct gomp_coalesce_buf
*cbuf
,
671 bool allow_zero_length_array_sections
)
673 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
674 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
675 struct splay_tree_key_s cur_node
;
677 cur_node
.host_start
= host_ptr
;
678 if (cur_node
.host_start
== (uintptr_t) NULL
)
680 cur_node
.tgt_offset
= (uintptr_t) NULL
;
681 gomp_copy_host2dev (devicep
, aq
,
682 (void *) (tgt
->tgt_start
+ target_offset
),
683 (void *) &cur_node
.tgt_offset
, sizeof (void *),
687 /* Add bias to the pointer value. */
688 cur_node
.host_start
+= bias
;
689 cur_node
.host_end
= cur_node
.host_start
;
690 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
693 if (allow_zero_length_array_sections
)
694 cur_node
.tgt_offset
= 0;
697 gomp_mutex_unlock (&devicep
->lock
);
698 gomp_fatal ("Pointer target of array section wasn't mapped");
703 cur_node
.host_start
-= n
->host_start
;
705 = n
->tgt
->tgt_start
+ n
->tgt_offset
+ cur_node
.host_start
;
706 /* At this point tgt_offset is target address of the
707 array section. Now subtract bias to get what we want
708 to initialize the pointer with. */
709 cur_node
.tgt_offset
-= bias
;
711 gomp_copy_host2dev (devicep
, aq
, (void *) (tgt
->tgt_start
+ target_offset
),
712 (void *) &cur_node
.tgt_offset
, sizeof (void *),
717 gomp_map_fields_existing (struct target_mem_desc
*tgt
,
718 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
719 size_t first
, size_t i
, void **hostaddrs
,
720 size_t *sizes
, void *kinds
,
721 struct gomp_coalesce_buf
*cbuf
, htab_t
*refcount_set
)
723 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
724 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
725 struct splay_tree_key_s cur_node
;
728 const bool short_mapkind
= true;
729 const int typemask
= short_mapkind
? 0xff : 0x7;
731 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
732 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
733 splay_tree_key n2
= splay_tree_lookup (mem_map
, &cur_node
);
734 kind
= get_kind (short_mapkind
, kinds
, i
);
735 implicit
= get_implicit (short_mapkind
, kinds
, i
);
738 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
740 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
741 kind
& typemask
, false, implicit
, cbuf
,
747 if (cur_node
.host_start
> (uintptr_t) hostaddrs
[first
- 1])
749 cur_node
.host_start
--;
750 n2
= splay_tree_lookup (mem_map
, &cur_node
);
751 cur_node
.host_start
++;
754 && n2
->host_start
- n
->host_start
755 == n2
->tgt_offset
- n
->tgt_offset
)
757 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
758 kind
& typemask
, false, implicit
, cbuf
,
764 n2
= splay_tree_lookup (mem_map
, &cur_node
);
768 && n2
->host_start
- n
->host_start
== n2
->tgt_offset
- n
->tgt_offset
)
770 gomp_map_vars_existing (devicep
, aq
, n2
, &cur_node
, &tgt
->list
[i
],
771 kind
& typemask
, false, implicit
, cbuf
,
776 gomp_mutex_unlock (&devicep
->lock
);
777 gomp_fatal ("Trying to map into device [%p..%p) structure element when "
778 "other mapped elements from the same structure weren't mapped "
779 "together with it", (void *) cur_node
.host_start
,
780 (void *) cur_node
.host_end
);
783 attribute_hidden
void
784 gomp_attach_pointer (struct gomp_device_descr
*devicep
,
785 struct goacc_asyncqueue
*aq
, splay_tree mem_map
,
786 splay_tree_key n
, uintptr_t attach_to
, size_t bias
,
787 struct gomp_coalesce_buf
*cbufp
,
788 bool allow_zero_length_array_sections
)
790 struct splay_tree_key_s s
;
795 gomp_mutex_unlock (&devicep
->lock
);
796 gomp_fatal ("enclosing struct not mapped for attach");
799 size
= (n
->host_end
- n
->host_start
+ sizeof (void *) - 1) / sizeof (void *);
800 /* We might have a pointer in a packed struct: however we cannot have more
801 than one such pointer in each pointer-sized portion of the struct, so
803 idx
= (attach_to
- n
->host_start
) / sizeof (void *);
806 n
->aux
= gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
808 if (!n
->aux
->attach_count
)
810 = gomp_malloc_cleared (sizeof (*n
->aux
->attach_count
) * size
);
812 if (n
->aux
->attach_count
[idx
] < UINTPTR_MAX
)
813 n
->aux
->attach_count
[idx
]++;
816 gomp_mutex_unlock (&devicep
->lock
);
817 gomp_fatal ("attach count overflow");
820 if (n
->aux
->attach_count
[idx
] == 1)
822 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ attach_to
824 uintptr_t target
= (uintptr_t) *(void **) attach_to
;
828 if ((void *) target
== NULL
)
830 gomp_mutex_unlock (&devicep
->lock
);
831 gomp_fatal ("attempt to attach null pointer");
834 s
.host_start
= target
+ bias
;
835 s
.host_end
= s
.host_start
+ 1;
836 tn
= splay_tree_lookup (mem_map
, &s
);
840 if (allow_zero_length_array_sections
)
841 /* When allowing attachment to zero-length array sections, we
842 allow attaching to NULL pointers when the target region is not
847 gomp_mutex_unlock (&devicep
->lock
);
848 gomp_fatal ("pointer target not mapped for attach");
852 data
= tn
->tgt
->tgt_start
+ tn
->tgt_offset
+ target
- tn
->host_start
;
855 "%s: attaching host %p, target %p (struct base %p) to %p\n",
856 __FUNCTION__
, (void *) attach_to
, (void *) devptr
,
857 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
), (void *) data
);
859 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &data
,
860 sizeof (void *), true, cbufp
);
863 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
864 (void *) attach_to
, (int) n
->aux
->attach_count
[idx
]);
867 attribute_hidden
void
868 gomp_detach_pointer (struct gomp_device_descr
*devicep
,
869 struct goacc_asyncqueue
*aq
, splay_tree_key n
,
870 uintptr_t detach_from
, bool finalize
,
871 struct gomp_coalesce_buf
*cbufp
)
877 gomp_mutex_unlock (&devicep
->lock
);
878 gomp_fatal ("enclosing struct not mapped for detach");
881 idx
= (detach_from
- n
->host_start
) / sizeof (void *);
883 if (!n
->aux
|| !n
->aux
->attach_count
)
885 gomp_mutex_unlock (&devicep
->lock
);
886 gomp_fatal ("no attachment counters for struct");
890 n
->aux
->attach_count
[idx
] = 1;
892 if (n
->aux
->attach_count
[idx
] == 0)
894 gomp_mutex_unlock (&devicep
->lock
);
895 gomp_fatal ("attach count underflow");
898 n
->aux
->attach_count
[idx
]--;
900 if (n
->aux
->attach_count
[idx
] == 0)
902 uintptr_t devptr
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ detach_from
904 uintptr_t target
= (uintptr_t) *(void **) detach_from
;
907 "%s: detaching host %p, target %p (struct base %p) to %p\n",
908 __FUNCTION__
, (void *) detach_from
, (void *) devptr
,
909 (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
),
912 gomp_copy_host2dev (devicep
, aq
, (void *) devptr
, (void *) &target
,
913 sizeof (void *), true, cbufp
);
916 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__
,
917 (void *) detach_from
, (int) n
->aux
->attach_count
[idx
]);
920 attribute_hidden
uintptr_t
921 gomp_map_val (struct target_mem_desc
*tgt
, void **hostaddrs
, size_t i
)
923 if (tgt
->list
[i
].key
!= NULL
)
924 return tgt
->list
[i
].key
->tgt
->tgt_start
925 + tgt
->list
[i
].key
->tgt_offset
926 + tgt
->list
[i
].offset
;
928 switch (tgt
->list
[i
].offset
)
931 return (uintptr_t) hostaddrs
[i
];
937 return tgt
->list
[i
+ 1].key
->tgt
->tgt_start
938 + tgt
->list
[i
+ 1].key
->tgt_offset
939 + tgt
->list
[i
+ 1].offset
940 + (uintptr_t) hostaddrs
[i
]
941 - (uintptr_t) hostaddrs
[i
+ 1];
944 return tgt
->tgt_start
+ tgt
->list
[i
].offset
;
948 static inline __attribute__((always_inline
)) struct target_mem_desc
*
949 gomp_map_vars_internal (struct gomp_device_descr
*devicep
,
950 struct goacc_asyncqueue
*aq
, size_t mapnum
,
951 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
952 void *kinds
, bool short_mapkind
,
953 htab_t
*refcount_set
,
954 enum gomp_map_vars_kind pragma_kind
)
956 size_t i
, tgt_align
, tgt_size
, not_found_cnt
= 0;
957 bool has_firstprivate
= false;
958 bool has_always_ptrset
= false;
959 bool openmp_p
= (pragma_kind
& GOMP_MAP_VARS_OPENACC
) == 0;
960 const int rshift
= short_mapkind
? 8 : 3;
961 const int typemask
= short_mapkind
? 0xff : 0x7;
962 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
963 struct splay_tree_key_s cur_node
;
964 struct target_mem_desc
*tgt
965 = gomp_malloc (sizeof (*tgt
) + sizeof (tgt
->list
[0]) * mapnum
);
966 tgt
->list_count
= mapnum
;
967 tgt
->refcount
= (pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) ? 0 : 1;
968 tgt
->device_descr
= devicep
;
970 struct gomp_coalesce_buf cbuf
, *cbufp
= NULL
;
979 tgt_align
= sizeof (void *);
985 if (mapnum
> 1 || pragma_kind
== GOMP_MAP_VARS_TARGET
)
987 size_t chunks_size
= (mapnum
+ 1) * sizeof (struct gomp_coalesce_chunk
);
988 cbuf
.chunks
= (struct gomp_coalesce_chunk
*) gomp_alloca (chunks_size
);
991 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
993 size_t align
= 4 * sizeof (void *);
995 tgt_size
= mapnum
* sizeof (void *);
997 cbuf
.use_cnt
= 1 + (mapnum
> 1);
998 cbuf
.chunks
[0].start
= 0;
999 cbuf
.chunks
[0].end
= tgt_size
;
1002 gomp_mutex_lock (&devicep
->lock
);
1003 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1005 gomp_mutex_unlock (&devicep
->lock
);
1010 for (i
= 0; i
< mapnum
; i
++)
1012 int kind
= get_kind (short_mapkind
, kinds
, i
);
1013 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1014 if (hostaddrs
[i
] == NULL
1015 || (kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE_INT
)
1017 tgt
->list
[i
].key
= NULL
;
1018 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1021 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
1022 || (kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1024 tgt
->list
[i
].key
= NULL
;
1027 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
1028 on a separate construct prior to using use_device_{addr,ptr}.
1029 In OpenMP 5.0, map directives need to be ordered by the
1030 middle-end before the use_device_* clauses. If
1031 !not_found_cnt, all mappings requested (if any) are already
1032 mapped, so use_device_{addr,ptr} can be resolved right away.
1033 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1034 now but would succeed after performing the mappings in the
1035 following loop. We can't defer this always to the second
1036 loop, because it is not even invoked when !not_found_cnt
1037 after the first loop. */
1038 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1039 cur_node
.host_end
= cur_node
.host_start
;
1040 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
1043 cur_node
.host_start
-= n
->host_start
;
1045 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1046 + cur_node
.host_start
);
1048 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1050 gomp_mutex_unlock (&devicep
->lock
);
1051 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1053 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1054 /* If not present, continue using the host address. */
1057 __builtin_unreachable ();
1058 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1061 tgt
->list
[i
].offset
= 0;
1064 else if ((kind
& typemask
) == GOMP_MAP_STRUCT
)
1066 size_t first
= i
+ 1;
1067 size_t last
= i
+ sizes
[i
];
1068 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1069 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1071 tgt
->list
[i
].key
= NULL
;
1072 tgt
->list
[i
].offset
= OFFSET_STRUCT
;
1073 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1076 size_t align
= (size_t) 1 << (kind
>> rshift
);
1077 if (tgt_align
< align
)
1079 tgt_size
-= (uintptr_t) hostaddrs
[first
] - cur_node
.host_start
;
1080 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1081 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1082 not_found_cnt
+= last
- i
;
1083 for (i
= first
; i
<= last
; i
++)
1085 tgt
->list
[i
].key
= NULL
;
1087 && gomp_to_device_kind_p (get_kind (short_mapkind
, kinds
, i
)
1089 gomp_coalesce_buf_add (&cbuf
,
1090 tgt_size
- cur_node
.host_end
1091 + (uintptr_t) hostaddrs
[i
],
1097 for (i
= first
; i
<= last
; i
++)
1098 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1099 sizes
, kinds
, NULL
, refcount_set
);
1103 else if ((kind
& typemask
) == GOMP_MAP_ALWAYS_POINTER
)
1105 tgt
->list
[i
].key
= NULL
;
1106 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1107 has_firstprivate
= true;
1110 else if ((kind
& typemask
) == GOMP_MAP_ATTACH
1111 || ((kind
& typemask
)
1112 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
))
1114 tgt
->list
[i
].key
= NULL
;
1115 has_firstprivate
= true;
1118 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1119 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1120 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
1122 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1123 if ((kind
& typemask
) == GOMP_MAP_FIRSTPRIVATE
)
1125 tgt
->list
[i
].key
= NULL
;
1127 size_t align
= (size_t) 1 << (kind
>> rshift
);
1128 if (tgt_align
< align
)
1130 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1132 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1133 cur_node
.host_end
- cur_node
.host_start
);
1134 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1135 has_firstprivate
= true;
1139 if ((kind
& typemask
) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
1141 n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
1144 tgt
->list
[i
].key
= NULL
;
1145 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1150 n
= splay_tree_lookup (mem_map
, &cur_node
);
1151 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1153 int always_to_cnt
= 0;
1154 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1156 bool has_nullptr
= false;
1158 for (j
= 0; j
< n
->tgt
->list_count
; j
++)
1159 if (n
->tgt
->list
[j
].key
== n
)
1161 has_nullptr
= n
->tgt
->list
[j
].has_null_ptr_assoc
;
1164 if (n
->tgt
->list_count
== 0)
1166 /* 'declare target'; assume has_nullptr; it could also be
1167 statically assigned pointer, but that it should be to
1168 the equivalent variable on the host. */
1169 assert (n
->refcount
== REFCOUNT_INFINITY
);
1173 assert (j
< n
->tgt
->list_count
);
1174 /* Re-map the data if there is an 'always' modifier or if it a
1175 null pointer was there and non a nonnull has been found; that
1176 permits transparent re-mapping for Fortran array descriptors
1177 which were previously mapped unallocated. */
1178 for (j
= i
+ 1; j
< mapnum
; j
++)
1180 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1181 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1183 || !GOMP_MAP_POINTER_P (ptr_kind
)
1184 || *(void **) hostaddrs
[j
] == NULL
))
1186 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1187 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1188 > cur_node
.host_end
))
1192 has_always_ptrset
= true;
1197 gomp_map_vars_existing (devicep
, aq
, n
, &cur_node
, &tgt
->list
[i
],
1198 kind
& typemask
, always_to_cnt
> 0, implicit
,
1199 NULL
, refcount_set
);
1204 tgt
->list
[i
].key
= NULL
;
1206 if ((kind
& typemask
) == GOMP_MAP_IF_PRESENT
)
1208 /* Not present, hence, skip entry - including its MAP_POINTER,
1210 tgt
->list
[i
].offset
= OFFSET_POINTER
;
1212 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1213 == GOMP_MAP_POINTER
))
1216 tgt
->list
[i
].key
= NULL
;
1217 tgt
->list
[i
].offset
= 0;
1221 size_t align
= (size_t) 1 << (kind
>> rshift
);
1223 if (tgt_align
< align
)
1225 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1227 && gomp_to_device_kind_p (kind
& typemask
))
1228 gomp_coalesce_buf_add (&cbuf
, tgt_size
,
1229 cur_node
.host_end
- cur_node
.host_start
);
1230 tgt_size
+= cur_node
.host_end
- cur_node
.host_start
;
1231 if ((kind
& typemask
) == GOMP_MAP_TO_PSET
)
1235 for (j
= i
+ 1; j
< mapnum
; j
++)
1236 if (!GOMP_MAP_POINTER_P ((kind
= (get_kind (short_mapkind
,
1237 kinds
, j
)) & typemask
))
1238 && !GOMP_MAP_ALWAYS_POINTER_P (kind
))
1240 else if ((uintptr_t) hostaddrs
[j
] < cur_node
.host_start
1241 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1242 > cur_node
.host_end
))
1246 tgt
->list
[j
].key
= NULL
;
1257 gomp_mutex_unlock (&devicep
->lock
);
1258 gomp_fatal ("unexpected aggregation");
1260 tgt
->to_free
= devaddrs
[0];
1261 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1262 tgt
->tgt_end
= tgt
->tgt_start
+ sizes
[0];
1264 else if (not_found_cnt
|| pragma_kind
== GOMP_MAP_VARS_TARGET
)
1266 /* Allocate tgt_align aligned tgt_size block of memory. */
1267 /* FIXME: Perhaps change interface to allocate properly aligned
1269 tgt
->to_free
= devicep
->alloc_func (devicep
->target_id
,
1270 tgt_size
+ tgt_align
- 1);
1273 gomp_mutex_unlock (&devicep
->lock
);
1274 gomp_fatal ("device memory allocation fail");
1277 tgt
->tgt_start
= (uintptr_t) tgt
->to_free
;
1278 tgt
->tgt_start
= (tgt
->tgt_start
+ tgt_align
- 1) & ~(tgt_align
- 1);
1279 tgt
->tgt_end
= tgt
->tgt_start
+ tgt_size
;
1281 if (cbuf
.use_cnt
== 1)
1283 if (cbuf
.chunk_cnt
> 0)
1286 = malloc (cbuf
.chunks
[cbuf
.chunk_cnt
- 1].end
- cbuf
.chunks
[0].start
);
1296 tgt
->to_free
= NULL
;
1302 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1303 tgt_size
= mapnum
* sizeof (void *);
1306 if (not_found_cnt
|| has_firstprivate
|| has_always_ptrset
)
1309 tgt
->array
= gomp_malloc (not_found_cnt
* sizeof (*tgt
->array
));
1310 splay_tree_node array
= tgt
->array
;
1311 size_t j
, field_tgt_offset
= 0, field_tgt_clear
= FIELD_TGT_EMPTY
;
1312 uintptr_t field_tgt_base
= 0;
1313 splay_tree_key field_tgt_structelem_first
= NULL
;
1315 for (i
= 0; i
< mapnum
; i
++)
1316 if (has_always_ptrset
1318 && (get_kind (short_mapkind
, kinds
, i
) & typemask
)
1319 == GOMP_MAP_TO_PSET
)
1321 splay_tree_key k
= tgt
->list
[i
].key
;
1322 bool has_nullptr
= false;
1324 for (j
= 0; j
< k
->tgt
->list_count
; j
++)
1325 if (k
->tgt
->list
[j
].key
== k
)
1327 has_nullptr
= k
->tgt
->list
[j
].has_null_ptr_assoc
;
1330 if (k
->tgt
->list_count
== 0)
1333 assert (j
< k
->tgt
->list_count
);
1335 tgt
->list
[i
].has_null_ptr_assoc
= false;
1336 for (j
= i
+ 1; j
< mapnum
; j
++)
1338 int ptr_kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
1339 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
)
1341 || !GOMP_MAP_POINTER_P (ptr_kind
)
1342 || *(void **) hostaddrs
[j
] == NULL
))
1344 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1345 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1350 if (*(void **) hostaddrs
[j
] == NULL
)
1351 tgt
->list
[i
].has_null_ptr_assoc
= true;
1352 tgt
->list
[j
].key
= k
;
1353 tgt
->list
[j
].copy_from
= false;
1354 tgt
->list
[j
].always_copy_from
= false;
1355 tgt
->list
[j
].is_attach
= false;
1356 gomp_increment_refcount (k
, refcount_set
);
1357 gomp_map_pointer (k
->tgt
, aq
,
1358 (uintptr_t) *(void **) hostaddrs
[j
],
1359 k
->tgt_offset
+ ((uintptr_t) hostaddrs
[j
]
1361 sizes
[j
], cbufp
, false);
1366 else if (tgt
->list
[i
].key
== NULL
)
1368 int kind
= get_kind (short_mapkind
, kinds
, i
);
1369 bool implicit
= get_implicit (short_mapkind
, kinds
, i
);
1370 if (hostaddrs
[i
] == NULL
)
1372 switch (kind
& typemask
)
1374 size_t align
, len
, first
, last
;
1376 case GOMP_MAP_FIRSTPRIVATE
:
1377 align
= (size_t) 1 << (kind
>> rshift
);
1378 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1379 tgt
->list
[i
].offset
= tgt_size
;
1381 gomp_copy_host2dev (devicep
, aq
,
1382 (void *) (tgt
->tgt_start
+ tgt_size
),
1383 (void *) hostaddrs
[i
], len
, false, cbufp
);
1384 /* Save device address in hostaddr to permit latter availablity
1385 when doing a deep-firstprivate with pointer attach. */
1386 hostaddrs
[i
] = (void *) (tgt
->tgt_start
+ tgt_size
);
1389 /* If followed by GOMP_MAP_ATTACH, pointer assign this
1390 firstprivate to hostaddrs[i+1], which is assumed to contain a
1394 == (typemask
& get_kind (short_mapkind
, kinds
, i
+1))))
1396 uintptr_t target
= (uintptr_t) hostaddrs
[i
];
1397 void *devptr
= *(void**) hostaddrs
[i
+1] + sizes
[i
+1];
1398 gomp_copy_host2dev (devicep
, aq
, devptr
, &target
,
1399 sizeof (void *), false, cbufp
);
1403 case GOMP_MAP_FIRSTPRIVATE_INT
:
1404 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
1406 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
:
1407 /* The OpenACC 'host_data' construct only allows 'use_device'
1408 "mapping" clauses, so in the first loop, 'not_found_cnt'
1409 must always have been zero, so all OpenACC 'use_device'
1410 clauses have already been handled. (We can only easily test
1411 'use_device' with 'if_present' clause here.) */
1412 assert (tgt
->list
[i
].offset
== OFFSET_INLINED
);
1413 /* Nevertheless, FALLTHRU to the normal handling, to keep the
1414 code conceptually simple, similar to the first loop. */
1415 case GOMP_MAP_USE_DEVICE_PTR
:
1416 if (tgt
->list
[i
].offset
== 0)
1418 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1419 cur_node
.host_end
= cur_node
.host_start
;
1420 n
= gomp_map_lookup (mem_map
, &cur_node
);
1423 cur_node
.host_start
-= n
->host_start
;
1425 = (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1426 + cur_node
.host_start
);
1428 else if ((kind
& typemask
) == GOMP_MAP_USE_DEVICE_PTR
)
1430 gomp_mutex_unlock (&devicep
->lock
);
1431 gomp_fatal ("use_device_ptr pointer wasn't mapped");
1433 else if ((kind
& typemask
)
1434 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT
)
1435 /* If not present, continue using the host address. */
1438 __builtin_unreachable ();
1439 tgt
->list
[i
].offset
= OFFSET_INLINED
;
1442 case GOMP_MAP_STRUCT
:
1444 last
= i
+ sizes
[i
];
1445 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1446 cur_node
.host_end
= (uintptr_t) hostaddrs
[last
]
1448 if (tgt
->list
[first
].key
!= NULL
)
1450 n
= splay_tree_lookup (mem_map
, &cur_node
);
1453 size_t align
= (size_t) 1 << (kind
>> rshift
);
1454 tgt_size
-= (uintptr_t) hostaddrs
[first
]
1455 - (uintptr_t) hostaddrs
[i
];
1456 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1457 tgt_size
+= (uintptr_t) hostaddrs
[first
]
1458 - (uintptr_t) hostaddrs
[i
];
1459 field_tgt_base
= (uintptr_t) hostaddrs
[first
];
1460 field_tgt_offset
= tgt_size
;
1461 field_tgt_clear
= last
;
1462 field_tgt_structelem_first
= NULL
;
1463 tgt_size
+= cur_node
.host_end
1464 - (uintptr_t) hostaddrs
[first
];
1467 for (i
= first
; i
<= last
; i
++)
1468 gomp_map_fields_existing (tgt
, aq
, n
, first
, i
, hostaddrs
,
1469 sizes
, kinds
, cbufp
, refcount_set
);
1472 case GOMP_MAP_ALWAYS_POINTER
:
1473 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1474 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1475 n
= splay_tree_lookup (mem_map
, &cur_node
);
1477 || n
->host_start
> cur_node
.host_start
1478 || n
->host_end
< cur_node
.host_end
)
1480 gomp_mutex_unlock (&devicep
->lock
);
1481 gomp_fatal ("always pointer not mapped");
1483 if ((get_kind (short_mapkind
, kinds
, i
- 1) & typemask
)
1484 != GOMP_MAP_ALWAYS_POINTER
)
1485 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
- 1);
1486 if (cur_node
.tgt_offset
)
1487 cur_node
.tgt_offset
-= sizes
[i
];
1488 gomp_copy_host2dev (devicep
, aq
,
1489 (void *) (n
->tgt
->tgt_start
1491 + cur_node
.host_start
1493 (void *) &cur_node
.tgt_offset
,
1494 sizeof (void *), true, cbufp
);
1495 cur_node
.tgt_offset
= n
->tgt
->tgt_start
+ n
->tgt_offset
1496 + cur_node
.host_start
- n
->host_start
;
1498 case GOMP_MAP_IF_PRESENT
:
1499 /* Not present - otherwise handled above. Skip over its
1500 MAP_POINTER as well. */
1502 && ((typemask
& get_kind (short_mapkind
, kinds
, i
+ 1))
1503 == GOMP_MAP_POINTER
))
1506 case GOMP_MAP_ATTACH
:
1507 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
:
1509 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1510 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1511 splay_tree_key n
= splay_tree_lookup (mem_map
, &cur_node
);
1514 tgt
->list
[i
].key
= n
;
1515 tgt
->list
[i
].offset
= cur_node
.host_start
- n
->host_start
;
1516 tgt
->list
[i
].length
= n
->host_end
- n
->host_start
;
1517 tgt
->list
[i
].copy_from
= false;
1518 tgt
->list
[i
].always_copy_from
= false;
1519 tgt
->list
[i
].is_attach
= true;
1520 /* OpenACC 'attach'/'detach' doesn't affect
1521 structured/dynamic reference counts ('n->refcount',
1522 'n->dynamic_refcount'). */
1525 = ((kind
& typemask
)
1526 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION
);
1527 gomp_attach_pointer (devicep
, aq
, mem_map
, n
,
1528 (uintptr_t) hostaddrs
[i
], sizes
[i
],
1531 else if ((pragma_kind
& GOMP_MAP_VARS_OPENACC
) != 0)
1533 gomp_mutex_unlock (&devicep
->lock
);
1534 gomp_fatal ("outer struct not mapped for attach");
1541 splay_tree_key k
= &array
->key
;
1542 k
->host_start
= (uintptr_t) hostaddrs
[i
];
1543 if (!GOMP_MAP_POINTER_P (kind
& typemask
))
1544 k
->host_end
= k
->host_start
+ sizes
[i
];
1546 k
->host_end
= k
->host_start
+ sizeof (void *);
1547 splay_tree_key n
= splay_tree_lookup (mem_map
, k
);
1548 if (n
&& n
->refcount
!= REFCOUNT_LINK
)
1549 gomp_map_vars_existing (devicep
, aq
, n
, k
, &tgt
->list
[i
],
1550 kind
& typemask
, false, implicit
, cbufp
,
1555 if (n
&& n
->refcount
== REFCOUNT_LINK
)
1557 /* Replace target address of the pointer with target address
1558 of mapped object in the splay tree. */
1559 splay_tree_remove (mem_map
, n
);
1561 = gomp_malloc_cleared (sizeof (struct splay_tree_aux
));
1562 k
->aux
->link_key
= n
;
1564 size_t align
= (size_t) 1 << (kind
>> rshift
);
1565 tgt
->list
[i
].key
= k
;
1568 k
->dynamic_refcount
= 0;
1569 if (field_tgt_clear
!= FIELD_TGT_EMPTY
)
1571 k
->tgt_offset
= k
->host_start
- field_tgt_base
1575 k
->refcount
= REFCOUNT_STRUCTELEM
;
1576 if (field_tgt_structelem_first
== NULL
)
1578 /* Set to first structure element of sequence. */
1579 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_FIRST
;
1580 field_tgt_structelem_first
= k
;
1583 /* Point to refcount of leading element, but do not
1585 k
->structelem_refcount_ptr
1586 = &field_tgt_structelem_first
->structelem_refcount
;
1588 if (i
== field_tgt_clear
)
1590 k
->refcount
|= REFCOUNT_STRUCTELEM_FLAG_LAST
;
1591 field_tgt_structelem_first
= NULL
;
1594 if (i
== field_tgt_clear
)
1595 field_tgt_clear
= FIELD_TGT_EMPTY
;
1599 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
1600 k
->tgt_offset
= tgt_size
;
1601 tgt_size
+= k
->host_end
- k
->host_start
;
1603 /* First increment, from 0 to 1. gomp_increment_refcount
1604 encapsulates the different increment cases, so use this
1605 instead of directly setting 1 during initialization. */
1606 gomp_increment_refcount (k
, refcount_set
);
1608 tgt
->list
[i
].copy_from
= GOMP_MAP_COPY_FROM_P (kind
& typemask
);
1609 tgt
->list
[i
].always_copy_from
1610 = GOMP_MAP_ALWAYS_FROM_P (kind
& typemask
);
1611 tgt
->list
[i
].is_attach
= false;
1612 tgt
->list
[i
].offset
= 0;
1613 tgt
->list
[i
].length
= k
->host_end
- k
->host_start
;
1616 array
->right
= NULL
;
1617 splay_tree_insert (mem_map
, array
);
1618 switch (kind
& typemask
)
1620 case GOMP_MAP_ALLOC
:
1622 case GOMP_MAP_FORCE_ALLOC
:
1623 case GOMP_MAP_FORCE_FROM
:
1624 case GOMP_MAP_ALWAYS_FROM
:
1627 case GOMP_MAP_TOFROM
:
1628 case GOMP_MAP_FORCE_TO
:
1629 case GOMP_MAP_FORCE_TOFROM
:
1630 case GOMP_MAP_ALWAYS_TO
:
1631 case GOMP_MAP_ALWAYS_TOFROM
:
1632 gomp_copy_host2dev (devicep
, aq
,
1633 (void *) (tgt
->tgt_start
1635 (void *) k
->host_start
,
1636 k
->host_end
- k
->host_start
,
1639 case GOMP_MAP_POINTER
:
1640 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
:
1642 (tgt
, aq
, (uintptr_t) *(void **) k
->host_start
,
1643 k
->tgt_offset
, sizes
[i
], cbufp
,
1645 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION
));
1647 case GOMP_MAP_TO_PSET
:
1648 gomp_copy_host2dev (devicep
, aq
,
1649 (void *) (tgt
->tgt_start
1651 (void *) k
->host_start
,
1652 k
->host_end
- k
->host_start
,
1654 tgt
->list
[i
].has_null_ptr_assoc
= false;
1656 for (j
= i
+ 1; j
< mapnum
; j
++)
1658 int ptr_kind
= (get_kind (short_mapkind
, kinds
, j
)
1660 if (!GOMP_MAP_POINTER_P (ptr_kind
)
1661 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind
))
1663 else if ((uintptr_t) hostaddrs
[j
] < k
->host_start
1664 || ((uintptr_t) hostaddrs
[j
] + sizeof (void *)
1669 tgt
->list
[j
].key
= k
;
1670 tgt
->list
[j
].copy_from
= false;
1671 tgt
->list
[j
].always_copy_from
= false;
1672 tgt
->list
[j
].is_attach
= false;
1673 tgt
->list
[i
].has_null_ptr_assoc
|= !(*(void **) hostaddrs
[j
]);
1674 /* For OpenMP, the use of refcount_sets causes
1675 errors if we set k->refcount = 1 above but also
1676 increment it again here, for decrementing will
1677 not properly match, since we decrement only once
1678 for each key's refcount. Therefore avoid this
1679 increment for OpenMP constructs. */
1681 gomp_increment_refcount (k
, refcount_set
);
1682 gomp_map_pointer (tgt
, aq
,
1683 (uintptr_t) *(void **) hostaddrs
[j
],
1685 + ((uintptr_t) hostaddrs
[j
]
1687 sizes
[j
], cbufp
, false);
1692 case GOMP_MAP_FORCE_PRESENT
:
1694 /* We already looked up the memory region above and it
1696 size_t size
= k
->host_end
- k
->host_start
;
1697 gomp_mutex_unlock (&devicep
->lock
);
1698 #ifdef HAVE_INTTYPES_H
1699 gomp_fatal ("present clause: !acc_is_present (%p, "
1700 "%"PRIu64
" (0x%"PRIx64
"))",
1701 (void *) k
->host_start
,
1702 (uint64_t) size
, (uint64_t) size
);
1704 gomp_fatal ("present clause: !acc_is_present (%p, "
1705 "%lu (0x%lx))", (void *) k
->host_start
,
1706 (unsigned long) size
, (unsigned long) size
);
1710 case GOMP_MAP_FORCE_DEVICEPTR
:
1711 assert (k
->host_end
- k
->host_start
== sizeof (void *));
1712 gomp_copy_host2dev (devicep
, aq
,
1713 (void *) (tgt
->tgt_start
1715 (void *) k
->host_start
,
1716 sizeof (void *), false, cbufp
);
1719 gomp_mutex_unlock (&devicep
->lock
);
1720 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__
,
1724 if (k
->aux
&& k
->aux
->link_key
)
1726 /* Set link pointer on target to the device address of the
1728 void *tgt_addr
= (void *) (tgt
->tgt_start
+ k
->tgt_offset
);
1729 /* We intentionally do not use coalescing here, as it's not
1730 data allocated by the current call to this function. */
1731 gomp_copy_host2dev (devicep
, aq
, (void *) n
->tgt_offset
,
1732 &tgt_addr
, sizeof (void *), true, NULL
);
1739 if (pragma_kind
== GOMP_MAP_VARS_TARGET
)
1741 for (i
= 0; i
< mapnum
; i
++)
1743 cur_node
.tgt_offset
= gomp_map_val (tgt
, hostaddrs
, i
);
1744 gomp_copy_host2dev (devicep
, aq
,
1745 (void *) (tgt
->tgt_start
+ i
* sizeof (void *)),
1746 (void *) &cur_node
.tgt_offset
, sizeof (void *),
1753 /* See 'gomp_coalesce_buf_add'. */
1757 for (c
= 0; c
< cbuf
.chunk_cnt
; ++c
)
1758 gomp_copy_host2dev (devicep
, aq
,
1759 (void *) (tgt
->tgt_start
+ cbuf
.chunks
[c
].start
),
1760 (char *) cbuf
.buf
+ (cbuf
.chunks
[c
].start
1761 - cbuf
.chunks
[0].start
),
1762 cbuf
.chunks
[c
].end
- cbuf
.chunks
[c
].start
,
1769 /* If the variable from "omp target enter data" map-list was already mapped,
1770 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
1772 if ((pragma_kind
& GOMP_MAP_VARS_ENTER_DATA
) && tgt
->refcount
== 0)
1778 gomp_mutex_unlock (&devicep
->lock
);
1782 static struct target_mem_desc
*
1783 gomp_map_vars (struct gomp_device_descr
*devicep
, size_t mapnum
,
1784 void **hostaddrs
, void **devaddrs
, size_t *sizes
, void *kinds
,
1785 bool short_mapkind
, htab_t
*refcount_set
,
1786 enum gomp_map_vars_kind pragma_kind
)
1788 /* This management of a local refcount_set is for convenience of callers
1789 who do not share a refcount_set over multiple map/unmap uses. */
1790 htab_t local_refcount_set
= NULL
;
1791 if (refcount_set
== NULL
)
1793 local_refcount_set
= htab_create (mapnum
);
1794 refcount_set
= &local_refcount_set
;
1797 struct target_mem_desc
*tgt
;
1798 tgt
= gomp_map_vars_internal (devicep
, NULL
, mapnum
, hostaddrs
, devaddrs
,
1799 sizes
, kinds
, short_mapkind
, refcount_set
,
1801 if (local_refcount_set
)
1802 htab_free (local_refcount_set
);
1807 attribute_hidden
struct target_mem_desc
*
1808 goacc_map_vars (struct gomp_device_descr
*devicep
,
1809 struct goacc_asyncqueue
*aq
, size_t mapnum
,
1810 void **hostaddrs
, void **devaddrs
, size_t *sizes
,
1811 void *kinds
, bool short_mapkind
,
1812 enum gomp_map_vars_kind pragma_kind
)
1814 return gomp_map_vars_internal (devicep
, aq
, mapnum
, hostaddrs
, devaddrs
,
1815 sizes
, kinds
, short_mapkind
, NULL
,
1816 GOMP_MAP_VARS_OPENACC
| pragma_kind
);
1820 gomp_unmap_tgt (struct target_mem_desc
*tgt
)
1822 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1824 gomp_free_device_memory (tgt
->device_descr
, tgt
->to_free
);
1831 gomp_unref_tgt (void *ptr
)
1833 bool is_tgt_unmapped
= false;
1835 struct target_mem_desc
*tgt
= (struct target_mem_desc
*) ptr
;
1837 if (tgt
->refcount
> 1)
1841 gomp_unmap_tgt (tgt
);
1842 is_tgt_unmapped
= true;
1845 return is_tgt_unmapped
;
1849 gomp_unref_tgt_void (void *ptr
)
1851 (void) gomp_unref_tgt (ptr
);
1855 gomp_remove_splay_tree_key (splay_tree sp
, splay_tree_key k
)
1857 splay_tree_remove (sp
, k
);
1860 if (k
->aux
->link_key
)
1861 splay_tree_insert (sp
, (splay_tree_node
) k
->aux
->link_key
);
1862 if (k
->aux
->attach_count
)
1863 free (k
->aux
->attach_count
);
1869 static inline __attribute__((always_inline
)) bool
1870 gomp_remove_var_internal (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1871 struct goacc_asyncqueue
*aq
)
1873 bool is_tgt_unmapped
= false;
1875 if (REFCOUNT_STRUCTELEM_P (k
->refcount
))
1877 if (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
) == false)
1878 /* Infer the splay_tree_key of the first structelem key using the
1879 pointer to the first structleme_refcount. */
1880 k
= (splay_tree_key
) ((char *) k
->structelem_refcount_ptr
1881 - offsetof (struct splay_tree_key_s
,
1882 structelem_refcount
));
1883 assert (REFCOUNT_STRUCTELEM_FIRST_P (k
->refcount
));
1885 /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1886 with the splay_tree_keys embedded inside. */
1887 splay_tree_node node
=
1888 (splay_tree_node
) ((char *) k
1889 - offsetof (struct splay_tree_node_s
, key
));
1892 /* Starting from the _FIRST key, and continue for all following
1894 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1895 if (REFCOUNT_STRUCTELEM_LAST_P (k
->refcount
))
1902 gomp_remove_splay_tree_key (&devicep
->mem_map
, k
);
1905 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
1908 is_tgt_unmapped
= gomp_unref_tgt ((void *) k
->tgt
);
1909 return is_tgt_unmapped
;
1912 attribute_hidden
bool
1913 gomp_remove_var (struct gomp_device_descr
*devicep
, splay_tree_key k
)
1915 return gomp_remove_var_internal (devicep
, k
, NULL
);
1918 /* Remove a variable asynchronously. This actually removes the variable
1919 mapping immediately, but retains the linked target_mem_desc until the
1920 asynchronous operation has completed (as it may still refer to target
1921 memory). The device lock must be held before entry, and remains locked on
1924 attribute_hidden
void
1925 gomp_remove_var_async (struct gomp_device_descr
*devicep
, splay_tree_key k
,
1926 struct goacc_asyncqueue
*aq
)
1928 (void) gomp_remove_var_internal (devicep
, k
, aq
);
1931 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1932 variables back from device to host: if it is false, it is assumed that this
1933 has been done already. */
1935 static inline __attribute__((always_inline
)) void
1936 gomp_unmap_vars_internal (struct target_mem_desc
*tgt
, bool do_copyfrom
,
1937 htab_t
*refcount_set
, struct goacc_asyncqueue
*aq
)
1939 struct gomp_device_descr
*devicep
= tgt
->device_descr
;
1941 if (tgt
->list_count
== 0)
1947 gomp_mutex_lock (&devicep
->lock
);
1948 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
1950 gomp_mutex_unlock (&devicep
->lock
);
1958 /* We must perform detachments before any copies back to the host. */
1959 for (i
= 0; i
< tgt
->list_count
; i
++)
1961 splay_tree_key k
= tgt
->list
[i
].key
;
1963 if (k
!= NULL
&& tgt
->list
[i
].is_attach
)
1964 gomp_detach_pointer (devicep
, aq
, k
, tgt
->list
[i
].key
->host_start
1965 + tgt
->list
[i
].offset
,
1969 for (i
= 0; i
< tgt
->list_count
; i
++)
1971 splay_tree_key k
= tgt
->list
[i
].key
;
1975 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1976 counts ('n->refcount', 'n->dynamic_refcount'). */
1977 if (tgt
->list
[i
].is_attach
)
1980 bool do_copy
, do_remove
;
1981 gomp_decrement_refcount (k
, refcount_set
, false, &do_copy
, &do_remove
);
1983 if ((do_copy
&& do_copyfrom
&& tgt
->list
[i
].copy_from
)
1984 || tgt
->list
[i
].always_copy_from
)
1985 gomp_copy_dev2host (devicep
, aq
,
1986 (void *) (k
->host_start
+ tgt
->list
[i
].offset
),
1987 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
1988 + tgt
->list
[i
].offset
),
1989 tgt
->list
[i
].length
);
1992 struct target_mem_desc
*k_tgt
= k
->tgt
;
1993 bool is_tgt_unmapped
= gomp_remove_var (devicep
, k
);
1994 /* It would be bad if TGT got unmapped while we're still iterating
1995 over its LIST_COUNT, and also expect to use it in the following
1997 assert (!is_tgt_unmapped
2003 devicep
->openacc
.async
.queue_callback_func (aq
, gomp_unref_tgt_void
,
2006 gomp_unref_tgt ((void *) tgt
);
2008 gomp_mutex_unlock (&devicep
->lock
);
2012 gomp_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2013 htab_t
*refcount_set
)
2015 /* This management of a local refcount_set is for convenience of callers
2016 who do not share a refcount_set over multiple map/unmap uses. */
2017 htab_t local_refcount_set
= NULL
;
2018 if (refcount_set
== NULL
)
2020 local_refcount_set
= htab_create (tgt
->list_count
);
2021 refcount_set
= &local_refcount_set
;
2024 gomp_unmap_vars_internal (tgt
, do_copyfrom
, refcount_set
, NULL
);
2026 if (local_refcount_set
)
2027 htab_free (local_refcount_set
);
2030 attribute_hidden
void
2031 goacc_unmap_vars (struct target_mem_desc
*tgt
, bool do_copyfrom
,
2032 struct goacc_asyncqueue
*aq
)
2034 gomp_unmap_vars_internal (tgt
, do_copyfrom
, NULL
, aq
);
2038 gomp_update (struct gomp_device_descr
*devicep
, size_t mapnum
, void **hostaddrs
,
2039 size_t *sizes
, void *kinds
, bool short_mapkind
)
2042 struct splay_tree_key_s cur_node
;
2043 const int typemask
= short_mapkind
? 0xff : 0x7;
2051 gomp_mutex_lock (&devicep
->lock
);
2052 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2054 gomp_mutex_unlock (&devicep
->lock
);
2058 for (i
= 0; i
< mapnum
; i
++)
2061 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
2062 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
2063 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
2066 int kind
= get_kind (short_mapkind
, kinds
, i
);
2067 if (n
->host_start
> cur_node
.host_start
2068 || n
->host_end
< cur_node
.host_end
)
2070 gomp_mutex_unlock (&devicep
->lock
);
2071 gomp_fatal ("Trying to update [%p..%p) object when "
2072 "only [%p..%p) is mapped",
2073 (void *) cur_node
.host_start
,
2074 (void *) cur_node
.host_end
,
2075 (void *) n
->host_start
,
2076 (void *) n
->host_end
);
2079 if (n
->aux
&& n
->aux
->attach_count
)
2081 uintptr_t addr
= cur_node
.host_start
;
2082 while (addr
< cur_node
.host_end
)
2084 /* We have to be careful not to overwrite still attached
2085 pointers during host<->device updates. */
2086 size_t i
= (addr
- cur_node
.host_start
) / sizeof (void *);
2087 if (n
->aux
->attach_count
[i
] == 0)
2089 void *devaddr
= (void *) (n
->tgt
->tgt_start
2091 + addr
- n
->host_start
);
2092 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2093 gomp_copy_host2dev (devicep
, NULL
,
2094 devaddr
, (void *) addr
,
2095 sizeof (void *), false, NULL
);
2096 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2097 gomp_copy_dev2host (devicep
, NULL
,
2098 (void *) addr
, devaddr
,
2101 addr
+= sizeof (void *);
2106 void *hostaddr
= (void *) cur_node
.host_start
;
2107 void *devaddr
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
2108 + cur_node
.host_start
2110 size_t size
= cur_node
.host_end
- cur_node
.host_start
;
2112 if (GOMP_MAP_COPY_TO_P (kind
& typemask
))
2113 gomp_copy_host2dev (devicep
, NULL
, devaddr
, hostaddr
, size
,
2115 if (GOMP_MAP_COPY_FROM_P (kind
& typemask
))
2116 gomp_copy_dev2host (devicep
, NULL
, hostaddr
, devaddr
, size
);
2120 gomp_mutex_unlock (&devicep
->lock
);
2123 static struct gomp_offload_icv_list
*
2124 gomp_get_offload_icv_item (int dev_num
)
2126 struct gomp_offload_icv_list
*l
= gomp_offload_icv_list
;
2127 while (l
!= NULL
&& l
->device_num
!= dev_num
)
2133 /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values
2134 depending on the device num and the variable hierarchy
2135 (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given
2136 device and thus no item with that device number is contained in
2137 gomp_offload_icv_list, then a new item is created and added to the list. */
2139 static struct gomp_offload_icvs
*
2140 get_gomp_offload_icvs (int dev_num
)
2142 struct gomp_icv_list
*dev
2143 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV
);
2144 struct gomp_icv_list
*all
2145 = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL
);
2146 struct gomp_icv_list
*dev_x
= gomp_get_initial_icv_item (dev_num
);
2147 struct gomp_offload_icv_list
*offload_icvs
2148 = gomp_get_offload_icv_item (dev_num
);
2150 if (offload_icvs
!= NULL
)
2151 return &offload_icvs
->icvs
;
2153 struct gomp_offload_icv_list
*new
2154 = (struct gomp_offload_icv_list
*) gomp_malloc (sizeof (struct gomp_offload_icv_list
));
2156 new->device_num
= dev_num
;
2157 new->icvs
.device_num
= dev_num
;
2158 new->next
= gomp_offload_icv_list
;
2160 if (dev_x
!= NULL
&& gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_NTEAMS
))
2161 new->icvs
.nteams
= dev_x
->icvs
.nteams_var
;
2162 else if (dev
!= NULL
&& gomp_get_icv_flag (dev
->flags
, GOMP_ICV_NTEAMS
))
2163 new->icvs
.nteams
= dev
->icvs
.nteams_var
;
2164 else if (all
!= NULL
&& gomp_get_icv_flag (all
->flags
, GOMP_ICV_NTEAMS
))
2165 new->icvs
.nteams
= all
->icvs
.nteams_var
;
2167 new->icvs
.nteams
= gomp_default_icv_values
.nteams_var
;
2170 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2171 new->icvs
.teams_thread_limit
= dev_x
->icvs
.teams_thread_limit_var
;
2172 else if (dev
!= NULL
2173 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2174 new->icvs
.teams_thread_limit
= dev
->icvs
.teams_thread_limit_var
;
2175 else if (all
!= NULL
2176 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_TEAMS_THREAD_LIMIT
))
2177 new->icvs
.teams_thread_limit
= all
->icvs
.teams_thread_limit_var
;
2179 new->icvs
.teams_thread_limit
2180 = gomp_default_icv_values
.teams_thread_limit_var
;
2183 && gomp_get_icv_flag (dev_x
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2184 new->icvs
.default_device
= dev_x
->icvs
.default_device_var
;
2185 else if (dev
!= NULL
2186 && gomp_get_icv_flag (dev
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2187 new->icvs
.default_device
= dev
->icvs
.default_device_var
;
2188 else if (all
!= NULL
2189 && gomp_get_icv_flag (all
->flags
, GOMP_ICV_DEFAULT_DEVICE
))
2190 new->icvs
.default_device
= all
->icvs
.default_device_var
;
2192 new->icvs
.default_device
= gomp_default_icv_values
.default_device_var
;
2194 gomp_offload_icv_list
= new;
2198 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2199 And insert to splay tree the mapping between addresses from HOST_TABLE and
2200 from loaded target image. We rely in the host and device compiler
2201 emitting variable and functions in the same order. */
2204 gomp_load_image_to_device (struct gomp_device_descr
*devicep
, unsigned version
,
2205 const void *host_table
, const void *target_data
,
2206 bool is_register_lock
)
2208 void **host_func_table
= ((void ***) host_table
)[0];
2209 void **host_funcs_end
= ((void ***) host_table
)[1];
2210 void **host_var_table
= ((void ***) host_table
)[2];
2211 void **host_vars_end
= ((void ***) host_table
)[3];
2213 /* The func table contains only addresses, the var table contains addresses
2214 and corresponding sizes. */
2215 int num_funcs
= host_funcs_end
- host_func_table
;
2216 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2218 /* Load image to device and get target addresses for the image. */
2219 struct addr_pair
*target_table
= NULL
;
2220 uint64_t *rev_target_fn_table
= NULL
;
2221 int i
, num_target_entries
;
2223 /* With reverse offload, insert also target-host addresses. */
2224 bool rev_lookup
= omp_requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
;
2227 = devicep
->load_image_func (devicep
->target_id
, version
,
2228 target_data
, &target_table
,
2229 rev_lookup
? &rev_target_fn_table
: NULL
);
2231 if (num_target_entries
!= num_funcs
+ num_vars
2232 /* "+1" due to the additional ICV struct. */
2233 && num_target_entries
!= num_funcs
+ num_vars
+ 1)
2235 gomp_mutex_unlock (&devicep
->lock
);
2236 if (is_register_lock
)
2237 gomp_mutex_unlock (®ister_lock
);
2238 gomp_fatal ("Cannot map target functions or variables"
2239 " (expected %u, have %u)", num_funcs
+ num_vars
,
2240 num_target_entries
);
2243 /* Insert host-target address mapping into splay tree. */
2244 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
2245 /* "+1" due to the additional ICV struct. */
2246 tgt
->array
= gomp_malloc ((num_funcs
+ num_vars
+ 1)
2247 * sizeof (*tgt
->array
));
2248 if (rev_target_fn_table
)
2249 tgt
->rev_array
= gomp_malloc (num_funcs
* sizeof (*tgt
->rev_array
));
2251 tgt
->rev_array
= NULL
;
2252 tgt
->refcount
= REFCOUNT_INFINITY
;
2255 tgt
->to_free
= NULL
;
2257 tgt
->list_count
= 0;
2258 tgt
->device_descr
= devicep
;
2259 splay_tree_node array
= tgt
->array
;
2260 reverse_splay_tree_node rev_array
= tgt
->rev_array
;
2262 for (i
= 0; i
< num_funcs
; i
++)
2264 splay_tree_key k
= &array
->key
;
2265 k
->host_start
= (uintptr_t) host_func_table
[i
];
2266 k
->host_end
= k
->host_start
+ 1;
2268 k
->tgt_offset
= target_table
[i
].start
;
2269 k
->refcount
= REFCOUNT_INFINITY
;
2270 k
->dynamic_refcount
= 0;
2273 array
->right
= NULL
;
2274 splay_tree_insert (&devicep
->mem_map
, array
);
2275 if (rev_target_fn_table
)
2277 reverse_splay_tree_key k2
= &rev_array
->key
;
2278 k2
->dev
= rev_target_fn_table
[i
];
2280 rev_array
->left
= NULL
;
2281 rev_array
->right
= NULL
;
2283 reverse_splay_tree_insert (&devicep
->mem_map_rev
, rev_array
);
2289 /* Most significant bit of the size in host and target tables marks
2290 "omp declare target link" variables. */
2291 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2292 const uintptr_t size_mask
= ~link_bit
;
2294 for (i
= 0; i
< num_vars
; i
++)
2296 struct addr_pair
*target_var
= &target_table
[num_funcs
+ i
];
2297 uintptr_t target_size
= target_var
->end
- target_var
->start
;
2298 bool is_link_var
= link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1];
2300 if (!is_link_var
&& (uintptr_t) host_var_table
[i
* 2 + 1] != target_size
)
2302 gomp_mutex_unlock (&devicep
->lock
);
2303 if (is_register_lock
)
2304 gomp_mutex_unlock (®ister_lock
);
2305 gomp_fatal ("Cannot map target variables (size mismatch)");
2308 splay_tree_key k
= &array
->key
;
2309 k
->host_start
= (uintptr_t) host_var_table
[i
* 2];
2311 = k
->host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2313 k
->tgt_offset
= target_var
->start
;
2314 k
->refcount
= is_link_var
? REFCOUNT_LINK
: REFCOUNT_INFINITY
;
2315 k
->dynamic_refcount
= 0;
2318 array
->right
= NULL
;
2319 splay_tree_insert (&devicep
->mem_map
, array
);
2323 /* Last entry is for a ICVs variable.
2324 Tolerate case where plugin does not return those entries. */
2325 if (num_funcs
+ num_vars
< num_target_entries
)
2327 struct addr_pair
*var
= &target_table
[num_funcs
+ num_vars
];
2329 /* Start address will be non-zero for the ICVs variable if
2330 the variable was found in this image. */
2331 if (var
->start
!= 0)
2333 /* The index of the devicep within devices[] is regarded as its
2334 'device number', which is different from the per-device type
2335 devicep->target_id. */
2336 int dev_num
= (int) (devicep
- &devices
[0]);
2337 struct gomp_offload_icvs
*icvs
= get_gomp_offload_icvs (dev_num
);
2338 size_t var_size
= var
->end
- var
->start
;
2339 if (var_size
!= sizeof (struct gomp_offload_icvs
))
2341 gomp_mutex_unlock (&devicep
->lock
);
2342 if (is_register_lock
)
2343 gomp_mutex_unlock (®ister_lock
);
2344 gomp_fatal ("offload plugin managed 'icv struct' not of expected "
2347 /* Copy the ICVs variable to place on device memory, hereby
2348 actually designating its device number into effect. */
2349 gomp_copy_host2dev (devicep
, NULL
, (void *) var
->start
, icvs
,
2350 var_size
, false, NULL
);
2351 splay_tree_key k
= &array
->key
;
2352 k
->host_start
= (uintptr_t) icvs
;
2354 k
->host_start
+ (size_mask
& sizeof (struct gomp_offload_icvs
));
2356 k
->tgt_offset
= var
->start
;
2357 k
->refcount
= REFCOUNT_INFINITY
;
2358 k
->dynamic_refcount
= 0;
2361 array
->right
= NULL
;
2362 splay_tree_insert (&devicep
->mem_map
, array
);
2367 free (target_table
);
2370 /* Unload the mappings described by target_data from device DEVICE_P.
2371 The device must be locked. */
2374 gomp_unload_image_from_device (struct gomp_device_descr
*devicep
,
2376 const void *host_table
, const void *target_data
)
2378 void **host_func_table
= ((void ***) host_table
)[0];
2379 void **host_funcs_end
= ((void ***) host_table
)[1];
2380 void **host_var_table
= ((void ***) host_table
)[2];
2381 void **host_vars_end
= ((void ***) host_table
)[3];
2383 /* The func table contains only addresses, the var table contains addresses
2384 and corresponding sizes. */
2385 int num_funcs
= host_funcs_end
- host_func_table
;
2386 int num_vars
= (host_vars_end
- host_var_table
) / 2;
2388 struct splay_tree_key_s k
;
2389 splay_tree_key node
= NULL
;
2391 /* Find mapping at start of node array */
2392 if (num_funcs
|| num_vars
)
2394 k
.host_start
= (num_funcs
? (uintptr_t) host_func_table
[0]
2395 : (uintptr_t) host_var_table
[0]);
2396 k
.host_end
= k
.host_start
+ 1;
2397 node
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2400 if (!devicep
->unload_image_func (devicep
->target_id
, version
, target_data
))
2402 gomp_mutex_unlock (&devicep
->lock
);
2403 gomp_fatal ("image unload fail");
2405 if (devicep
->mem_map_rev
.root
)
2407 /* Free reverse offload splay tree + data; 'tgt->rev_array' is the only
2409 assert (node
&& node
->tgt
&& node
->tgt
->rev_array
);
2410 assert (devicep
->mem_map_rev
.root
->key
.k
->tgt
== node
->tgt
);
2411 free (node
->tgt
->rev_array
);
2412 devicep
->mem_map_rev
.root
= NULL
;
2415 /* Remove mappings from splay tree. */
2417 for (i
= 0; i
< num_funcs
; i
++)
2419 k
.host_start
= (uintptr_t) host_func_table
[i
];
2420 k
.host_end
= k
.host_start
+ 1;
2421 splay_tree_remove (&devicep
->mem_map
, &k
);
2424 /* Most significant bit of the size in host and target tables marks
2425 "omp declare target link" variables. */
2426 const uintptr_t link_bit
= 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__
- 1);
2427 const uintptr_t size_mask
= ~link_bit
;
2428 bool is_tgt_unmapped
= false;
2430 for (i
= 0; i
< num_vars
; i
++)
2432 k
.host_start
= (uintptr_t) host_var_table
[i
* 2];
2434 = k
.host_start
+ (size_mask
& (uintptr_t) host_var_table
[i
* 2 + 1]);
2436 if (!(link_bit
& (uintptr_t) host_var_table
[i
* 2 + 1]))
2437 splay_tree_remove (&devicep
->mem_map
, &k
);
2440 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2441 is_tgt_unmapped
= gomp_remove_var (devicep
, n
);
2445 if (node
&& !is_tgt_unmapped
)
2453 gomp_requires_to_name (char *buf
, size_t size
, int requires_mask
)
2455 char *end
= buf
+ size
, *p
= buf
;
2456 if (requires_mask
& GOMP_REQUIRES_UNIFIED_ADDRESS
)
2457 p
+= snprintf (p
, end
- p
, "unified_address");
2458 if (requires_mask
& GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
)
2459 p
+= snprintf (p
, end
- p
, "%sunified_shared_memory",
2460 (p
== buf
? "" : ", "));
2461 if (requires_mask
& GOMP_REQUIRES_REVERSE_OFFLOAD
)
2462 p
+= snprintf (p
, end
- p
, "%sreverse_offload",
2463 (p
== buf
? "" : ", "));
2466 /* This function should be called from every offload image while loading.
2467 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2468 the target, and DATA. */
2471 GOMP_offload_register_ver (unsigned version
, const void *host_table
,
2472 int target_type
, const void *data
)
2476 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2477 gomp_fatal ("Library too old for offload (version %u < %u)",
2478 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2481 const void *target_data
;
2482 if (GOMP_VERSION_LIB (version
) > 1)
2484 omp_req
= (int) (size_t) ((void **) data
)[0];
2485 target_data
= &((void **) data
)[1];
2493 gomp_mutex_lock (®ister_lock
);
2495 if (omp_req
&& omp_requires_mask
&& omp_requires_mask
!= omp_req
)
2497 char buf1
[sizeof ("unified_address, unified_shared_memory, "
2498 "reverse_offload")];
2499 char buf2
[sizeof ("unified_address, unified_shared_memory, "
2500 "reverse_offload")];
2501 gomp_requires_to_name (buf2
, sizeof (buf2
),
2502 omp_req
!= GOMP_REQUIRES_TARGET_USED
2503 ? omp_req
: omp_requires_mask
);
2504 if (omp_req
!= GOMP_REQUIRES_TARGET_USED
2505 && omp_requires_mask
!= GOMP_REQUIRES_TARGET_USED
)
2507 gomp_requires_to_name (buf1
, sizeof (buf1
), omp_requires_mask
);
2508 gomp_fatal ("OpenMP 'requires' directive with non-identical clauses "
2509 "in multiple compilation units: '%s' vs. '%s'",
2513 gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in "
2514 "some compilation units", buf2
);
2516 omp_requires_mask
= omp_req
;
2518 /* Load image to all initialized devices. */
2519 for (i
= 0; i
< num_devices
; i
++)
2521 struct gomp_device_descr
*devicep
= &devices
[i
];
2522 gomp_mutex_lock (&devicep
->lock
);
2523 if (devicep
->type
== target_type
2524 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2525 gomp_load_image_to_device (devicep
, version
,
2526 host_table
, target_data
, true);
2527 gomp_mutex_unlock (&devicep
->lock
);
2530 /* Insert image to array of pending images. */
2532 = gomp_realloc_unlock (offload_images
,
2533 (num_offload_images
+ 1)
2534 * sizeof (struct offload_image_descr
));
2535 offload_images
[num_offload_images
].version
= version
;
2536 offload_images
[num_offload_images
].type
= target_type
;
2537 offload_images
[num_offload_images
].host_table
= host_table
;
2538 offload_images
[num_offload_images
].target_data
= target_data
;
2540 num_offload_images
++;
2541 gomp_mutex_unlock (®ister_lock
);
2544 /* Legacy entry point. */
2547 GOMP_offload_register (const void *host_table
, int target_type
,
2548 const void *target_data
)
2550 GOMP_offload_register_ver (0, host_table
, target_type
, target_data
);
2553 /* This function should be called from every offload image while unloading.
2554 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2555 the target, and DATA. */
2558 GOMP_offload_unregister_ver (unsigned version
, const void *host_table
,
2559 int target_type
, const void *data
)
2563 if (GOMP_VERSION_LIB (version
) > GOMP_VERSION
)
2564 gomp_fatal ("Library too old for offload (version %u < %u)",
2565 GOMP_VERSION
, GOMP_VERSION_LIB (version
));
2567 const void *target_data
;
2568 if (GOMP_VERSION_LIB (version
) > 1)
2569 target_data
= &((void **) data
)[1];
2573 gomp_mutex_lock (®ister_lock
);
2575 /* Unload image from all initialized devices. */
2576 for (i
= 0; i
< num_devices
; i
++)
2578 struct gomp_device_descr
*devicep
= &devices
[i
];
2579 gomp_mutex_lock (&devicep
->lock
);
2580 if (devicep
->type
== target_type
2581 && devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2582 gomp_unload_image_from_device (devicep
, version
,
2583 host_table
, target_data
);
2584 gomp_mutex_unlock (&devicep
->lock
);
2587 /* Remove image from array of pending images. */
2588 for (i
= 0; i
< num_offload_images
; i
++)
2589 if (offload_images
[i
].target_data
== target_data
)
2591 offload_images
[i
] = offload_images
[--num_offload_images
];
2595 gomp_mutex_unlock (®ister_lock
);
2598 /* Legacy entry point. */
2601 GOMP_offload_unregister (const void *host_table
, int target_type
,
2602 const void *target_data
)
2604 GOMP_offload_unregister_ver (0, host_table
, target_type
, target_data
);
2607 /* This function initializes the target device, specified by DEVICEP. DEVICEP
2608 must be locked on entry, and remains locked on return. */
2610 attribute_hidden
void
2611 gomp_init_device (struct gomp_device_descr
*devicep
)
2614 if (!devicep
->init_device_func (devicep
->target_id
))
2616 gomp_mutex_unlock (&devicep
->lock
);
2617 gomp_fatal ("device initialization failed");
2620 /* Load to device all images registered by the moment. */
2621 for (i
= 0; i
< num_offload_images
; i
++)
2623 struct offload_image_descr
*image
= &offload_images
[i
];
2624 if (image
->type
== devicep
->type
)
2625 gomp_load_image_to_device (devicep
, image
->version
,
2626 image
->host_table
, image
->target_data
,
2630 /* Initialize OpenACC asynchronous queues. */
2631 goacc_init_asyncqueues (devicep
);
2633 devicep
->state
= GOMP_DEVICE_INITIALIZED
;
2636 /* This function finalizes the target device, specified by DEVICEP. DEVICEP
2637 must be locked on entry, and remains locked on return. */
2639 attribute_hidden
bool
2640 gomp_fini_device (struct gomp_device_descr
*devicep
)
2642 bool ret
= goacc_fini_asyncqueues (devicep
);
2643 ret
&= devicep
->fini_device_func (devicep
->target_id
);
2644 devicep
->state
= GOMP_DEVICE_FINALIZED
;
2648 attribute_hidden
void
2649 gomp_unload_device (struct gomp_device_descr
*devicep
)
2651 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
2655 /* Unload from device all images registered at the moment. */
2656 for (i
= 0; i
< num_offload_images
; i
++)
2658 struct offload_image_descr
*image
= &offload_images
[i
];
2659 if (image
->type
== devicep
->type
)
2660 gomp_unload_image_from_device (devicep
, image
->version
,
2662 image
->target_data
);
2667 /* Host fallback for GOMP_target{,_ext} routines. */
2670 gomp_target_fallback (void (*fn
) (void *), void **hostaddrs
,
2671 struct gomp_device_descr
*devicep
, void **args
)
2673 struct gomp_thread old_thr
, *thr
= gomp_thread ();
2675 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
2677 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2678 "be used for offloading");
2681 memset (thr
, '\0', sizeof (*thr
));
2682 if (gomp_places_list
)
2684 thr
->place
= old_thr
.place
;
2685 thr
->ts
.place_partition_len
= gomp_places_list_len
;
2690 intptr_t id
= (intptr_t) *args
++, val
;
2691 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2692 val
= (intptr_t) *args
++;
2694 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2695 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2697 id
&= GOMP_TARGET_ARG_ID_MASK
;
2698 if (id
!= GOMP_TARGET_ARG_THREAD_LIMIT
)
2700 val
= val
> INT_MAX
? INT_MAX
: val
;
2702 gomp_icv (true)->thread_limit_var
= val
;
2707 gomp_free_thread (thr
);
2711 /* Calculate alignment and size requirements of a private copy of data shared
2712 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
2715 calculate_firstprivate_requirements (size_t mapnum
, size_t *sizes
,
2716 unsigned short *kinds
, size_t *tgt_align
,
2720 for (i
= 0; i
< mapnum
; i
++)
2721 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
)
2723 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2724 if (*tgt_align
< align
)
2726 *tgt_size
= (*tgt_size
+ align
- 1) & ~(align
- 1);
2727 *tgt_size
+= sizes
[i
];
2731 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
2734 copy_firstprivate_data (char *tgt
, size_t mapnum
, void **hostaddrs
,
2735 size_t *sizes
, unsigned short *kinds
, size_t tgt_align
,
2738 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
2740 tgt
+= tgt_align
- al
;
2743 for (i
= 0; i
< mapnum
; i
++)
2744 if ((kinds
[i
] & 0xff) == GOMP_MAP_FIRSTPRIVATE
&& hostaddrs
[i
] != NULL
)
2746 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
2747 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
2748 memcpy (tgt
+ tgt_size
, hostaddrs
[i
], sizes
[i
]);
2749 hostaddrs
[i
] = tgt
+ tgt_size
;
2750 tgt_size
= tgt_size
+ sizes
[i
];
2751 if (i
+ 1 < mapnum
&& (kinds
[i
+1] & 0xff) == GOMP_MAP_ATTACH
)
2753 *(*(uintptr_t**) hostaddrs
[i
+1] + sizes
[i
+1]) = (uintptr_t) hostaddrs
[i
];
2759 /* Helper function of GOMP_target{,_ext} routines. */
2762 gomp_get_target_fn_addr (struct gomp_device_descr
*devicep
,
2763 void (*host_fn
) (void *))
2765 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
)
2766 return (void *) host_fn
;
2769 gomp_mutex_lock (&devicep
->lock
);
2770 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
2772 gomp_mutex_unlock (&devicep
->lock
);
2776 struct splay_tree_key_s k
;
2777 k
.host_start
= (uintptr_t) host_fn
;
2778 k
.host_end
= k
.host_start
+ 1;
2779 splay_tree_key tgt_fn
= splay_tree_lookup (&devicep
->mem_map
, &k
);
2780 gomp_mutex_unlock (&devicep
->lock
);
2784 return (void *) tgt_fn
->tgt_offset
;
2788 /* Called when encountering a target directive. If DEVICE
2789 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
2790 GOMP_DEVICE_HOST_FALLBACK (or any value
2791 larger than last available hw device), use host fallback.
2792 FN is address of host code, UNUSED is part of the current ABI, but
2793 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
2794 with MAPNUM entries, with addresses of the host objects,
2795 sizes of the host objects (resp. for pointer kind pointer bias
2796 and assumed sizeof (void *) size) and kinds. */
2799 GOMP_target (int device
, void (*fn
) (void *), const void *unused
,
2800 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
2801 unsigned char *kinds
)
2803 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2807 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
2808 /* All shared memory devices should use the GOMP_target_ext function. */
2809 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
2810 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
)))
2811 return gomp_target_fallback (fn
, hostaddrs
, devicep
, NULL
);
2813 htab_t refcount_set
= htab_create (mapnum
);
2814 struct target_mem_desc
*tgt_vars
2815 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
2816 &refcount_set
, GOMP_MAP_VARS_TARGET
);
2817 devicep
->run_func (devicep
->target_id
, fn_addr
, (void *) tgt_vars
->tgt_start
,
2819 htab_clear (refcount_set
);
2820 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
2821 htab_free (refcount_set
);
2824 static inline unsigned int
2825 clear_unsupported_flags (struct gomp_device_descr
*devicep
, unsigned int flags
)
2827 /* If we cannot run asynchronously, simply ignore nowait. */
2828 if (devicep
!= NULL
&& devicep
->async_run_func
== NULL
)
2829 flags
&= ~GOMP_TARGET_FLAG_NOWAIT
;
2835 gomp_copy_back_icvs (struct gomp_device_descr
*devicep
, int device
)
2837 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2841 void *host_ptr
= &item
->icvs
;
2842 void *dev_ptr
= omp_get_mapped_ptr (host_ptr
, device
);
2843 if (dev_ptr
!= NULL
)
2844 gomp_copy_dev2host (devicep
, NULL
, host_ptr
, dev_ptr
,
2845 sizeof (struct gomp_offload_icvs
));
2848 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2849 and several arguments have been added:
2850 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2851 DEPEND is array of dependencies, see GOMP_task for details.
2853 ARGS is a pointer to an array consisting of a variable number of both
2854 device-independent and device-specific arguments, which can take one two
2855 elements where the first specifies for which device it is intended, the type
2856 and optionally also the value. If the value is not present in the first
2857 one, the whole second element the actual value. The last element of the
2858 array is a single NULL. Among the device independent can be for example
2859 NUM_TEAMS and THREAD_LIMIT.
2861 NUM_TEAMS is positive if GOMP_teams will be called in the body with
2862 that value, or 1 if teams construct is not present, or 0, if
2863 teams construct does not have num_teams clause and so the choice is
2864 implementation defined, and -1 if it can't be determined on the host
2865 what value will GOMP_teams have on the device.
2866 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2867 body with that value, or 0, if teams construct does not have thread_limit
2868 clause or the teams construct is not present, or -1 if it can't be
2869 determined on the host what value will GOMP_teams have on the device. */
2872 GOMP_target_ext (int device
, void (*fn
) (void *), size_t mapnum
,
2873 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
2874 unsigned int flags
, void **depend
, void **args
)
2876 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
2877 size_t tgt_align
= 0, tgt_size
= 0;
2878 bool fpc_done
= false;
2880 /* Obtain the original TEAMS and THREADS values from ARGS. */
2881 intptr_t orig_teams
= 1, orig_threads
= 0;
2882 size_t num_args
= 0, len
= 1, teams_len
= 1, threads_len
= 1;
2883 void **tmpargs
= args
;
2886 intptr_t id
= (intptr_t) *tmpargs
++, val
;
2887 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2889 val
= (intptr_t) *tmpargs
++;
2894 val
= id
>> GOMP_TARGET_ARG_VALUE_SHIFT
;
2898 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) != GOMP_TARGET_ARG_DEVICE_ALL
)
2900 val
= val
> INT_MAX
? INT_MAX
: val
;
2901 if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
)
2906 else if ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
)
2913 intptr_t new_teams
= orig_teams
, new_threads
= orig_threads
;
2914 /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1.
2915 ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the
2916 value could not be determined. No change.
2917 ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause.
2918 Set device-specific value.
2919 ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause.
2921 if (orig_teams
== -2)
2923 else if (orig_teams
== 0)
2925 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2927 new_teams
= item
->icvs
.nteams
;
2929 /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS
2930 region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by
2931 e.g. a THREAD_LIMIT clause. */
2932 if (orig_teams
> -2 && orig_threads
== 0)
2934 struct gomp_offload_icv_list
*item
= gomp_get_offload_icv_item (device
);
2936 new_threads
= item
->icvs
.teams_thread_limit
;
2939 /* Copy and change the arguments list only if TEAMS or THREADS need to be
2941 void **new_args
= args
;
2942 if (orig_teams
!= new_teams
|| orig_threads
!= new_threads
)
2944 size_t tms_len
= (orig_teams
== new_teams
2946 : (new_teams
> -(1 << 15) && new_teams
< (1 << 15)
2948 size_t ths_len
= (orig_threads
== new_threads
2950 : (new_threads
> -(1 << 15) && new_threads
< (1 << 15)
2952 /* One additional item after the last arg must be NULL. */
2953 size_t new_args_cnt
= num_args
- teams_len
- threads_len
+ tms_len
2955 new_args
= (void **) gomp_alloca (new_args_cnt
* sizeof (void*));
2958 void **tmp_new_args
= new_args
;
2959 /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied
2960 too if they have not been changed and skipped otherwise. */
2963 intptr_t id
= (intptr_t) *tmpargs
;
2964 if (((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_NUM_TEAMS
2965 && orig_teams
!= new_teams
)
2966 || ((id
& GOMP_TARGET_ARG_ID_MASK
) == GOMP_TARGET_ARG_THREAD_LIMIT
2967 && orig_threads
!= new_threads
))
2970 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2975 *tmp_new_args
++ = *tmpargs
++;
2976 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
2977 *tmp_new_args
++ = *tmpargs
++;
2981 /* Add the new TEAMS arg to the new args list if it has been changed. */
2982 if (orig_teams
!= new_teams
)
2984 intptr_t new_val
= new_teams
;
2987 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
2988 | GOMP_TARGET_ARG_NUM_TEAMS
;
2989 *tmp_new_args
++ = (void *) new_val
;
2993 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
2994 | GOMP_TARGET_ARG_NUM_TEAMS
);
2995 *tmp_new_args
++ = (void *) new_val
;
2999 /* Add the new THREADS arg to the new args list if it has been changed. */
3000 if (orig_threads
!= new_threads
)
3002 intptr_t new_val
= new_threads
;
3005 new_val
= (new_val
<< GOMP_TARGET_ARG_VALUE_SHIFT
)
3006 | GOMP_TARGET_ARG_THREAD_LIMIT
;
3007 *tmp_new_args
++ = (void *) new_val
;
3011 *tmp_new_args
++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM
3012 | GOMP_TARGET_ARG_THREAD_LIMIT
);
3013 *tmp_new_args
++ = (void *) new_val
;
3017 *tmp_new_args
= NULL
;
3020 flags
= clear_unsupported_flags (devicep
, flags
);
3022 if (flags
& GOMP_TARGET_FLAG_NOWAIT
)
3024 struct gomp_thread
*thr
= gomp_thread ();
3025 /* Create a team if we don't have any around, as nowait
3026 target tasks make sense to run asynchronously even when
3027 outside of any parallel. */
3028 if (__builtin_expect (thr
->ts
.team
== NULL
, 0))
3030 struct gomp_team
*team
= gomp_new_team (1);
3031 struct gomp_task
*task
= thr
->task
;
3032 struct gomp_task
**implicit_task
= &task
;
3033 struct gomp_task_icv
*icv
= task
? &task
->icv
: &gomp_global_icv
;
3034 team
->prev_ts
= thr
->ts
;
3035 thr
->ts
.team
= team
;
3036 thr
->ts
.team_id
= 0;
3037 thr
->ts
.work_share
= &team
->work_shares
[0];
3038 thr
->ts
.last_work_share
= NULL
;
3039 #ifdef HAVE_SYNC_BUILTINS
3040 thr
->ts
.single_count
= 0;
3042 thr
->ts
.static_trip
= 0;
3043 thr
->task
= &team
->implicit_task
[0];
3044 gomp_init_task (thr
->task
, NULL
, icv
);
3045 while (*implicit_task
3046 && (*implicit_task
)->kind
!= GOMP_TASK_IMPLICIT
)
3047 implicit_task
= &(*implicit_task
)->parent
;
3050 thr
->task
= *implicit_task
;
3052 free (*implicit_task
);
3053 thr
->task
= &team
->implicit_task
[0];
3056 pthread_setspecific (gomp_thread_destructor
, thr
);
3057 if (implicit_task
!= &task
)
3059 *implicit_task
= thr
->task
;
3064 && !thr
->task
->final_task
)
3066 gomp_create_target_task (devicep
, fn
, mapnum
, hostaddrs
,
3067 sizes
, kinds
, flags
, depend
, new_args
,
3068 GOMP_TARGET_TASK_BEFORE_MAP
);
3073 /* If there are depend clauses, but nowait is not present
3074 (or we are in a final task), block the parent task until the
3075 dependencies are resolved and then just continue with the rest
3076 of the function as if it is a merged task. */
3079 struct gomp_thread
*thr
= gomp_thread ();
3080 if (thr
->task
&& thr
->task
->depend_hash
)
3082 /* If we might need to wait, copy firstprivate now. */
3083 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3084 &tgt_align
, &tgt_size
);
3087 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3088 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3089 tgt_align
, tgt_size
);
3092 gomp_task_maybe_wait_for_dependencies (depend
);
3098 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3099 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, fn
))
3100 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
3104 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3105 &tgt_align
, &tgt_size
);
3108 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3109 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3110 tgt_align
, tgt_size
);
3113 gomp_target_fallback (fn
, hostaddrs
, devicep
, new_args
);
3117 struct target_mem_desc
*tgt_vars
;
3118 htab_t refcount_set
= NULL
;
3120 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3124 calculate_firstprivate_requirements (mapnum
, sizes
, kinds
,
3125 &tgt_align
, &tgt_size
);
3128 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3129 copy_firstprivate_data (tgt
, mapnum
, hostaddrs
, sizes
, kinds
,
3130 tgt_align
, tgt_size
);
3137 refcount_set
= htab_create (mapnum
);
3138 tgt_vars
= gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
3139 true, &refcount_set
, GOMP_MAP_VARS_TARGET
);
3141 devicep
->run_func (devicep
->target_id
, fn_addr
,
3142 tgt_vars
? (void *) tgt_vars
->tgt_start
: hostaddrs
,
3146 htab_clear (refcount_set
);
3147 gomp_unmap_vars (tgt_vars
, true, &refcount_set
);
3150 htab_free (refcount_set
);
3152 /* Copy back ICVs from device to host.
3153 HOST_PTR is expected to exist since it was added in
3154 gomp_load_image_to_device if not already available. */
3155 gomp_copy_back_icvs (devicep
, device
);
3160 /* Reverse lookup (device addr -> host addr) for reverse offload. We avoid
3161 keeping track of all variable handling - assuming that reverse offload occurs
3162 ony very rarely. Downside is that the reverse search is slow. */
3164 struct gomp_splay_tree_rev_lookup_data
{
3165 uintptr_t tgt_start
;
3171 gomp_splay_tree_rev_lookup (splay_tree_key key
, void *d
)
3173 struct gomp_splay_tree_rev_lookup_data
*data
;
3174 data
= (struct gomp_splay_tree_rev_lookup_data
*)d
;
3175 uintptr_t tgt_start
= key
->tgt
->tgt_start
+ key
->tgt_offset
;
3177 if (tgt_start
> data
->tgt_start
|| key
->tgt
->list_count
== 0)
3181 for (j
= 0; j
< key
->tgt
->list_count
; j
++)
3182 if (key
->tgt
->list
[j
].key
== key
)
3184 assert (j
< key
->tgt
->list_count
);
3185 uintptr_t tgt_end
= tgt_start
+ key
->tgt
->list
[j
].length
;
3187 if ((tgt_start
== data
->tgt_start
&& tgt_end
== data
->tgt_end
)
3188 || (tgt_end
> data
->tgt_start
&& tgt_start
< data
->tgt_end
))
3196 static inline splay_tree_key
3197 gomp_map_rev_lookup (splay_tree mem_map
, uint64_t tgt_start
, uint64_t tgt_end
,
3200 struct gomp_splay_tree_rev_lookup_data data
;
3202 data
.tgt_start
= tgt_start
;
3203 data
.tgt_end
= tgt_end
;
3205 if (tgt_start
!= tgt_end
)
3207 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3212 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3213 if (data
.key
!= NULL
|| zero_len
)
3218 splay_tree_foreach_lazy (mem_map
, gomp_splay_tree_rev_lookup
, &data
);
3225 bool present
, aligned
;
3229 /* Search just mapped reverse-offload data; returns index if found,
3233 gomp_map_cdata_lookup_int (struct cpy_data
*d
, uint64_t *devaddrs
,
3234 unsigned short *kinds
, uint64_t *sizes
, size_t n
,
3235 uint64_t tgt_start
, uint64_t tgt_end
)
3237 const bool short_mapkind
= true;
3238 const int typemask
= short_mapkind
? 0xff : 0x7;
3240 for (i
= 0; i
< n
; i
++)
3242 bool is_struct
= ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3243 == GOMP_MAP_STRUCT
);
3246 dev_end
= d
[i
].devaddr
+ sizes
[i
];
3249 if (i
+ sizes
[i
] < n
)
3250 dev_end
= d
[i
+ sizes
[i
]].devaddr
+ sizes
[i
+ sizes
[i
]];
3252 dev_end
= devaddrs
[i
+ sizes
[i
]] + sizes
[i
+ sizes
[i
]];
3254 if ((d
[i
].devaddr
== tgt_start
&& dev_end
== tgt_end
)
3255 || (dev_end
> tgt_start
&& d
[i
].devaddr
< tgt_end
))
3264 gomp_map_cdata_lookup (struct cpy_data
*d
, uint64_t *devaddrs
,
3265 unsigned short *kinds
, uint64_t *sizes
,
3266 size_t n
, uint64_t tgt_start
, uint64_t tgt_end
,
3270 if (tgt_start
!= tgt_end
)
3271 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3272 tgt_start
, tgt_end
);
3274 i
= gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3275 tgt_start
, tgt_end
);
3276 if (i
< n
|| zero_len
)
3281 return gomp_map_cdata_lookup_int (d
, devaddrs
, kinds
, sizes
, n
,
3282 tgt_start
, tgt_end
);
3285 /* Handle reverse offload. This is called by the device plugins for a
3286 reverse offload; it is not called if the outer target runs on the host.
3287 The mapping is simplified device-affecting constructs (except for target
3288 with device(ancestor:1)) must not be encountered; in particular not
3289 target (enter/exit) data. */
3292 gomp_target_rev (uint64_t fn_ptr
, uint64_t mapnum
, uint64_t devaddrs_ptr
,
3293 uint64_t sizes_ptr
, uint64_t kinds_ptr
, int dev_num
,
3294 void (*dev_to_host_cpy
) (void *, const void *, size_t, void*),
3295 void (*host_to_dev_cpy
) (void *, const void *, size_t, void*),
3298 /* Return early if there is no offload code. */
3299 if (sizeof (OFFLOAD_PLUGINS
) == sizeof (""))
3301 /* Currently, this fails because of calculate_firstprivate_requirements
3302 below; it could be fixed but additional code needs to be updated to
3303 handle 32bit hosts - thus, it is not worthwhile. */
3304 if (sizeof (void *) != sizeof (uint64_t))
3305 gomp_fatal ("Reverse offload of 32bit hosts not supported.");
3307 struct cpy_data
*cdata
= NULL
;
3310 unsigned short *kinds
;
3311 const bool short_mapkind
= true;
3312 const int typemask
= short_mapkind
? 0xff : 0x7;
3313 struct gomp_device_descr
*devicep
= resolve_device (dev_num
, false);
3315 reverse_splay_tree_key n
;
3316 struct reverse_splay_tree_key_s k
;
3319 gomp_mutex_lock (&devicep
->lock
);
3320 n
= gomp_map_lookup_rev (&devicep
->mem_map_rev
, &k
);
3321 gomp_mutex_unlock (&devicep
->lock
);
3324 gomp_fatal ("Cannot find reverse-offload function");
3325 void (*host_fn
)() = (void (*)()) n
->k
->host_start
;
3327 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3329 devaddrs
= (uint64_t *) (uintptr_t) devaddrs_ptr
;
3330 sizes
= (uint64_t *) (uintptr_t) sizes_ptr
;
3331 kinds
= (unsigned short *) (uintptr_t) kinds_ptr
;
3335 devaddrs
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3336 sizes
= (uint64_t *) gomp_malloc (mapnum
* sizeof (uint64_t));
3337 kinds
= (unsigned short *) gomp_malloc (mapnum
* sizeof (unsigned short));
3338 if (dev_to_host_cpy
)
3340 dev_to_host_cpy (devaddrs
, (const void *) (uintptr_t) devaddrs_ptr
,
3341 mapnum
* sizeof (uint64_t), token
);
3342 dev_to_host_cpy (sizes
, (const void *) (uintptr_t) sizes_ptr
,
3343 mapnum
* sizeof (uint64_t), token
);
3344 dev_to_host_cpy (kinds
, (const void *) (uintptr_t) kinds_ptr
,
3345 mapnum
* sizeof (unsigned short), token
);
3349 gomp_copy_dev2host (devicep
, NULL
, devaddrs
,
3350 (const void *) (uintptr_t) devaddrs_ptr
,
3351 mapnum
* sizeof (uint64_t));
3352 gomp_copy_dev2host (devicep
, NULL
, sizes
,
3353 (const void *) (uintptr_t) sizes_ptr
,
3354 mapnum
* sizeof (uint64_t));
3355 gomp_copy_dev2host (devicep
, NULL
, kinds
, (const void *) (uintptr_t) kinds_ptr
,
3356 mapnum
* sizeof (unsigned short));
3360 size_t tgt_align
= 0, tgt_size
= 0;
3362 /* If actually executed on 32bit systems, the casts lead to wrong code;
3363 but 32bit with offloading is not supported; see top of this function. */
3364 calculate_firstprivate_requirements (mapnum
, (void *) (uintptr_t) sizes
,
3365 (void *) (uintptr_t) kinds
,
3366 &tgt_align
, &tgt_size
);
3370 char *tgt
= gomp_alloca (tgt_size
+ tgt_align
- 1);
3371 uintptr_t al
= (uintptr_t) tgt
& (tgt_align
- 1);
3373 tgt
+= tgt_align
- al
;
3375 for (uint64_t i
= 0; i
< mapnum
; i
++)
3376 if (get_kind (short_mapkind
, kinds
, i
) == GOMP_MAP_FIRSTPRIVATE
3377 && devaddrs
[i
] != 0)
3379 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3380 tgt_size
= (tgt_size
+ align
- 1) & ~(align
- 1);
3381 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3382 memcpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3384 else if (dev_to_host_cpy
)
3385 dev_to_host_cpy (tgt
+ tgt_size
, (void *) (uintptr_t) devaddrs
[i
],
3386 (size_t) sizes
[i
], token
);
3388 gomp_copy_dev2host (devicep
, NULL
, tgt
+ tgt_size
,
3389 (void *) (uintptr_t) devaddrs
[i
],
3391 devaddrs
[i
] = (uint64_t) (uintptr_t) tgt
+ tgt_size
;
3392 tgt_size
= tgt_size
+ sizes
[i
];
3393 if ((devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3395 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3396 == GOMP_MAP_ATTACH
))
3398 *(uint64_t*) (uintptr_t) (devaddrs
[i
+1] + sizes
[i
+1])
3399 = (uint64_t) devaddrs
[i
];
3405 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3407 size_t j
, struct_cpy
= 0;
3409 cdata
= gomp_alloca (sizeof (*cdata
) * mapnum
);
3410 memset (cdata
, '\0', sizeof (*cdata
) * mapnum
);
3411 gomp_mutex_lock (&devicep
->lock
);
3412 for (uint64_t i
= 0; i
< mapnum
; i
++)
3414 if (devaddrs
[i
] == 0)
3417 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3420 case GOMP_MAP_FIRSTPRIVATE
:
3421 case GOMP_MAP_FIRSTPRIVATE_INT
:
3424 case GOMP_MAP_DELETE
:
3425 case GOMP_MAP_RELEASE
:
3426 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3427 /* Assume it is present; look it up - but ignore otherwise. */
3428 case GOMP_MAP_ALLOC
:
3430 case GOMP_MAP_FORCE_ALLOC
:
3431 case GOMP_MAP_FORCE_FROM
:
3432 case GOMP_MAP_ALWAYS_FROM
:
3434 case GOMP_MAP_TOFROM
:
3435 case GOMP_MAP_FORCE_TO
:
3436 case GOMP_MAP_FORCE_TOFROM
:
3437 case GOMP_MAP_ALWAYS_TO
:
3438 case GOMP_MAP_ALWAYS_TOFROM
:
3439 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3440 cdata
[i
].devaddr
= devaddrs
[i
];
3441 bool zero_len
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3442 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
);
3443 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3445 devaddrs
[i
] + sizes
[i
], zero_len
);
3449 cdata
[i
].present
= true;
3450 devaddrs
[i
] = devaddrs
[j
] + devaddrs
[i
] - cdata
[j
].devaddr
;
3454 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3456 devaddrs
[i
] + sizes
[i
], zero_len
);
3457 cdata
[i
].present
= n2
!= NULL
;
3459 if (!cdata
[i
].present
3460 && kind
!= GOMP_MAP_DELETE
3461 && kind
!= GOMP_MAP_RELEASE
3462 && kind
!= GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
)
3464 cdata
[i
].aligned
= true;
3465 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3467 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3470 else if (n2
!= NULL
)
3471 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3472 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3473 if (((!cdata
[i
].present
|| struct_cpy
)
3474 && (kind
== GOMP_MAP_TO
|| kind
== GOMP_MAP_TOFROM
))
3475 || kind
== GOMP_MAP_FORCE_TO
3476 || kind
== GOMP_MAP_FORCE_TOFROM
3477 || kind
== GOMP_MAP_ALWAYS_TO
3478 || kind
== GOMP_MAP_ALWAYS_TOFROM
)
3480 if (dev_to_host_cpy
)
3481 dev_to_host_cpy ((void *) (uintptr_t) devaddrs
[i
],
3482 (void *) (uintptr_t) cdata
[i
].devaddr
,
3485 gomp_copy_dev2host (devicep
, NULL
,
3486 (void *) (uintptr_t) devaddrs
[i
],
3487 (void *) (uintptr_t) cdata
[i
].devaddr
,
3493 case GOMP_MAP_ATTACH
:
3494 case GOMP_MAP_POINTER
:
3495 case GOMP_MAP_ALWAYS_POINTER
:
3496 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3497 devaddrs
[i
] + sizes
[i
],
3498 devaddrs
[i
] + sizes
[i
]
3499 + sizeof (void*), false);
3500 cdata
[i
].present
= n2
!= NULL
;
3501 cdata
[i
].devaddr
= devaddrs
[i
];
3503 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3504 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3507 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3508 devaddrs
[i
] + sizes
[i
],
3509 devaddrs
[i
] + sizes
[i
]
3510 + sizeof (void*), false);
3513 cdata
[i
].present
= true;
3514 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3515 - cdata
[j
].devaddr
);
3518 if (!cdata
[i
].present
)
3519 devaddrs
[i
] = (uintptr_t) gomp_malloc (sizeof (void*));
3520 /* Assume that when present, the pointer is already correct. */
3522 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[i
])
3525 case GOMP_MAP_TO_PSET
:
3526 /* Assume that when present, the pointers are fine and no 'to:'
3528 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3529 devaddrs
[i
], devaddrs
[i
] + sizes
[i
],
3531 cdata
[i
].present
= n2
!= NULL
;
3532 cdata
[i
].devaddr
= devaddrs
[i
];
3534 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3535 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3538 j
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, i
,
3540 devaddrs
[i
] + sizes
[i
], false);
3543 cdata
[i
].present
= true;
3544 devaddrs
[i
] = (devaddrs
[j
] + devaddrs
[i
]
3545 - cdata
[j
].devaddr
);
3548 if (!cdata
[i
].present
)
3550 cdata
[i
].aligned
= true;
3551 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3553 = (uint64_t) (uintptr_t) gomp_aligned_alloc (align
,
3555 if (dev_to_host_cpy
)
3556 dev_to_host_cpy ((void *) (uintptr_t) devaddrs
[i
],
3557 (void *) (uintptr_t) cdata
[i
].devaddr
,
3560 gomp_copy_dev2host (devicep
, NULL
,
3561 (void *) (uintptr_t) devaddrs
[i
],
3562 (void *) (uintptr_t) cdata
[i
].devaddr
,
3565 for (j
= i
+ 1; j
< mapnum
; j
++)
3567 kind
= get_kind (short_mapkind
, kinds
, j
) & typemask
;
3568 if (!GOMP_MAP_ALWAYS_POINTER_P (kind
)
3569 && !GOMP_MAP_POINTER_P (kind
))
3571 if (devaddrs
[j
] < devaddrs
[i
])
3573 if (cdata
[i
].present
)
3575 if (devaddrs
[j
] == 0)
3577 *(uint64_t *) (uintptr_t) (devaddrs
[i
] + sizes
[j
]) = 0;
3582 cdata
[i
].present
= true;
3583 cdata
[j
].devaddr
= devaddrs
[j
];
3584 k
= gomp_map_cdata_lookup (cdata
, devaddrs
, kinds
, sizes
, j
,
3586 devaddrs
[j
] + sizeof (void*),
3589 devaddrs
[j
] = (devaddrs
[k
] + devaddrs
[j
]
3590 - cdata
[k
].devaddr
);
3593 n2
= gomp_map_rev_lookup (&devicep
->mem_map
,
3595 devaddrs
[j
] + sizeof (void*),
3599 gomp_mutex_unlock (&devicep
->lock
);
3600 gomp_fatal ("Pointer target wasn't mapped");
3602 devaddrs
[j
] = (n2
->host_start
+ cdata
[j
].devaddr
3603 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3605 *(void **) (uintptr_t) (devaddrs
[i
] + sizes
[j
])
3606 = (void *) (uintptr_t) devaddrs
[j
];
3610 case GOMP_MAP_STRUCT
:
3611 n2
= gomp_map_rev_lookup (&devicep
->mem_map
, devaddrs
[i
+1],
3612 devaddrs
[i
+ sizes
[i
]]
3613 + sizes
[i
+ sizes
[i
]], false);
3614 cdata
[i
].present
= n2
!= NULL
;
3615 cdata
[i
].devaddr
= devaddrs
[i
];
3616 struct_cpy
= cdata
[i
].present
? 0 : sizes
[i
];
3619 size_t sz
= (size_t) (devaddrs
[i
+ sizes
[i
]]
3621 + sizes
[i
+ sizes
[i
]]);
3622 size_t align
= (size_t) 1 << (kinds
[i
] >> 8);
3623 cdata
[i
].aligned
= true;
3624 devaddrs
[i
] = (uintptr_t) gomp_aligned_alloc (align
, sz
);
3625 devaddrs
[i
] -= devaddrs
[i
+1] - cdata
[i
].devaddr
;
3628 devaddrs
[i
] = (n2
->host_start
+ cdata
[i
].devaddr
3629 - (n2
->tgt
->tgt_start
+ n2
->tgt_offset
));
3632 gomp_mutex_unlock (&devicep
->lock
);
3633 gomp_fatal ("gomp_target_rev unhandled kind 0x%.4x", kinds
[i
]);
3636 gomp_mutex_unlock (&devicep
->lock
);
3641 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3643 uint64_t struct_cpy
= 0;
3644 bool clean_struct
= false;
3645 for (uint64_t i
= 0; i
< mapnum
; i
++)
3647 if (cdata
[i
].devaddr
== 0)
3649 int kind
= get_kind (short_mapkind
, kinds
, i
) & typemask
;
3650 bool copy
= !cdata
[i
].present
|| struct_cpy
;
3653 case GOMP_MAP_FORCE_FROM
:
3654 case GOMP_MAP_FORCE_TOFROM
:
3655 case GOMP_MAP_ALWAYS_FROM
:
3656 case GOMP_MAP_ALWAYS_TOFROM
:
3660 case GOMP_MAP_TOFROM
:
3661 if (copy
&& host_to_dev_cpy
)
3662 host_to_dev_cpy ((void *) (uintptr_t) cdata
[i
].devaddr
,
3663 (void *) (uintptr_t) devaddrs
[i
],
3666 gomp_copy_host2dev (devicep
, NULL
,
3667 (void *) (uintptr_t) cdata
[i
].devaddr
,
3668 (void *) (uintptr_t) devaddrs
[i
],
3669 sizes
[i
], false, NULL
);
3678 if (kind
== GOMP_MAP_STRUCT
&& !cdata
[i
].present
)
3680 clean_struct
= true;
3681 struct_cpy
= sizes
[i
];
3683 else if (cdata
[i
].aligned
)
3684 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3685 else if (!cdata
[i
].present
)
3686 free ((void *) (uintptr_t) devaddrs
[i
]);
3689 for (uint64_t i
= 0; i
< mapnum
; i
++)
3690 if (!cdata
[i
].present
3691 && ((get_kind (short_mapkind
, kinds
, i
) & typemask
)
3692 == GOMP_MAP_STRUCT
))
3694 devaddrs
[i
] += cdata
[i
+1].devaddr
- cdata
[i
].devaddr
;
3695 gomp_aligned_free ((void *) (uintptr_t) devaddrs
[i
]);
3704 /* Host fallback for GOMP_target_data{,_ext} routines. */
3707 gomp_target_data_fallback (struct gomp_device_descr
*devicep
)
3709 struct gomp_task_icv
*icv
= gomp_icv (false);
3711 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_MANDATORY
3713 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
3714 "be used for offloading");
3716 if (icv
->target_data
)
3718 /* Even when doing a host fallback, if there are any active
3719 #pragma omp target data constructs, need to remember the
3720 new #pragma omp target data, otherwise GOMP_target_end_data
3721 would get out of sync. */
3722 struct target_mem_desc
*tgt
3723 = gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, false,
3724 NULL
, GOMP_MAP_VARS_DATA
);
3725 tgt
->prev
= icv
->target_data
;
3726 icv
->target_data
= tgt
;
3731 GOMP_target_data (int device
, const void *unused
, size_t mapnum
,
3732 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3734 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3737 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3738 || (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
))
3739 return gomp_target_data_fallback (devicep
);
3741 struct target_mem_desc
*tgt
3742 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, false,
3743 NULL
, GOMP_MAP_VARS_DATA
);
3744 struct gomp_task_icv
*icv
= gomp_icv (true);
3745 tgt
->prev
= icv
->target_data
;
3746 icv
->target_data
= tgt
;
3750 GOMP_target_data_ext (int device
, size_t mapnum
, void **hostaddrs
,
3751 size_t *sizes
, unsigned short *kinds
)
3753 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3756 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3757 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3758 return gomp_target_data_fallback (devicep
);
3760 struct target_mem_desc
*tgt
3761 = gomp_map_vars (devicep
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
3762 NULL
, GOMP_MAP_VARS_DATA
);
3763 struct gomp_task_icv
*icv
= gomp_icv (true);
3764 tgt
->prev
= icv
->target_data
;
3765 icv
->target_data
= tgt
;
3769 GOMP_target_end_data (void)
3771 struct gomp_task_icv
*icv
= gomp_icv (false);
3772 if (icv
->target_data
)
3774 struct target_mem_desc
*tgt
= icv
->target_data
;
3775 icv
->target_data
= tgt
->prev
;
3776 gomp_unmap_vars (tgt
, true, NULL
);
3781 GOMP_target_update (int device
, const void *unused
, size_t mapnum
,
3782 void **hostaddrs
, size_t *sizes
, unsigned char *kinds
)
3784 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3787 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3788 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3791 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, false);
3795 GOMP_target_update_ext (int device
, size_t mapnum
, void **hostaddrs
,
3796 size_t *sizes
, unsigned short *kinds
,
3797 unsigned int flags
, void **depend
)
3799 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3801 /* If there are depend clauses, but nowait is not present,
3802 block the parent task until the dependencies are resolved
3803 and then just continue with the rest of the function as if it
3804 is a merged task. Until we are able to schedule task during
3805 variable mapping or unmapping, ignore nowait if depend clauses
3809 struct gomp_thread
*thr
= gomp_thread ();
3810 if (thr
->task
&& thr
->task
->depend_hash
)
3812 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
3814 && !thr
->task
->final_task
)
3816 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
3817 mapnum
, hostaddrs
, sizes
, kinds
,
3818 flags
| GOMP_TARGET_FLAG_UPDATE
,
3819 depend
, NULL
, GOMP_TARGET_TASK_DATA
))
3824 struct gomp_team
*team
= thr
->ts
.team
;
3825 /* If parallel or taskgroup has been cancelled, don't start new
3827 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3829 if (gomp_team_barrier_cancelled (&team
->barrier
))
3831 if (thr
->task
->taskgroup
)
3833 if (thr
->task
->taskgroup
->cancelled
)
3835 if (thr
->task
->taskgroup
->workshare
3836 && thr
->task
->taskgroup
->prev
3837 && thr
->task
->taskgroup
->prev
->cancelled
)
3842 gomp_task_maybe_wait_for_dependencies (depend
);
3848 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
3849 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
3852 struct gomp_thread
*thr
= gomp_thread ();
3853 struct gomp_team
*team
= thr
->ts
.team
;
3854 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
3855 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
3857 if (gomp_team_barrier_cancelled (&team
->barrier
))
3859 if (thr
->task
->taskgroup
)
3861 if (thr
->task
->taskgroup
->cancelled
)
3863 if (thr
->task
->taskgroup
->workshare
3864 && thr
->task
->taskgroup
->prev
3865 && thr
->task
->taskgroup
->prev
->cancelled
)
3870 gomp_update (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, true);
3874 gomp_exit_data (struct gomp_device_descr
*devicep
, size_t mapnum
,
3875 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
3876 htab_t
*refcount_set
)
3878 const int typemask
= 0xff;
3880 gomp_mutex_lock (&devicep
->lock
);
3881 if (devicep
->state
== GOMP_DEVICE_FINALIZED
)
3883 gomp_mutex_unlock (&devicep
->lock
);
3887 for (i
= 0; i
< mapnum
; i
++)
3888 if ((kinds
[i
] & typemask
) == GOMP_MAP_DETACH
)
3890 struct splay_tree_key_s cur_node
;
3891 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3892 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
3893 splay_tree_key n
= splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3896 gomp_detach_pointer (devicep
, NULL
, n
, (uintptr_t) hostaddrs
[i
],
3901 splay_tree_key remove_vars
[mapnum
];
3903 for (i
= 0; i
< mapnum
; i
++)
3905 struct splay_tree_key_s cur_node
;
3906 unsigned char kind
= kinds
[i
] & typemask
;
3910 case GOMP_MAP_ALWAYS_FROM
:
3911 case GOMP_MAP_DELETE
:
3912 case GOMP_MAP_RELEASE
:
3913 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION
:
3914 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
:
3915 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
3916 cur_node
.host_end
= cur_node
.host_start
+ sizes
[i
];
3917 splay_tree_key k
= (kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
3918 || kind
== GOMP_MAP_ZERO_LEN_ARRAY_SECTION
)
3919 ? gomp_map_0len_lookup (&devicep
->mem_map
, &cur_node
)
3920 : splay_tree_lookup (&devicep
->mem_map
, &cur_node
);
3924 bool delete_p
= (kind
== GOMP_MAP_DELETE
3925 || kind
== GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
);
3926 bool do_copy
, do_remove
;
3927 gomp_decrement_refcount (k
, refcount_set
, delete_p
, &do_copy
,
3930 if ((kind
== GOMP_MAP_FROM
&& do_copy
)
3931 || kind
== GOMP_MAP_ALWAYS_FROM
)
3933 if (k
->aux
&& k
->aux
->attach_count
)
3935 /* We have to be careful not to overwrite still attached
3936 pointers during the copyback to host. */
3937 uintptr_t addr
= k
->host_start
;
3938 while (addr
< k
->host_end
)
3940 size_t i
= (addr
- k
->host_start
) / sizeof (void *);
3941 if (k
->aux
->attach_count
[i
] == 0)
3942 gomp_copy_dev2host (devicep
, NULL
, (void *) addr
,
3943 (void *) (k
->tgt
->tgt_start
3945 + addr
- k
->host_start
),
3947 addr
+= sizeof (void *);
3951 gomp_copy_dev2host (devicep
, NULL
, (void *) cur_node
.host_start
,
3952 (void *) (k
->tgt
->tgt_start
+ k
->tgt_offset
3953 + cur_node
.host_start
3955 cur_node
.host_end
- cur_node
.host_start
);
3958 /* Structure elements lists are removed altogether at once, which
3959 may cause immediate deallocation of the target_mem_desc, causing
3960 errors if we still have following element siblings to copy back.
3961 While we're at it, it also seems more disciplined to simply
3962 queue all removals together for processing below.
3964 Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3965 not have this problem, since they maintain an additional
3966 tgt->refcount = 1 reference to the target_mem_desc to start with.
3969 remove_vars
[nrmvars
++] = k
;
3972 case GOMP_MAP_DETACH
:
3975 gomp_mutex_unlock (&devicep
->lock
);
3976 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3981 for (int i
= 0; i
< nrmvars
; i
++)
3982 gomp_remove_var (devicep
, remove_vars
[i
]);
3984 gomp_mutex_unlock (&devicep
->lock
);
3988 GOMP_target_enter_exit_data (int device
, size_t mapnum
, void **hostaddrs
,
3989 size_t *sizes
, unsigned short *kinds
,
3990 unsigned int flags
, void **depend
)
3992 struct gomp_device_descr
*devicep
= resolve_device (device
, true);
3994 /* If there are depend clauses, but nowait is not present,
3995 block the parent task until the dependencies are resolved
3996 and then just continue with the rest of the function as if it
3997 is a merged task. Until we are able to schedule task during
3998 variable mapping or unmapping, ignore nowait if depend clauses
4002 struct gomp_thread
*thr
= gomp_thread ();
4003 if (thr
->task
&& thr
->task
->depend_hash
)
4005 if ((flags
& GOMP_TARGET_FLAG_NOWAIT
)
4007 && !thr
->task
->final_task
)
4009 if (gomp_create_target_task (devicep
, (void (*) (void *)) NULL
,
4010 mapnum
, hostaddrs
, sizes
, kinds
,
4011 flags
, depend
, NULL
,
4012 GOMP_TARGET_TASK_DATA
))
4017 struct gomp_team
*team
= thr
->ts
.team
;
4018 /* If parallel or taskgroup has been cancelled, don't start new
4020 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4022 if (gomp_team_barrier_cancelled (&team
->barrier
))
4024 if (thr
->task
->taskgroup
)
4026 if (thr
->task
->taskgroup
->cancelled
)
4028 if (thr
->task
->taskgroup
->workshare
4029 && thr
->task
->taskgroup
->prev
4030 && thr
->task
->taskgroup
->prev
->cancelled
)
4035 gomp_task_maybe_wait_for_dependencies (depend
);
4041 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4042 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4045 struct gomp_thread
*thr
= gomp_thread ();
4046 struct gomp_team
*team
= thr
->ts
.team
;
4047 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
4048 if (__builtin_expect (gomp_cancel_var
, 0) && team
)
4050 if (gomp_team_barrier_cancelled (&team
->barrier
))
4052 if (thr
->task
->taskgroup
)
4054 if (thr
->task
->taskgroup
->cancelled
)
4056 if (thr
->task
->taskgroup
->workshare
4057 && thr
->task
->taskgroup
->prev
4058 && thr
->task
->taskgroup
->prev
->cancelled
)
4063 htab_t refcount_set
= htab_create (mapnum
);
4065 /* The variables are mapped separately such that they can be released
4068 if ((flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4069 for (i
= 0; i
< mapnum
; i
++)
4070 if ((kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4072 gomp_map_vars (devicep
, sizes
[i
] + 1, &hostaddrs
[i
], NULL
, &sizes
[i
],
4073 &kinds
[i
], true, &refcount_set
,
4074 GOMP_MAP_VARS_ENTER_DATA
);
4077 else if ((kinds
[i
] & 0xff) == GOMP_MAP_TO_PSET
)
4079 for (j
= i
+ 1; j
< mapnum
; j
++)
4080 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds
, j
) & 0xff)
4081 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds
, j
) & 0xff))
4083 gomp_map_vars (devicep
, j
-i
, &hostaddrs
[i
], NULL
, &sizes
[i
],
4084 &kinds
[i
], true, &refcount_set
,
4085 GOMP_MAP_VARS_ENTER_DATA
);
4088 else if (i
+ 1 < mapnum
&& (kinds
[i
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
4090 /* An attach operation must be processed together with the mapped
4091 base-pointer list item. */
4092 gomp_map_vars (devicep
, 2, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4093 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4097 gomp_map_vars (devicep
, 1, &hostaddrs
[i
], NULL
, &sizes
[i
], &kinds
[i
],
4098 true, &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4100 gomp_exit_data (devicep
, mapnum
, hostaddrs
, sizes
, kinds
, &refcount_set
);
4101 htab_free (refcount_set
);
4105 gomp_target_task_fn (void *data
)
4107 struct gomp_target_task
*ttask
= (struct gomp_target_task
*) data
;
4108 struct gomp_device_descr
*devicep
= ttask
->devicep
;
4110 if (ttask
->fn
!= NULL
)
4114 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4115 || !(fn_addr
= gomp_get_target_fn_addr (devicep
, ttask
->fn
))
4116 || (devicep
->can_run_func
&& !devicep
->can_run_func (fn_addr
)))
4118 ttask
->state
= GOMP_TARGET_TASK_FALLBACK
;
4119 gomp_target_fallback (ttask
->fn
, ttask
->hostaddrs
, devicep
,
4124 if (ttask
->state
== GOMP_TARGET_TASK_FINISHED
)
4127 gomp_unmap_vars (ttask
->tgt
, true, NULL
);
4131 void *actual_arguments
;
4132 if (devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4135 actual_arguments
= ttask
->hostaddrs
;
4139 ttask
->tgt
= gomp_map_vars (devicep
, ttask
->mapnum
, ttask
->hostaddrs
,
4140 NULL
, ttask
->sizes
, ttask
->kinds
, true,
4141 NULL
, GOMP_MAP_VARS_TARGET
);
4142 actual_arguments
= (void *) ttask
->tgt
->tgt_start
;
4144 ttask
->state
= GOMP_TARGET_TASK_READY_TO_RUN
;
4146 assert (devicep
->async_run_func
);
4147 devicep
->async_run_func (devicep
->target_id
, fn_addr
, actual_arguments
,
4148 ttask
->args
, (void *) ttask
);
4151 else if (devicep
== NULL
4152 || !(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4153 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4157 if (ttask
->flags
& GOMP_TARGET_FLAG_UPDATE
)
4158 gomp_update (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4159 ttask
->kinds
, true);
4162 htab_t refcount_set
= htab_create (ttask
->mapnum
);
4163 if ((ttask
->flags
& GOMP_TARGET_FLAG_EXIT_DATA
) == 0)
4164 for (i
= 0; i
< ttask
->mapnum
; i
++)
4165 if ((ttask
->kinds
[i
] & 0xff) == GOMP_MAP_STRUCT
)
4167 gomp_map_vars (devicep
, ttask
->sizes
[i
] + 1, &ttask
->hostaddrs
[i
],
4168 NULL
, &ttask
->sizes
[i
], &ttask
->kinds
[i
], true,
4169 &refcount_set
, GOMP_MAP_VARS_ENTER_DATA
);
4170 i
+= ttask
->sizes
[i
];
4173 gomp_map_vars (devicep
, 1, &ttask
->hostaddrs
[i
], NULL
, &ttask
->sizes
[i
],
4174 &ttask
->kinds
[i
], true, &refcount_set
,
4175 GOMP_MAP_VARS_ENTER_DATA
);
4177 gomp_exit_data (devicep
, ttask
->mapnum
, ttask
->hostaddrs
, ttask
->sizes
,
4178 ttask
->kinds
, &refcount_set
);
4179 htab_free (refcount_set
);
4185 GOMP_teams (unsigned int num_teams
, unsigned int thread_limit
)
4189 struct gomp_task_icv
*icv
= gomp_icv (true);
4190 icv
->thread_limit_var
4191 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4197 GOMP_teams4 (unsigned int num_teams_low
, unsigned int num_teams_high
,
4198 unsigned int thread_limit
, bool first
)
4200 struct gomp_thread
*thr
= gomp_thread ();
4205 struct gomp_task_icv
*icv
= gomp_icv (true);
4206 icv
->thread_limit_var
4207 = thread_limit
> INT_MAX
? UINT_MAX
: thread_limit
;
4209 (void) num_teams_high
;
4210 if (num_teams_low
== 0)
4212 thr
->num_teams
= num_teams_low
- 1;
4215 else if (thr
->team_num
== thr
->num_teams
)
4223 omp_target_alloc (size_t size
, int device_num
)
4225 if (device_num
== omp_initial_device
4226 || device_num
== gomp_get_num_devices ())
4227 return malloc (size
);
4229 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4230 if (devicep
== NULL
)
4233 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4234 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4235 return malloc (size
);
4237 gomp_mutex_lock (&devicep
->lock
);
4238 void *ret
= devicep
->alloc_func (devicep
->target_id
, size
);
4239 gomp_mutex_unlock (&devicep
->lock
);
4244 omp_target_free (void *device_ptr
, int device_num
)
4246 if (device_num
== omp_initial_device
4247 || device_num
== gomp_get_num_devices ())
4253 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4254 if (devicep
== NULL
|| device_ptr
== NULL
)
4257 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4258 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4264 gomp_mutex_lock (&devicep
->lock
);
4265 gomp_free_device_memory (devicep
, device_ptr
);
4266 gomp_mutex_unlock (&devicep
->lock
);
4270 omp_target_is_present (const void *ptr
, int device_num
)
4272 if (device_num
== omp_initial_device
4273 || device_num
== gomp_get_num_devices ())
4276 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4277 if (devicep
== NULL
)
4283 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4284 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4287 gomp_mutex_lock (&devicep
->lock
);
4288 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4289 struct splay_tree_key_s cur_node
;
4291 cur_node
.host_start
= (uintptr_t) ptr
;
4292 cur_node
.host_end
= cur_node
.host_start
;
4293 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4294 int ret
= n
!= NULL
;
4295 gomp_mutex_unlock (&devicep
->lock
);
4300 omp_target_memcpy_check (int dst_device_num
, int src_device_num
,
4301 struct gomp_device_descr
**dst_devicep
,
4302 struct gomp_device_descr
**src_devicep
)
4304 if (dst_device_num
!= gomp_get_num_devices ()
4305 /* Above gomp_get_num_devices has to be called unconditionally. */
4306 && dst_device_num
!= omp_initial_device
)
4308 *dst_devicep
= resolve_device (dst_device_num
, false);
4309 if (*dst_devicep
== NULL
)
4312 if (!((*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4313 || (*dst_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4314 *dst_devicep
= NULL
;
4317 if (src_device_num
!= num_devices_openmp
4318 && src_device_num
!= omp_initial_device
)
4320 *src_devicep
= resolve_device (src_device_num
, false);
4321 if (*src_devicep
== NULL
)
4324 if (!((*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4325 || (*src_devicep
)->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4326 *src_devicep
= NULL
;
4333 omp_target_memcpy_copy (void *dst
, const void *src
, size_t length
,
4334 size_t dst_offset
, size_t src_offset
,
4335 struct gomp_device_descr
*dst_devicep
,
4336 struct gomp_device_descr
*src_devicep
)
4339 if (src_devicep
== NULL
&& dst_devicep
== NULL
)
4341 memcpy ((char *) dst
+ dst_offset
, (char *) src
+ src_offset
, length
);
4344 if (src_devicep
== NULL
)
4346 gomp_mutex_lock (&dst_devicep
->lock
);
4347 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4348 (char *) dst
+ dst_offset
,
4349 (char *) src
+ src_offset
, length
);
4350 gomp_mutex_unlock (&dst_devicep
->lock
);
4351 return (ret
? 0 : EINVAL
);
4353 if (dst_devicep
== NULL
)
4355 gomp_mutex_lock (&src_devicep
->lock
);
4356 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4357 (char *) dst
+ dst_offset
,
4358 (char *) src
+ src_offset
, length
);
4359 gomp_mutex_unlock (&src_devicep
->lock
);
4360 return (ret
? 0 : EINVAL
);
4362 if (src_devicep
== dst_devicep
)
4364 gomp_mutex_lock (&src_devicep
->lock
);
4365 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4366 (char *) dst
+ dst_offset
,
4367 (char *) src
+ src_offset
, length
);
4368 gomp_mutex_unlock (&src_devicep
->lock
);
4369 return (ret
? 0 : EINVAL
);
4375 omp_target_memcpy (void *dst
, const void *src
, size_t length
, size_t dst_offset
,
4376 size_t src_offset
, int dst_device_num
, int src_device_num
)
4378 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4379 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4380 &dst_devicep
, &src_devicep
);
4385 ret
= omp_target_memcpy_copy (dst
, src
, length
, dst_offset
, src_offset
,
4386 dst_devicep
, src_devicep
);
4398 struct gomp_device_descr
*dst_devicep
;
4399 struct gomp_device_descr
*src_devicep
;
4400 } omp_target_memcpy_data
;
4403 omp_target_memcpy_async_helper (void *args
)
4405 omp_target_memcpy_data
*a
= args
;
4406 if (omp_target_memcpy_copy (a
->dst
, a
->src
, a
->length
, a
->dst_offset
,
4407 a
->src_offset
, a
->dst_devicep
, a
->src_devicep
))
4408 gomp_fatal ("omp_target_memcpy failed");
4412 omp_target_memcpy_async (void *dst
, const void *src
, size_t length
,
4413 size_t dst_offset
, size_t src_offset
,
4414 int dst_device_num
, int src_device_num
,
4415 int depobj_count
, omp_depend_t
*depobj_list
)
4417 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4418 unsigned int flags
= 0;
4419 void *depend
[depobj_count
+ 5];
4421 int check
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4422 &dst_devicep
, &src_devicep
);
4424 omp_target_memcpy_data s
= {
4428 .dst_offset
= dst_offset
,
4429 .src_offset
= src_offset
,
4430 .dst_devicep
= dst_devicep
,
4431 .src_devicep
= src_devicep
4437 if (depobj_count
> 0 && depobj_list
!= NULL
)
4439 flags
|= GOMP_TASK_FLAG_DEPEND
;
4441 depend
[1] = (void *) (uintptr_t) depobj_count
;
4442 depend
[2] = depend
[3] = depend
[4] = 0;
4443 for (i
= 0; i
< depobj_count
; ++i
)
4444 depend
[i
+ 5] = &depobj_list
[i
];
4447 GOMP_task (omp_target_memcpy_async_helper
, &s
, NULL
, sizeof (s
),
4448 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4454 omp_target_memcpy_rect_worker (void *dst
, const void *src
, size_t element_size
,
4455 int num_dims
, const size_t *volume
,
4456 const size_t *dst_offsets
,
4457 const size_t *src_offsets
,
4458 const size_t *dst_dimensions
,
4459 const size_t *src_dimensions
,
4460 struct gomp_device_descr
*dst_devicep
,
4461 struct gomp_device_descr
*src_devicep
)
4463 size_t dst_slice
= element_size
;
4464 size_t src_slice
= element_size
;
4465 size_t j
, dst_off
, src_off
, length
;
4470 if (__builtin_mul_overflow (element_size
, volume
[0], &length
)
4471 || __builtin_mul_overflow (element_size
, dst_offsets
[0], &dst_off
)
4472 || __builtin_mul_overflow (element_size
, src_offsets
[0], &src_off
))
4474 if (dst_devicep
== NULL
&& src_devicep
== NULL
)
4476 memcpy ((char *) dst
+ dst_off
, (const char *) src
+ src_off
,
4480 else if (src_devicep
== NULL
)
4481 ret
= dst_devicep
->host2dev_func (dst_devicep
->target_id
,
4482 (char *) dst
+ dst_off
,
4483 (const char *) src
+ src_off
,
4485 else if (dst_devicep
== NULL
)
4486 ret
= src_devicep
->dev2host_func (src_devicep
->target_id
,
4487 (char *) dst
+ dst_off
,
4488 (const char *) src
+ src_off
,
4490 else if (src_devicep
== dst_devicep
)
4491 ret
= src_devicep
->dev2dev_func (src_devicep
->target_id
,
4492 (char *) dst
+ dst_off
,
4493 (const char *) src
+ src_off
,
4497 return ret
? 0 : EINVAL
;
4500 /* FIXME: it would be nice to have some plugin function to handle
4501 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
4502 be handled in the generic recursion below, and for host-host it
4503 should be used even for any num_dims >= 2. */
4505 for (i
= 1; i
< num_dims
; i
++)
4506 if (__builtin_mul_overflow (dst_slice
, dst_dimensions
[i
], &dst_slice
)
4507 || __builtin_mul_overflow (src_slice
, src_dimensions
[i
], &src_slice
))
4509 if (__builtin_mul_overflow (dst_slice
, dst_offsets
[0], &dst_off
)
4510 || __builtin_mul_overflow (src_slice
, src_offsets
[0], &src_off
))
4512 for (j
= 0; j
< volume
[0]; j
++)
4514 ret
= omp_target_memcpy_rect_worker ((char *) dst
+ dst_off
,
4515 (const char *) src
+ src_off
,
4516 element_size
, num_dims
- 1,
4517 volume
+ 1, dst_offsets
+ 1,
4518 src_offsets
+ 1, dst_dimensions
+ 1,
4519 src_dimensions
+ 1, dst_devicep
,
4523 dst_off
+= dst_slice
;
4524 src_off
+= src_slice
;
4530 omp_target_memcpy_rect_check (void *dst
, const void *src
, int dst_device_num
,
4532 struct gomp_device_descr
**dst_devicep
,
4533 struct gomp_device_descr
**src_devicep
)
4538 int ret
= omp_target_memcpy_check (dst_device_num
, src_device_num
,
4539 dst_devicep
, src_devicep
);
4543 if (*src_devicep
!= NULL
&& *dst_devicep
!= NULL
&& *src_devicep
!= *dst_devicep
)
4550 omp_target_memcpy_rect_copy (void *dst
, const void *src
,
4551 size_t element_size
, int num_dims
,
4552 const size_t *volume
, const size_t *dst_offsets
,
4553 const size_t *src_offsets
,
4554 const size_t *dst_dimensions
,
4555 const size_t *src_dimensions
,
4556 struct gomp_device_descr
*dst_devicep
,
4557 struct gomp_device_descr
*src_devicep
)
4560 gomp_mutex_lock (&src_devicep
->lock
);
4561 else if (dst_devicep
)
4562 gomp_mutex_lock (&dst_devicep
->lock
);
4563 int ret
= omp_target_memcpy_rect_worker (dst
, src
, element_size
, num_dims
,
4564 volume
, dst_offsets
, src_offsets
,
4565 dst_dimensions
, src_dimensions
,
4566 dst_devicep
, src_devicep
);
4568 gomp_mutex_unlock (&src_devicep
->lock
);
4569 else if (dst_devicep
)
4570 gomp_mutex_unlock (&dst_devicep
->lock
);
4576 omp_target_memcpy_rect (void *dst
, const void *src
, size_t element_size
,
4577 int num_dims
, const size_t *volume
,
4578 const size_t *dst_offsets
,
4579 const size_t *src_offsets
,
4580 const size_t *dst_dimensions
,
4581 const size_t *src_dimensions
,
4582 int dst_device_num
, int src_device_num
)
4584 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4586 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4587 src_device_num
, &dst_devicep
,
4593 int ret
= omp_target_memcpy_rect_copy (dst
, src
, element_size
, num_dims
,
4594 volume
, dst_offsets
, src_offsets
,
4595 dst_dimensions
, src_dimensions
,
4596 dst_devicep
, src_devicep
);
4605 size_t element_size
;
4606 const size_t *volume
;
4607 const size_t *dst_offsets
;
4608 const size_t *src_offsets
;
4609 const size_t *dst_dimensions
;
4610 const size_t *src_dimensions
;
4611 struct gomp_device_descr
*dst_devicep
;
4612 struct gomp_device_descr
*src_devicep
;
4614 } omp_target_memcpy_rect_data
;
4617 omp_target_memcpy_rect_async_helper (void *args
)
4619 omp_target_memcpy_rect_data
*a
= args
;
4620 int ret
= omp_target_memcpy_rect_copy (a
->dst
, a
->src
, a
->element_size
,
4621 a
->num_dims
, a
->volume
, a
->dst_offsets
,
4622 a
->src_offsets
, a
->dst_dimensions
,
4623 a
->src_dimensions
, a
->dst_devicep
,
4626 gomp_fatal ("omp_target_memcpy_rect failed");
4630 omp_target_memcpy_rect_async (void *dst
, const void *src
, size_t element_size
,
4631 int num_dims
, const size_t *volume
,
4632 const size_t *dst_offsets
,
4633 const size_t *src_offsets
,
4634 const size_t *dst_dimensions
,
4635 const size_t *src_dimensions
,
4636 int dst_device_num
, int src_device_num
,
4637 int depobj_count
, omp_depend_t
*depobj_list
)
4639 struct gomp_device_descr
*dst_devicep
= NULL
, *src_devicep
= NULL
;
4641 int check
= omp_target_memcpy_rect_check (dst
, src
, dst_device_num
,
4642 src_device_num
, &dst_devicep
,
4644 void *depend
[depobj_count
+ 5];
4647 omp_target_memcpy_rect_data s
= {
4650 .element_size
= element_size
,
4651 .num_dims
= num_dims
,
4653 .dst_offsets
= dst_offsets
,
4654 .src_offsets
= src_offsets
,
4655 .dst_dimensions
= dst_dimensions
,
4656 .src_dimensions
= src_dimensions
,
4657 .dst_devicep
= dst_devicep
,
4658 .src_devicep
= src_devicep
4664 if (depobj_count
> 0 && depobj_list
!= NULL
)
4666 flags
|= GOMP_TASK_FLAG_DEPEND
;
4668 depend
[1] = (void *) (uintptr_t) depobj_count
;
4669 depend
[2] = depend
[3] = depend
[4] = 0;
4670 for (i
= 0; i
< depobj_count
; ++i
)
4671 depend
[i
+ 5] = &depobj_list
[i
];
4674 GOMP_task (omp_target_memcpy_rect_async_helper
, &s
, NULL
, sizeof (s
),
4675 __alignof__ (s
), true, flags
, depend
, 0, NULL
);
4681 omp_target_associate_ptr (const void *host_ptr
, const void *device_ptr
,
4682 size_t size
, size_t device_offset
, int device_num
)
4684 if (device_num
== omp_initial_device
4685 || device_num
== gomp_get_num_devices ())
4688 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4689 if (devicep
== NULL
)
4692 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4693 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4696 gomp_mutex_lock (&devicep
->lock
);
4698 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4699 struct splay_tree_key_s cur_node
;
4702 cur_node
.host_start
= (uintptr_t) host_ptr
;
4703 cur_node
.host_end
= cur_node
.host_start
+ size
;
4704 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4707 if (n
->tgt
->tgt_start
+ n
->tgt_offset
4708 == (uintptr_t) device_ptr
+ device_offset
4709 && n
->host_start
<= cur_node
.host_start
4710 && n
->host_end
>= cur_node
.host_end
)
4715 struct target_mem_desc
*tgt
= gomp_malloc (sizeof (*tgt
));
4716 tgt
->array
= gomp_malloc (sizeof (*tgt
->array
));
4720 tgt
->to_free
= NULL
;
4722 tgt
->list_count
= 0;
4723 tgt
->device_descr
= devicep
;
4724 splay_tree_node array
= tgt
->array
;
4725 splay_tree_key k
= &array
->key
;
4726 k
->host_start
= cur_node
.host_start
;
4727 k
->host_end
= cur_node
.host_end
;
4729 k
->tgt_offset
= (uintptr_t) device_ptr
+ device_offset
;
4730 k
->refcount
= REFCOUNT_INFINITY
;
4731 k
->dynamic_refcount
= 0;
4734 array
->right
= NULL
;
4735 splay_tree_insert (&devicep
->mem_map
, array
);
4738 gomp_mutex_unlock (&devicep
->lock
);
4743 omp_target_disassociate_ptr (const void *ptr
, int device_num
)
4745 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4746 if (devicep
== NULL
)
4749 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
4752 gomp_mutex_lock (&devicep
->lock
);
4754 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4755 struct splay_tree_key_s cur_node
;
4758 cur_node
.host_start
= (uintptr_t) ptr
;
4759 cur_node
.host_end
= cur_node
.host_start
;
4760 splay_tree_key n
= gomp_map_lookup (mem_map
, &cur_node
);
4762 && n
->host_start
== cur_node
.host_start
4763 && n
->refcount
== REFCOUNT_INFINITY
4764 && n
->tgt
->tgt_start
== 0
4765 && n
->tgt
->to_free
== NULL
4766 && n
->tgt
->refcount
== 1
4767 && n
->tgt
->list_count
== 0)
4769 splay_tree_remove (&devicep
->mem_map
, n
);
4770 gomp_unmap_tgt (n
->tgt
);
4774 gomp_mutex_unlock (&devicep
->lock
);
4779 omp_get_mapped_ptr (const void *ptr
, int device_num
)
4781 if (device_num
== omp_initial_device
4782 || device_num
== omp_get_initial_device ())
4783 return (void *) ptr
;
4785 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4786 if (devicep
== NULL
)
4789 if (!(devicep
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4790 || devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
4791 return (void *) ptr
;
4793 gomp_mutex_lock (&devicep
->lock
);
4795 struct splay_tree_s
*mem_map
= &devicep
->mem_map
;
4796 struct splay_tree_key_s cur_node
;
4799 cur_node
.host_start
= (uintptr_t) ptr
;
4800 cur_node
.host_end
= cur_node
.host_start
;
4801 splay_tree_key n
= gomp_map_0len_lookup (mem_map
, &cur_node
);
4805 uintptr_t offset
= cur_node
.host_start
- n
->host_start
;
4806 ret
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
);
4809 gomp_mutex_unlock (&devicep
->lock
);
4815 omp_target_is_accessible (const void *ptr
, size_t size
, int device_num
)
4817 if (device_num
== omp_initial_device
4818 || device_num
== gomp_get_num_devices ())
4821 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4822 if (devicep
== NULL
)
4825 /* TODO: Unified shared memory must be handled when available. */
4827 return devicep
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
;
4831 omp_pause_resource (omp_pause_resource_t kind
, int device_num
)
4834 if (device_num
== omp_initial_device
4835 || device_num
== gomp_get_num_devices ())
4836 return gomp_pause_host ();
4838 struct gomp_device_descr
*devicep
= resolve_device (device_num
, false);
4839 if (devicep
== NULL
)
4842 /* Do nothing for target devices for now. */
4847 omp_pause_resource_all (omp_pause_resource_t kind
)
4850 if (gomp_pause_host ())
4852 /* Do nothing for target devices for now. */
4856 ialias (omp_pause_resource
)
4857 ialias (omp_pause_resource_all
)
4859 #ifdef PLUGIN_SUPPORT
4861 /* This function tries to load a plugin for DEVICE. Name of plugin is passed
4863 The handles of the found functions are stored in the corresponding fields
4864 of DEVICE. The function returns TRUE on success and FALSE otherwise. */
4867 gomp_load_plugin_for_device (struct gomp_device_descr
*device
,
4868 const char *plugin_name
)
4870 const char *err
= NULL
, *last_missing
= NULL
;
4872 void *plugin_handle
= dlopen (plugin_name
, RTLD_LAZY
);
4874 #if OFFLOAD_DEFAULTED
4880 /* Check if all required functions are available in the plugin and store
4881 their handlers. None of the symbols can legitimately be NULL,
4882 so we don't need to check dlerror all the time. */
4884 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
4886 /* Similar, but missing functions are not an error. Return false if
4887 failed, true otherwise. */
4888 #define DLSYM_OPT(f, n) \
4889 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
4890 || (last_missing = #n, 0))
4893 if (device
->version_func () != GOMP_VERSION
)
4895 err
= "plugin version mismatch";
4902 DLSYM (get_num_devices
);
4903 DLSYM (init_device
);
4904 DLSYM (fini_device
);
4906 DLSYM (unload_image
);
4911 device
->capabilities
= device
->get_caps_func ();
4912 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
4915 DLSYM_OPT (async_run
, async_run
);
4916 DLSYM_OPT (can_run
, can_run
);
4919 if (device
->capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
4921 if (!DLSYM_OPT (openacc
.exec
, openacc_exec
)
4922 || !DLSYM_OPT (openacc
.create_thread_data
,
4923 openacc_create_thread_data
)
4924 || !DLSYM_OPT (openacc
.destroy_thread_data
,
4925 openacc_destroy_thread_data
)
4926 || !DLSYM_OPT (openacc
.async
.construct
, openacc_async_construct
)
4927 || !DLSYM_OPT (openacc
.async
.destruct
, openacc_async_destruct
)
4928 || !DLSYM_OPT (openacc
.async
.test
, openacc_async_test
)
4929 || !DLSYM_OPT (openacc
.async
.synchronize
, openacc_async_synchronize
)
4930 || !DLSYM_OPT (openacc
.async
.serialize
, openacc_async_serialize
)
4931 || !DLSYM_OPT (openacc
.async
.queue_callback
,
4932 openacc_async_queue_callback
)
4933 || !DLSYM_OPT (openacc
.async
.exec
, openacc_async_exec
)
4934 || !DLSYM_OPT (openacc
.async
.dev2host
, openacc_async_dev2host
)
4935 || !DLSYM_OPT (openacc
.async
.host2dev
, openacc_async_host2dev
)
4936 || !DLSYM_OPT (openacc
.get_property
, openacc_get_property
))
4938 /* Require all the OpenACC handlers if we have
4939 GOMP_OFFLOAD_CAP_OPENACC_200. */
4940 err
= "plugin missing OpenACC handler function";
4945 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_device
,
4946 openacc_cuda_get_current_device
);
4947 cuda
+= DLSYM_OPT (openacc
.cuda
.get_current_context
,
4948 openacc_cuda_get_current_context
);
4949 cuda
+= DLSYM_OPT (openacc
.cuda
.get_stream
, openacc_cuda_get_stream
);
4950 cuda
+= DLSYM_OPT (openacc
.cuda
.set_stream
, openacc_cuda_set_stream
);
4951 if (cuda
&& cuda
!= 4)
4953 /* Make sure all the CUDA functions are there if any of them are. */
4954 err
= "plugin missing OpenACC CUDA handler function";
4966 gomp_error ("while loading %s: %s", plugin_name
, err
);
4968 gomp_error ("missing function was %s", last_missing
);
4970 dlclose (plugin_handle
);
4975 /* This function finalizes all initialized devices. */
4978 gomp_target_fini (void)
4981 for (i
= 0; i
< num_devices
; i
++)
4984 struct gomp_device_descr
*devicep
= &devices
[i
];
4985 gomp_mutex_lock (&devicep
->lock
);
4986 if (devicep
->state
== GOMP_DEVICE_INITIALIZED
)
4987 ret
= gomp_fini_device (devicep
);
4988 gomp_mutex_unlock (&devicep
->lock
);
4990 gomp_fatal ("device finalization failed");
4994 /* This function initializes the runtime for offloading.
4995 It parses the list of offload plugins, and tries to load these.
4996 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
4997 will be set, and the array DEVICES initialized, containing descriptors for
4998 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
5002 gomp_target_init (void)
5004 const char *prefix
="libgomp-plugin-";
5005 const char *suffix
= SONAME_SUFFIX (1);
5006 const char *cur
, *next
;
5008 int i
, new_num_devs
;
5009 int num_devs
= 0, num_devs_openmp
;
5010 struct gomp_device_descr
*devs
= NULL
;
5012 if (gomp_target_offload_var
== GOMP_TARGET_OFFLOAD_DISABLED
)
5015 cur
= OFFLOAD_PLUGINS
;
5019 struct gomp_device_descr current_device
;
5020 size_t prefix_len
, suffix_len
, cur_len
;
5022 next
= strchr (cur
, ',');
5024 prefix_len
= strlen (prefix
);
5025 cur_len
= next
? next
- cur
: strlen (cur
);
5026 suffix_len
= strlen (suffix
);
5028 plugin_name
= (char *) malloc (prefix_len
+ cur_len
+ suffix_len
+ 1);
5035 memcpy (plugin_name
, prefix
, prefix_len
);
5036 memcpy (plugin_name
+ prefix_len
, cur
, cur_len
);
5037 memcpy (plugin_name
+ prefix_len
+ cur_len
, suffix
, suffix_len
+ 1);
5039 if (gomp_load_plugin_for_device (¤t_device
, plugin_name
))
5041 int omp_req
= omp_requires_mask
& ~GOMP_REQUIRES_TARGET_USED
;
5042 new_num_devs
= current_device
.get_num_devices_func (omp_req
);
5043 if (gomp_debug_var
> 0 && new_num_devs
< 0)
5046 int type
= current_device
.get_type_func ();
5047 for (int img
= 0; img
< num_offload_images
; img
++)
5048 if (type
== offload_images
[img
].type
)
5052 char buf
[sizeof ("unified_address, unified_shared_memory, "
5053 "reverse_offload")];
5054 gomp_requires_to_name (buf
, sizeof (buf
), omp_req
);
5055 char *name
= (char *) malloc (cur_len
+ 1);
5056 memcpy (name
, cur
, cur_len
);
5057 name
[cur_len
] = '\0';
5059 "%s devices present but 'omp requires %s' "
5060 "cannot be fulfilled\n", name
, buf
);
5064 else if (new_num_devs
>= 1)
5066 /* Augment DEVICES and NUM_DEVICES. */
5068 devs
= realloc (devs
, (num_devs
+ new_num_devs
)
5069 * sizeof (struct gomp_device_descr
));
5077 current_device
.name
= current_device
.get_name_func ();
5078 /* current_device.capabilities has already been set. */
5079 current_device
.type
= current_device
.get_type_func ();
5080 current_device
.mem_map
.root
= NULL
;
5081 current_device
.mem_map_rev
.root
= NULL
;
5082 current_device
.state
= GOMP_DEVICE_UNINITIALIZED
;
5083 for (i
= 0; i
< new_num_devs
; i
++)
5085 current_device
.target_id
= i
;
5086 devs
[num_devs
] = current_device
;
5087 gomp_mutex_init (&devs
[num_devs
].lock
);
5098 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
5099 NUM_DEVICES_OPENMP. */
5100 struct gomp_device_descr
*devs_s
5101 = malloc (num_devs
* sizeof (struct gomp_device_descr
));
5108 num_devs_openmp
= 0;
5109 for (i
= 0; i
< num_devs
; i
++)
5110 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
)
5111 devs_s
[num_devs_openmp
++] = devs
[i
];
5112 int num_devs_after_openmp
= num_devs_openmp
;
5113 for (i
= 0; i
< num_devs
; i
++)
5114 if (!(devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENMP_400
))
5115 devs_s
[num_devs_after_openmp
++] = devs
[i
];
5119 for (i
= 0; i
< num_devs
; i
++)
5121 /* The 'devices' array can be moved (by the realloc call) until we have
5122 found all the plugins, so registering with the OpenACC runtime (which
5123 takes a copy of the pointer argument) must be delayed until now. */
5124 if (devs
[i
].capabilities
& GOMP_OFFLOAD_CAP_OPENACC_200
)
5125 goacc_register (&devs
[i
]);
5128 num_devices
= num_devs
;
5129 num_devices_openmp
= num_devs_openmp
;
5131 if (atexit (gomp_target_fini
) != 0)
5132 gomp_fatal ("atexit failed");
5135 #else /* PLUGIN_SUPPORT */
5136 /* If dlfcn.h is unavailable we always fallback to host execution.
5137 GOMP_target* routines are just stubs for this case. */
5139 gomp_target_init (void)
5142 #endif /* PLUGIN_SUPPORT */